aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig92
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/Makefile4
-rw-r--r--arch/x86/boot/boot.h18
-rw-r--r--arch/x86/boot/cmdline.c12
-rw-r--r--arch/x86/boot/compressed/cmdline.c12
-rw-r--r--arch/x86/boot/compressed/eboot.c21
-rw-r--r--arch/x86/boot/compressed/head_32.S8
-rw-r--r--arch/x86/boot/compressed/head_64.S56
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/boot/header.S47
-rw-r--r--arch/x86/boot/setup.ld2
-rw-r--r--arch/x86/boot/tools/build.c81
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aes-i586-asm_32.S15
-rw-r--r--arch/x86/crypto/aes-x86_64-asm_64.S30
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S23
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c37
-rw-r--r--arch/x86/crypto/blowfish-x86_64-asm_64.S39
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S38
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S50
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S35
-rw-r--r--arch/x86/crypto/crc32-pclmul_asm.S246
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c201
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S8
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S4
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S28
-rw-r--r--arch/x86/crypto/salsa20-x86_64-asm_64.S28
-rw-r--r--arch/x86/crypto/salsa20_glue.c5
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S35
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S20
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S20
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S10
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S35
-rw-r--r--arch/x86/crypto/twofish-i586-asm_32.S11
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S20
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64.S11
-rw-r--r--arch/x86/ia32/ia32_signal.c50
-rw-r--r--arch/x86/ia32/ia32entry.S16
-rw-r--r--arch/x86/ia32/sys_ia32.c171
-rw-r--r--arch/x86/include/asm/acpi.h4
-rw-r--r--arch/x86/include/asm/amd_nb.h17
-rw-r--r--arch/x86/include/asm/bootparam_utils.h38
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/fpu-internal.h5
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/hpet.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h13
-rw-r--r--arch/x86/include/asm/hypervisor.h13
-rw-r--r--arch/x86/include/asm/ia32.h15
-rw-r--r--arch/x86/include/asm/init.h28
-rw-r--r--arch/x86/include/asm/io_apic.h28
-rw-r--r--arch/x86/include/asm/irq_remapping.h40
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/kexec.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h26
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/include/asm/linkage.h18
-rw-r--r--arch/x86/include/asm/mce.h84
-rw-r--r--arch/x86/include/asm/microcode.h14
-rw-r--r--arch/x86/include/asm/microcode_intel.h85
-rw-r--r--arch/x86/include/asm/mmzone_32.h6
-rw-r--r--arch/x86/include/asm/mshyperv.h4
-rw-r--r--arch/x86/include/asm/mwait.h3
-rw-r--r--arch/x86/include/asm/numa.h6
-rw-r--r--arch/x86/include/asm/numa_64.h6
-rw-r--r--arch/x86/include/asm/page.h7
-rw-r--r--arch/x86/include/asm/page_32.h1
-rw-r--r--arch/x86/include/asm/page_64.h36
-rw-r--r--arch/x86/include/asm/page_64_types.h22
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/parport.h4
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/pci_x86.h7
-rw-r--r--arch/x86/include/asm/perf_event.h13
-rw-r--r--arch/x86/include/asm/pgtable.h34
-rw-r--r--arch/x86/include/asm/pgtable_32.h7
-rw-r--r--arch/x86/include/asm/pgtable_64.h8
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/pgtable_types.h5
-rw-r--r--arch/x86/include/asm/processor.h29
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/realmode.h3
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/signal.h22
-rw-r--r--arch/x86/include/asm/sys_ia32.h16
-rw-r--r--arch/x86/include/asm/syscalls.h13
-rw-r--r--arch/x86/include/asm/tlbflush.h18
-rw-r--r--arch/x86/include/asm/uaccess.h55
-rw-r--r--arch/x86/include/asm/unistd.h2
-rw-r--r--arch/x86/include/asm/uv/uv.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h44
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h1496
-rw-r--r--arch/x86/include/asm/vmx.h18
-rw-r--r--arch/x86/include/asm/x86_init.h39
-rw-r--r--arch/x86/include/asm/xen/events.h3
-rw-r--r--arch/x86/include/asm/xen/page.h2
-rw-r--r--arch/x86/include/asm/xor.h491
-rw-r--r--arch/x86/include/asm/xor_32.h309
-rw-r--r--arch/x86/include/asm/xor_64.h305
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h63
-rw-r--r--arch/x86/include/uapi/asm/mce.h87
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h6
-rw-r--r--arch/x86/include/uapi/asm/signal.h8
-rw-r--r--arch/x86/include/uapi/asm/vmx.h9
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/boot.c5
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/amd_gart_64.c5
-rw-r--r--arch/x86/kernel/apb_timer.c10
-rw-r--r--arch/x86/kernel/apic/apic.c28
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c457
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c21
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c206
-rw-r--r--arch/x86/kernel/apm_32.c68
-rw-r--r--arch/x86/kernel/cpu/amd.c68
-rw-r--r--arch/x86/kernel/cpu/bugs.c27
-rw-r--r--arch/x86/kernel/cpu/common.c17
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c16
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c54
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event.h25
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c322
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c2
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c13
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/entry_32.S55
-rw-r--r--arch/x86/kernel/entry_64.S48
-rw-r--r--arch/x86/kernel/ftrace.c4
-rw-r--r--arch/x86/kernel/head32.c21
-rw-r--r--arch/x86/kernel/head64.c146
-rw-r--r--arch/x86/kernel/head_32.S113
-rw-r--r--arch/x86/kernel/head_64.S212
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c1
-rw-r--r--arch/x86/kernel/ioport.c3
-rw-r--r--arch/x86/kernel/kprobes/Makefile7
-rw-r--r--arch/x86/kernel/kprobes/common.h (renamed from arch/x86/kernel/kprobes-common.h)11
-rw-r--r--arch/x86/kernel/kprobes/core.c (renamed from arch/x86/kernel/kprobes.c)76
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c93
-rw-r--r--arch/x86/kernel/kprobes/opt.c (renamed from arch/x86/kernel/kprobes-opt.c)2
-rw-r--r--arch/x86/kernel/kvm.c24
-rw-r--r--arch/x86/kernel/kvmclock.c15
-rw-r--r--arch/x86/kernel/machine_kexec_64.c171
-rw-r--r--arch/x86/kernel/microcode_core.c7
-rw-r--r--arch/x86/kernel/microcode_core_early.c76
-rw-r--r--arch/x86/kernel/microcode_intel.c198
-rw-r--r--arch/x86/kernel/microcode_intel_early.c796
-rw-r--r--arch/x86/kernel/microcode_intel_lib.c174
-rw-r--r--arch/x86/kernel/msr.c3
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/process.c122
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/quirks.c4
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/rtc.c1
-rw-r--r--arch/x86/kernel/setup.c377
-rw-r--r--arch/x86/kernel/signal.c184
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/step.c9
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/kernel/traps.c9
-rw-r--r--arch/x86/kernel/tsc.c3
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--arch/x86/kernel/vm86_32.c8
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/kernel/x86_init.c28
-rw-r--r--arch/x86/kvm/emulate.c673
-rw-r--r--arch/x86/kvm/i8254.c1
-rw-r--r--arch/x86/kvm/i8259.c2
-rw-r--r--arch/x86/kvm/irq.c74
-rw-r--r--arch/x86/kvm/lapic.c140
-rw-r--r--arch/x86/kvm/lapic.h34
-rw-r--r--arch/x86/kvm/mmu.c168
-rw-r--r--arch/x86/kvm/mmutrace.h6
-rw-r--r--arch/x86/kvm/paging_tmpl.h106
-rw-r--r--arch/x86/kvm/svm.c24
-rw-r--r--arch/x86/kvm/vmx.c714
-rw-r--r--arch/x86/kvm/x86.c208
-rw-r--r--arch/x86/lguest/Kconfig1
-rw-r--r--arch/x86/lguest/boot.c3
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/getuser.S43
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/init.c469
-rw-r--r--arch/x86/mm/init_32.c118
-rw-r--r--arch/x86/mm/init_64.c659
-rw-r--r--arch/x86/mm/memtest.c10
-rw-r--r--arch/x86/mm/mm_internal.h19
-rw-r--r--arch/x86/mm/numa.c43
-rw-r--r--arch/x86/mm/numa_32.c161
-rw-r--r--arch/x86/mm/numa_64.c13
-rw-r--r--arch/x86/mm/numa_internal.h6
-rw-r--r--arch/x86/mm/pageattr.c113
-rw-r--r--arch/x86/mm/pat.c4
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/mm/physaddr.c60
-rw-r--r--arch/x86/mm/srat.c152
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c40
-rw-r--r--arch/x86/pci/acpi.c11
-rw-r--r--arch/x86/pci/bus_numa.c4
-rw-r--r--arch/x86/pci/common.c26
-rw-r--r--arch/x86/pci/fixup.c30
-rw-r--r--arch/x86/pci/legacy.c4
-rw-r--r--arch/x86/pci/mmconfig-shared.c24
-rw-r--r--arch/x86/pci/mmconfig_32.c2
-rw-r--r--arch/x86/pci/mmconfig_64.c4
-rw-r--r--arch/x86/pci/mrst.c6
-rw-r--r--arch/x86/pci/numaq_32.c2
-rw-r--r--arch/x86/pci/pcbios.c4
-rw-r--r--arch/x86/platform/Makefile2
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c7
-rw-r--r--arch/x86/platform/efi/efi.c70
-rw-r--r--arch/x86/platform/efi/efi_64.c22
-rw-r--r--arch/x86/platform/goldfish/Makefile1
-rw-r--r--arch/x86/platform/goldfish/goldfish.c51
-rw-r--r--arch/x86/platform/mrst/mrst.c2
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c8
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c18
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c2
-rw-r--r--arch/x86/platform/scx200/scx200_32.c6
-rw-r--r--arch/x86/platform/sfi/sfi.c2
-rw-r--r--arch/x86/platform/ts5500/Makefile1
-rw-r--r--arch/x86/platform/ts5500/ts5500.c339
-rw-r--r--arch/x86/platform/uv/tlb_uv.c14
-rw-r--r--arch/x86/platform/uv/uv_time.c13
-rw-r--r--arch/x86/power/hibernate_32.c2
-rw-r--r--arch/x86/power/hibernate_64.c66
-rw-r--r--arch/x86/realmode/init.c49
-rw-r--r--arch/x86/syscalls/syscall_32.tbl22
-rw-r--r--arch/x86/syscalls/syscall_64.tbl6
-rw-r--r--arch/x86/tools/insn_sanity.c10
-rw-r--r--arch/x86/tools/relocs.c6
-rw-r--r--arch/x86/um/Kconfig6
-rw-r--r--arch/x86/um/Makefile4
-rw-r--r--arch/x86/um/fault.c2
-rw-r--r--arch/x86/um/shared/sysdep/syscalls_32.h5
-rw-r--r--arch/x86/um/signal.c15
-rw-r--r--arch/x86/um/sys_call_table_32.c4
-rw-r--r--arch/x86/um/syscalls_32.c38
-rw-r--r--arch/x86/vdso/vclock_gettime.c2
-rw-r--r--arch/x86/xen/enlighten.c78
-rw-r--r--arch/x86/xen/mmu.c72
-rw-r--r--arch/x86/xen/setup.c5
-rw-r--r--arch/x86/xen/smp.c49
-rw-r--r--arch/x86/xen/spinlock.c1
-rw-r--r--arch/x86/xen/suspend.c2
-rw-r--r--arch/x86/xen/xen-asm_32.S14
-rw-r--r--arch/x86/xen/xen-ops.h2
268 files changed, 9570 insertions, 5389 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 79795af59810..6a9383370311 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1,7 +1,7 @@
1# Select 32 or 64 bit 1# Select 32 or 64 bit
2config 64BIT 2config 64BIT
3 bool "64-bit kernel" if ARCH = "x86" 3 bool "64-bit kernel" if ARCH = "x86"
4 default ARCH = "x86_64" 4 default ARCH != "i386"
5 ---help--- 5 ---help---
6 Say yes to build a 64-bit kernel - formerly known as x86_64 6 Say yes to build a 64-bit kernel - formerly known as x86_64
7 Say no to build a 32-bit kernel - formerly known as i386 7 Say no to build a 32-bit kernel - formerly known as i386
@@ -28,7 +28,6 @@ config X86
28 select HAVE_OPROFILE 28 select HAVE_OPROFILE
29 select HAVE_PCSPKR_PLATFORM 29 select HAVE_PCSPKR_PLATFORM
30 select HAVE_PERF_EVENTS 30 select HAVE_PERF_EVENTS
31 select HAVE_IRQ_WORK
32 select HAVE_IOREMAP_PROT 31 select HAVE_IOREMAP_PROT
33 select HAVE_KPROBES 32 select HAVE_KPROBES
34 select HAVE_MEMBLOCK 33 select HAVE_MEMBLOCK
@@ -40,10 +39,12 @@ config X86
40 select HAVE_DMA_CONTIGUOUS if !SWIOTLB 39 select HAVE_DMA_CONTIGUOUS if !SWIOTLB
41 select HAVE_KRETPROBES 40 select HAVE_KRETPROBES
42 select HAVE_OPTPROBES 41 select HAVE_OPTPROBES
42 select HAVE_KPROBES_ON_FTRACE
43 select HAVE_FTRACE_MCOUNT_RECORD 43 select HAVE_FTRACE_MCOUNT_RECORD
44 select HAVE_FENTRY if X86_64 44 select HAVE_FENTRY if X86_64
45 select HAVE_C_RECORDMCOUNT 45 select HAVE_C_RECORDMCOUNT
46 select HAVE_DYNAMIC_FTRACE 46 select HAVE_DYNAMIC_FTRACE
47 select HAVE_DYNAMIC_FTRACE_WITH_REGS
47 select HAVE_FUNCTION_TRACER 48 select HAVE_FUNCTION_TRACER
48 select HAVE_FUNCTION_GRAPH_TRACER 49 select HAVE_FUNCTION_GRAPH_TRACER
49 select HAVE_FUNCTION_GRAPH_FP_TEST 50 select HAVE_FUNCTION_GRAPH_FP_TEST
@@ -106,6 +107,7 @@ config X86
106 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) 107 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
107 select GENERIC_TIME_VSYSCALL if X86_64 108 select GENERIC_TIME_VSYSCALL if X86_64
108 select KTIME_SCALAR if X86_32 109 select KTIME_SCALAR if X86_32
110 select ALWAYS_USE_PERSISTENT_CLOCK
109 select GENERIC_STRNCPY_FROM_USER 111 select GENERIC_STRNCPY_FROM_USER
110 select GENERIC_STRNLEN_USER 112 select GENERIC_STRNLEN_USER
111 select HAVE_CONTEXT_TRACKING if X86_64 113 select HAVE_CONTEXT_TRACKING if X86_64
@@ -113,7 +115,10 @@ config X86
113 select MODULES_USE_ELF_REL if X86_32 115 select MODULES_USE_ELF_REL if X86_32
114 select MODULES_USE_ELF_RELA if X86_64 116 select MODULES_USE_ELF_RELA if X86_64
115 select CLONE_BACKWARDS if X86_32 117 select CLONE_BACKWARDS if X86_32
116 select GENERIC_SIGALTSTACK 118 select ARCH_USE_BUILTIN_BSWAP
119 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
120 select OLD_SIGACTION if X86_32
121 select COMPAT_OLD_SIGACTION if IA32_EMULATION
117 122
118config INSTRUCTION_DECODER 123config INSTRUCTION_DECODER
119 def_bool y 124 def_bool y
@@ -222,7 +227,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
222 227
223config HAVE_INTEL_TXT 228config HAVE_INTEL_TXT
224 def_bool y 229 def_bool y
225 depends on EXPERIMENTAL && INTEL_IOMMU && ACPI 230 depends on INTEL_IOMMU && ACPI
226 231
227config X86_32_SMP 232config X86_32_SMP
228 def_bool y 233 def_bool y
@@ -320,6 +325,10 @@ config X86_BIGSMP
320 ---help--- 325 ---help---
321 This option is needed for the systems that have more than 8 CPUs 326 This option is needed for the systems that have more than 8 CPUs
322 327
328config GOLDFISH
329 def_bool y
330 depends on X86_GOLDFISH
331
323if X86_32 332if X86_32
324config X86_EXTENDED_PLATFORM 333config X86_EXTENDED_PLATFORM
325 bool "Support for extended (non-PC) x86 platforms" 334 bool "Support for extended (non-PC) x86 platforms"
@@ -402,6 +411,14 @@ config X86_UV
402# Following is an alphabetically sorted list of 32 bit extended platforms 411# Following is an alphabetically sorted list of 32 bit extended platforms
403# Please maintain the alphabetic order if and when there are additions 412# Please maintain the alphabetic order if and when there are additions
404 413
414config X86_GOLDFISH
415 bool "Goldfish (Virtual Platform)"
416 depends on X86_32
417 ---help---
418 Enable support for the Goldfish virtual platform used primarily
419 for Android development. Unless you are building for the Android
420 Goldfish emulator say N here.
421
405config X86_INTEL_CE 422config X86_INTEL_CE
406 bool "CE4100 TV platform" 423 bool "CE4100 TV platform"
407 depends on PCI 424 depends on PCI
@@ -454,6 +471,16 @@ config X86_MDFLD
454 471
455endif 472endif
456 473
474config X86_INTEL_LPSS
475 bool "Intel Low Power Subsystem Support"
476 depends on ACPI
477 select COMMON_CLK
478 ---help---
479 Select to build support for Intel Low Power Subsystem such as
480 found on Intel Lynxpoint PCH. Selecting this option enables
481 things like clock tree (common clock framework) which are needed
482 by the LPSS peripheral drivers.
483
457config X86_RDC321X 484config X86_RDC321X
458 bool "RDC R-321x SoC" 485 bool "RDC R-321x SoC"
459 depends on X86_32 486 depends on X86_32
@@ -617,7 +644,7 @@ config PARAVIRT
617 644
618config PARAVIRT_SPINLOCKS 645config PARAVIRT_SPINLOCKS
619 bool "Paravirtualization layer for spinlocks" 646 bool "Paravirtualization layer for spinlocks"
620 depends on PARAVIRT && SMP && EXPERIMENTAL 647 depends on PARAVIRT && SMP
621 ---help--- 648 ---help---
622 Paravirtualized spinlocks allow a pvops backend to replace the 649 Paravirtualized spinlocks allow a pvops backend to replace the
623 spinlock implementation with something virtualization-friendly 650 spinlock implementation with something virtualization-friendly
@@ -729,7 +756,7 @@ config GART_IOMMU
729config CALGARY_IOMMU 756config CALGARY_IOMMU
730 bool "IBM Calgary IOMMU support" 757 bool "IBM Calgary IOMMU support"
731 select SWIOTLB 758 select SWIOTLB
732 depends on X86_64 && PCI && EXPERIMENTAL 759 depends on X86_64 && PCI
733 ---help--- 760 ---help---
734 Support for hardware IOMMUs in IBM's xSeries x366 and x460 761 Support for hardware IOMMUs in IBM's xSeries x366 and x460
735 systems. Needed to run systems with more than 3GB of memory 762 systems. Needed to run systems with more than 3GB of memory
@@ -771,7 +798,7 @@ config IOMMU_HELPER
771 798
772config MAXSMP 799config MAXSMP
773 bool "Enable Maximum number of SMP Processors and NUMA Nodes" 800 bool "Enable Maximum number of SMP Processors and NUMA Nodes"
774 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL 801 depends on X86_64 && SMP && DEBUG_KERNEL
775 select CPUMASK_OFFSTACK 802 select CPUMASK_OFFSTACK
776 ---help--- 803 ---help---
777 Enable maximum number of CPUS and NUMA Nodes for this architecture. 804 Enable maximum number of CPUS and NUMA Nodes for this architecture.
@@ -1029,6 +1056,24 @@ config MICROCODE_OLD_INTERFACE
1029 def_bool y 1056 def_bool y
1030 depends on MICROCODE 1057 depends on MICROCODE
1031 1058
1059config MICROCODE_INTEL_LIB
1060 def_bool y
1061 depends on MICROCODE_INTEL
1062
1063config MICROCODE_INTEL_EARLY
1064 bool "Early load microcode"
1065 depends on MICROCODE_INTEL && BLK_DEV_INITRD
1066 default y
1067 help
1068 This option provides functionality to read additional microcode data
1069 at the beginning of initrd image. The data tells kernel to load
1070 microcode to CPU's as early as possible. No functional change if no
1071 microcode data is glued to the initrd, therefore it's safe to say Y.
1072
1073config MICROCODE_EARLY
1074 def_bool y
1075 depends on MICROCODE_INTEL_EARLY
1076
1032config X86_MSR 1077config X86_MSR
1033 tristate "/dev/cpu/*/msr - Model-specific register support" 1078 tristate "/dev/cpu/*/msr - Model-specific register support"
1034 ---help--- 1079 ---help---
@@ -1107,7 +1152,6 @@ config HIGHMEM64G
1107endchoice 1152endchoice
1108 1153
1109choice 1154choice
1110 depends on EXPERIMENTAL
1111 prompt "Memory split" if EXPERT 1155 prompt "Memory split" if EXPERT
1112 default VMSPLIT_3G 1156 default VMSPLIT_3G
1113 depends on X86_32 1157 depends on X86_32
@@ -1184,7 +1228,7 @@ config DIRECT_GBPAGES
1184config NUMA 1228config NUMA
1185 bool "Numa Memory Allocation and Scheduler Support" 1229 bool "Numa Memory Allocation and Scheduler Support"
1186 depends on SMP 1230 depends on SMP
1187 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) 1231 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI))
1188 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) 1232 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
1189 ---help--- 1233 ---help---
1190 Enable NUMA (Non Uniform Memory Access) support. 1234 Enable NUMA (Non Uniform Memory Access) support.
@@ -1253,10 +1297,6 @@ config NODES_SHIFT
1253 Specify the maximum number of NUMA Nodes available on the target 1297 Specify the maximum number of NUMA Nodes available on the target
1254 system. Increases memory reserved to accommodate various tables. 1298 system. Increases memory reserved to accommodate various tables.
1255 1299
1256config HAVE_ARCH_ALLOC_REMAP
1257 def_bool y
1258 depends on X86_32 && NUMA
1259
1260config ARCH_HAVE_MEMORY_PRESENT 1300config ARCH_HAVE_MEMORY_PRESENT
1261 def_bool y 1301 def_bool y
1262 depends on X86_32 && DISCONTIGMEM 1302 depends on X86_32 && DISCONTIGMEM
@@ -1279,7 +1319,7 @@ config ARCH_DISCONTIGMEM_DEFAULT
1279 1319
1280config ARCH_SPARSEMEM_ENABLE 1320config ARCH_SPARSEMEM_ENABLE
1281 def_bool y 1321 def_bool y
1282 depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD 1322 depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD
1283 select SPARSEMEM_STATIC if X86_32 1323 select SPARSEMEM_STATIC if X86_32
1284 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 1324 select SPARSEMEM_VMEMMAP_ENABLE if X86_64
1285 1325
@@ -1593,8 +1633,7 @@ config CRASH_DUMP
1593 For more details see Documentation/kdump/kdump.txt 1633 For more details see Documentation/kdump/kdump.txt
1594 1634
1595config KEXEC_JUMP 1635config KEXEC_JUMP
1596 bool "kexec jump (EXPERIMENTAL)" 1636 bool "kexec jump"
1597 depends on EXPERIMENTAL
1598 depends on KEXEC && HIBERNATION 1637 depends on KEXEC && HIBERNATION
1599 ---help--- 1638 ---help---
1600 Jump between original kernel and kexeced kernel and invoke 1639 Jump between original kernel and kexeced kernel and invoke
@@ -1699,7 +1738,7 @@ config HOTPLUG_CPU
1699config BOOTPARAM_HOTPLUG_CPU0 1738config BOOTPARAM_HOTPLUG_CPU0
1700 bool "Set default setting of cpu0_hotpluggable" 1739 bool "Set default setting of cpu0_hotpluggable"
1701 default n 1740 default n
1702 depends on HOTPLUG_CPU && EXPERIMENTAL 1741 depends on HOTPLUG_CPU
1703 ---help--- 1742 ---help---
1704 Set whether default state of cpu0_hotpluggable is on or off. 1743 Set whether default state of cpu0_hotpluggable is on or off.
1705 1744
@@ -1728,7 +1767,7 @@ config BOOTPARAM_HOTPLUG_CPU0
1728config DEBUG_HOTPLUG_CPU0 1767config DEBUG_HOTPLUG_CPU0
1729 def_bool n 1768 def_bool n
1730 prompt "Debug CPU0 hotplug" 1769 prompt "Debug CPU0 hotplug"
1731 depends on HOTPLUG_CPU && EXPERIMENTAL 1770 depends on HOTPLUG_CPU
1732 ---help--- 1771 ---help---
1733 Enabling this option offlines CPU0 (if CPU0 can be offlined) as 1772 Enabling this option offlines CPU0 (if CPU0 can be offlined) as
1734 soon as possible and boots up userspace with CPU0 offlined. User 1773 soon as possible and boots up userspace with CPU0 offlined. User
@@ -1912,6 +1951,7 @@ config APM_DO_ENABLE
1912 this feature. 1951 this feature.
1913 1952
1914config APM_CPU_IDLE 1953config APM_CPU_IDLE
1954 depends on CPU_IDLE
1915 bool "Make CPU Idle calls when idle" 1955 bool "Make CPU Idle calls when idle"
1916 ---help--- 1956 ---help---
1917 Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. 1957 Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
@@ -2037,7 +2077,7 @@ config PCI_MMCONFIG
2037 2077
2038config PCI_CNB20LE_QUIRK 2078config PCI_CNB20LE_QUIRK
2039 bool "Read CNB20LE Host Bridge Windows" if EXPERT 2079 bool "Read CNB20LE Host Bridge Windows" if EXPERT
2040 depends on PCI && EXPERIMENTAL 2080 depends on PCI
2041 help 2081 help
2042 Read the PCI windows out of the CNB20LE host bridge. This allows 2082 Read the PCI windows out of the CNB20LE host bridge. This allows
2043 PCI hotplug to work on systems with the CNB20LE chipset which do 2083 PCI hotplug to work on systems with the CNB20LE chipset which do
@@ -2138,6 +2178,7 @@ config OLPC_XO1_RTC
2138config OLPC_XO1_SCI 2178config OLPC_XO1_SCI
2139 bool "OLPC XO-1 SCI extras" 2179 bool "OLPC XO-1 SCI extras"
2140 depends on OLPC && OLPC_XO1_PM 2180 depends on OLPC && OLPC_XO1_PM
2181 depends on INPUT=y
2141 select POWER_SUPPLY 2182 select POWER_SUPPLY
2142 select GPIO_CS5535 2183 select GPIO_CS5535
2143 select MFD_CORE 2184 select MFD_CORE
@@ -2187,6 +2228,15 @@ config GEOS
2187 ---help--- 2228 ---help---
2188 This option enables system support for the Traverse Technologies GEOS. 2229 This option enables system support for the Traverse Technologies GEOS.
2189 2230
2231config TS5500
2232 bool "Technologic Systems TS-5500 platform support"
2233 depends on MELAN
2234 select CHECK_SIGNATURE
2235 select NEW_LEDS
2236 select LEDS_CLASS
2237 ---help---
2238 This option enables system support for the Technologic Systems TS-5500.
2239
2190endif # X86_32 2240endif # X86_32
2191 2241
2192config AMD_NB 2242config AMD_NB
@@ -2231,8 +2281,8 @@ config IA32_AOUT
2231 Support old a.out binaries in the 32bit emulation. 2281 Support old a.out binaries in the 32bit emulation.
2232 2282
2233config X86_X32 2283config X86_X32
2234 bool "x32 ABI for 64-bit mode (EXPERIMENTAL)" 2284 bool "x32 ABI for 64-bit mode"
2235 depends on X86_64 && IA32_EMULATION && EXPERIMENTAL 2285 depends on X86_64 && IA32_EMULATION
2236 ---help--- 2286 ---help---
2237 Include code to run binaries for the x32 native 32-bit ABI 2287 Include code to run binaries for the x32 native 32-bit ABI
2238 for 64-bit processors. An x32 process gets access to the 2288 for 64-bit processors. An x32 process gets access to the
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index e71fc4279aab..5c477260294f 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -2,7 +2,11 @@
2 2
3# select defconfig based on actual architecture 3# select defconfig based on actual architecture
4ifeq ($(ARCH),x86) 4ifeq ($(ARCH),x86)
5 ifeq ($(shell uname -m),x86_64)
6 KBUILD_DEFCONFIG := x86_64_defconfig
7 else
5 KBUILD_DEFCONFIG := i386_defconfig 8 KBUILD_DEFCONFIG := i386_defconfig
9 endif
6else 10else
7 KBUILD_DEFCONFIG := $(ARCH)_defconfig 11 KBUILD_DEFCONFIG := $(ARCH)_defconfig
8endif 12endif
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index ccce0ed67dde..379814bc41e3 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -71,7 +71,7 @@ GCOV_PROFILE := n
71$(obj)/bzImage: asflags-y := $(SVGA_MODE) 71$(obj)/bzImage: asflags-y := $(SVGA_MODE)
72 72
73quiet_cmd_image = BUILD $@ 73quiet_cmd_image = BUILD $@
74cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin > $@ 74cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/zoffset.h > $@
75 75
76$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE 76$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
77 $(call if_changed,image) 77 $(call if_changed,image)
@@ -92,7 +92,7 @@ targets += voffset.h
92$(obj)/voffset.h: vmlinux FORCE 92$(obj)/voffset.h: vmlinux FORCE
93 $(call if_changed,voffset) 93 $(call if_changed,voffset)
94 94
95sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' 95sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|startup_64\|efi_pe_entry\|efi_stub_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
96 96
97quiet_cmd_zoffset = ZOFFSET $@ 97quiet_cmd_zoffset = ZOFFSET $@
98 cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ 98 cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 18997e5a1053..5b7531966b84 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -285,16 +285,26 @@ struct biosregs {
285void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); 285void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
286 286
287/* cmdline.c */ 287/* cmdline.c */
288int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize); 288int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize);
289int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option); 289int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option);
290static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) 290static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
291{ 291{
292 return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize); 292 unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
293
294 if (cmd_line_ptr >= 0x100000)
295 return -1; /* inaccessible */
296
297 return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize);
293} 298}
294 299
295static inline int cmdline_find_option_bool(const char *option) 300static inline int cmdline_find_option_bool(const char *option)
296{ 301{
297 return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option); 302 unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
303
304 if (cmd_line_ptr >= 0x100000)
305 return -1; /* inaccessible */
306
307 return __cmdline_find_option_bool(cmd_line_ptr, option);
298} 308}
299 309
300 310
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index 6b3b6f708c04..625d21b0cd3f 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -27,7 +27,7 @@ static inline int myisspace(u8 c)
27 * Returns the length of the argument (regardless of if it was 27 * Returns the length of the argument (regardless of if it was
28 * truncated to fit in the buffer), or -1 on not found. 28 * truncated to fit in the buffer), or -1 on not found.
29 */ 29 */
30int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize) 30int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize)
31{ 31{
32 addr_t cptr; 32 addr_t cptr;
33 char c; 33 char c;
@@ -41,8 +41,8 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
41 st_bufcpy /* Copying this to buffer */ 41 st_bufcpy /* Copying this to buffer */
42 } state = st_wordstart; 42 } state = st_wordstart;
43 43
44 if (!cmdline_ptr || cmdline_ptr >= 0x100000) 44 if (!cmdline_ptr)
45 return -1; /* No command line, or inaccessible */ 45 return -1; /* No command line */
46 46
47 cptr = cmdline_ptr & 0xf; 47 cptr = cmdline_ptr & 0xf;
48 set_fs(cmdline_ptr >> 4); 48 set_fs(cmdline_ptr >> 4);
@@ -99,7 +99,7 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
99 * Returns the position of that option (starts counting with 1) 99 * Returns the position of that option (starts counting with 1)
100 * or 0 on not found 100 * or 0 on not found
101 */ 101 */
102int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option) 102int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
103{ 103{
104 addr_t cptr; 104 addr_t cptr;
105 char c; 105 char c;
@@ -111,8 +111,8 @@ int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
111 st_wordskip, /* Miscompare, skip */ 111 st_wordskip, /* Miscompare, skip */
112 } state = st_wordstart; 112 } state = st_wordstart;
113 113
114 if (!cmdline_ptr || cmdline_ptr >= 0x100000) 114 if (!cmdline_ptr)
115 return -1; /* No command line, or inaccessible */ 115 return -1; /* No command line */
116 116
117 cptr = cmdline_ptr & 0xf; 117 cptr = cmdline_ptr & 0xf;
118 set_fs(cmdline_ptr >> 4); 118 set_fs(cmdline_ptr >> 4);
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index 10f6b1178c68..bffd73b45b1f 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -13,13 +13,21 @@ static inline char rdfs8(addr_t addr)
13 return *((char *)(fs + addr)); 13 return *((char *)(fs + addr));
14} 14}
15#include "../cmdline.c" 15#include "../cmdline.c"
16static unsigned long get_cmd_line_ptr(void)
17{
18 unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr;
19
20 cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32;
21
22 return cmd_line_ptr;
23}
16int cmdline_find_option(const char *option, char *buffer, int bufsize) 24int cmdline_find_option(const char *option, char *buffer, int bufsize)
17{ 25{
18 return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize); 26 return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize);
19} 27}
20int cmdline_find_option_bool(const char *option) 28int cmdline_find_option_bool(const char *option)
21{ 29{
22 return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); 30 return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
23} 31}
24 32
25#endif 33#endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index b1942e222768..f8fa41190c35 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -256,10 +256,10 @@ static efi_status_t setup_efi_pci(struct boot_params *params)
256 int i; 256 int i;
257 struct setup_data *data; 257 struct setup_data *data;
258 258
259 data = (struct setup_data *)params->hdr.setup_data; 259 data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
260 260
261 while (data && data->next) 261 while (data && data->next)
262 data = (struct setup_data *)data->next; 262 data = (struct setup_data *)(unsigned long)data->next;
263 263
264 status = efi_call_phys5(sys_table->boottime->locate_handle, 264 status = efi_call_phys5(sys_table->boottime->locate_handle,
265 EFI_LOCATE_BY_PROTOCOL, &pci_proto, 265 EFI_LOCATE_BY_PROTOCOL, &pci_proto,
@@ -295,16 +295,18 @@ static efi_status_t setup_efi_pci(struct boot_params *params)
295 if (!pci) 295 if (!pci)
296 continue; 296 continue;
297 297
298#ifdef CONFIG_X86_64
298 status = efi_call_phys4(pci->attributes, pci, 299 status = efi_call_phys4(pci->attributes, pci,
299 EfiPciIoAttributeOperationGet, 0, 300 EfiPciIoAttributeOperationGet, 0,
300 &attributes); 301 &attributes);
301 302#else
303 status = efi_call_phys5(pci->attributes, pci,
304 EfiPciIoAttributeOperationGet, 0, 0,
305 &attributes);
306#endif
302 if (status != EFI_SUCCESS) 307 if (status != EFI_SUCCESS)
303 continue; 308 continue;
304 309
305 if (!attributes & EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM)
306 continue;
307
308 if (!pci->romimage || !pci->romsize) 310 if (!pci->romimage || !pci->romsize)
309 continue; 311 continue;
310 312
@@ -345,9 +347,9 @@ static efi_status_t setup_efi_pci(struct boot_params *params)
345 memcpy(rom->romdata, pci->romimage, pci->romsize); 347 memcpy(rom->romdata, pci->romimage, pci->romsize);
346 348
347 if (data) 349 if (data)
348 data->next = (uint64_t)rom; 350 data->next = (unsigned long)rom;
349 else 351 else
350 params->hdr.setup_data = (uint64_t)rom; 352 params->hdr.setup_data = (unsigned long)rom;
351 353
352 data = (struct setup_data *)rom; 354 data = (struct setup_data *)rom;
353 355
@@ -432,10 +434,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
432 * Once we've found a GOP supporting ConOut, 434 * Once we've found a GOP supporting ConOut,
433 * don't bother looking any further. 435 * don't bother looking any further.
434 */ 436 */
437 first_gop = gop;
435 if (conout_found) 438 if (conout_found)
436 break; 439 break;
437
438 first_gop = gop;
439 } 440 }
440 } 441 }
441 442
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index aa4aaf1b2380..1e3184f6072f 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -35,11 +35,11 @@ ENTRY(startup_32)
35#ifdef CONFIG_EFI_STUB 35#ifdef CONFIG_EFI_STUB
36 jmp preferred_addr 36 jmp preferred_addr
37 37
38 .balign 0x10
39 /* 38 /*
40 * We don't need the return address, so set up the stack so 39 * We don't need the return address, so set up the stack so
41 * efi_main() can find its arugments. 40 * efi_main() can find its arguments.
42 */ 41 */
42ENTRY(efi_pe_entry)
43 add $0x4, %esp 43 add $0x4, %esp
44 44
45 call make_boot_params 45 call make_boot_params
@@ -50,8 +50,10 @@ ENTRY(startup_32)
50 pushl %eax 50 pushl %eax
51 pushl %esi 51 pushl %esi
52 pushl %ecx 52 pushl %ecx
53 sub $0x4, %esp
53 54
54 .org 0x30,0x90 55ENTRY(efi_stub_entry)
56 add $0x4, %esp
55 call efi_main 57 call efi_main
56 cmpl $0, %eax 58 cmpl $0, %eax
57 movl %eax, %esi 59 movl %eax, %esi
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 2c4b171eec33..c1d383d1fb7e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -37,6 +37,12 @@
37 __HEAD 37 __HEAD
38 .code32 38 .code32
39ENTRY(startup_32) 39ENTRY(startup_32)
40 /*
41 * 32bit entry is 0 and it is ABI so immutable!
42 * If we come here directly from a bootloader,
43 * kernel(text+data+bss+brk) ramdisk, zero_page, command line
44 * all need to be under the 4G limit.
45 */
40 cld 46 cld
41 /* 47 /*
42 * Test KEEP_SEGMENTS flag to see if the bootloader is asking 48 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
@@ -154,6 +160,12 @@ ENTRY(startup_32)
154 btsl $_EFER_LME, %eax 160 btsl $_EFER_LME, %eax
155 wrmsr 161 wrmsr
156 162
163 /* After gdt is loaded */
164 xorl %eax, %eax
165 lldt %ax
166 movl $0x20, %eax
167 ltr %ax
168
157 /* 169 /*
158 * Setup for the jump to 64bit mode 170 * Setup for the jump to 64bit mode
159 * 171 *
@@ -176,37 +188,27 @@ ENTRY(startup_32)
176 lret 188 lret
177ENDPROC(startup_32) 189ENDPROC(startup_32)
178 190
179no_longmode:
180 /* This isn't an x86-64 CPU so hang */
1811:
182 hlt
183 jmp 1b
184
185#include "../../kernel/verify_cpu.S"
186
187 /*
188 * Be careful here startup_64 needs to be at a predictable
189 * address so I can export it in an ELF header. Bootloaders
190 * should look at the ELF header to find this address, as
191 * it may change in the future.
192 */
193 .code64 191 .code64
194 .org 0x200 192 .org 0x200
195ENTRY(startup_64) 193ENTRY(startup_64)
196 /* 194 /*
195 * 64bit entry is 0x200 and it is ABI so immutable!
197 * We come here either from startup_32 or directly from a 196 * We come here either from startup_32 or directly from a
198 * 64bit bootloader. If we come here from a bootloader we depend on 197 * 64bit bootloader.
199 * an identity mapped page table being provied that maps our 198 * If we come here from a bootloader, kernel(text+data+bss+brk),
200 * entire text+data+bss and hopefully all of memory. 199 * ramdisk, zero_page, command line could be above 4G.
200 * We depend on an identity mapped page table being provided
201 * that maps our entire kernel(text+data+bss+brk), zero page
202 * and command line.
201 */ 203 */
202#ifdef CONFIG_EFI_STUB 204#ifdef CONFIG_EFI_STUB
203 /* 205 /*
204 * The entry point for the PE/COFF executable is 0x210, so only 206 * The entry point for the PE/COFF executable is efi_pe_entry, so
205 * legacy boot loaders will execute this jmp. 207 * only legacy boot loaders will execute this jmp.
206 */ 208 */
207 jmp preferred_addr 209 jmp preferred_addr
208 210
209 .org 0x210 211ENTRY(efi_pe_entry)
210 mov %rcx, %rdi 212 mov %rcx, %rdi
211 mov %rdx, %rsi 213 mov %rdx, %rsi
212 pushq %rdi 214 pushq %rdi
@@ -218,7 +220,7 @@ ENTRY(startup_64)
218 popq %rsi 220 popq %rsi
219 popq %rdi 221 popq %rdi
220 222
221 .org 0x230,0x90 223ENTRY(efi_stub_entry)
222 call efi_main 224 call efi_main
223 movq %rax,%rsi 225 movq %rax,%rsi
224 cmpq $0,%rax 226 cmpq $0,%rax
@@ -247,9 +249,6 @@ preferred_addr:
247 movl %eax, %ss 249 movl %eax, %ss
248 movl %eax, %fs 250 movl %eax, %fs
249 movl %eax, %gs 251 movl %eax, %gs
250 lldt %ax
251 movl $0x20, %eax
252 ltr %ax
253 252
254 /* 253 /*
255 * Compute the decompressed kernel start address. It is where 254 * Compute the decompressed kernel start address. It is where
@@ -349,6 +348,15 @@ relocated:
349 */ 348 */
350 jmp *%rbp 349 jmp *%rbp
351 350
351 .code32
352no_longmode:
353 /* This isn't an x86-64 CPU so hang */
3541:
355 hlt
356 jmp 1b
357
358#include "../../kernel/verify_cpu.S"
359
352 .data 360 .data
353gdt: 361gdt:
354 .word gdt_end - gdt 362 .word gdt_end - gdt
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 88f7ff6da404..7cb56c6ca351 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -325,6 +325,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
325{ 325{
326 real_mode = rmode; 326 real_mode = rmode;
327 327
328 sanitize_boot_params(real_mode);
329
328 if (real_mode->screen_info.orig_video_mode == 7) { 330 if (real_mode->screen_info.orig_video_mode == 7) {
329 vidmem = (char *) 0xb0000; 331 vidmem = (char *) 0xb0000;
330 vidport = 0x3b4; 332 vidport = 0x3b4;
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 0e6dc0ee0eea..674019d8e235 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -18,6 +18,7 @@
18#include <asm/page.h> 18#include <asm/page.h>
19#include <asm/boot.h> 19#include <asm/boot.h>
20#include <asm/bootparam.h> 20#include <asm/bootparam.h>
21#include <asm/bootparam_utils.h>
21 22
22#define BOOT_BOOT_H 23#define BOOT_BOOT_H
23#include "../ctype.h" 24#include "../ctype.h"
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 8c132a625b94..9ec06a1f6d61 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -21,6 +21,7 @@
21#include <asm/e820.h> 21#include <asm/e820.h>
22#include <asm/page_types.h> 22#include <asm/page_types.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24#include <asm/bootparam.h>
24#include "boot.h" 25#include "boot.h"
25#include "voffset.h" 26#include "voffset.h"
26#include "zoffset.h" 27#include "zoffset.h"
@@ -255,6 +256,9 @@ section_table:
255 # header, from the old boot sector. 256 # header, from the old boot sector.
256 257
257 .section ".header", "a" 258 .section ".header", "a"
259 .globl sentinel
260sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */
261
258 .globl hdr 262 .globl hdr
259hdr: 263hdr:
260setup_sects: .byte 0 /* Filled in by build.c */ 264setup_sects: .byte 0 /* Filled in by build.c */
@@ -279,7 +283,7 @@ _start:
279 # Part 2 of the header, from the old setup.S 283 # Part 2 of the header, from the old setup.S
280 284
281 .ascii "HdrS" # header signature 285 .ascii "HdrS" # header signature
282 .word 0x020b # header version number (>= 0x0105) 286 .word 0x020c # header version number (>= 0x0105)
283 # or else old loadlin-1.5 will fail) 287 # or else old loadlin-1.5 will fail)
284 .globl realmode_swtch 288 .globl realmode_swtch
285realmode_swtch: .word 0, 0 # default_switch, SETUPSEG 289realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -297,13 +301,7 @@ type_of_loader: .byte 0 # 0 means ancient bootloader, newer
297 301
298# flags, unused bits must be zero (RFU) bit within loadflags 302# flags, unused bits must be zero (RFU) bit within loadflags
299loadflags: 303loadflags:
300LOADED_HIGH = 1 # If set, the kernel is loaded high 304 .byte LOADED_HIGH # The kernel is to be loaded high
301CAN_USE_HEAP = 0x80 # If set, the loader also has set
302 # heap_end_ptr to tell how much
303 # space behind setup.S can be used for
304 # heap purposes.
305 # Only the loader knows what is free
306 .byte LOADED_HIGH
307 305
308setup_move_size: .word 0x8000 # size to move, when setup is not 306setup_move_size: .word 0x8000 # size to move, when setup is not
309 # loaded at 0x90000. We will move setup 307 # loaded at 0x90000. We will move setup
@@ -369,7 +367,31 @@ relocatable_kernel: .byte 1
369relocatable_kernel: .byte 0 367relocatable_kernel: .byte 0
370#endif 368#endif
371min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment 369min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
372pad3: .word 0 370
371xloadflags:
372#ifdef CONFIG_X86_64
373# define XLF0 XLF_KERNEL_64 /* 64-bit kernel */
374#else
375# define XLF0 0
376#endif
377
378#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
379 /* kernel/boot_param/ramdisk could be loaded above 4g */
380# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
381#else
382# define XLF1 0
383#endif
384
385#ifdef CONFIG_EFI_STUB
386# ifdef CONFIG_X86_64
387# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
388# else
389# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */
390# endif
391#else
392# define XLF23 0
393#endif
394 .word XLF0 | XLF1 | XLF23
373 395
374cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, 396cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
375 #added with boot protocol 397 #added with boot protocol
@@ -397,8 +419,13 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
397#define INIT_SIZE VO_INIT_SIZE 419#define INIT_SIZE VO_INIT_SIZE
398#endif 420#endif
399init_size: .long INIT_SIZE # kernel initialization size 421init_size: .long INIT_SIZE # kernel initialization size
400handover_offset: .long 0x30 # offset to the handover 422handover_offset:
423#ifdef CONFIG_EFI_STUB
424 .long 0x30 # offset to the handover
401 # protocol entry point 425 # protocol entry point
426#else
427 .long 0
428#endif
402 429
403# End of setup header ##################################################### 430# End of setup header #####################################################
404 431
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 03c0683636b6..96a6c7563538 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -13,7 +13,7 @@ SECTIONS
13 .bstext : { *(.bstext) } 13 .bstext : { *(.bstext) }
14 .bsdata : { *(.bsdata) } 14 .bsdata : { *(.bsdata) }
15 15
16 . = 497; 16 . = 495;
17 .header : { *(.header) } 17 .header : { *(.header) }
18 .entrytext : { *(.entrytext) } 18 .entrytext : { *(.entrytext) }
19 .inittext : { *(.inittext) } 19 .inittext : { *(.inittext) }
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 4b8e165ee572..94c544650020 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -52,6 +52,10 @@ int is_big_kernel;
52 52
53#define PECOFF_RELOC_RESERVE 0x20 53#define PECOFF_RELOC_RESERVE 0x20
54 54
55unsigned long efi_stub_entry;
56unsigned long efi_pe_entry;
57unsigned long startup_64;
58
55/*----------------------------------------------------------------------*/ 59/*----------------------------------------------------------------------*/
56 60
57static const u32 crctab32[] = { 61static const u32 crctab32[] = {
@@ -132,7 +136,7 @@ static void die(const char * str, ...)
132 136
133static void usage(void) 137static void usage(void)
134{ 138{
135 die("Usage: build setup system [> image]"); 139 die("Usage: build setup system [zoffset.h] [> image]");
136} 140}
137 141
138#ifdef CONFIG_EFI_STUB 142#ifdef CONFIG_EFI_STUB
@@ -206,30 +210,54 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
206 */ 210 */
207 put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]); 211 put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]);
208 212
209#ifdef CONFIG_X86_32
210 /* 213 /*
211 * Address of entry point. 214 * Address of entry point for PE/COFF executable
212 *
213 * The EFI stub entry point is +16 bytes from the start of
214 * the .text section.
215 */ 215 */
216 put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]); 216 put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]);
217#else
218 /*
219 * Address of entry point. startup_32 is at the beginning and
220 * the 64-bit entry point (startup_64) is always 512 bytes
221 * after. The EFI stub entry point is 16 bytes after that, as
222 * the first instruction allows legacy loaders to jump over
223 * the EFI stub initialisation
224 */
225 put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]);
226#endif /* CONFIG_X86_32 */
227 217
228 update_pecoff_section_header(".text", text_start, text_sz); 218 update_pecoff_section_header(".text", text_start, text_sz);
229} 219}
230 220
231#endif /* CONFIG_EFI_STUB */ 221#endif /* CONFIG_EFI_STUB */
232 222
223
224/*
225 * Parse zoffset.h and find the entry points. We could just #include zoffset.h
226 * but that would mean tools/build would have to be rebuilt every time. It's
227 * not as if parsing it is hard...
228 */
229#define PARSE_ZOFS(p, sym) do { \
230 if (!strncmp(p, "#define ZO_" #sym " ", 11+sizeof(#sym))) \
231 sym = strtoul(p + 11 + sizeof(#sym), NULL, 16); \
232} while (0)
233
234static void parse_zoffset(char *fname)
235{
236 FILE *file;
237 char *p;
238 int c;
239
240 file = fopen(fname, "r");
241 if (!file)
242 die("Unable to open `%s': %m", fname);
243 c = fread(buf, 1, sizeof(buf) - 1, file);
244 if (ferror(file))
245 die("read-error on `zoffset.h'");
246 buf[c] = 0;
247
248 p = (char *)buf;
249
250 while (p && *p) {
251 PARSE_ZOFS(p, efi_stub_entry);
252 PARSE_ZOFS(p, efi_pe_entry);
253 PARSE_ZOFS(p, startup_64);
254
255 p = strchr(p, '\n');
256 while (p && (*p == '\r' || *p == '\n'))
257 p++;
258 }
259}
260
233int main(int argc, char ** argv) 261int main(int argc, char ** argv)
234{ 262{
235 unsigned int i, sz, setup_sectors; 263 unsigned int i, sz, setup_sectors;
@@ -241,7 +269,19 @@ int main(int argc, char ** argv)
241 void *kernel; 269 void *kernel;
242 u32 crc = 0xffffffffUL; 270 u32 crc = 0xffffffffUL;
243 271
244 if (argc != 3) 272 /* Defaults for old kernel */
273#ifdef CONFIG_X86_32
274 efi_pe_entry = 0x10;
275 efi_stub_entry = 0x30;
276#else
277 efi_pe_entry = 0x210;
278 efi_stub_entry = 0x230;
279 startup_64 = 0x200;
280#endif
281
282 if (argc == 4)
283 parse_zoffset(argv[3]);
284 else if (argc != 3)
245 usage(); 285 usage();
246 286
247 /* Copy the setup code */ 287 /* Copy the setup code */
@@ -299,6 +339,11 @@ int main(int argc, char ** argv)
299 339
300#ifdef CONFIG_EFI_STUB 340#ifdef CONFIG_EFI_STUB
301 update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); 341 update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz));
342
343#ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */
344 efi_stub_entry -= 0x200;
345#endif
346 put_unaligned_le32(efi_stub_entry, &buf[0x264]);
302#endif 347#endif
303 348
304 crc = partial_crc32(buf, i, crc); 349 crc = partial_crc32(buf, i, crc);
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 5598547281a7..94447086e551 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,3 +1,4 @@
1# CONFIG_64BIT is not set
1CONFIG_EXPERIMENTAL=y 2CONFIG_EXPERIMENTAL=y
2# CONFIG_LOCALVERSION_AUTO is not set 3# CONFIG_LOCALVERSION_AUTO is not set
3CONFIG_SYSVIPC=y 4CONFIG_SYSVIPC=y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index e0ca7c9ac383..63947a8f9f0f 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
27 27
28obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o 28obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
29obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o 29obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
30obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
30 31
31aes-i586-y := aes-i586-asm_32.o aes_glue.o 32aes-i586-y := aes-i586-asm_32.o aes_glue.o
32twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o 33twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
@@ -52,3 +53,4 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
52sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 53sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
53crc32c-intel-y := crc32c-intel_glue.o 54crc32c-intel-y := crc32c-intel_glue.o
54crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o 55crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
56crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
index b949ec2f9af4..2849dbc59e11 100644
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -36,6 +36,7 @@
36.file "aes-i586-asm.S" 36.file "aes-i586-asm.S"
37.text 37.text
38 38
39#include <linux/linkage.h>
39#include <asm/asm-offsets.h> 40#include <asm/asm-offsets.h>
40 41
41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) 42#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
@@ -219,14 +220,10 @@
219// AES (Rijndael) Encryption Subroutine 220// AES (Rijndael) Encryption Subroutine
220/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ 221/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
221 222
222.global aes_enc_blk
223
224.extern crypto_ft_tab 223.extern crypto_ft_tab
225.extern crypto_fl_tab 224.extern crypto_fl_tab
226 225
227.align 4 226ENTRY(aes_enc_blk)
228
229aes_enc_blk:
230 push %ebp 227 push %ebp
231 mov ctx(%esp),%ebp 228 mov ctx(%esp),%ebp
232 229
@@ -290,18 +287,15 @@ aes_enc_blk:
290 mov %r0,(%ebp) 287 mov %r0,(%ebp)
291 pop %ebp 288 pop %ebp
292 ret 289 ret
290ENDPROC(aes_enc_blk)
293 291
294// AES (Rijndael) Decryption Subroutine 292// AES (Rijndael) Decryption Subroutine
295/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ 293/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
296 294
297.global aes_dec_blk
298
299.extern crypto_it_tab 295.extern crypto_it_tab
300.extern crypto_il_tab 296.extern crypto_il_tab
301 297
302.align 4 298ENTRY(aes_dec_blk)
303
304aes_dec_blk:
305 push %ebp 299 push %ebp
306 mov ctx(%esp),%ebp 300 mov ctx(%esp),%ebp
307 301
@@ -365,3 +359,4 @@ aes_dec_blk:
365 mov %r0,(%ebp) 359 mov %r0,(%ebp)
366 pop %ebp 360 pop %ebp
367 ret 361 ret
362ENDPROC(aes_dec_blk)
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index 5b577d5a059b..910565547163 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -15,6 +15,7 @@
15 15
16.text 16.text
17 17
18#include <linux/linkage.h>
18#include <asm/asm-offsets.h> 19#include <asm/asm-offsets.h>
19 20
20#define R1 %rax 21#define R1 %rax
@@ -49,10 +50,8 @@
49#define R11 %r11 50#define R11 %r11
50 51
51#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ 52#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
52 .global FUNC; \ 53 ENTRY(FUNC); \
53 .type FUNC,@function; \ 54 movq r1,r2; \
54 .align 8; \
55FUNC: movq r1,r2; \
56 movq r3,r4; \ 55 movq r3,r4; \
57 leaq KEY+48(r8),r9; \ 56 leaq KEY+48(r8),r9; \
58 movq r10,r11; \ 57 movq r10,r11; \
@@ -71,14 +70,15 @@ FUNC: movq r1,r2; \
71 je B192; \ 70 je B192; \
72 leaq 32(r9),r9; 71 leaq 32(r9),r9;
73 72
74#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ 73#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
75 movq r1,r2; \ 74 movq r1,r2; \
76 movq r3,r4; \ 75 movq r3,r4; \
77 movl r5 ## E,(r9); \ 76 movl r5 ## E,(r9); \
78 movl r6 ## E,4(r9); \ 77 movl r6 ## E,4(r9); \
79 movl r7 ## E,8(r9); \ 78 movl r7 ## E,8(r9); \
80 movl r8 ## E,12(r9); \ 79 movl r8 ## E,12(r9); \
81 ret; 80 ret; \
81 ENDPROC(FUNC);
82 82
83#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ 83#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
84 movzbl r2 ## H,r5 ## E; \ 84 movzbl r2 ## H,r5 ## E; \
@@ -133,7 +133,7 @@ FUNC: movq r1,r2; \
133#define entry(FUNC,KEY,B128,B192) \ 133#define entry(FUNC,KEY,B128,B192) \
134 prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) 134 prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
135 135
136#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) 136#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
137 137
138#define encrypt_round(TAB,OFFSET) \ 138#define encrypt_round(TAB,OFFSET) \
139 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ 139 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
@@ -151,12 +151,12 @@ FUNC: movq r1,r2; \
151 151
152/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ 152/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
153 153
154 entry(aes_enc_blk,0,enc128,enc192) 154 entry(aes_enc_blk,0,.Le128,.Le192)
155 encrypt_round(crypto_ft_tab,-96) 155 encrypt_round(crypto_ft_tab,-96)
156 encrypt_round(crypto_ft_tab,-80) 156 encrypt_round(crypto_ft_tab,-80)
157enc192: encrypt_round(crypto_ft_tab,-64) 157.Le192: encrypt_round(crypto_ft_tab,-64)
158 encrypt_round(crypto_ft_tab,-48) 158 encrypt_round(crypto_ft_tab,-48)
159enc128: encrypt_round(crypto_ft_tab,-32) 159.Le128: encrypt_round(crypto_ft_tab,-32)
160 encrypt_round(crypto_ft_tab,-16) 160 encrypt_round(crypto_ft_tab,-16)
161 encrypt_round(crypto_ft_tab, 0) 161 encrypt_round(crypto_ft_tab, 0)
162 encrypt_round(crypto_ft_tab, 16) 162 encrypt_round(crypto_ft_tab, 16)
@@ -166,16 +166,16 @@ enc128: encrypt_round(crypto_ft_tab,-32)
166 encrypt_round(crypto_ft_tab, 80) 166 encrypt_round(crypto_ft_tab, 80)
167 encrypt_round(crypto_ft_tab, 96) 167 encrypt_round(crypto_ft_tab, 96)
168 encrypt_final(crypto_fl_tab,112) 168 encrypt_final(crypto_fl_tab,112)
169 return 169 return(aes_enc_blk)
170 170
171/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ 171/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
172 172
173 entry(aes_dec_blk,240,dec128,dec192) 173 entry(aes_dec_blk,240,.Ld128,.Ld192)
174 decrypt_round(crypto_it_tab,-96) 174 decrypt_round(crypto_it_tab,-96)
175 decrypt_round(crypto_it_tab,-80) 175 decrypt_round(crypto_it_tab,-80)
176dec192: decrypt_round(crypto_it_tab,-64) 176.Ld192: decrypt_round(crypto_it_tab,-64)
177 decrypt_round(crypto_it_tab,-48) 177 decrypt_round(crypto_it_tab,-48)
178dec128: decrypt_round(crypto_it_tab,-32) 178.Ld128: decrypt_round(crypto_it_tab,-32)
179 decrypt_round(crypto_it_tab,-16) 179 decrypt_round(crypto_it_tab,-16)
180 decrypt_round(crypto_it_tab, 0) 180 decrypt_round(crypto_it_tab, 0)
181 decrypt_round(crypto_it_tab, 16) 181 decrypt_round(crypto_it_tab, 16)
@@ -185,4 +185,4 @@ dec128: decrypt_round(crypto_it_tab,-32)
185 decrypt_round(crypto_it_tab, 80) 185 decrypt_round(crypto_it_tab, 80)
186 decrypt_round(crypto_it_tab, 96) 186 decrypt_round(crypto_it_tab, 96)
187 decrypt_final(crypto_il_tab,112) 187 decrypt_final(crypto_il_tab,112)
188 return 188 return(aes_dec_blk)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 3470624d7835..04b797767b9e 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
1262* poly = x^128 + x^127 + x^126 + x^121 + 1 1262* poly = x^128 + x^127 + x^126 + x^121 + 1
1263* 1263*
1264*****************************************************************************/ 1264*****************************************************************************/
1265
1266ENTRY(aesni_gcm_dec) 1265ENTRY(aesni_gcm_dec)
1267 push %r12 1266 push %r12
1268 push %r13 1267 push %r13
@@ -1437,6 +1436,7 @@ _return_T_done_decrypt:
1437 pop %r13 1436 pop %r13
1438 pop %r12 1437 pop %r12
1439 ret 1438 ret
1439ENDPROC(aesni_gcm_dec)
1440 1440
1441 1441
1442/***************************************************************************** 1442/*****************************************************************************
@@ -1700,10 +1700,12 @@ _return_T_done_encrypt:
1700 pop %r13 1700 pop %r13
1701 pop %r12 1701 pop %r12
1702 ret 1702 ret
1703ENDPROC(aesni_gcm_enc)
1703 1704
1704#endif 1705#endif
1705 1706
1706 1707
1708.align 4
1707_key_expansion_128: 1709_key_expansion_128:
1708_key_expansion_256a: 1710_key_expansion_256a:
1709 pshufd $0b11111111, %xmm1, %xmm1 1711 pshufd $0b11111111, %xmm1, %xmm1
@@ -1715,6 +1717,8 @@ _key_expansion_256a:
1715 movaps %xmm0, (TKEYP) 1717 movaps %xmm0, (TKEYP)
1716 add $0x10, TKEYP 1718 add $0x10, TKEYP
1717 ret 1719 ret
1720ENDPROC(_key_expansion_128)
1721ENDPROC(_key_expansion_256a)
1718 1722
1719.align 4 1723.align 4
1720_key_expansion_192a: 1724_key_expansion_192a:
@@ -1739,6 +1743,7 @@ _key_expansion_192a:
1739 movaps %xmm1, 0x10(TKEYP) 1743 movaps %xmm1, 0x10(TKEYP)
1740 add $0x20, TKEYP 1744 add $0x20, TKEYP
1741 ret 1745 ret
1746ENDPROC(_key_expansion_192a)
1742 1747
1743.align 4 1748.align 4
1744_key_expansion_192b: 1749_key_expansion_192b:
@@ -1758,6 +1763,7 @@ _key_expansion_192b:
1758 movaps %xmm0, (TKEYP) 1763 movaps %xmm0, (TKEYP)
1759 add $0x10, TKEYP 1764 add $0x10, TKEYP
1760 ret 1765 ret
1766ENDPROC(_key_expansion_192b)
1761 1767
1762.align 4 1768.align 4
1763_key_expansion_256b: 1769_key_expansion_256b:
@@ -1770,6 +1776,7 @@ _key_expansion_256b:
1770 movaps %xmm2, (TKEYP) 1776 movaps %xmm2, (TKEYP)
1771 add $0x10, TKEYP 1777 add $0x10, TKEYP
1772 ret 1778 ret
1779ENDPROC(_key_expansion_256b)
1773 1780
1774/* 1781/*
1775 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 1782 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key)
1882 popl KEYP 1889 popl KEYP
1883#endif 1890#endif
1884 ret 1891 ret
1892ENDPROC(aesni_set_key)
1885 1893
1886/* 1894/*
1887 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 1895 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -1903,6 +1911,7 @@ ENTRY(aesni_enc)
1903 popl KEYP 1911 popl KEYP
1904#endif 1912#endif
1905 ret 1913 ret
1914ENDPROC(aesni_enc)
1906 1915
1907/* 1916/*
1908 * _aesni_enc1: internal ABI 1917 * _aesni_enc1: internal ABI
@@ -1960,6 +1969,7 @@ _aesni_enc1:
1960 movaps 0x70(TKEYP), KEY 1969 movaps 0x70(TKEYP), KEY
1961 AESENCLAST KEY STATE 1970 AESENCLAST KEY STATE
1962 ret 1971 ret
1972ENDPROC(_aesni_enc1)
1963 1973
1964/* 1974/*
1965 * _aesni_enc4: internal ABI 1975 * _aesni_enc4: internal ABI
@@ -2068,6 +2078,7 @@ _aesni_enc4:
2068 AESENCLAST KEY STATE3 2078 AESENCLAST KEY STATE3
2069 AESENCLAST KEY STATE4 2079 AESENCLAST KEY STATE4
2070 ret 2080 ret
2081ENDPROC(_aesni_enc4)
2071 2082
2072/* 2083/*
2073 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 2084 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -2090,6 +2101,7 @@ ENTRY(aesni_dec)
2090 popl KEYP 2101 popl KEYP
2091#endif 2102#endif
2092 ret 2103 ret
2104ENDPROC(aesni_dec)
2093 2105
2094/* 2106/*
2095 * _aesni_dec1: internal ABI 2107 * _aesni_dec1: internal ABI
@@ -2147,6 +2159,7 @@ _aesni_dec1:
2147 movaps 0x70(TKEYP), KEY 2159 movaps 0x70(TKEYP), KEY
2148 AESDECLAST KEY STATE 2160 AESDECLAST KEY STATE
2149 ret 2161 ret
2162ENDPROC(_aesni_dec1)
2150 2163
2151/* 2164/*
2152 * _aesni_dec4: internal ABI 2165 * _aesni_dec4: internal ABI
@@ -2255,6 +2268,7 @@ _aesni_dec4:
2255 AESDECLAST KEY STATE3 2268 AESDECLAST KEY STATE3
2256 AESDECLAST KEY STATE4 2269 AESDECLAST KEY STATE4
2257 ret 2270 ret
2271ENDPROC(_aesni_dec4)
2258 2272
2259/* 2273/*
2260 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2274 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc)
2312 popl LEN 2326 popl LEN
2313#endif 2327#endif
2314 ret 2328 ret
2329ENDPROC(aesni_ecb_enc)
2315 2330
2316/* 2331/*
2317 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2332 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec)
2370 popl LEN 2385 popl LEN
2371#endif 2386#endif
2372 ret 2387 ret
2388ENDPROC(aesni_ecb_dec)
2373 2389
2374/* 2390/*
2375 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2391 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc)
2411 popl IVP 2427 popl IVP
2412#endif 2428#endif
2413 ret 2429 ret
2430ENDPROC(aesni_cbc_enc)
2414 2431
2415/* 2432/*
2416 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2433 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec)
2501 popl IVP 2518 popl IVP
2502#endif 2519#endif
2503 ret 2520 ret
2521ENDPROC(aesni_cbc_dec)
2504 2522
2505#ifdef __x86_64__ 2523#ifdef __x86_64__
2506.align 16 2524.align 16
@@ -2527,6 +2545,7 @@ _aesni_inc_init:
2527 MOVQ_R64_XMM TCTR_LOW INC 2545 MOVQ_R64_XMM TCTR_LOW INC
2528 MOVQ_R64_XMM CTR TCTR_LOW 2546 MOVQ_R64_XMM CTR TCTR_LOW
2529 ret 2547 ret
2548ENDPROC(_aesni_inc_init)
2530 2549
2531/* 2550/*
2532 * _aesni_inc: internal ABI 2551 * _aesni_inc: internal ABI
@@ -2555,6 +2574,7 @@ _aesni_inc:
2555 movaps CTR, IV 2574 movaps CTR, IV
2556 PSHUFB_XMM BSWAP_MASK IV 2575 PSHUFB_XMM BSWAP_MASK IV
2557 ret 2576 ret
2577ENDPROC(_aesni_inc)
2558 2578
2559/* 2579/*
2560 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2580 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc)
2615 movups IV, (IVP) 2635 movups IV, (IVP)
2616.Lctr_enc_just_ret: 2636.Lctr_enc_just_ret:
2617 ret 2637 ret
2638ENDPROC(aesni_ctr_enc)
2618#endif 2639#endif
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 1b9c22bea8a7..a0795da22c02 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -40,10 +40,6 @@
40#include <linux/workqueue.h> 40#include <linux/workqueue.h>
41#include <linux/spinlock.h> 41#include <linux/spinlock.h>
42 42
43#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE)
44#define HAS_CTR
45#endif
46
47#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) 43#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
48#define HAS_PCBC 44#define HAS_PCBC
49#endif 45#endif
@@ -395,12 +391,6 @@ static int ablk_ctr_init(struct crypto_tfm *tfm)
395 return ablk_init_common(tfm, "__driver-ctr-aes-aesni"); 391 return ablk_init_common(tfm, "__driver-ctr-aes-aesni");
396} 392}
397 393
398#ifdef HAS_CTR
399static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
400{
401 return ablk_init_common(tfm, "rfc3686(__driver-ctr-aes-aesni)");
402}
403#endif
404#endif 394#endif
405 395
406#ifdef HAS_PCBC 396#ifdef HAS_PCBC
@@ -1158,33 +1148,6 @@ static struct crypto_alg aesni_algs[] = { {
1158 .maxauthsize = 16, 1148 .maxauthsize = 16,
1159 }, 1149 },
1160 }, 1150 },
1161#ifdef HAS_CTR
1162}, {
1163 .cra_name = "rfc3686(ctr(aes))",
1164 .cra_driver_name = "rfc3686-ctr-aes-aesni",
1165 .cra_priority = 400,
1166 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1167 .cra_blocksize = 1,
1168 .cra_ctxsize = sizeof(struct async_helper_ctx),
1169 .cra_alignmask = 0,
1170 .cra_type = &crypto_ablkcipher_type,
1171 .cra_module = THIS_MODULE,
1172 .cra_init = ablk_rfc3686_ctr_init,
1173 .cra_exit = ablk_exit,
1174 .cra_u = {
1175 .ablkcipher = {
1176 .min_keysize = AES_MIN_KEY_SIZE +
1177 CTR_RFC3686_NONCE_SIZE,
1178 .max_keysize = AES_MAX_KEY_SIZE +
1179 CTR_RFC3686_NONCE_SIZE,
1180 .ivsize = CTR_RFC3686_IV_SIZE,
1181 .setkey = ablk_set_key,
1182 .encrypt = ablk_encrypt,
1183 .decrypt = ablk_decrypt,
1184 .geniv = "seqiv",
1185 },
1186 },
1187#endif
1188#endif 1151#endif
1189#ifdef HAS_PCBC 1152#ifdef HAS_PCBC
1190}, { 1153}, {
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 391d245dc086..246c67006ed0 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -20,6 +20,8 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/linkage.h>
24
23.file "blowfish-x86_64-asm.S" 25.file "blowfish-x86_64-asm.S"
24.text 26.text
25 27
@@ -116,11 +118,7 @@
116 bswapq RX0; \ 118 bswapq RX0; \
117 xorq RX0, (RIO); 119 xorq RX0, (RIO);
118 120
119.align 8 121ENTRY(__blowfish_enc_blk)
120.global __blowfish_enc_blk
121.type __blowfish_enc_blk,@function;
122
123__blowfish_enc_blk:
124 /* input: 122 /* input:
125 * %rdi: ctx, CTX 123 * %rdi: ctx, CTX
126 * %rsi: dst 124 * %rsi: dst
@@ -148,19 +146,16 @@ __blowfish_enc_blk:
148 146
149 movq %r10, RIO; 147 movq %r10, RIO;
150 test %cl, %cl; 148 test %cl, %cl;
151 jnz __enc_xor; 149 jnz .L__enc_xor;
152 150
153 write_block(); 151 write_block();
154 ret; 152 ret;
155__enc_xor: 153.L__enc_xor:
156 xor_block(); 154 xor_block();
157 ret; 155 ret;
156ENDPROC(__blowfish_enc_blk)
158 157
159.align 8 158ENTRY(blowfish_dec_blk)
160.global blowfish_dec_blk
161.type blowfish_dec_blk,@function;
162
163blowfish_dec_blk:
164 /* input: 159 /* input:
165 * %rdi: ctx, CTX 160 * %rdi: ctx, CTX
166 * %rsi: dst 161 * %rsi: dst
@@ -189,6 +184,7 @@ blowfish_dec_blk:
189 movq %r11, %rbp; 184 movq %r11, %rbp;
190 185
191 ret; 186 ret;
187ENDPROC(blowfish_dec_blk)
192 188
193/********************************************************************** 189/**********************************************************************
194 4-way blowfish, four blocks parallel 190 4-way blowfish, four blocks parallel
@@ -300,11 +296,7 @@ blowfish_dec_blk:
300 bswapq RX3; \ 296 bswapq RX3; \
301 xorq RX3, 24(RIO); 297 xorq RX3, 24(RIO);
302 298
303.align 8 299ENTRY(__blowfish_enc_blk_4way)
304.global __blowfish_enc_blk_4way
305.type __blowfish_enc_blk_4way,@function;
306
307__blowfish_enc_blk_4way:
308 /* input: 300 /* input:
309 * %rdi: ctx, CTX 301 * %rdi: ctx, CTX
310 * %rsi: dst 302 * %rsi: dst
@@ -336,7 +328,7 @@ __blowfish_enc_blk_4way:
336 movq %r11, RIO; 328 movq %r11, RIO;
337 329
338 test %bpl, %bpl; 330 test %bpl, %bpl;
339 jnz __enc_xor4; 331 jnz .L__enc_xor4;
340 332
341 write_block4(); 333 write_block4();
342 334
@@ -344,18 +336,15 @@ __blowfish_enc_blk_4way:
344 popq %rbp; 336 popq %rbp;
345 ret; 337 ret;
346 338
347__enc_xor4: 339.L__enc_xor4:
348 xor_block4(); 340 xor_block4();
349 341
350 popq %rbx; 342 popq %rbx;
351 popq %rbp; 343 popq %rbp;
352 ret; 344 ret;
345ENDPROC(__blowfish_enc_blk_4way)
353 346
354.align 8 347ENTRY(blowfish_dec_blk_4way)
355.global blowfish_dec_blk_4way
356.type blowfish_dec_blk_4way,@function;
357
358blowfish_dec_blk_4way:
359 /* input: 348 /* input:
360 * %rdi: ctx, CTX 349 * %rdi: ctx, CTX
361 * %rsi: dst 350 * %rsi: dst
@@ -387,4 +376,4 @@ blowfish_dec_blk_4way:
387 popq %rbp; 376 popq %rbp;
388 377
389 ret; 378 ret;
390 379ENDPROC(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index 2306d2e4816f..cfc163469c71 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -15,6 +15,8 @@
15 * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz 15 * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
16 */ 16 */
17 17
18#include <linux/linkage.h>
19
18#define CAMELLIA_TABLE_BYTE_LEN 272 20#define CAMELLIA_TABLE_BYTE_LEN 272
19 21
20/* struct camellia_ctx: */ 22/* struct camellia_ctx: */
@@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
190 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, 192 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
191 %rcx, (%r9)); 193 %rcx, (%r9));
192 ret; 194 ret;
195ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
193 196
194.align 8 197.align 8
195roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: 198roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
@@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
197 %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, 200 %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
198 %rax, (%r9)); 201 %rax, (%r9));
199 ret; 202 ret;
203ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
200 204
201/* 205/*
202 * IN/OUT: 206 * IN/OUT:
@@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
709.text 713.text
710 714
711.align 8 715.align 8
712.type __camellia_enc_blk16,@function;
713
714__camellia_enc_blk16: 716__camellia_enc_blk16:
715 /* input: 717 /* input:
716 * %rdi: ctx, CTX 718 * %rdi: ctx, CTX
@@ -793,10 +795,9 @@ __camellia_enc_blk16:
793 %xmm15, %rax, %rcx, 24); 795 %xmm15, %rax, %rcx, 24);
794 796
795 jmp .Lenc_done; 797 jmp .Lenc_done;
798ENDPROC(__camellia_enc_blk16)
796 799
797.align 8 800.align 8
798.type __camellia_dec_blk16,@function;
799
800__camellia_dec_blk16: 801__camellia_dec_blk16:
801 /* input: 802 /* input:
802 * %rdi: ctx, CTX 803 * %rdi: ctx, CTX
@@ -877,12 +878,9 @@ __camellia_dec_blk16:
877 ((key_table + (24) * 8) + 4)(CTX)); 878 ((key_table + (24) * 8) + 4)(CTX));
878 879
879 jmp .Ldec_max24; 880 jmp .Ldec_max24;
881ENDPROC(__camellia_dec_blk16)
880 882
881.align 8 883ENTRY(camellia_ecb_enc_16way)
882.global camellia_ecb_enc_16way
883.type camellia_ecb_enc_16way,@function;
884
885camellia_ecb_enc_16way:
886 /* input: 884 /* input:
887 * %rdi: ctx, CTX 885 * %rdi: ctx, CTX
888 * %rsi: dst (16 blocks) 886 * %rsi: dst (16 blocks)
@@ -903,12 +901,9 @@ camellia_ecb_enc_16way:
903 %xmm8, %rsi); 901 %xmm8, %rsi);
904 902
905 ret; 903 ret;
904ENDPROC(camellia_ecb_enc_16way)
906 905
907.align 8 906ENTRY(camellia_ecb_dec_16way)
908.global camellia_ecb_dec_16way
909.type camellia_ecb_dec_16way,@function;
910
911camellia_ecb_dec_16way:
912 /* input: 907 /* input:
913 * %rdi: ctx, CTX 908 * %rdi: ctx, CTX
914 * %rsi: dst (16 blocks) 909 * %rsi: dst (16 blocks)
@@ -934,12 +929,9 @@ camellia_ecb_dec_16way:
934 %xmm8, %rsi); 929 %xmm8, %rsi);
935 930
936 ret; 931 ret;
932ENDPROC(camellia_ecb_dec_16way)
937 933
938.align 8 934ENTRY(camellia_cbc_dec_16way)
939.global camellia_cbc_dec_16way
940.type camellia_cbc_dec_16way,@function;
941
942camellia_cbc_dec_16way:
943 /* input: 935 /* input:
944 * %rdi: ctx, CTX 936 * %rdi: ctx, CTX
945 * %rsi: dst (16 blocks) 937 * %rsi: dst (16 blocks)
@@ -986,6 +978,7 @@ camellia_cbc_dec_16way:
986 %xmm8, %rsi); 978 %xmm8, %rsi);
987 979
988 ret; 980 ret;
981ENDPROC(camellia_cbc_dec_16way)
989 982
990#define inc_le128(x, minus_one, tmp) \ 983#define inc_le128(x, minus_one, tmp) \
991 vpcmpeqq minus_one, x, tmp; \ 984 vpcmpeqq minus_one, x, tmp; \
@@ -993,11 +986,7 @@ camellia_cbc_dec_16way:
993 vpslldq $8, tmp, tmp; \ 986 vpslldq $8, tmp, tmp; \
994 vpsubq tmp, x, x; 987 vpsubq tmp, x, x;
995 988
996.align 8 989ENTRY(camellia_ctr_16way)
997.global camellia_ctr_16way
998.type camellia_ctr_16way,@function;
999
1000camellia_ctr_16way:
1001 /* input: 990 /* input:
1002 * %rdi: ctx, CTX 991 * %rdi: ctx, CTX
1003 * %rsi: dst (16 blocks) 992 * %rsi: dst (16 blocks)
@@ -1100,3 +1089,4 @@ camellia_ctr_16way:
1100 %xmm8, %rsi); 1089 %xmm8, %rsi);
1101 1090
1102 ret; 1091 ret;
1092ENDPROC(camellia_ctr_16way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 0b3374335fdc..310319c601ed 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -20,6 +20,8 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/linkage.h>
24
23.file "camellia-x86_64-asm_64.S" 25.file "camellia-x86_64-asm_64.S"
24.text 26.text
25 27
@@ -188,10 +190,7 @@
188 bswapq RAB0; \ 190 bswapq RAB0; \
189 movq RAB0, 4*2(RIO); 191 movq RAB0, 4*2(RIO);
190 192
191.global __camellia_enc_blk; 193ENTRY(__camellia_enc_blk)
192.type __camellia_enc_blk,@function;
193
194__camellia_enc_blk:
195 /* input: 194 /* input:
196 * %rdi: ctx, CTX 195 * %rdi: ctx, CTX
197 * %rsi: dst 196 * %rsi: dst
@@ -214,33 +213,31 @@ __camellia_enc_blk:
214 movl $24, RT1d; /* max */ 213 movl $24, RT1d; /* max */
215 214
216 cmpb $16, key_length(CTX); 215 cmpb $16, key_length(CTX);
217 je __enc_done; 216 je .L__enc_done;
218 217
219 enc_fls(24); 218 enc_fls(24);
220 enc_rounds(24); 219 enc_rounds(24);
221 movl $32, RT1d; /* max */ 220 movl $32, RT1d; /* max */
222 221
223__enc_done: 222.L__enc_done:
224 testb RXORbl, RXORbl; 223 testb RXORbl, RXORbl;
225 movq RDST, RIO; 224 movq RDST, RIO;
226 225
227 jnz __enc_xor; 226 jnz .L__enc_xor;
228 227
229 enc_outunpack(mov, RT1); 228 enc_outunpack(mov, RT1);
230 229
231 movq RRBP, %rbp; 230 movq RRBP, %rbp;
232 ret; 231 ret;
233 232
234__enc_xor: 233.L__enc_xor:
235 enc_outunpack(xor, RT1); 234 enc_outunpack(xor, RT1);
236 235
237 movq RRBP, %rbp; 236 movq RRBP, %rbp;
238 ret; 237 ret;
238ENDPROC(__camellia_enc_blk)
239 239
240.global camellia_dec_blk; 240ENTRY(camellia_dec_blk)
241.type camellia_dec_blk,@function;
242
243camellia_dec_blk:
244 /* input: 241 /* input:
245 * %rdi: ctx, CTX 242 * %rdi: ctx, CTX
246 * %rsi: dst 243 * %rsi: dst
@@ -258,12 +255,12 @@ camellia_dec_blk:
258 dec_inpack(RT2); 255 dec_inpack(RT2);
259 256
260 cmpb $24, RT2bl; 257 cmpb $24, RT2bl;
261 je __dec_rounds16; 258 je .L__dec_rounds16;
262 259
263 dec_rounds(24); 260 dec_rounds(24);
264 dec_fls(24); 261 dec_fls(24);
265 262
266__dec_rounds16: 263.L__dec_rounds16:
267 dec_rounds(16); 264 dec_rounds(16);
268 dec_fls(16); 265 dec_fls(16);
269 dec_rounds(8); 266 dec_rounds(8);
@@ -276,6 +273,7 @@ __dec_rounds16:
276 273
277 movq RRBP, %rbp; 274 movq RRBP, %rbp;
278 ret; 275 ret;
276ENDPROC(camellia_dec_blk)
279 277
280/********************************************************************** 278/**********************************************************************
281 2-way camellia 279 2-way camellia
@@ -426,10 +424,7 @@ __dec_rounds16:
426 bswapq RAB1; \ 424 bswapq RAB1; \
427 movq RAB1, 12*2(RIO); 425 movq RAB1, 12*2(RIO);
428 426
429.global __camellia_enc_blk_2way; 427ENTRY(__camellia_enc_blk_2way)
430.type __camellia_enc_blk_2way,@function;
431
432__camellia_enc_blk_2way:
433 /* input: 428 /* input:
434 * %rdi: ctx, CTX 429 * %rdi: ctx, CTX
435 * %rsi: dst 430 * %rsi: dst
@@ -453,16 +448,16 @@ __camellia_enc_blk_2way:
453 movl $24, RT2d; /* max */ 448 movl $24, RT2d; /* max */
454 449
455 cmpb $16, key_length(CTX); 450 cmpb $16, key_length(CTX);
456 je __enc2_done; 451 je .L__enc2_done;
457 452
458 enc_fls2(24); 453 enc_fls2(24);
459 enc_rounds2(24); 454 enc_rounds2(24);
460 movl $32, RT2d; /* max */ 455 movl $32, RT2d; /* max */
461 456
462__enc2_done: 457.L__enc2_done:
463 test RXORbl, RXORbl; 458 test RXORbl, RXORbl;
464 movq RDST, RIO; 459 movq RDST, RIO;
465 jnz __enc2_xor; 460 jnz .L__enc2_xor;
466 461
467 enc_outunpack2(mov, RT2); 462 enc_outunpack2(mov, RT2);
468 463
@@ -470,17 +465,15 @@ __enc2_done:
470 popq %rbx; 465 popq %rbx;
471 ret; 466 ret;
472 467
473__enc2_xor: 468.L__enc2_xor:
474 enc_outunpack2(xor, RT2); 469 enc_outunpack2(xor, RT2);
475 470
476 movq RRBP, %rbp; 471 movq RRBP, %rbp;
477 popq %rbx; 472 popq %rbx;
478 ret; 473 ret;
474ENDPROC(__camellia_enc_blk_2way)
479 475
480.global camellia_dec_blk_2way; 476ENTRY(camellia_dec_blk_2way)
481.type camellia_dec_blk_2way,@function;
482
483camellia_dec_blk_2way:
484 /* input: 477 /* input:
485 * %rdi: ctx, CTX 478 * %rdi: ctx, CTX
486 * %rsi: dst 479 * %rsi: dst
@@ -499,12 +492,12 @@ camellia_dec_blk_2way:
499 dec_inpack2(RT2); 492 dec_inpack2(RT2);
500 493
501 cmpb $24, RT2bl; 494 cmpb $24, RT2bl;
502 je __dec2_rounds16; 495 je .L__dec2_rounds16;
503 496
504 dec_rounds2(24); 497 dec_rounds2(24);
505 dec_fls2(24); 498 dec_fls2(24);
506 499
507__dec2_rounds16: 500.L__dec2_rounds16:
508 dec_rounds2(16); 501 dec_rounds2(16);
509 dec_fls2(16); 502 dec_fls2(16);
510 dec_rounds2(8); 503 dec_rounds2(8);
@@ -518,3 +511,4 @@ __dec2_rounds16:
518 movq RRBP, %rbp; 511 movq RRBP, %rbp;
519 movq RXOR, %rbx; 512 movq RXOR, %rbx;
520 ret; 513 ret;
514ENDPROC(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index 15b00ac7cbd3..c35fd5d6ecd2 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -23,6 +23,8 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/linkage.h>
27
26.file "cast5-avx-x86_64-asm_64.S" 28.file "cast5-avx-x86_64-asm_64.S"
27 29
28.extern cast_s1 30.extern cast_s1
@@ -211,8 +213,6 @@
211.text 213.text
212 214
213.align 16 215.align 16
214.type __cast5_enc_blk16,@function;
215
216__cast5_enc_blk16: 216__cast5_enc_blk16:
217 /* input: 217 /* input:
218 * %rdi: ctx, CTX 218 * %rdi: ctx, CTX
@@ -263,14 +263,14 @@ __cast5_enc_blk16:
263 263
264 movzbl rr(CTX), %eax; 264 movzbl rr(CTX), %eax;
265 testl %eax, %eax; 265 testl %eax, %eax;
266 jnz __skip_enc; 266 jnz .L__skip_enc;
267 267
268 round(RL, RR, 12, 1); 268 round(RL, RR, 12, 1);
269 round(RR, RL, 13, 2); 269 round(RR, RL, 13, 2);
270 round(RL, RR, 14, 3); 270 round(RL, RR, 14, 3);
271 round(RR, RL, 15, 1); 271 round(RR, RL, 15, 1);
272 272
273__skip_enc: 273.L__skip_enc:
274 popq %rbx; 274 popq %rbx;
275 popq %rbp; 275 popq %rbp;
276 276
@@ -282,10 +282,9 @@ __skip_enc:
282 outunpack_blocks(RR4, RL4, RTMP, RX, RKM); 282 outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
283 283
284 ret; 284 ret;
285ENDPROC(__cast5_enc_blk16)
285 286
286.align 16 287.align 16
287.type __cast5_dec_blk16,@function;
288
289__cast5_dec_blk16: 288__cast5_dec_blk16:
290 /* input: 289 /* input:
291 * %rdi: ctx, CTX 290 * %rdi: ctx, CTX
@@ -323,14 +322,14 @@ __cast5_dec_blk16:
323 322
324 movzbl rr(CTX), %eax; 323 movzbl rr(CTX), %eax;
325 testl %eax, %eax; 324 testl %eax, %eax;
326 jnz __skip_dec; 325 jnz .L__skip_dec;
327 326
328 round(RL, RR, 15, 1); 327 round(RL, RR, 15, 1);
329 round(RR, RL, 14, 3); 328 round(RR, RL, 14, 3);
330 round(RL, RR, 13, 2); 329 round(RL, RR, 13, 2);
331 round(RR, RL, 12, 1); 330 round(RR, RL, 12, 1);
332 331
333__dec_tail: 332.L__dec_tail:
334 round(RL, RR, 11, 3); 333 round(RL, RR, 11, 3);
335 round(RR, RL, 10, 2); 334 round(RR, RL, 10, 2);
336 round(RL, RR, 9, 1); 335 round(RL, RR, 9, 1);
@@ -355,15 +354,12 @@ __dec_tail:
355 354
356 ret; 355 ret;
357 356
358__skip_dec: 357.L__skip_dec:
359 vpsrldq $4, RKR, RKR; 358 vpsrldq $4, RKR, RKR;
360 jmp __dec_tail; 359 jmp .L__dec_tail;
360ENDPROC(__cast5_dec_blk16)
361 361
362.align 16 362ENTRY(cast5_ecb_enc_16way)
363.global cast5_ecb_enc_16way
364.type cast5_ecb_enc_16way,@function;
365
366cast5_ecb_enc_16way:
367 /* input: 363 /* input:
368 * %rdi: ctx, CTX 364 * %rdi: ctx, CTX
369 * %rsi: dst 365 * %rsi: dst
@@ -393,12 +389,9 @@ cast5_ecb_enc_16way:
393 vmovdqu RL4, (7*4*4)(%r11); 389 vmovdqu RL4, (7*4*4)(%r11);
394 390
395 ret; 391 ret;
392ENDPROC(cast5_ecb_enc_16way)
396 393
397.align 16 394ENTRY(cast5_ecb_dec_16way)
398.global cast5_ecb_dec_16way
399.type cast5_ecb_dec_16way,@function;
400
401cast5_ecb_dec_16way:
402 /* input: 395 /* input:
403 * %rdi: ctx, CTX 396 * %rdi: ctx, CTX
404 * %rsi: dst 397 * %rsi: dst
@@ -428,12 +421,9 @@ cast5_ecb_dec_16way:
428 vmovdqu RL4, (7*4*4)(%r11); 421 vmovdqu RL4, (7*4*4)(%r11);
429 422
430 ret; 423 ret;
424ENDPROC(cast5_ecb_dec_16way)
431 425
432.align 16 426ENTRY(cast5_cbc_dec_16way)
433.global cast5_cbc_dec_16way
434.type cast5_cbc_dec_16way,@function;
435
436cast5_cbc_dec_16way:
437 /* input: 427 /* input:
438 * %rdi: ctx, CTX 428 * %rdi: ctx, CTX
439 * %rsi: dst 429 * %rsi: dst
@@ -480,12 +470,9 @@ cast5_cbc_dec_16way:
480 popq %r12; 470 popq %r12;
481 471
482 ret; 472 ret;
473ENDPROC(cast5_cbc_dec_16way)
483 474
484.align 16 475ENTRY(cast5_ctr_16way)
485.global cast5_ctr_16way
486.type cast5_ctr_16way,@function;
487
488cast5_ctr_16way:
489 /* input: 476 /* input:
490 * %rdi: ctx, CTX 477 * %rdi: ctx, CTX
491 * %rsi: dst 478 * %rsi: dst
@@ -556,3 +543,4 @@ cast5_ctr_16way:
556 popq %r12; 543 popq %r12;
557 544
558 ret; 545 ret;
546ENDPROC(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 2569d0da841f..f93b6105a0ce 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -23,6 +23,7 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/linkage.h>
26#include "glue_helper-asm-avx.S" 27#include "glue_helper-asm-avx.S"
27 28
28.file "cast6-avx-x86_64-asm_64.S" 29.file "cast6-avx-x86_64-asm_64.S"
@@ -250,8 +251,6 @@
250.text 251.text
251 252
252.align 8 253.align 8
253.type __cast6_enc_blk8,@function;
254
255__cast6_enc_blk8: 254__cast6_enc_blk8:
256 /* input: 255 /* input:
257 * %rdi: ctx, CTX 256 * %rdi: ctx, CTX
@@ -295,10 +294,9 @@ __cast6_enc_blk8:
295 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 294 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
296 295
297 ret; 296 ret;
297ENDPROC(__cast6_enc_blk8)
298 298
299.align 8 299.align 8
300.type __cast6_dec_blk8,@function;
301
302__cast6_dec_blk8: 300__cast6_dec_blk8:
303 /* input: 301 /* input:
304 * %rdi: ctx, CTX 302 * %rdi: ctx, CTX
@@ -341,12 +339,9 @@ __cast6_dec_blk8:
341 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 339 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
342 340
343 ret; 341 ret;
342ENDPROC(__cast6_dec_blk8)
344 343
345.align 8 344ENTRY(cast6_ecb_enc_8way)
346.global cast6_ecb_enc_8way
347.type cast6_ecb_enc_8way,@function;
348
349cast6_ecb_enc_8way:
350 /* input: 345 /* input:
351 * %rdi: ctx, CTX 346 * %rdi: ctx, CTX
352 * %rsi: dst 347 * %rsi: dst
@@ -362,12 +357,9 @@ cast6_ecb_enc_8way:
362 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 357 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
363 358
364 ret; 359 ret;
360ENDPROC(cast6_ecb_enc_8way)
365 361
366.align 8 362ENTRY(cast6_ecb_dec_8way)
367.global cast6_ecb_dec_8way
368.type cast6_ecb_dec_8way,@function;
369
370cast6_ecb_dec_8way:
371 /* input: 363 /* input:
372 * %rdi: ctx, CTX 364 * %rdi: ctx, CTX
373 * %rsi: dst 365 * %rsi: dst
@@ -383,12 +375,9 @@ cast6_ecb_dec_8way:
383 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 375 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
384 376
385 ret; 377 ret;
378ENDPROC(cast6_ecb_dec_8way)
386 379
387.align 8 380ENTRY(cast6_cbc_dec_8way)
388.global cast6_cbc_dec_8way
389.type cast6_cbc_dec_8way,@function;
390
391cast6_cbc_dec_8way:
392 /* input: 381 /* input:
393 * %rdi: ctx, CTX 382 * %rdi: ctx, CTX
394 * %rsi: dst 383 * %rsi: dst
@@ -409,12 +398,9 @@ cast6_cbc_dec_8way:
409 popq %r12; 398 popq %r12;
410 399
411 ret; 400 ret;
401ENDPROC(cast6_cbc_dec_8way)
412 402
413.align 8 403ENTRY(cast6_ctr_8way)
414.global cast6_ctr_8way
415.type cast6_ctr_8way,@function;
416
417cast6_ctr_8way:
418 /* input: 404 /* input:
419 * %rdi: ctx, CTX 405 * %rdi: ctx, CTX
420 * %rsi: dst 406 * %rsi: dst
@@ -437,3 +423,4 @@ cast6_ctr_8way:
437 popq %r12; 423 popq %r12;
438 424
439 ret; 425 ret;
426ENDPROC(cast6_ctr_8way)
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
new file mode 100644
index 000000000000..c8335014a044
--- /dev/null
+++ b/arch/x86/crypto/crc32-pclmul_asm.S
@@ -0,0 +1,246 @@
1/* GPL HEADER START
2 *
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License version 2 for more details (a copy is included
13 * in the LICENSE file that accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License
16 * version 2 along with this program; If not, see http://www.gnu.org/licenses
17 *
18 * Please visit http://www.xyratex.com/contact if you need additional
19 * information or have any questions.
20 *
21 * GPL HEADER END
22 */
23
24/*
25 * Copyright 2012 Xyratex Technology Limited
26 *
27 * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
28 * calculation.
29 * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
30 * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
31 * at:
32 * http://www.intel.com/products/processor/manuals/
33 * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
34 * Volume 2B: Instruction Set Reference, N-Z
35 *
36 * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
37 * Alexander Boyko <Alexander_Boyko@xyratex.com>
38 */
39
40#include <linux/linkage.h>
41#include <asm/inst.h>
42
43
44.align 16
45/*
46 * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
47 * #define CONSTANT_R1 0x154442bd4LL
48 *
49 * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
50 * #define CONSTANT_R2 0x1c6e41596LL
51 */
52.Lconstant_R2R1:
53 .octa 0x00000001c6e415960000000154442bd4
54/*
55 * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
56 * #define CONSTANT_R3 0x1751997d0LL
57 *
58 * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
59 * #define CONSTANT_R4 0x0ccaa009eLL
60 */
61.Lconstant_R4R3:
62 .octa 0x00000000ccaa009e00000001751997d0
63/*
64 * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
65 * #define CONSTANT_R5 0x163cd6124LL
66 */
67.Lconstant_R5:
68 .octa 0x00000000000000000000000163cd6124
69.Lconstant_mask32:
70 .octa 0x000000000000000000000000FFFFFFFF
71/*
72 * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
73 *
74 * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
75 * #define CONSTANT_RU 0x1F7011641LL
76 */
77.Lconstant_RUpoly:
78 .octa 0x00000001F701164100000001DB710641
79
80#define CONSTANT %xmm0
81
82#ifdef __x86_64__
83#define BUF %rdi
84#define LEN %rsi
85#define CRC %edx
86#else
87#define BUF %eax
88#define LEN %edx
89#define CRC %ecx
90#endif
91
92
93
94.text
95/**
96 * Calculate crc32
97 * BUF - buffer (16 bytes aligned)
98 * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
99 * CRC - initial crc32
100 * return %eax crc32
101 * uint crc32_pclmul_le_16(unsigned char const *buffer,
102 * size_t len, uint crc32)
103 */
104.globl crc32_pclmul_le_16
105.align 4, 0x90
106crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */
107 movdqa (BUF), %xmm1
108 movdqa 0x10(BUF), %xmm2
109 movdqa 0x20(BUF), %xmm3
110 movdqa 0x30(BUF), %xmm4
111 movd CRC, CONSTANT
112 pxor CONSTANT, %xmm1
113 sub $0x40, LEN
114 add $0x40, BUF
115#ifndef __x86_64__
116 /* This is for position independent code(-fPIC) support for 32bit */
117 call delta
118delta:
119 pop %ecx
120#endif
121 cmp $0x40, LEN
122 jb less_64
123
124#ifdef __x86_64__
125 movdqa .Lconstant_R2R1(%rip), CONSTANT
126#else
127 movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
128#endif
129
130loop_64:/* 64 bytes Full cache line folding */
131 prefetchnta 0x40(BUF)
132 movdqa %xmm1, %xmm5
133 movdqa %xmm2, %xmm6
134 movdqa %xmm3, %xmm7
135#ifdef __x86_64__
136 movdqa %xmm4, %xmm8
137#endif
138 PCLMULQDQ 00, CONSTANT, %xmm1
139 PCLMULQDQ 00, CONSTANT, %xmm2
140 PCLMULQDQ 00, CONSTANT, %xmm3
141#ifdef __x86_64__
142 PCLMULQDQ 00, CONSTANT, %xmm4
143#endif
144 PCLMULQDQ 0x11, CONSTANT, %xmm5
145 PCLMULQDQ 0x11, CONSTANT, %xmm6
146 PCLMULQDQ 0x11, CONSTANT, %xmm7
147#ifdef __x86_64__
148 PCLMULQDQ 0x11, CONSTANT, %xmm8
149#endif
150 pxor %xmm5, %xmm1
151 pxor %xmm6, %xmm2
152 pxor %xmm7, %xmm3
153#ifdef __x86_64__
154 pxor %xmm8, %xmm4
155#else
156 /* xmm8 unsupported for x32 */
157 movdqa %xmm4, %xmm5
158 PCLMULQDQ 00, CONSTANT, %xmm4
159 PCLMULQDQ 0x11, CONSTANT, %xmm5
160 pxor %xmm5, %xmm4
161#endif
162
163 pxor (BUF), %xmm1
164 pxor 0x10(BUF), %xmm2
165 pxor 0x20(BUF), %xmm3
166 pxor 0x30(BUF), %xmm4
167
168 sub $0x40, LEN
169 add $0x40, BUF
170 cmp $0x40, LEN
171 jge loop_64
172less_64:/* Folding cache line into 128bit */
173#ifdef __x86_64__
174 movdqa .Lconstant_R4R3(%rip), CONSTANT
175#else
176 movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT
177#endif
178 prefetchnta (BUF)
179
180 movdqa %xmm1, %xmm5
181 PCLMULQDQ 0x00, CONSTANT, %xmm1
182 PCLMULQDQ 0x11, CONSTANT, %xmm5
183 pxor %xmm5, %xmm1
184 pxor %xmm2, %xmm1
185
186 movdqa %xmm1, %xmm5
187 PCLMULQDQ 0x00, CONSTANT, %xmm1
188 PCLMULQDQ 0x11, CONSTANT, %xmm5
189 pxor %xmm5, %xmm1
190 pxor %xmm3, %xmm1
191
192 movdqa %xmm1, %xmm5
193 PCLMULQDQ 0x00, CONSTANT, %xmm1
194 PCLMULQDQ 0x11, CONSTANT, %xmm5
195 pxor %xmm5, %xmm1
196 pxor %xmm4, %xmm1
197
198 cmp $0x10, LEN
199 jb fold_64
200loop_16:/* Folding rest buffer into 128bit */
201 movdqa %xmm1, %xmm5
202 PCLMULQDQ 0x00, CONSTANT, %xmm1
203 PCLMULQDQ 0x11, CONSTANT, %xmm5
204 pxor %xmm5, %xmm1
205 pxor (BUF), %xmm1
206 sub $0x10, LEN
207 add $0x10, BUF
208 cmp $0x10, LEN
209 jge loop_16
210
211fold_64:
212 /* perform the last 64 bit fold, also adds 32 zeroes
213 * to the input stream */
214 PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
215 psrldq $0x08, %xmm1
216 pxor CONSTANT, %xmm1
217
218 /* final 32-bit fold */
219 movdqa %xmm1, %xmm2
220#ifdef __x86_64__
221 movdqa .Lconstant_R5(%rip), CONSTANT
222 movdqa .Lconstant_mask32(%rip), %xmm3
223#else
224 movdqa .Lconstant_R5 - delta(%ecx), CONSTANT
225 movdqa .Lconstant_mask32 - delta(%ecx), %xmm3
226#endif
227 psrldq $0x04, %xmm2
228 pand %xmm3, %xmm1
229 PCLMULQDQ 0x00, CONSTANT, %xmm1
230 pxor %xmm2, %xmm1
231
232 /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
233#ifdef __x86_64__
234 movdqa .Lconstant_RUpoly(%rip), CONSTANT
235#else
236 movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT
237#endif
238 movdqa %xmm1, %xmm2
239 pand %xmm3, %xmm1
240 PCLMULQDQ 0x10, CONSTANT, %xmm1
241 pand %xmm3, %xmm1
242 PCLMULQDQ 0x00, CONSTANT, %xmm1
243 pxor %xmm2, %xmm1
244 pextrd $0x01, %xmm1, %eax
245
246 ret
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
new file mode 100644
index 000000000000..9d014a74ef96
--- /dev/null
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -0,0 +1,201 @@
1/* GPL HEADER START
2 *
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License version 2 for more details (a copy is included
13 * in the LICENSE file that accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License
16 * version 2 along with this program; If not, see http://www.gnu.org/licenses
17 *
18 * Please visit http://www.xyratex.com/contact if you need additional
19 * information or have any questions.
20 *
21 * GPL HEADER END
22 */
23
24/*
25 * Copyright 2012 Xyratex Technology Limited
26 *
27 * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
28 */
29#include <linux/init.h>
30#include <linux/module.h>
31#include <linux/string.h>
32#include <linux/kernel.h>
33#include <linux/crc32.h>
34#include <crypto/internal/hash.h>
35
36#include <asm/cpufeature.h>
37#include <asm/cpu_device_id.h>
38#include <asm/i387.h>
39
40#define CHKSUM_BLOCK_SIZE 1
41#define CHKSUM_DIGEST_SIZE 4
42
43#define PCLMUL_MIN_LEN 64L /* minimum size of buffer
44 * for crc32_pclmul_le_16 */
45#define SCALE_F 16L /* size of xmm register */
46#define SCALE_F_MASK (SCALE_F - 1)
47
48u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
49
50static u32 __attribute__((pure))
51 crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
52{
53 unsigned int iquotient;
54 unsigned int iremainder;
55 unsigned int prealign;
56
57 if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable())
58 return crc32_le(crc, p, len);
59
60 if ((long)p & SCALE_F_MASK) {
61 /* align p to 16 byte */
62 prealign = SCALE_F - ((long)p & SCALE_F_MASK);
63
64 crc = crc32_le(crc, p, prealign);
65 len -= prealign;
66 p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
67 ~SCALE_F_MASK);
68 }
69 iquotient = len & (~SCALE_F_MASK);
70 iremainder = len & SCALE_F_MASK;
71
72 kernel_fpu_begin();
73 crc = crc32_pclmul_le_16(p, iquotient, crc);
74 kernel_fpu_end();
75
76 if (iremainder)
77 crc = crc32_le(crc, p + iquotient, iremainder);
78
79 return crc;
80}
81
82static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
83{
84 u32 *key = crypto_tfm_ctx(tfm);
85
86 *key = 0;
87
88 return 0;
89}
90
91static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
92 unsigned int keylen)
93{
94 u32 *mctx = crypto_shash_ctx(hash);
95
96 if (keylen != sizeof(u32)) {
97 crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
98 return -EINVAL;
99 }
100 *mctx = le32_to_cpup((__le32 *)key);
101 return 0;
102}
103
104static int crc32_pclmul_init(struct shash_desc *desc)
105{
106 u32 *mctx = crypto_shash_ctx(desc->tfm);
107 u32 *crcp = shash_desc_ctx(desc);
108
109 *crcp = *mctx;
110
111 return 0;
112}
113
114static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
115 unsigned int len)
116{
117 u32 *crcp = shash_desc_ctx(desc);
118
119 *crcp = crc32_pclmul_le(*crcp, data, len);
120 return 0;
121}
122
123/* No final XOR 0xFFFFFFFF, like crc32_le */
124static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
125 u8 *out)
126{
127 *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
128 return 0;
129}
130
131static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
132 unsigned int len, u8 *out)
133{
134 return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
135}
136
137static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
138{
139 u32 *crcp = shash_desc_ctx(desc);
140
141 *(__le32 *)out = cpu_to_le32p(crcp);
142 return 0;
143}
144
145static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
146 unsigned int len, u8 *out)
147{
148 return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
149 out);
150}
151
152static struct shash_alg alg = {
153 .setkey = crc32_pclmul_setkey,
154 .init = crc32_pclmul_init,
155 .update = crc32_pclmul_update,
156 .final = crc32_pclmul_final,
157 .finup = crc32_pclmul_finup,
158 .digest = crc32_pclmul_digest,
159 .descsize = sizeof(u32),
160 .digestsize = CHKSUM_DIGEST_SIZE,
161 .base = {
162 .cra_name = "crc32",
163 .cra_driver_name = "crc32-pclmul",
164 .cra_priority = 200,
165 .cra_blocksize = CHKSUM_BLOCK_SIZE,
166 .cra_ctxsize = sizeof(u32),
167 .cra_module = THIS_MODULE,
168 .cra_init = crc32_pclmul_cra_init,
169 }
170};
171
172static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
173 X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
174 {}
175};
176MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
177
178
179static int __init crc32_pclmul_mod_init(void)
180{
181
182 if (!x86_match_cpu(crc32pclmul_cpu_id)) {
183 pr_info("PCLMULQDQ-NI instructions are not detected.\n");
184 return -ENODEV;
185 }
186 return crypto_register_shash(&alg);
187}
188
189static void __exit crc32_pclmul_mod_fini(void)
190{
191 crypto_unregister_shash(&alg);
192}
193
194module_init(crc32_pclmul_mod_init);
195module_exit(crc32_pclmul_mod_fini);
196
197MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
198MODULE_LICENSE("GPL");
199
200MODULE_ALIAS("crc32");
201MODULE_ALIAS("crc32-pclmul");
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 93c6d39237ac..cf1a7ec4cc3a 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -42,6 +42,8 @@
42 * SOFTWARE. 42 * SOFTWARE.
43 */ 43 */
44 44
45#include <linux/linkage.h>
46
45## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction 47## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
46 48
47.macro LABEL prefix n 49.macro LABEL prefix n
@@ -68,8 +70,7 @@
68 70
69# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); 71# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
70 72
71.global crc_pcl 73ENTRY(crc_pcl)
72crc_pcl:
73#define bufp %rdi 74#define bufp %rdi
74#define bufp_dw %edi 75#define bufp_dw %edi
75#define bufp_w %di 76#define bufp_w %di
@@ -323,6 +324,9 @@ JMPTBL_ENTRY %i
323.noaltmacro 324.noaltmacro
324 i=i+1 325 i=i+1
325.endr 326.endr
327
328ENDPROC(crc_pcl)
329
326 ################################################################ 330 ################################################################
327 ## PCLMULQDQ tables 331 ## PCLMULQDQ tables
328 ## Table is 128 entries x 2 quad words each 332 ## Table is 128 entries x 2 quad words each
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 1eb7f90cb7b9..586f41aac361 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -94,6 +94,7 @@ __clmul_gf128mul_ble:
94 pxor T2, T1 94 pxor T2, T1
95 pxor T1, DATA 95 pxor T1, DATA
96 ret 96 ret
97ENDPROC(__clmul_gf128mul_ble)
97 98
98/* void clmul_ghash_mul(char *dst, const be128 *shash) */ 99/* void clmul_ghash_mul(char *dst, const be128 *shash) */
99ENTRY(clmul_ghash_mul) 100ENTRY(clmul_ghash_mul)
@@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul)
105 PSHUFB_XMM BSWAP DATA 106 PSHUFB_XMM BSWAP DATA
106 movups DATA, (%rdi) 107 movups DATA, (%rdi)
107 ret 108 ret
109ENDPROC(clmul_ghash_mul)
108 110
109/* 111/*
110 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, 112 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
@@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update)
131 movups DATA, (%rdi) 133 movups DATA, (%rdi)
132.Lupdate_just_ret: 134.Lupdate_just_ret:
133 ret 135 ret
136ENDPROC(clmul_ghash_update)
134 137
135/* 138/*
136 * void clmul_ghash_setkey(be128 *shash, const u8 *key); 139 * void clmul_ghash_setkey(be128 *shash, const u8 *key);
@@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey)
155 pxor %xmm1, %xmm0 158 pxor %xmm1, %xmm0
156 movups %xmm0, (%rdi) 159 movups %xmm0, (%rdi)
157 ret 160 ret
161ENDPROC(clmul_ghash_setkey)
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
index 72eb306680b2..329452b8f794 100644
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ b/arch/x86/crypto/salsa20-i586-asm_32.S
@@ -2,11 +2,12 @@
2# D. J. Bernstein 2# D. J. Bernstein
3# Public domain. 3# Public domain.
4 4
5# enter ECRYPT_encrypt_bytes 5#include <linux/linkage.h>
6
6.text 7.text
7.p2align 5 8
8.globl ECRYPT_encrypt_bytes 9# enter salsa20_encrypt_bytes
9ECRYPT_encrypt_bytes: 10ENTRY(salsa20_encrypt_bytes)
10 mov %esp,%eax 11 mov %esp,%eax
11 and $31,%eax 12 and $31,%eax
12 add $256,%eax 13 add $256,%eax
@@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes:
933 add $64,%esi 934 add $64,%esi
934 # goto bytesatleast1 935 # goto bytesatleast1
935 jmp ._bytesatleast1 936 jmp ._bytesatleast1
936# enter ECRYPT_keysetup 937ENDPROC(salsa20_encrypt_bytes)
937.text 938
938.p2align 5 939# enter salsa20_keysetup
939.globl ECRYPT_keysetup 940ENTRY(salsa20_keysetup)
940ECRYPT_keysetup:
941 mov %esp,%eax 941 mov %esp,%eax
942 and $31,%eax 942 and $31,%eax
943 add $256,%eax 943 add $256,%eax
@@ -1060,11 +1060,10 @@ ECRYPT_keysetup:
1060 # leave 1060 # leave
1061 add %eax,%esp 1061 add %eax,%esp
1062 ret 1062 ret
1063# enter ECRYPT_ivsetup 1063ENDPROC(salsa20_keysetup)
1064.text 1064
1065.p2align 5 1065# enter salsa20_ivsetup
1066.globl ECRYPT_ivsetup 1066ENTRY(salsa20_ivsetup)
1067ECRYPT_ivsetup:
1068 mov %esp,%eax 1067 mov %esp,%eax
1069 and $31,%eax 1068 and $31,%eax
1070 add $256,%eax 1069 add $256,%eax
@@ -1112,3 +1111,4 @@ ECRYPT_ivsetup:
1112 # leave 1111 # leave
1113 add %eax,%esp 1112 add %eax,%esp
1114 ret 1113 ret
1114ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
index 6214a9b09706..9279e0b2d60e 100644
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S
@@ -1,8 +1,7 @@
1# enter ECRYPT_encrypt_bytes 1#include <linux/linkage.h>
2.text 2
3.p2align 5 3# enter salsa20_encrypt_bytes
4.globl ECRYPT_encrypt_bytes 4ENTRY(salsa20_encrypt_bytes)
5ECRYPT_encrypt_bytes:
6 mov %rsp,%r11 5 mov %rsp,%r11
7 and $31,%r11 6 and $31,%r11
8 add $256,%r11 7 add $256,%r11
@@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes:
802 # comment:fp stack unchanged by jump 801 # comment:fp stack unchanged by jump
803 # goto bytesatleast1 802 # goto bytesatleast1
804 jmp ._bytesatleast1 803 jmp ._bytesatleast1
805# enter ECRYPT_keysetup 804ENDPROC(salsa20_encrypt_bytes)
806.text 805
807.p2align 5 806# enter salsa20_keysetup
808.globl ECRYPT_keysetup 807ENTRY(salsa20_keysetup)
809ECRYPT_keysetup:
810 mov %rsp,%r11 808 mov %rsp,%r11
811 and $31,%r11 809 and $31,%r11
812 add $256,%r11 810 add $256,%r11
@@ -892,11 +890,10 @@ ECRYPT_keysetup:
892 mov %rdi,%rax 890 mov %rdi,%rax
893 mov %rsi,%rdx 891 mov %rsi,%rdx
894 ret 892 ret
895# enter ECRYPT_ivsetup 893ENDPROC(salsa20_keysetup)
896.text 894
897.p2align 5 895# enter salsa20_ivsetup
898.globl ECRYPT_ivsetup 896ENTRY(salsa20_ivsetup)
899ECRYPT_ivsetup:
900 mov %rsp,%r11 897 mov %rsp,%r11
901 and $31,%r11 898 and $31,%r11
902 add $256,%r11 899 add $256,%r11
@@ -918,3 +915,4 @@ ECRYPT_ivsetup:
918 mov %rdi,%rax 915 mov %rdi,%rax
919 mov %rsi,%rdx 916 mov %rsi,%rdx
920 ret 917 ret
918ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index a3a3c0205c16..5e8e67739bb5 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -26,11 +26,6 @@
26#define SALSA20_MIN_KEY_SIZE 16U 26#define SALSA20_MIN_KEY_SIZE 16U
27#define SALSA20_MAX_KEY_SIZE 32U 27#define SALSA20_MAX_KEY_SIZE 32U
28 28
29// use the ECRYPT_* function names
30#define salsa20_keysetup ECRYPT_keysetup
31#define salsa20_ivsetup ECRYPT_ivsetup
32#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes
33
34struct salsa20_ctx 29struct salsa20_ctx
35{ 30{
36 u32 input[16]; 31 u32 input[16];
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 02b0e9fe997c..43c938612b74 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -24,6 +24,7 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/linkage.h>
27#include "glue_helper-asm-avx.S" 28#include "glue_helper-asm-avx.S"
28 29
29.file "serpent-avx-x86_64-asm_64.S" 30.file "serpent-avx-x86_64-asm_64.S"
@@ -566,8 +567,6 @@
566 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 567 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
567 568
568.align 8 569.align 8
569.type __serpent_enc_blk8_avx,@function;
570
571__serpent_enc_blk8_avx: 570__serpent_enc_blk8_avx:
572 /* input: 571 /* input:
573 * %rdi: ctx, CTX 572 * %rdi: ctx, CTX
@@ -619,10 +618,9 @@ __serpent_enc_blk8_avx:
619 write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); 618 write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
620 619
621 ret; 620 ret;
621ENDPROC(__serpent_enc_blk8_avx)
622 622
623.align 8 623.align 8
624.type __serpent_dec_blk8_avx,@function;
625
626__serpent_dec_blk8_avx: 624__serpent_dec_blk8_avx:
627 /* input: 625 /* input:
628 * %rdi: ctx, CTX 626 * %rdi: ctx, CTX
@@ -674,12 +672,9 @@ __serpent_dec_blk8_avx:
674 write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); 672 write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
675 673
676 ret; 674 ret;
675ENDPROC(__serpent_dec_blk8_avx)
677 676
678.align 8 677ENTRY(serpent_ecb_enc_8way_avx)
679.global serpent_ecb_enc_8way_avx
680.type serpent_ecb_enc_8way_avx,@function;
681
682serpent_ecb_enc_8way_avx:
683 /* input: 678 /* input:
684 * %rdi: ctx, CTX 679 * %rdi: ctx, CTX
685 * %rsi: dst 680 * %rsi: dst
@@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx:
693 store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 688 store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
694 689
695 ret; 690 ret;
691ENDPROC(serpent_ecb_enc_8way_avx)
696 692
697.align 8 693ENTRY(serpent_ecb_dec_8way_avx)
698.global serpent_ecb_dec_8way_avx
699.type serpent_ecb_dec_8way_avx,@function;
700
701serpent_ecb_dec_8way_avx:
702 /* input: 694 /* input:
703 * %rdi: ctx, CTX 695 * %rdi: ctx, CTX
704 * %rsi: dst 696 * %rsi: dst
@@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx:
712 store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 704 store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
713 705
714 ret; 706 ret;
707ENDPROC(serpent_ecb_dec_8way_avx)
715 708
716.align 8 709ENTRY(serpent_cbc_dec_8way_avx)
717.global serpent_cbc_dec_8way_avx
718.type serpent_cbc_dec_8way_avx,@function;
719
720serpent_cbc_dec_8way_avx:
721 /* input: 710 /* input:
722 * %rdi: ctx, CTX 711 * %rdi: ctx, CTX
723 * %rsi: dst 712 * %rsi: dst
@@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx:
731 store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 720 store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
732 721
733 ret; 722 ret;
723ENDPROC(serpent_cbc_dec_8way_avx)
734 724
735.align 8 725ENTRY(serpent_ctr_8way_avx)
736.global serpent_ctr_8way_avx
737.type serpent_ctr_8way_avx,@function;
738
739serpent_ctr_8way_avx:
740 /* input: 726 /* input:
741 * %rdi: ctx, CTX 727 * %rdi: ctx, CTX
742 * %rsi: dst 728 * %rsi: dst
@@ -752,3 +738,4 @@ serpent_ctr_8way_avx:
752 store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 738 store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
753 739
754 ret; 740 ret;
741ENDPROC(serpent_ctr_8way_avx)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index c00053d42f99..d348f1553a79 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -24,6 +24,8 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/linkage.h>
28
27.file "serpent-sse2-i586-asm_32.S" 29.file "serpent-sse2-i586-asm_32.S"
28.text 30.text
29 31
@@ -510,11 +512,7 @@
510 pxor t0, x3; \ 512 pxor t0, x3; \
511 movdqu x3, (3*4*4)(out); 513 movdqu x3, (3*4*4)(out);
512 514
513.align 8 515ENTRY(__serpent_enc_blk_4way)
514.global __serpent_enc_blk_4way
515.type __serpent_enc_blk_4way,@function;
516
517__serpent_enc_blk_4way:
518 /* input: 516 /* input:
519 * arg_ctx(%esp): ctx, CTX 517 * arg_ctx(%esp): ctx, CTX
520 * arg_dst(%esp): dst 518 * arg_dst(%esp): dst
@@ -566,22 +564,19 @@ __serpent_enc_blk_4way:
566 movl arg_dst(%esp), %eax; 564 movl arg_dst(%esp), %eax;
567 565
568 cmpb $0, arg_xor(%esp); 566 cmpb $0, arg_xor(%esp);
569 jnz __enc_xor4; 567 jnz .L__enc_xor4;
570 568
571 write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); 569 write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
572 570
573 ret; 571 ret;
574 572
575__enc_xor4: 573.L__enc_xor4:
576 xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); 574 xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
577 575
578 ret; 576 ret;
577ENDPROC(__serpent_enc_blk_4way)
579 578
580.align 8 579ENTRY(serpent_dec_blk_4way)
581.global serpent_dec_blk_4way
582.type serpent_dec_blk_4way,@function;
583
584serpent_dec_blk_4way:
585 /* input: 580 /* input:
586 * arg_ctx(%esp): ctx, CTX 581 * arg_ctx(%esp): ctx, CTX
587 * arg_dst(%esp): dst 582 * arg_dst(%esp): dst
@@ -633,3 +628,4 @@ serpent_dec_blk_4way:
633 write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); 628 write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
634 629
635 ret; 630 ret;
631ENDPROC(serpent_dec_blk_4way)
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 3ee1ff04d3e9..acc066c7c6b2 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -24,6 +24,8 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/linkage.h>
28
27.file "serpent-sse2-x86_64-asm_64.S" 29.file "serpent-sse2-x86_64-asm_64.S"
28.text 30.text
29 31
@@ -632,11 +634,7 @@
632 pxor t0, x3; \ 634 pxor t0, x3; \
633 movdqu x3, (3*4*4)(out); 635 movdqu x3, (3*4*4)(out);
634 636
635.align 8 637ENTRY(__serpent_enc_blk_8way)
636.global __serpent_enc_blk_8way
637.type __serpent_enc_blk_8way,@function;
638
639__serpent_enc_blk_8way:
640 /* input: 638 /* input:
641 * %rdi: ctx, CTX 639 * %rdi: ctx, CTX
642 * %rsi: dst 640 * %rsi: dst
@@ -687,24 +685,21 @@ __serpent_enc_blk_8way:
687 leaq (4*4*4)(%rsi), %rax; 685 leaq (4*4*4)(%rsi), %rax;
688 686
689 testb %cl, %cl; 687 testb %cl, %cl;
690 jnz __enc_xor8; 688 jnz .L__enc_xor8;
691 689
692 write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 690 write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
693 write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); 691 write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
694 692
695 ret; 693 ret;
696 694
697__enc_xor8: 695.L__enc_xor8:
698 xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 696 xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
699 xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); 697 xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
700 698
701 ret; 699 ret;
700ENDPROC(__serpent_enc_blk_8way)
702 701
703.align 8 702ENTRY(serpent_dec_blk_8way)
704.global serpent_dec_blk_8way
705.type serpent_dec_blk_8way,@function;
706
707serpent_dec_blk_8way:
708 /* input: 703 /* input:
709 * %rdi: ctx, CTX 704 * %rdi: ctx, CTX
710 * %rsi: dst 705 * %rsi: dst
@@ -756,3 +751,4 @@ serpent_dec_blk_8way:
756 write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); 751 write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
757 752
758 ret; 753 ret;
754ENDPROC(serpent_dec_blk_8way)
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 49d6987a73d9..a4109506a5e8 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -28,6 +28,8 @@
28 * (at your option) any later version. 28 * (at your option) any later version.
29 */ 29 */
30 30
31#include <linux/linkage.h>
32
31#define CTX %rdi // arg1 33#define CTX %rdi // arg1
32#define BUF %rsi // arg2 34#define BUF %rsi // arg2
33#define CNT %rdx // arg3 35#define CNT %rdx // arg3
@@ -69,10 +71,8 @@
69 * param: function's name 71 * param: function's name
70 */ 72 */
71.macro SHA1_VECTOR_ASM name 73.macro SHA1_VECTOR_ASM name
72 .global \name 74 ENTRY(\name)
73 .type \name, @function 75
74 .align 32
75\name:
76 push %rbx 76 push %rbx
77 push %rbp 77 push %rbp
78 push %r12 78 push %r12
@@ -106,7 +106,7 @@
106 pop %rbx 106 pop %rbx
107 ret 107 ret
108 108
109 .size \name, .-\name 109 ENDPROC(\name)
110.endm 110.endm
111 111
112/* 112/*
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index ebac16bfa830..8d3e113b2c95 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -23,6 +23,7 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/linkage.h>
26#include "glue_helper-asm-avx.S" 27#include "glue_helper-asm-avx.S"
27 28
28.file "twofish-avx-x86_64-asm_64.S" 29.file "twofish-avx-x86_64-asm_64.S"
@@ -243,8 +244,6 @@
243 vpxor x3, wkey, x3; 244 vpxor x3, wkey, x3;
244 245
245.align 8 246.align 8
246.type __twofish_enc_blk8,@function;
247
248__twofish_enc_blk8: 247__twofish_enc_blk8:
249 /* input: 248 /* input:
250 * %rdi: ctx, CTX 249 * %rdi: ctx, CTX
@@ -284,10 +283,9 @@ __twofish_enc_blk8:
284 outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); 283 outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
285 284
286 ret; 285 ret;
286ENDPROC(__twofish_enc_blk8)
287 287
288.align 8 288.align 8
289.type __twofish_dec_blk8,@function;
290
291__twofish_dec_blk8: 289__twofish_dec_blk8:
292 /* input: 290 /* input:
293 * %rdi: ctx, CTX 291 * %rdi: ctx, CTX
@@ -325,12 +323,9 @@ __twofish_dec_blk8:
325 outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); 323 outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
326 324
327 ret; 325 ret;
326ENDPROC(__twofish_dec_blk8)
328 327
329.align 8 328ENTRY(twofish_ecb_enc_8way)
330.global twofish_ecb_enc_8way
331.type twofish_ecb_enc_8way,@function;
332
333twofish_ecb_enc_8way:
334 /* input: 329 /* input:
335 * %rdi: ctx, CTX 330 * %rdi: ctx, CTX
336 * %rsi: dst 331 * %rsi: dst
@@ -346,12 +341,9 @@ twofish_ecb_enc_8way:
346 store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); 341 store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
347 342
348 ret; 343 ret;
344ENDPROC(twofish_ecb_enc_8way)
349 345
350.align 8 346ENTRY(twofish_ecb_dec_8way)
351.global twofish_ecb_dec_8way
352.type twofish_ecb_dec_8way,@function;
353
354twofish_ecb_dec_8way:
355 /* input: 347 /* input:
356 * %rdi: ctx, CTX 348 * %rdi: ctx, CTX
357 * %rsi: dst 349 * %rsi: dst
@@ -367,12 +359,9 @@ twofish_ecb_dec_8way:
367 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 359 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
368 360
369 ret; 361 ret;
362ENDPROC(twofish_ecb_dec_8way)
370 363
371.align 8 364ENTRY(twofish_cbc_dec_8way)
372.global twofish_cbc_dec_8way
373.type twofish_cbc_dec_8way,@function;
374
375twofish_cbc_dec_8way:
376 /* input: 365 /* input:
377 * %rdi: ctx, CTX 366 * %rdi: ctx, CTX
378 * %rsi: dst 367 * %rsi: dst
@@ -393,12 +382,9 @@ twofish_cbc_dec_8way:
393 popq %r12; 382 popq %r12;
394 383
395 ret; 384 ret;
385ENDPROC(twofish_cbc_dec_8way)
396 386
397.align 8 387ENTRY(twofish_ctr_8way)
398.global twofish_ctr_8way
399.type twofish_ctr_8way,@function;
400
401twofish_ctr_8way:
402 /* input: 388 /* input:
403 * %rdi: ctx, CTX 389 * %rdi: ctx, CTX
404 * %rsi: dst 390 * %rsi: dst
@@ -421,3 +407,4 @@ twofish_ctr_8way:
421 popq %r12; 407 popq %r12;
422 408
423 ret; 409 ret;
410ENDPROC(twofish_ctr_8way)
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 658af4bb35c9..694ea4587ba7 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -20,6 +20,7 @@
20.file "twofish-i586-asm.S" 20.file "twofish-i586-asm.S"
21.text 21.text
22 22
23#include <linux/linkage.h>
23#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
24 25
25/* return address at 0 */ 26/* return address at 0 */
@@ -219,11 +220,7 @@
219 xor %esi, d ## D;\ 220 xor %esi, d ## D;\
220 ror $1, d ## D; 221 ror $1, d ## D;
221 222
222.align 4 223ENTRY(twofish_enc_blk)
223.global twofish_enc_blk
224.global twofish_dec_blk
225
226twofish_enc_blk:
227 push %ebp /* save registers according to calling convention*/ 224 push %ebp /* save registers according to calling convention*/
228 push %ebx 225 push %ebx
229 push %esi 226 push %esi
@@ -277,8 +274,9 @@ twofish_enc_blk:
277 pop %ebp 274 pop %ebp
278 mov $1, %eax 275 mov $1, %eax
279 ret 276 ret
277ENDPROC(twofish_enc_blk)
280 278
281twofish_dec_blk: 279ENTRY(twofish_dec_blk)
282 push %ebp /* save registers according to calling convention*/ 280 push %ebp /* save registers according to calling convention*/
283 push %ebx 281 push %ebx
284 push %esi 282 push %esi
@@ -333,3 +331,4 @@ twofish_dec_blk:
333 pop %ebp 331 pop %ebp
334 mov $1, %eax 332 mov $1, %eax
335 ret 333 ret
334ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index 5b012a2c5119..1c3b7ceb36d2 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -20,6 +20,8 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/linkage.h>
24
23.file "twofish-x86_64-asm-3way.S" 25.file "twofish-x86_64-asm-3way.S"
24.text 26.text
25 27
@@ -214,11 +216,7 @@
214 rorq $32, RAB2; \ 216 rorq $32, RAB2; \
215 outunpack3(mov, RIO, 2, RAB, 2); 217 outunpack3(mov, RIO, 2, RAB, 2);
216 218
217.align 8 219ENTRY(__twofish_enc_blk_3way)
218.global __twofish_enc_blk_3way
219.type __twofish_enc_blk_3way,@function;
220
221__twofish_enc_blk_3way:
222 /* input: 220 /* input:
223 * %rdi: ctx, CTX 221 * %rdi: ctx, CTX
224 * %rsi: dst 222 * %rsi: dst
@@ -250,7 +248,7 @@ __twofish_enc_blk_3way:
250 popq %rbp; /* bool xor */ 248 popq %rbp; /* bool xor */
251 249
252 testb %bpl, %bpl; 250 testb %bpl, %bpl;
253 jnz __enc_xor3; 251 jnz .L__enc_xor3;
254 252
255 outunpack_enc3(mov); 253 outunpack_enc3(mov);
256 254
@@ -262,7 +260,7 @@ __twofish_enc_blk_3way:
262 popq %r15; 260 popq %r15;
263 ret; 261 ret;
264 262
265__enc_xor3: 263.L__enc_xor3:
266 outunpack_enc3(xor); 264 outunpack_enc3(xor);
267 265
268 popq %rbx; 266 popq %rbx;
@@ -272,11 +270,9 @@ __enc_xor3:
272 popq %r14; 270 popq %r14;
273 popq %r15; 271 popq %r15;
274 ret; 272 ret;
273ENDPROC(__twofish_enc_blk_3way)
275 274
276.global twofish_dec_blk_3way 275ENTRY(twofish_dec_blk_3way)
277.type twofish_dec_blk_3way,@function;
278
279twofish_dec_blk_3way:
280 /* input: 276 /* input:
281 * %rdi: ctx, CTX 277 * %rdi: ctx, CTX
282 * %rsi: dst 278 * %rsi: dst
@@ -313,4 +309,4 @@ twofish_dec_blk_3way:
313 popq %r14; 309 popq %r14;
314 popq %r15; 310 popq %r15;
315 ret; 311 ret;
316 312ENDPROC(twofish_dec_blk_3way)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index 7bcf3fcc3668..a039d21986a2 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -20,6 +20,7 @@
20.file "twofish-x86_64-asm.S" 20.file "twofish-x86_64-asm.S"
21.text 21.text
22 22
23#include <linux/linkage.h>
23#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
24 25
25#define a_offset 0 26#define a_offset 0
@@ -214,11 +215,7 @@
214 xor %r8d, d ## D;\ 215 xor %r8d, d ## D;\
215 ror $1, d ## D; 216 ror $1, d ## D;
216 217
217.align 8 218ENTRY(twofish_enc_blk)
218.global twofish_enc_blk
219.global twofish_dec_blk
220
221twofish_enc_blk:
222 pushq R1 219 pushq R1
223 220
224 /* %rdi contains the ctx address */ 221 /* %rdi contains the ctx address */
@@ -269,8 +266,9 @@ twofish_enc_blk:
269 popq R1 266 popq R1
270 movq $1,%rax 267 movq $1,%rax
271 ret 268 ret
269ENDPROC(twofish_enc_blk)
272 270
273twofish_dec_blk: 271ENTRY(twofish_dec_blk)
274 pushq R1 272 pushq R1
275 273
276 /* %rdi contains the ctx address */ 274 /* %rdi contains the ctx address */
@@ -320,3 +318,4 @@ twofish_dec_blk:
320 popq R1 318 popq R1
321 movq $1,%rax 319 movq $1,%rax
322 ret 320 ret
321ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a1daf4a65009..cf1a471a18a2 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -129,13 +129,6 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
129 return err; 129 return err;
130} 130}
131 131
132asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
133{
134 sigset_t blocked;
135 siginitset(&blocked, mask);
136 return sigsuspend(&blocked);
137}
138
139/* 132/*
140 * Do a signal return; undo the signal stack. 133 * Do a signal return; undo the signal stack.
141 */ 134 */
@@ -215,8 +208,9 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
215 return err; 208 return err;
216} 209}
217 210
218asmlinkage long sys32_sigreturn(struct pt_regs *regs) 211asmlinkage long sys32_sigreturn(void)
219{ 212{
213 struct pt_regs *regs = current_pt_regs();
220 struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); 214 struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
221 sigset_t set; 215 sigset_t set;
222 unsigned int ax; 216 unsigned int ax;
@@ -241,8 +235,9 @@ badframe:
241 return 0; 235 return 0;
242} 236}
243 237
244asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) 238asmlinkage long sys32_rt_sigreturn(void)
245{ 239{
240 struct pt_regs *regs = current_pt_regs();
246 struct rt_sigframe_ia32 __user *frame; 241 struct rt_sigframe_ia32 __user *frame;
247 sigset_t set; 242 sigset_t set;
248 unsigned int ax; 243 unsigned int ax;
@@ -314,7 +309,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
314/* 309/*
315 * Determine which stack to use.. 310 * Determine which stack to use..
316 */ 311 */
317static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, 312static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
318 size_t frame_size, 313 size_t frame_size,
319 void __user **fpstate) 314 void __user **fpstate)
320{ 315{
@@ -324,16 +319,13 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
324 sp = regs->sp; 319 sp = regs->sp;
325 320
326 /* This is the X/Open sanctioned signal stack switching. */ 321 /* This is the X/Open sanctioned signal stack switching. */
327 if (ka->sa.sa_flags & SA_ONSTACK) { 322 if (ksig->ka.sa.sa_flags & SA_ONSTACK)
328 if (sas_ss_flags(sp) == 0) 323 sp = sigsp(sp, ksig);
329 sp = current->sas_ss_sp + current->sas_ss_size;
330 }
331
332 /* This is the legacy signal stack switching. */ 324 /* This is the legacy signal stack switching. */
333 else if ((regs->ss & 0xffff) != __USER32_DS && 325 else if ((regs->ss & 0xffff) != __USER32_DS &&
334 !(ka->sa.sa_flags & SA_RESTORER) && 326 !(ksig->ka.sa.sa_flags & SA_RESTORER) &&
335 ka->sa.sa_restorer) 327 ksig->ka.sa.sa_restorer)
336 sp = (unsigned long) ka->sa.sa_restorer; 328 sp = (unsigned long) ksig->ka.sa.sa_restorer;
337 329
338 if (used_math()) { 330 if (used_math()) {
339 unsigned long fx_aligned, math_size; 331 unsigned long fx_aligned, math_size;
@@ -352,7 +344,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
352 return (void __user *) sp; 344 return (void __user *) sp;
353} 345}
354 346
355int ia32_setup_frame(int sig, struct k_sigaction *ka, 347int ia32_setup_frame(int sig, struct ksignal *ksig,
356 compat_sigset_t *set, struct pt_regs *regs) 348 compat_sigset_t *set, struct pt_regs *regs)
357{ 349{
358 struct sigframe_ia32 __user *frame; 350 struct sigframe_ia32 __user *frame;
@@ -371,7 +363,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
371 0x80cd, /* int $0x80 */ 363 0x80cd, /* int $0x80 */
372 }; 364 };
373 365
374 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 366 frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate);
375 367
376 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 368 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
377 return -EFAULT; 369 return -EFAULT;
@@ -388,8 +380,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
388 return -EFAULT; 380 return -EFAULT;
389 } 381 }
390 382
391 if (ka->sa.sa_flags & SA_RESTORER) { 383 if (ksig->ka.sa.sa_flags & SA_RESTORER) {
392 restorer = ka->sa.sa_restorer; 384 restorer = ksig->ka.sa.sa_restorer;
393 } else { 385 } else {
394 /* Return stub is in 32bit vsyscall page */ 386 /* Return stub is in 32bit vsyscall page */
395 if (current->mm->context.vdso) 387 if (current->mm->context.vdso)
@@ -414,7 +406,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
414 406
415 /* Set up registers for signal handler */ 407 /* Set up registers for signal handler */
416 regs->sp = (unsigned long) frame; 408 regs->sp = (unsigned long) frame;
417 regs->ip = (unsigned long) ka->sa.sa_handler; 409 regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
418 410
419 /* Make -mregparm=3 work */ 411 /* Make -mregparm=3 work */
420 regs->ax = sig; 412 regs->ax = sig;
@@ -430,7 +422,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
430 return 0; 422 return 0;
431} 423}
432 424
433int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 425int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
434 compat_sigset_t *set, struct pt_regs *regs) 426 compat_sigset_t *set, struct pt_regs *regs)
435{ 427{
436 struct rt_sigframe_ia32 __user *frame; 428 struct rt_sigframe_ia32 __user *frame;
@@ -451,7 +443,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
451 0, 443 0,
452 }; 444 };
453 445
454 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 446 frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate);
455 447
456 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 448 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
457 return -EFAULT; 449 return -EFAULT;
@@ -469,8 +461,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
469 put_user_ex(0, &frame->uc.uc_link); 461 put_user_ex(0, &frame->uc.uc_link);
470 err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); 462 err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
471 463
472 if (ka->sa.sa_flags & SA_RESTORER) 464 if (ksig->ka.sa.sa_flags & SA_RESTORER)
473 restorer = ka->sa.sa_restorer; 465 restorer = ksig->ka.sa.sa_restorer;
474 else 466 else
475 restorer = VDSO32_SYMBOL(current->mm->context.vdso, 467 restorer = VDSO32_SYMBOL(current->mm->context.vdso,
476 rt_sigreturn); 468 rt_sigreturn);
@@ -483,7 +475,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
483 put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); 475 put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
484 } put_user_catch(err); 476 } put_user_catch(err);
485 477
486 err |= copy_siginfo_to_user32(&frame->info, info); 478 err |= copy_siginfo_to_user32(&frame->info, &ksig->info);
487 err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, 479 err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
488 regs, set->sig[0]); 480 regs, set->sig[0]);
489 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 481 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -493,7 +485,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
493 485
494 /* Set up registers for signal handler */ 486 /* Set up registers for signal handler */
495 regs->sp = (unsigned long) frame; 487 regs->sp = (unsigned long) frame;
496 regs->ip = (unsigned long) ka->sa.sa_handler; 488 regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
497 489
498 /* Make -mregparm=3 work */ 490 /* Make -mregparm=3 work */
499 regs->ax = sig; 491 regs->ax = sig;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 102ff7cb3e41..474dc1b59f72 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -207,7 +207,7 @@ sysexit_from_sys_call:
207 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 207 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
208 jnz ia32_ret_from_sys_call 208 jnz ia32_ret_from_sys_call
209 TRACE_IRQS_ON 209 TRACE_IRQS_ON
210 sti 210 ENABLE_INTERRUPTS(CLBR_NONE)
211 movl %eax,%esi /* second arg, syscall return value */ 211 movl %eax,%esi /* second arg, syscall return value */
212 cmpl $-MAX_ERRNO,%eax /* is it an error ? */ 212 cmpl $-MAX_ERRNO,%eax /* is it an error ? */
213 jbe 1f 213 jbe 1f
@@ -217,7 +217,7 @@ sysexit_from_sys_call:
217 call __audit_syscall_exit 217 call __audit_syscall_exit
218 movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ 218 movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
219 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 219 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
220 cli 220 DISABLE_INTERRUPTS(CLBR_NONE)
221 TRACE_IRQS_OFF 221 TRACE_IRQS_OFF
222 testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 222 testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
223 jz \exit 223 jz \exit
@@ -456,18 +456,16 @@ ia32_badsys:
456 ALIGN 456 ALIGN
457GLOBAL(\label) 457GLOBAL(\label)
458 leaq \func(%rip),%rax 458 leaq \func(%rip),%rax
459 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
460 jmp ia32_ptregs_common 459 jmp ia32_ptregs_common
461 .endm 460 .endm
462 461
463 CFI_STARTPROC32 462 CFI_STARTPROC32
464 463
465 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi 464 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
466 PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi 465 PTREGSCALL stub32_sigreturn, sys32_sigreturn
467 PTREGSCALL stub32_execve, compat_sys_execve, %rcx 466 PTREGSCALL stub32_execve, compat_sys_execve
468 PTREGSCALL stub32_fork, sys_fork, %rdi 467 PTREGSCALL stub32_fork, sys_fork
469 PTREGSCALL stub32_vfork, sys_vfork, %rdi 468 PTREGSCALL stub32_vfork, sys_vfork
470 PTREGSCALL stub32_iopl, sys_iopl, %rsi
471 469
472 ALIGN 470 ALIGN
473GLOBAL(stub32_clone) 471GLOBAL(stub32_clone)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index d0b689ba7be2..592f5a9a9c0e 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -172,183 +172,12 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len,
172 return sys_mprotect(start, len, prot); 172 return sys_mprotect(start, len, prot);
173} 173}
174 174
175asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
176 struct sigaction32 __user *oact,
177 unsigned int sigsetsize)
178{
179 struct k_sigaction new_ka, old_ka;
180 int ret;
181 compat_sigset_t set32;
182
183 /* XXX: Don't preclude handling different sized sigset_t's. */
184 if (sigsetsize != sizeof(compat_sigset_t))
185 return -EINVAL;
186
187 if (act) {
188 compat_uptr_t handler, restorer;
189
190 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
191 __get_user(handler, &act->sa_handler) ||
192 __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
193 __get_user(restorer, &act->sa_restorer) ||
194 __copy_from_user(&set32, &act->sa_mask,
195 sizeof(compat_sigset_t)))
196 return -EFAULT;
197 new_ka.sa.sa_handler = compat_ptr(handler);
198 new_ka.sa.sa_restorer = compat_ptr(restorer);
199
200 /*
201 * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
202 * than _NSIG_WORDS << 1
203 */
204 switch (_NSIG_WORDS) {
205 case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
206 | (((long)set32.sig[7]) << 32);
207 case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
208 | (((long)set32.sig[5]) << 32);
209 case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
210 | (((long)set32.sig[3]) << 32);
211 case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
212 | (((long)set32.sig[1]) << 32);
213 }
214 }
215
216 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
217
218 if (!ret && oact) {
219 /*
220 * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
221 * than _NSIG_WORDS << 1
222 */
223 switch (_NSIG_WORDS) {
224 case 4:
225 set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
226 set32.sig[6] = old_ka.sa.sa_mask.sig[3];
227 case 3:
228 set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
229 set32.sig[4] = old_ka.sa.sa_mask.sig[2];
230 case 2:
231 set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
232 set32.sig[2] = old_ka.sa.sa_mask.sig[1];
233 case 1:
234 set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
235 set32.sig[0] = old_ka.sa.sa_mask.sig[0];
236 }
237 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
238 __put_user(ptr_to_compat(old_ka.sa.sa_handler),
239 &oact->sa_handler) ||
240 __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
241 &oact->sa_restorer) ||
242 __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
243 __copy_to_user(&oact->sa_mask, &set32,
244 sizeof(compat_sigset_t)))
245 return -EFAULT;
246 }
247
248 return ret;
249}
250
251asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
252 struct old_sigaction32 __user *oact)
253{
254 struct k_sigaction new_ka, old_ka;
255 int ret;
256
257 if (act) {
258 compat_old_sigset_t mask;
259 compat_uptr_t handler, restorer;
260
261 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
262 __get_user(handler, &act->sa_handler) ||
263 __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
264 __get_user(restorer, &act->sa_restorer) ||
265 __get_user(mask, &act->sa_mask))
266 return -EFAULT;
267
268 new_ka.sa.sa_handler = compat_ptr(handler);
269 new_ka.sa.sa_restorer = compat_ptr(restorer);
270
271 siginitset(&new_ka.sa.sa_mask, mask);
272 }
273
274 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
275
276 if (!ret && oact) {
277 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
278 __put_user(ptr_to_compat(old_ka.sa.sa_handler),
279 &oact->sa_handler) ||
280 __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
281 &oact->sa_restorer) ||
282 __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
283 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
284 return -EFAULT;
285 }
286
287 return ret;
288}
289
290asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, 175asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
291 int options) 176 int options)
292{ 177{
293 return compat_sys_wait4(pid, stat_addr, options, NULL); 178 return compat_sys_wait4(pid, stat_addr, options, NULL);
294} 179}
295 180
296/* 32-bit timeval and related flotsam. */
297
298asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
299 struct compat_timespec __user *interval)
300{
301 struct timespec t;
302 int ret;
303 mm_segment_t old_fs = get_fs();
304
305 set_fs(KERNEL_DS);
306 ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
307 set_fs(old_fs);
308 if (put_compat_timespec(&t, interval))
309 return -EFAULT;
310 return ret;
311}
312
313asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
314 compat_size_t sigsetsize)
315{
316 sigset_t s;
317 compat_sigset_t s32;
318 int ret;
319 mm_segment_t old_fs = get_fs();
320
321 set_fs(KERNEL_DS);
322 ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
323 set_fs(old_fs);
324 if (!ret) {
325 switch (_NSIG_WORDS) {
326 case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
327 case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
328 case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
329 case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
330 }
331 if (copy_to_user(set, &s32, sizeof(compat_sigset_t)))
332 return -EFAULT;
333 }
334 return ret;
335}
336
337asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
338 compat_siginfo_t __user *uinfo)
339{
340 siginfo_t info;
341 int ret;
342 mm_segment_t old_fs = get_fs();
343
344 if (copy_siginfo_from_user32(&info, uinfo))
345 return -EFAULT;
346 set_fs(KERNEL_DS);
347 ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
348 set_fs(old_fs);
349 return ret;
350}
351
352/* warning: next two assume little endian */ 181/* warning: next two assume little endian */
353asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, 182asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
354 u32 poslo, u32 poshi) 183 u32 poslo, u32 poshi)
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 0c44630d1789..b31bf97775fc 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -49,10 +49,6 @@
49 49
50/* Asm macros */ 50/* Asm macros */
51 51
52#define ACPI_ASM_MACROS
53#define BREAKPOINT3
54#define ACPI_DISABLE_IRQS() local_irq_disable()
55#define ACPI_ENABLE_IRQS() local_irq_enable()
56#define ACPI_FLUSH_CPU_CACHE() wbinvd() 52#define ACPI_FLUSH_CPU_CACHE() wbinvd()
57 53
58int __acpi_acquire_global_lock(unsigned int *lock); 54int __acpi_acquire_global_lock(unsigned int *lock);
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9cd8fd..a54ee1d054d9 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,23 @@ static inline struct amd_northbridge *node_to_amd_nb(int node)
81 return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; 81 return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
82} 82}
83 83
84static inline u16 amd_get_node_id(struct pci_dev *pdev)
85{
86 struct pci_dev *misc;
87 int i;
88
89 for (i = 0; i != amd_nb_num(); i++) {
90 misc = node_to_amd_nb(i)->misc;
91
92 if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
93 PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
94 return i;
95 }
96
97 WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev));
98 return 0;
99}
100
84#else 101#else
85 102
86#define amd_nb_num(x) 0 103#define amd_nb_num(x) 0
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
new file mode 100644
index 000000000000..5b5e9cb774b5
--- /dev/null
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -0,0 +1,38 @@
1#ifndef _ASM_X86_BOOTPARAM_UTILS_H
2#define _ASM_X86_BOOTPARAM_UTILS_H
3
4#include <asm/bootparam.h>
5
6/*
7 * This file is included from multiple environments. Do not
8 * add completing #includes to make it standalone.
9 */
10
11/*
12 * Deal with bootloaders which fail to initialize unknown fields in
13 * boot_params to zero. The list fields in this list are taken from
14 * analysis of kexec-tools; if other broken bootloaders initialize a
15 * different set of fields we will need to figure out how to disambiguate.
16 *
17 */
18static void sanitize_boot_params(struct boot_params *boot_params)
19{
20 if (boot_params->sentinel) {
21 /*fields in boot_params are not valid, clear them */
22 memset(&boot_params->olpc_ofw_header, 0,
23 (char *)&boot_params->alt_mem_k -
24 (char *)&boot_params->olpc_ofw_header);
25 memset(&boot_params->kbd_status, 0,
26 (char *)&boot_params->hdr -
27 (char *)&boot_params->kbd_status);
28 memset(&boot_params->_pad7[0], 0,
29 (char *)&boot_params->edd_mbr_sig_buffer[0] -
30 (char *)&boot_params->_pad7[0]);
31 memset(&boot_params->_pad8[0], 0,
32 (char *)&boot_params->eddbuf[0] -
33 (char *)&boot_params->_pad8[0]);
34 memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
35 }
36}
37
38#endif /* _ASM_X86_BOOTPARAM_UTILS_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 2d9075e863a0..93fe929d1cee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -167,6 +167,7 @@
167#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ 167#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
168#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ 168#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
169#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ 169#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
170#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
170 171
171/* 172/*
172 * Auxiliary flags: Linux defined - For features scattered in various 173 * Auxiliary flags: Linux defined - For features scattered in various
@@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32];
309#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) 310#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
310#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) 311#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
311#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) 312#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
313#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
312#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) 314#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
313#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) 315#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
314#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) 316#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 6e8fdf5ad113..28677c55113f 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -94,6 +94,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
94#endif /* CONFIG_X86_32 */ 94#endif /* CONFIG_X86_32 */
95 95
96extern int add_efi_memmap; 96extern int add_efi_memmap;
97extern unsigned long x86_efi_facility;
97extern void efi_set_executable(efi_memory_desc_t *md, bool executable); 98extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
98extern int efi_memblock_x86_reserve_range(void); 99extern int efi_memblock_x86_reserve_range(void);
99extern void efi_call_phys_prelog(void); 100extern void efi_call_phys_prelog(void);
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 41ab26ea6564..e25cc33ec54d 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -26,9 +26,10 @@
26#ifdef CONFIG_X86_64 26#ifdef CONFIG_X86_64
27# include <asm/sigcontext32.h> 27# include <asm/sigcontext32.h>
28# include <asm/user32.h> 28# include <asm/user32.h>
29int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 29struct ksignal;
30int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
30 compat_sigset_t *set, struct pt_regs *regs); 31 compat_sigset_t *set, struct pt_regs *regs);
31int ia32_setup_frame(int sig, struct k_sigaction *ka, 32int ia32_setup_frame(int sig, struct ksignal *ksig,
32 compat_sigset_t *set, struct pt_regs *regs); 33 compat_sigset_t *set, struct pt_regs *regs);
33#else 34#else
34# define user_i387_ia32_struct user_i387_struct 35# define user_i387_ia32_struct user_i387_struct
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9a25b522d377..86cb51e1ca96 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
44 44
45#ifdef CONFIG_DYNAMIC_FTRACE 45#ifdef CONFIG_DYNAMIC_FTRACE
46#define ARCH_SUPPORTS_FTRACE_OPS 1 46#define ARCH_SUPPORTS_FTRACE_OPS 1
47#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
48#endif 47#endif
49 48
50#ifndef __ASSEMBLY__ 49#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 434e2106cc87..b18df579c0e9 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
80extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); 80extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
81 81
82#ifdef CONFIG_PCI_MSI 82#ifdef CONFIG_PCI_MSI
83extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); 83extern int default_setup_hpet_msi(unsigned int irq, unsigned int id);
84#else 84#else
85static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 85static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id)
86{ 86{
87 return -EINVAL; 87 return -EINVAL;
88} 88}
@@ -111,6 +111,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
111static inline int hpet_enable(void) { return 0; } 111static inline int hpet_enable(void) { return 0; }
112static inline int is_hpet_enabled(void) { return 0; } 112static inline int is_hpet_enabled(void) { return 0; }
113#define hpet_readl(a) 0 113#define hpet_readl(a) 0
114#define default_setup_hpet_msi NULL
114 115
115#endif 116#endif
116#endif /* _ASM_X86_HPET_H */ 117#endif /* _ASM_X86_HPET_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eb92a6ed2be7..10a78c3d3d5a 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -101,6 +101,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
101 irq_attr->polarity = polarity; 101 irq_attr->polarity = polarity;
102} 102}
103 103
104/* Intel specific interrupt remapping information */
104struct irq_2_iommu { 105struct irq_2_iommu {
105 struct intel_iommu *iommu; 106 struct intel_iommu *iommu;
106 u16 irte_index; 107 u16 irte_index;
@@ -108,6 +109,12 @@ struct irq_2_iommu {
108 u8 irte_mask; 109 u8 irte_mask;
109}; 110};
110 111
112/* AMD specific interrupt remapping information */
113struct irq_2_irte {
114 u16 devid; /* Device ID for IRTE table */
115 u16 index; /* Index into IRTE table*/
116};
117
111/* 118/*
112 * This is performance-critical, we want to do it O(1) 119 * This is performance-critical, we want to do it O(1)
113 * 120 *
@@ -120,7 +127,11 @@ struct irq_cfg {
120 u8 vector; 127 u8 vector;
121 u8 move_in_progress : 1; 128 u8 move_in_progress : 1;
122#ifdef CONFIG_IRQ_REMAP 129#ifdef CONFIG_IRQ_REMAP
123 struct irq_2_iommu irq_2_iommu; 130 u8 remapped : 1;
131 union {
132 struct irq_2_iommu irq_2_iommu;
133 struct irq_2_irte irq_2_irte;
134 };
124#endif 135#endif
125}; 136};
126 137
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index b518c7509933..86095ed14135 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -25,6 +25,7 @@
25 25
26extern void init_hypervisor(struct cpuinfo_x86 *c); 26extern void init_hypervisor(struct cpuinfo_x86 *c);
27extern void init_hypervisor_platform(void); 27extern void init_hypervisor_platform(void);
28extern bool hypervisor_x2apic_available(void);
28 29
29/* 30/*
30 * x86 hypervisor information 31 * x86 hypervisor information
@@ -41,6 +42,9 @@ struct hypervisor_x86 {
41 42
42 /* Platform setup (run once per boot) */ 43 /* Platform setup (run once per boot) */
43 void (*init_platform)(void); 44 void (*init_platform)(void);
45
46 /* X2APIC detection (run once per boot) */
47 bool (*x2apic_available)(void);
44}; 48};
45 49
46extern const struct hypervisor_x86 *x86_hyper; 50extern const struct hypervisor_x86 *x86_hyper;
@@ -51,13 +55,4 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
51extern const struct hypervisor_x86 x86_hyper_xen_hvm; 55extern const struct hypervisor_x86 x86_hyper_xen_hvm;
52extern const struct hypervisor_x86 x86_hyper_kvm; 56extern const struct hypervisor_x86 x86_hyper_kvm;
53 57
54static inline bool hypervisor_x2apic_available(void)
55{
56 if (kvm_para_available())
57 return true;
58 if (xen_x2apic_para_available())
59 return true;
60 return false;
61}
62
63#endif 58#endif
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 4c6da2e4bb1d..d0e8e0141041 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -13,21 +13,6 @@
13#include <asm/sigcontext32.h> 13#include <asm/sigcontext32.h>
14 14
15/* signal.h */ 15/* signal.h */
16struct sigaction32 {
17 unsigned int sa_handler; /* Really a pointer, but need to deal
18 with 32 bits */
19 unsigned int sa_flags;
20 unsigned int sa_restorer; /* Another 32 bit pointer */
21 compat_sigset_t sa_mask; /* A 32 bit mask */
22};
23
24struct old_sigaction32 {
25 unsigned int sa_handler; /* Really a pointer, but need to deal
26 with 32 bits */
27 compat_old_sigset_t sa_mask; /* A 32 bit mask */
28 unsigned int sa_flags;
29 unsigned int sa_restorer; /* Another 32 bit pointer */
30};
31 16
32struct ucontext_ia32 { 17struct ucontext_ia32 {
33 unsigned int uc_flags; 18 unsigned int uc_flags;
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index adcc0ae73d09..223042086f4e 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -1,20 +1,14 @@
1#ifndef _ASM_X86_INIT_32_H 1#ifndef _ASM_X86_INIT_H
2#define _ASM_X86_INIT_32_H 2#define _ASM_X86_INIT_H
3 3
4#ifdef CONFIG_X86_32 4struct x86_mapping_info {
5extern void __init early_ioremap_page_table_range_init(void); 5 void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
6#endif 6 void *context; /* context for alloc_pgt_page */
7 unsigned long pmd_flag; /* page flag for PMD entry */
8 bool kernel_mapping; /* kernel mapping or ident mapping */
9};
7 10
8extern void __init zone_sizes_init(void); 11int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
12 unsigned long addr, unsigned long end);
9 13
10extern unsigned long __init 14#endif /* _ASM_X86_INIT_H */
11kernel_physical_mapping_init(unsigned long start,
12 unsigned long end,
13 unsigned long page_size_mask);
14
15
16extern unsigned long __initdata pgt_buf_start;
17extern unsigned long __meminitdata pgt_buf_end;
18extern unsigned long __meminitdata pgt_buf_top;
19
20#endif /* _ASM_X86_INIT_32_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 73d8c5398ea9..459e50a424d1 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -144,11 +144,24 @@ extern int timer_through_8259;
144 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) 144 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
145 145
146struct io_apic_irq_attr; 146struct io_apic_irq_attr;
147struct irq_cfg;
147extern int io_apic_set_pci_routing(struct device *dev, int irq, 148extern int io_apic_set_pci_routing(struct device *dev, int irq,
148 struct io_apic_irq_attr *irq_attr); 149 struct io_apic_irq_attr *irq_attr);
149void setup_IO_APIC_irq_extra(u32 gsi); 150void setup_IO_APIC_irq_extra(u32 gsi);
150extern void ioapic_insert_resources(void); 151extern void ioapic_insert_resources(void);
151 152
153extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
154 unsigned int, int,
155 struct io_apic_irq_attr *);
156extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
157 unsigned int, int,
158 struct io_apic_irq_attr *);
159extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
160
161extern void native_compose_msi_msg(struct pci_dev *pdev,
162 unsigned int irq, unsigned int dest,
163 struct msi_msg *msg, u8 hpet_id);
164extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
152int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); 165int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
153 166
154extern int save_ioapic_entries(void); 167extern int save_ioapic_entries(void);
@@ -179,6 +192,12 @@ extern void __init native_io_apic_init_mappings(void);
179extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); 192extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
180extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); 193extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
181extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); 194extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
195extern void native_disable_io_apic(void);
196extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
197extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
198extern int native_ioapic_set_affinity(struct irq_data *,
199 const struct cpumask *,
200 bool);
182 201
183static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 202static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
184{ 203{
@@ -193,6 +212,9 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
193{ 212{
194 x86_io_apic_ops.modify(apic, reg, value); 213 x86_io_apic_ops.modify(apic, reg, value);
195} 214}
215
216extern void io_apic_eoi(unsigned int apic, unsigned int vector);
217
196#else /* !CONFIG_X86_IO_APIC */ 218#else /* !CONFIG_X86_IO_APIC */
197 219
198#define io_apic_assign_pci_irqs 0 220#define io_apic_assign_pci_irqs 0
@@ -223,6 +245,12 @@ static inline void disable_ioapic_support(void) { }
223#define native_io_apic_read NULL 245#define native_io_apic_read NULL
224#define native_io_apic_write NULL 246#define native_io_apic_write NULL
225#define native_io_apic_modify NULL 247#define native_io_apic_modify NULL
248#define native_disable_io_apic NULL
249#define native_io_apic_print_entries NULL
250#define native_ioapic_set_affinity NULL
251#define native_setup_ioapic_entry NULL
252#define native_compose_msi_msg NULL
253#define native_eoi_ioapic_pin NULL
226#endif 254#endif
227 255
228#endif /* _ASM_X86_IO_APIC_H */ 256#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5fb9bbbd2f14..95fd3527f632 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -26,8 +26,6 @@
26 26
27#ifdef CONFIG_IRQ_REMAP 27#ifdef CONFIG_IRQ_REMAP
28 28
29extern int irq_remapping_enabled;
30
31extern void setup_irq_remapping_ops(void); 29extern void setup_irq_remapping_ops(void);
32extern int irq_remapping_supported(void); 30extern int irq_remapping_supported(void);
33extern int irq_remapping_prepare(void); 31extern int irq_remapping_prepare(void);
@@ -40,21 +38,19 @@ extern int setup_ioapic_remapped_entry(int irq,
40 unsigned int destination, 38 unsigned int destination,
41 int vector, 39 int vector,
42 struct io_apic_irq_attr *attr); 40 struct io_apic_irq_attr *attr);
43extern int set_remapped_irq_affinity(struct irq_data *data,
44 const struct cpumask *mask,
45 bool force);
46extern void free_remapped_irq(int irq); 41extern void free_remapped_irq(int irq);
47extern void compose_remapped_msi_msg(struct pci_dev *pdev, 42extern void compose_remapped_msi_msg(struct pci_dev *pdev,
48 unsigned int irq, unsigned int dest, 43 unsigned int irq, unsigned int dest,
49 struct msi_msg *msg, u8 hpet_id); 44 struct msi_msg *msg, u8 hpet_id);
50extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
51extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
52 int index, int sub_handle);
53extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); 45extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
46extern void panic_if_irq_remap(const char *msg);
47extern bool setup_remapped_irq(int irq,
48 struct irq_cfg *cfg,
49 struct irq_chip *chip);
54 50
55#else /* CONFIG_IRQ_REMAP */ 51void irq_remap_modify_chip_defaults(struct irq_chip *chip);
56 52
57#define irq_remapping_enabled 0 53#else /* CONFIG_IRQ_REMAP */
58 54
59static inline void setup_irq_remapping_ops(void) { } 55static inline void setup_irq_remapping_ops(void) { }
60static inline int irq_remapping_supported(void) { return 0; } 56static inline int irq_remapping_supported(void) { return 0; }
@@ -71,30 +67,30 @@ static inline int setup_ioapic_remapped_entry(int irq,
71{ 67{
72 return -ENODEV; 68 return -ENODEV;
73} 69}
74static inline int set_remapped_irq_affinity(struct irq_data *data,
75 const struct cpumask *mask,
76 bool force)
77{
78 return 0;
79}
80static inline void free_remapped_irq(int irq) { } 70static inline void free_remapped_irq(int irq) { }
81static inline void compose_remapped_msi_msg(struct pci_dev *pdev, 71static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
82 unsigned int irq, unsigned int dest, 72 unsigned int irq, unsigned int dest,
83 struct msi_msg *msg, u8 hpet_id) 73 struct msi_msg *msg, u8 hpet_id)
84{ 74{
85} 75}
86static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) 76static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
87{ 77{
88 return -ENODEV; 78 return -ENODEV;
89} 79}
90static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, 80
91 int index, int sub_handle) 81static inline void panic_if_irq_remap(const char *msg)
82{
83}
84
85static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
92{ 86{
93 return -ENODEV;
94} 87}
95static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) 88
89static inline bool setup_remapped_irq(int irq,
90 struct irq_cfg *cfg,
91 struct irq_chip *chip)
96{ 92{
97 return -ENODEV; 93 return false;
98} 94}
99#endif /* CONFIG_IRQ_REMAP */ 95#endif /* CONFIG_IRQ_REMAP */
100 96
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 1508e518c7e3..aac5fa62a86c 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -109,8 +109,8 @@
109 109
110#define UV_BAU_MESSAGE 0xf5 110#define UV_BAU_MESSAGE 0xf5
111 111
112/* Xen vector callback to receive events in a HVM domain */ 112/* Vector on which hypervisor callbacks will be delivered */
113#define XEN_HVM_EVTCHN_CALLBACK 0xf3 113#define HYPERVISOR_CALLBACK_VECTOR 0xf3
114 114
115/* 115/*
116 * Local APIC timer IRQ vector is on a different priority level, 116 * Local APIC timer IRQ vector is on a different priority level,
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 6080d2694bad..17483a492f18 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -48,11 +48,11 @@
48# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) 48# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
49#else 49#else
50/* Maximum physical address we can use pages from */ 50/* Maximum physical address we can use pages from */
51# define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL) 51# define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1)
52/* Maximum address we can reach in physical address mode */ 52/* Maximum address we can reach in physical address mode */
53# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL) 53# define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1)
54/* Maximum address we can use for the control pages */ 54/* Maximum address we can use for the control pages */
55# define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) 55# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
56 56
57/* Allocate one page for the pdp and the second for the code */ 57/* Allocate one page for the pdp and the second for the code */
58# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) 58# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dc87b65e9c3a..635a74d22409 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -33,10 +33,10 @@
33 33
34#define KVM_MAX_VCPUS 254 34#define KVM_MAX_VCPUS 254
35#define KVM_SOFT_MAX_VCPUS 160 35#define KVM_SOFT_MAX_VCPUS 160
36#define KVM_MEMORY_SLOTS 32 36#define KVM_USER_MEM_SLOTS 125
37/* memory slots that does not exposed to userspace */ 37/* memory slots that are not exposed to userspace */
38#define KVM_PRIVATE_MEM_SLOTS 4 38#define KVM_PRIVATE_MEM_SLOTS 3
39#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 39#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
40 40
41#define KVM_MMIO_SIZE 16 41#define KVM_MMIO_SIZE 16
42 42
@@ -219,11 +219,6 @@ struct kvm_mmu_page {
219 u64 *spt; 219 u64 *spt;
220 /* hold the gfn of each spte inside spt */ 220 /* hold the gfn of each spte inside spt */
221 gfn_t *gfns; 221 gfn_t *gfns;
222 /*
223 * One bit set per slot which has memory
224 * in this shadow page.
225 */
226 DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM);
227 bool unsync; 222 bool unsync;
228 int root_count; /* Currently serving as active root */ 223 int root_count; /* Currently serving as active root */
229 unsigned int unsync_children; 224 unsigned int unsync_children;
@@ -502,6 +497,13 @@ struct kvm_vcpu_arch {
502 u64 msr_val; 497 u64 msr_val;
503 struct gfn_to_hva_cache data; 498 struct gfn_to_hva_cache data;
504 } pv_eoi; 499 } pv_eoi;
500
501 /*
502 * Indicate whether the access faults on its page table in guest
503 * which is set when fix page fault and used to detect unhandeable
504 * instruction.
505 */
506 bool write_fault_to_shadow_pgtable;
505}; 507};
506 508
507struct kvm_lpage_info { 509struct kvm_lpage_info {
@@ -697,6 +699,11 @@ struct kvm_x86_ops {
697 void (*enable_nmi_window)(struct kvm_vcpu *vcpu); 699 void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
698 void (*enable_irq_window)(struct kvm_vcpu *vcpu); 700 void (*enable_irq_window)(struct kvm_vcpu *vcpu);
699 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); 701 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
702 int (*vm_has_apicv)(struct kvm *kvm);
703 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
704 void (*hwapic_isr_update)(struct kvm *kvm, int isr);
705 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
706 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
700 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 707 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
701 int (*get_tdp_level)(void); 708 int (*get_tdp_level)(void);
702 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 709 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
@@ -991,6 +998,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva);
991int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 998int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
992void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 999void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
993int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); 1000int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
1001int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
994int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 1002int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
995int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); 1003int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
996int kvm_cpu_get_interrupt(struct kvm_vcpu *v); 1004int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed1f16187be..695399f2d5eb 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -27,7 +27,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
27 * 27 *
28 * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively. 28 * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
29 * The hypercall number should be placed in rax and the return value will be 29 * The hypercall number should be placed in rax and the return value will be
30 * placed in rax. No other registers will be clobbered unless explicited 30 * placed in rax. No other registers will be clobbered unless explicitly
31 * noted by the particular hypercall. 31 * noted by the particular hypercall.
32 */ 32 */
33 33
@@ -85,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
85 return ret; 85 return ret;
86} 86}
87 87
88static inline int kvm_para_available(void) 88static inline bool kvm_para_available(void)
89{ 89{
90 unsigned int eax, ebx, ecx, edx; 90 unsigned int eax, ebx, ecx, edx;
91 char signature[13]; 91 char signature[13];
92 92
93 if (boot_cpu_data.cpuid_level < 0) 93 if (boot_cpu_data.cpuid_level < 0)
94 return 0; /* So we don't blow up on old processors */ 94 return false; /* So we don't blow up on old processors */
95 95
96 if (cpu_has_hypervisor) { 96 if (cpu_has_hypervisor) {
97 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); 97 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
@@ -101,10 +101,10 @@ static inline int kvm_para_available(void)
101 signature[12] = 0; 101 signature[12] = 0;
102 102
103 if (strcmp(signature, "KVMKVMKVM") == 0) 103 if (strcmp(signature, "KVMKVMKVM") == 0)
104 return 1; 104 return true;
105 } 105 }
106 106
107 return 0; 107 return false;
108} 108}
109 109
110static inline unsigned int kvm_arch_para_features(void) 110static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 48142971b25d..79327e9483a3 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -27,20 +27,20 @@
27#define __asmlinkage_protect0(ret) \ 27#define __asmlinkage_protect0(ret) \
28 __asmlinkage_protect_n(ret) 28 __asmlinkage_protect_n(ret)
29#define __asmlinkage_protect1(ret, arg1) \ 29#define __asmlinkage_protect1(ret, arg1) \
30 __asmlinkage_protect_n(ret, "g" (arg1)) 30 __asmlinkage_protect_n(ret, "m" (arg1))
31#define __asmlinkage_protect2(ret, arg1, arg2) \ 31#define __asmlinkage_protect2(ret, arg1, arg2) \
32 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2)) 32 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
33#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ 33#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
34 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3)) 34 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
35#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ 35#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
36 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ 36 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
37 "g" (arg4)) 37 "m" (arg4))
38#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ 38#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
39 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ 39 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
40 "g" (arg4), "g" (arg5)) 40 "m" (arg4), "m" (arg5))
41#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ 41#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
42 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ 42 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
43 "g" (arg4), "g" (arg5), "g" (arg6)) 43 "m" (arg4), "m" (arg5), "m" (arg6))
44 44
45#endif /* CONFIG_X86_32 */ 45#endif /* CONFIG_X86_32 */
46 46
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ecdfee60ee4a..f4076af1f4ed 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -3,6 +3,90 @@
3 3
4#include <uapi/asm/mce.h> 4#include <uapi/asm/mce.h>
5 5
6/*
7 * Machine Check support for x86
8 */
9
10/* MCG_CAP register defines */
11#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
12#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
13#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
14#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
15#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
16#define MCG_EXT_CNT_SHIFT 16
17#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
18#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
19
20/* MCG_STATUS register defines */
21#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
22#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
23#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
24
25/* MCi_STATUS register defines */
26#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
27#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
28#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
29#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
30#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
31#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
32#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
33#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
34#define MCI_STATUS_AR (1ULL<<55) /* Action required */
35#define MCACOD 0xffff /* MCA Error Code */
36
37/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
38#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
39#define MCACOD_SCRUBMSK 0xfff0
40#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
41#define MCACOD_DATA 0x0134 /* Data Load */
42#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
43
44/* MCi_MISC register defines */
45#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
46#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
47#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
48#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
49#define MCI_MISC_ADDR_PHYS 2 /* physical address */
50#define MCI_MISC_ADDR_MEM 3 /* memory address */
51#define MCI_MISC_ADDR_GENERIC 7 /* generic */
52
53/* CTL2 register defines */
54#define MCI_CTL2_CMCI_EN (1ULL << 30)
55#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
56
57#define MCJ_CTX_MASK 3
58#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
59#define MCJ_CTX_RANDOM 0 /* inject context: random */
60#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
61#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
62#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
63#define MCJ_EXCEPTION 0x8 /* raise as exception */
64#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
65
66#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
67
68/* Software defined banks */
69#define MCE_EXTENDED_BANK 128
70#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0)
71#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
72
73#define MCE_LOG_LEN 32
74#define MCE_LOG_SIGNATURE "MACHINECHECK"
75
76/*
77 * This structure contains all data related to the MCE log. Also
78 * carries a signature to make it easier to find from external
79 * debugging tools. Each entry is only valid when its finished flag
80 * is set.
81 */
82struct mce_log {
83 char signature[12]; /* "MACHINECHECK" */
84 unsigned len; /* = MCE_LOG_LEN */
85 unsigned next;
86 unsigned flags;
87 unsigned recordlen; /* length of struct mce */
88 struct mce entry[MCE_LOG_LEN];
89};
6 90
7struct mca_config { 91struct mca_config {
8 bool dont_log_ce; 92 bool dont_log_ce;
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 43d921b4752c..6825e2efd1b4 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -57,4 +57,18 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
57static inline void __exit exit_amd_microcode(void) {} 57static inline void __exit exit_amd_microcode(void) {}
58#endif 58#endif
59 59
60#ifdef CONFIG_MICROCODE_EARLY
61#define MAX_UCODE_COUNT 128
62extern void __init load_ucode_bsp(void);
63extern __init void load_ucode_ap(void);
64extern int __init save_microcode_in_initrd(void);
65#else
66static inline void __init load_ucode_bsp(void) {}
67static inline __init void load_ucode_ap(void) {}
68static inline int __init save_microcode_in_initrd(void)
69{
70 return 0;
71}
72#endif
73
60#endif /* _ASM_X86_MICROCODE_H */ 74#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
new file mode 100644
index 000000000000..5356f927d411
--- /dev/null
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -0,0 +1,85 @@
1#ifndef _ASM_X86_MICROCODE_INTEL_H
2#define _ASM_X86_MICROCODE_INTEL_H
3
4#include <asm/microcode.h>
5
6struct microcode_header_intel {
7 unsigned int hdrver;
8 unsigned int rev;
9 unsigned int date;
10 unsigned int sig;
11 unsigned int cksum;
12 unsigned int ldrver;
13 unsigned int pf;
14 unsigned int datasize;
15 unsigned int totalsize;
16 unsigned int reserved[3];
17};
18
19struct microcode_intel {
20 struct microcode_header_intel hdr;
21 unsigned int bits[0];
22};
23
24/* microcode format is extended from prescott processors */
25struct extended_signature {
26 unsigned int sig;
27 unsigned int pf;
28 unsigned int cksum;
29};
30
31struct extended_sigtable {
32 unsigned int count;
33 unsigned int cksum;
34 unsigned int reserved[3];
35 struct extended_signature sigs[0];
36};
37
38#define DEFAULT_UCODE_DATASIZE (2000)
39#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
40#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
41#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
42#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
43#define DWSIZE (sizeof(u32))
44
45#define get_totalsize(mc) \
46 (((struct microcode_intel *)mc)->hdr.totalsize ? \
47 ((struct microcode_intel *)mc)->hdr.totalsize : \
48 DEFAULT_UCODE_TOTALSIZE)
49
50#define get_datasize(mc) \
51 (((struct microcode_intel *)mc)->hdr.datasize ? \
52 ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
53
54#define sigmatch(s1, s2, p1, p2) \
55 (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
56
57#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
58
59extern int
60get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev);
61extern int microcode_sanity_check(void *mc, int print_err);
62extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev);
63extern int
64update_match_revision(struct microcode_header_intel *mc_header, int rev);
65
66#ifdef CONFIG_MICROCODE_INTEL_EARLY
67extern void __init load_ucode_intel_bsp(void);
68extern void __cpuinit load_ucode_intel_ap(void);
69extern void show_ucode_info_early(void);
70#else
71static inline __init void load_ucode_intel_bsp(void) {}
72static inline __cpuinit void load_ucode_intel_ap(void) {}
73static inline void show_ucode_info_early(void) {}
74#endif
75
76#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
77extern int save_mc_for_early(u8 *mc);
78#else
79static inline int save_mc_for_early(u8 *mc)
80{
81 return 0;
82}
83#endif
84
85#endif /* _ASM_X86_MICROCODE_INTEL_H */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index eb05fb3b02fb..8a9b3e288cb4 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -14,12 +14,6 @@ extern struct pglist_data *node_data[];
14 14
15#include <asm/numaq.h> 15#include <asm/numaq.h>
16 16
17extern void resume_map_numa_kva(pgd_t *pgd);
18
19#else /* !CONFIG_NUMA */
20
21static inline void resume_map_numa_kva(pgd_t *pgd) {}
22
23#endif /* CONFIG_NUMA */ 17#endif /* CONFIG_NUMA */
24 18
25#ifdef CONFIG_DISCONTIGMEM 19#ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 79ce5685ab64..c2934be2446a 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -11,4 +11,8 @@ struct ms_hyperv_info {
11 11
12extern struct ms_hyperv_info ms_hyperv; 12extern struct ms_hyperv_info ms_hyperv;
13 13
14void hyperv_callback_vector(void);
15void hyperv_vector_handler(struct pt_regs *regs);
16void hv_register_vmbus_handler(int irq, irq_handler_t handler);
17
14#endif 18#endif
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index bcdff997668c..2f366d0ac6b4 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -4,7 +4,8 @@
4#define MWAIT_SUBSTATE_MASK 0xf 4#define MWAIT_SUBSTATE_MASK 0xf
5#define MWAIT_CSTATE_MASK 0xf 5#define MWAIT_CSTATE_MASK 0xf
6#define MWAIT_SUBSTATE_SIZE 4 6#define MWAIT_SUBSTATE_SIZE 4
7#define MWAIT_MAX_NUM_CSTATES 8 7#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
8#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
8 9
9#define CPUID_MWAIT_LEAF 5 10#define CPUID_MWAIT_LEAF 5
10#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 11#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 49119fcea2dc..1b99ee5c9f00 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -54,13 +54,11 @@ static inline int numa_cpu_node(int cpu)
54 54
55#ifdef CONFIG_X86_32 55#ifdef CONFIG_X86_32
56# include <asm/numa_32.h> 56# include <asm/numa_32.h>
57#else
58# include <asm/numa_64.h>
59#endif 57#endif
60 58
61#ifdef CONFIG_NUMA 59#ifdef CONFIG_NUMA
62extern void __cpuinit numa_set_node(int cpu, int node); 60extern void numa_set_node(int cpu, int node);
63extern void __cpuinit numa_clear_node(int cpu); 61extern void numa_clear_node(int cpu);
64extern void __init init_cpu_to_node(void); 62extern void __init init_cpu_to_node(void);
65extern void __cpuinit numa_add_cpu(int cpu); 63extern void __cpuinit numa_add_cpu(int cpu);
66extern void __cpuinit numa_remove_cpu(int cpu); 64extern void __cpuinit numa_remove_cpu(int cpu);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
deleted file mode 100644
index 0c05f7ae46e8..000000000000
--- a/arch/x86/include/asm/numa_64.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_X86_NUMA_64_H
2#define _ASM_X86_NUMA_64_H
3
4extern unsigned long numa_free_all_bootmem(void);
5
6#endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 8ca82839288a..c87892442e53 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -17,6 +17,10 @@
17 17
18struct page; 18struct page;
19 19
20#include <linux/range.h>
21extern struct range pfn_mapped[];
22extern int nr_pfn_mapped;
23
20static inline void clear_user_page(void *page, unsigned long vaddr, 24static inline void clear_user_page(void *page, unsigned long vaddr,
21 struct page *pg) 25 struct page *pg)
22{ 26{
@@ -44,7 +48,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
44 * case properly. Once all supported versions of gcc understand it, we can 48 * case properly. Once all supported versions of gcc understand it, we can
45 * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) 49 * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated)
46 */ 50 */
47#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) 51#define __pa_symbol(x) \
52 __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
48 53
49#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) 54#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
50 55
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index da4e762406f7..4d550d04b609 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -15,6 +15,7 @@ extern unsigned long __phys_addr(unsigned long);
15#else 15#else
16#define __phys_addr(x) __phys_addr_nodebug(x) 16#define __phys_addr(x) __phys_addr_nodebug(x)
17#endif 17#endif
18#define __phys_addr_symbol(x) __phys_addr(x)
18#define __phys_reloc_hide(x) RELOC_HIDE((x), 0) 19#define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
19 20
20#ifdef CONFIG_FLATMEM 21#ifdef CONFIG_FLATMEM
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 072694ed81a5..0f1ddee6a0ce 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -3,4 +3,40 @@
3 3
4#include <asm/page_64_types.h> 4#include <asm/page_64_types.h>
5 5
6#ifndef __ASSEMBLY__
7
8/* duplicated to the one in bootmem.h */
9extern unsigned long max_pfn;
10extern unsigned long phys_base;
11
12static inline unsigned long __phys_addr_nodebug(unsigned long x)
13{
14 unsigned long y = x - __START_KERNEL_map;
15
16 /* use the carry flag to determine if x was < __START_KERNEL_map */
17 x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET));
18
19 return x;
20}
21
22#ifdef CONFIG_DEBUG_VIRTUAL
23extern unsigned long __phys_addr(unsigned long);
24extern unsigned long __phys_addr_symbol(unsigned long);
25#else
26#define __phys_addr(x) __phys_addr_nodebug(x)
27#define __phys_addr_symbol(x) \
28 ((unsigned long)(x) - __START_KERNEL_map + phys_base)
29#endif
30
31#define __phys_reloc_hide(x) (x)
32
33#ifdef CONFIG_FLATMEM
34#define pfn_valid(pfn) ((pfn) < max_pfn)
35#endif
36
37void clear_page(void *page);
38void copy_page(void *to, void *from);
39
40#endif /* !__ASSEMBLY__ */
41
6#endif /* _ASM_X86_PAGE_64_H */ 42#endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 320f7bb95f76..8b491e66eaa8 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -50,26 +50,4 @@
50#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) 50#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
51#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) 51#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
52 52
53#ifndef __ASSEMBLY__
54void clear_page(void *page);
55void copy_page(void *to, void *from);
56
57/* duplicated to the one in bootmem.h */
58extern unsigned long max_pfn;
59extern unsigned long phys_base;
60
61extern unsigned long __phys_addr(unsigned long);
62#define __phys_reloc_hide(x) (x)
63
64#define vmemmap ((struct page *)VMEMMAP_START)
65
66extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
67extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
68
69#endif /* !__ASSEMBLY__ */
70
71#ifdef CONFIG_FLATMEM
72#define pfn_valid(pfn) ((pfn) < max_pfn)
73#endif
74
75#endif /* _ASM_X86_PAGE_64_DEFS_H */ 53#endif /* _ASM_X86_PAGE_64_DEFS_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index e21fdd10479f..54c97879195e 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -51,6 +51,8 @@ static inline phys_addr_t get_max_mapped(void)
51 return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; 51 return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
52} 52}
53 53
54bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
55
54extern unsigned long init_memory_mapping(unsigned long start, 56extern unsigned long init_memory_mapping(unsigned long start,
55 unsigned long end); 57 unsigned long end);
56 58
diff --git a/arch/x86/include/asm/parport.h b/arch/x86/include/asm/parport.h
index 3c4ffeb467e9..0d2d3b29118f 100644
--- a/arch/x86/include/asm/parport.h
+++ b/arch/x86/include/asm/parport.h
@@ -1,8 +1,8 @@
1#ifndef _ASM_X86_PARPORT_H 1#ifndef _ASM_X86_PARPORT_H
2#define _ASM_X86_PARPORT_H 2#define _ASM_X86_PARPORT_H
3 3
4static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma); 4static int parport_pc_find_isa_ports(int autoirq, int autodma);
5static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) 5static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
6{ 6{
7 return parport_pc_find_isa_ports(autoirq, autodma); 7 return parport_pc_find_isa_ports(autoirq, autodma);
8} 8}
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 9f437e97e9e8..d9e9e6c7ed32 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -124,9 +124,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
124#define arch_teardown_msi_irq x86_teardown_msi_irq 124#define arch_teardown_msi_irq x86_teardown_msi_irq
125#define arch_restore_msi_irqs x86_restore_msi_irqs 125#define arch_restore_msi_irqs x86_restore_msi_irqs
126/* implemented in arch/x86/kernel/apic/io_apic. */ 126/* implemented in arch/x86/kernel/apic/io_apic. */
127struct msi_desc;
127int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); 128int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
128void native_teardown_msi_irq(unsigned int irq); 129void native_teardown_msi_irq(unsigned int irq);
129void native_restore_msi_irqs(struct pci_dev *dev, int irq); 130void native_restore_msi_irqs(struct pci_dev *dev, int irq);
131int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
132 unsigned int irq_base, unsigned int irq_offset);
130/* default to the implementation in drivers/lib/msi.c */ 133/* default to the implementation in drivers/lib/msi.c */
131#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS 134#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
132#define HAVE_DEFAULT_MSI_RESTORE_IRQS 135#define HAVE_DEFAULT_MSI_RESTORE_IRQS
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 0126f104f0a5..fa1195dae425 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -139,11 +139,10 @@ struct pci_mmcfg_region {
139 139
140extern int __init pci_mmcfg_arch_init(void); 140extern int __init pci_mmcfg_arch_init(void);
141extern void __init pci_mmcfg_arch_free(void); 141extern void __init pci_mmcfg_arch_free(void);
142extern int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg); 142extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
143extern void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg); 143extern void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg);
144extern int __devinit pci_mmconfig_insert(struct device *dev, 144extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
145 u16 seg, u8 start, 145 phys_addr_t addr);
146 u8 end, phys_addr_t addr);
147extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end); 146extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
148extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); 147extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
149 148
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4fabcdf1cfa7..57cb63402213 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,8 +29,13 @@
29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) 29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL 30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
31 31
32#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40) 32#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
33#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41) 33#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
34#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
35
36#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37
37#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \
38 (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
34 39
35#define AMD64_EVENTSEL_EVENT \ 40#define AMD64_EVENTSEL_EVENT \
36 (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) 41 (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
@@ -46,8 +51,12 @@
46#define AMD64_RAW_EVENT_MASK \ 51#define AMD64_RAW_EVENT_MASK \
47 (X86_RAW_EVENT_MASK | \ 52 (X86_RAW_EVENT_MASK | \
48 AMD64_EVENTSEL_EVENT) 53 AMD64_EVENTSEL_EVENT)
54#define AMD64_RAW_EVENT_MASK_NB \
55 (AMD64_EVENTSEL_EVENT | \
56 ARCH_PERFMON_EVENTSEL_UMASK)
49#define AMD64_NUM_COUNTERS 4 57#define AMD64_NUM_COUNTERS 4
50#define AMD64_NUM_COUNTERS_CORE 6 58#define AMD64_NUM_COUNTERS_CORE 6
59#define AMD64_NUM_COUNTERS_NB 4
51 60
52#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 61#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
53#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 62#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5199db2923d3..1e672234c4ff 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
142 return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; 142 return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
143} 143}
144 144
145static inline unsigned long pud_pfn(pud_t pud)
146{
147 return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
148}
149
145#define pte_page(pte) pfn_to_page(pte_pfn(pte)) 150#define pte_page(pte) pfn_to_page(pte_pfn(pte))
146 151
147static inline int pmd_large(pmd_t pte) 152static inline int pmd_large(pmd_t pte)
@@ -390,6 +395,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
390 395
391#ifndef __ASSEMBLY__ 396#ifndef __ASSEMBLY__
392#include <linux/mm_types.h> 397#include <linux/mm_types.h>
398#include <linux/log2.h>
393 399
394static inline int pte_none(pte_t pte) 400static inline int pte_none(pte_t pte)
395{ 401{
@@ -615,6 +621,8 @@ static inline int pgd_none(pgd_t pgd)
615#ifndef __ASSEMBLY__ 621#ifndef __ASSEMBLY__
616 622
617extern int direct_gbpages; 623extern int direct_gbpages;
624void init_mem_mapping(void);
625void early_alloc_pgt_buf(void);
618 626
619/* local pte updates need not use xchg for locking */ 627/* local pte updates need not use xchg for locking */
620static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) 628static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
@@ -781,6 +789,32 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
781 memcpy(dst, src, count * sizeof(pgd_t)); 789 memcpy(dst, src, count * sizeof(pgd_t));
782} 790}
783 791
792#define PTE_SHIFT ilog2(PTRS_PER_PTE)
793static inline int page_level_shift(enum pg_level level)
794{
795 return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT;
796}
797static inline unsigned long page_level_size(enum pg_level level)
798{
799 return 1UL << page_level_shift(level);
800}
801static inline unsigned long page_level_mask(enum pg_level level)
802{
803 return ~(page_level_size(level) - 1);
804}
805
806/*
807 * The x86 doesn't have any external MMU info: the kernel page
808 * tables contain all the necessary information.
809 */
810static inline void update_mmu_cache(struct vm_area_struct *vma,
811 unsigned long addr, pte_t *ptep)
812{
813}
814static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
815 unsigned long addr, pmd_t *pmd)
816{
817}
784 818
785#include <asm-generic/pgtable.h> 819#include <asm-generic/pgtable.h>
786#endif /* __ASSEMBLY__ */ 820#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 8faa215a503e..9ee322103c6d 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -66,13 +66,6 @@ do { \
66 __flush_tlb_one((vaddr)); \ 66 __flush_tlb_one((vaddr)); \
67} while (0) 67} while (0)
68 68
69/*
70 * The i386 doesn't have any external MMU info: the kernel page
71 * tables contain all the necessary information.
72 */
73#define update_mmu_cache(vma, address, ptep) do { } while (0)
74#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
75
76#endif /* !__ASSEMBLY__ */ 69#endif /* !__ASSEMBLY__ */
77 70
78/* 71/*
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 47356f9df82e..e22c1dbf7feb 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -142,9 +142,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
142#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) 142#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
143#define pte_unmap(pte) ((void)(pte))/* NOP */ 143#define pte_unmap(pte) ((void)(pte))/* NOP */
144 144
145#define update_mmu_cache(vma, address, ptep) do { } while (0)
146#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
147
148/* Encode and de-code a swap entry */ 145/* Encode and de-code a swap entry */
149#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE 146#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
150#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) 147#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
@@ -183,6 +180,11 @@ extern void cleanup_highmap(void);
183 180
184#define __HAVE_ARCH_PTE_SAME 181#define __HAVE_ARCH_PTE_SAME
185 182
183#define vmemmap ((struct page *)VMEMMAP_START)
184
185extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
186extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
187
186#endif /* !__ASSEMBLY__ */ 188#endif /* !__ASSEMBLY__ */
187 189
188#endif /* _ASM_X86_PGTABLE_64_H */ 190#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 766ea16fbbbd..2d883440cb9a 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_PGTABLE_64_DEFS_H 1#ifndef _ASM_X86_PGTABLE_64_DEFS_H
2#define _ASM_X86_PGTABLE_64_DEFS_H 2#define _ASM_X86_PGTABLE_64_DEFS_H
3 3
4#include <asm/sparsemem.h>
5
4#ifndef __ASSEMBLY__ 6#ifndef __ASSEMBLY__
5#include <linux/types.h> 7#include <linux/types.h>
6 8
@@ -60,4 +62,6 @@ typedef struct { pteval_t pte; } pte_t;
60#define MODULES_END _AC(0xffffffffff000000, UL) 62#define MODULES_END _AC(0xffffffffff000000, UL)
61#define MODULES_LEN (MODULES_END - MODULES_VADDR) 63#define MODULES_LEN (MODULES_END - MODULES_VADDR)
62 64
65#define EARLY_DYNAMIC_PAGE_TABLES 64
66
63#endif /* _ASM_X86_PGTABLE_64_DEFS_H */ 67#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 3c32db8c539d..567b5d0632b2 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -321,7 +321,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
321/* Install a pte for a particular vaddr in kernel space. */ 321/* Install a pte for a particular vaddr in kernel space. */
322void set_pte_vaddr(unsigned long vaddr, pte_t pte); 322void set_pte_vaddr(unsigned long vaddr, pte_t pte);
323 323
324extern void native_pagetable_reserve(u64 start, u64 end);
325#ifdef CONFIG_X86_32 324#ifdef CONFIG_X86_32
326extern void native_pagetable_init(void); 325extern void native_pagetable_init(void);
327#else 326#else
@@ -331,7 +330,7 @@ extern void native_pagetable_init(void);
331struct seq_file; 330struct seq_file;
332extern void arch_report_meminfo(struct seq_file *m); 331extern void arch_report_meminfo(struct seq_file *m);
333 332
334enum { 333enum pg_level {
335 PG_LEVEL_NONE, 334 PG_LEVEL_NONE,
336 PG_LEVEL_4K, 335 PG_LEVEL_4K,
337 PG_LEVEL_2M, 336 PG_LEVEL_2M,
@@ -352,6 +351,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
352 * as a pte too. 351 * as a pte too.
353 */ 352 */
354extern pte_t *lookup_address(unsigned long address, unsigned int *level); 353extern pte_t *lookup_address(unsigned long address, unsigned int *level);
354extern int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase);
355extern phys_addr_t slow_virt_to_phys(void *__address);
355 356
356#endif /* !__ASSEMBLY__ */ 357#endif /* !__ASSEMBLY__ */
357 358
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 888184b2fc85..3270116b1488 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -89,7 +89,6 @@ struct cpuinfo_x86 {
89 char wp_works_ok; /* It doesn't on 386's */ 89 char wp_works_ok; /* It doesn't on 386's */
90 90
91 /* Problems on some 486Dx4's and old 386's: */ 91 /* Problems on some 486Dx4's and old 386's: */
92 char hlt_works_ok;
93 char hard_math; 92 char hard_math;
94 char rfu; 93 char rfu;
95 char fdiv_bug; 94 char fdiv_bug;
@@ -165,15 +164,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
165 164
166extern const struct seq_operations cpuinfo_op; 165extern const struct seq_operations cpuinfo_op;
167 166
168static inline int hlt_works(int cpu)
169{
170#ifdef CONFIG_X86_32
171 return cpu_data(cpu).hlt_works_ok;
172#else
173 return 1;
174#endif
175}
176
177#define cache_line_size() (boot_cpu_data.x86_cache_alignment) 167#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
178 168
179extern void cpu_detect(struct cpuinfo_x86 *c); 169extern void cpu_detect(struct cpuinfo_x86 *c);
@@ -190,6 +180,14 @@ extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
190extern void detect_extended_topology(struct cpuinfo_x86 *c); 180extern void detect_extended_topology(struct cpuinfo_x86 *c);
191extern void detect_ht(struct cpuinfo_x86 *c); 181extern void detect_ht(struct cpuinfo_x86 *c);
192 182
183#ifdef CONFIG_X86_32
184extern int have_cpuid_p(void);
185#else
186static inline int have_cpuid_p(void)
187{
188 return 1;
189}
190#endif
193static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, 191static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
194 unsigned int *ecx, unsigned int *edx) 192 unsigned int *ecx, unsigned int *edx)
195{ 193{
@@ -725,12 +723,13 @@ extern unsigned long boot_option_idle_override;
725extern bool amd_e400_c1e_detected; 723extern bool amd_e400_c1e_detected;
726 724
727enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, 725enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
728 IDLE_POLL, IDLE_FORCE_MWAIT}; 726 IDLE_POLL};
729 727
730extern void enable_sep_cpu(void); 728extern void enable_sep_cpu(void);
731extern int sysenter_setup(void); 729extern int sysenter_setup(void);
732 730
733extern void early_trap_init(void); 731extern void early_trap_init(void);
732void early_trap_pf_init(void);
734 733
735/* Defined in head.S */ 734/* Defined in head.S */
736extern struct desc_ptr early_gdt_descr; 735extern struct desc_ptr early_gdt_descr;
@@ -943,7 +942,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
943extern int get_tsc_mode(unsigned long adr); 942extern int get_tsc_mode(unsigned long adr);
944extern int set_tsc_mode(unsigned int val); 943extern int set_tsc_mode(unsigned int val);
945 944
946extern int amd_get_nb_id(int cpu); 945extern u16 amd_get_nb_id(int cpu);
947 946
948struct aperfmperf { 947struct aperfmperf {
949 u64 aperf, mperf; 948 u64 aperf, mperf;
@@ -998,7 +997,11 @@ extern unsigned long arch_align_stack(unsigned long sp);
998extern void free_init_pages(char *what, unsigned long begin, unsigned long end); 997extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
999 998
1000void default_idle(void); 999void default_idle(void);
1001bool set_pm_idle_to_default(void); 1000#ifdef CONFIG_XEN
1001bool xen_set_default_idle(void);
1002#else
1003#define xen_set_default_idle 0
1004#endif
1002 1005
1003void stop_this_cpu(void *dummy); 1006void stop_this_cpu(void *dummy);
1004 1007
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 6f414ed88620..6fd3fd769796 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -5,8 +5,6 @@
5 5
6/* misc architecture specific prototypes */ 6/* misc architecture specific prototypes */
7 7
8void early_idt_handler(void);
9
10void system_call(void); 8void system_call(void);
11void syscall_init(void); 9void syscall_init(void);
12 10
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index fe1ec5bcd846..9c6b890d5e7a 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -58,6 +58,7 @@ extern unsigned char boot_gdt[];
58extern unsigned char secondary_startup_64[]; 58extern unsigned char secondary_startup_64[];
59#endif 59#endif
60 60
61extern void __init setup_real_mode(void); 61void reserve_real_mode(void);
62void setup_real_mode(void);
62 63
63#endif /* _ARCH_X86_REALMODE_H */ 64#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 6c7fc25f2c34..5c6e4fb370f5 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -47,6 +47,12 @@
47# define NEED_NOPL 0 47# define NEED_NOPL 0
48#endif 48#endif
49 49
50#ifdef CONFIG_MATOM
51# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
52#else
53# define NEED_MOVBE 0
54#endif
55
50#ifdef CONFIG_X86_64 56#ifdef CONFIG_X86_64
51#ifdef CONFIG_PARAVIRT 57#ifdef CONFIG_PARAVIRT
52/* Paravirtualized systems may not have PSE or PGE available */ 58/* Paravirtualized systems may not have PSE or PGE available */
@@ -80,7 +86,7 @@
80 86
81#define REQUIRED_MASK2 0 87#define REQUIRED_MASK2 0
82#define REQUIRED_MASK3 (NEED_NOPL) 88#define REQUIRED_MASK3 (NEED_NOPL)
83#define REQUIRED_MASK4 0 89#define REQUIRED_MASK4 (NEED_MOVBE)
84#define REQUIRED_MASK5 0 90#define REQUIRED_MASK5 0
85#define REQUIRED_MASK6 0 91#define REQUIRED_MASK6 0
86#define REQUIRED_MASK7 0 92#define REQUIRED_MASK7 0
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 216bf364a7e7..35e67a457182 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,27 +31,9 @@ typedef sigset_t compat_sigset_t;
31#include <uapi/asm/signal.h> 31#include <uapi/asm/signal.h>
32#ifndef __ASSEMBLY__ 32#ifndef __ASSEMBLY__
33extern void do_notify_resume(struct pt_regs *, void *, __u32); 33extern void do_notify_resume(struct pt_regs *, void *, __u32);
34#ifdef __i386__
35struct old_sigaction {
36 __sighandler_t sa_handler;
37 old_sigset_t sa_mask;
38 unsigned long sa_flags;
39 __sigrestore_t sa_restorer;
40};
41
42struct sigaction {
43 __sighandler_t sa_handler;
44 unsigned long sa_flags;
45 __sigrestore_t sa_restorer;
46 sigset_t sa_mask; /* mask last for extensibility */
47};
48
49struct k_sigaction {
50 struct sigaction sa;
51};
52 34
53#else /* __i386__ */ 35#define __ARCH_HAS_SA_RESTORER
54#endif /* !__i386__ */ 36
55#include <asm/sigcontext.h> 37#include <asm/sigcontext.h>
56 38
57#ifdef __i386__ 39#ifdef __i386__
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 31f61f96e0fb..0218d917f509 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -32,22 +32,11 @@ struct mmap_arg_struct32;
32asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); 32asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
33asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); 33asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
34 34
35struct sigaction32;
36struct old_sigaction32;
37asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
38 struct sigaction32 __user *, unsigned int);
39asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
40 struct old_sigaction32 __user *);
41asmlinkage long sys32_alarm(unsigned int); 35asmlinkage long sys32_alarm(unsigned int);
42 36
43asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); 37asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
44asmlinkage long sys32_sysfs(int, u32, u32); 38asmlinkage long sys32_sysfs(int, u32, u32);
45 39
46asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
47 struct compat_timespec __user *);
48asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
49asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
50
51asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); 40asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
52asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); 41asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
53 42
@@ -68,9 +57,8 @@ asmlinkage long sys32_fallocate(int, int, unsigned,
68 unsigned, unsigned, unsigned); 57 unsigned, unsigned, unsigned);
69 58
70/* ia32/ia32_signal.c */ 59/* ia32/ia32_signal.c */
71asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); 60asmlinkage long sys32_sigreturn(void);
72asmlinkage long sys32_sigreturn(struct pt_regs *); 61asmlinkage long sys32_rt_sigreturn(void);
73asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
74 62
75/* ia32/ipc32.c */ 63/* ia32/ipc32.c */
76asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); 64asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 58b7e3eac0ae..6cf0a9cc60cd 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -18,13 +18,13 @@
18/* Common in X86_32 and X86_64 */ 18/* Common in X86_32 and X86_64 */
19/* kernel/ioport.c */ 19/* kernel/ioport.c */
20asmlinkage long sys_ioperm(unsigned long, unsigned long, int); 20asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
21long sys_iopl(unsigned int, struct pt_regs *); 21asmlinkage long sys_iopl(unsigned int);
22 22
23/* kernel/ldt.c */ 23/* kernel/ldt.c */
24asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); 24asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
25 25
26/* kernel/signal.c */ 26/* kernel/signal.c */
27long sys_rt_sigreturn(struct pt_regs *); 27long sys_rt_sigreturn(void);
28 28
29/* kernel/tls.c */ 29/* kernel/tls.c */
30asmlinkage int sys_set_thread_area(struct user_desc __user *); 30asmlinkage int sys_set_thread_area(struct user_desc __user *);
@@ -34,14 +34,11 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
34#ifdef CONFIG_X86_32 34#ifdef CONFIG_X86_32
35 35
36/* kernel/signal.c */ 36/* kernel/signal.c */
37asmlinkage int sys_sigsuspend(int, int, old_sigset_t); 37unsigned long sys_sigreturn(void);
38asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
39 struct old_sigaction __user *);
40unsigned long sys_sigreturn(struct pt_regs *);
41 38
42/* kernel/vm86_32.c */ 39/* kernel/vm86_32.c */
43int sys_vm86old(struct vm86_struct __user *, struct pt_regs *); 40int sys_vm86old(struct vm86_struct __user *);
44int sys_vm86(unsigned long, unsigned long, struct pt_regs *); 41int sys_vm86(unsigned long, unsigned long);
45 42
46#else /* CONFIG_X86_32 */ 43#else /* CONFIG_X86_32 */
47 44
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0fee48e279cc..50a7fc0f824a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -20,10 +20,20 @@ static inline void __native_flush_tlb(void)
20 native_write_cr3(native_read_cr3()); 20 native_write_cr3(native_read_cr3());
21} 21}
22 22
23static inline void __native_flush_tlb_global_irq_disabled(void)
24{
25 unsigned long cr4;
26
27 cr4 = native_read_cr4();
28 /* clear PGE */
29 native_write_cr4(cr4 & ~X86_CR4_PGE);
30 /* write old PGE again and flush TLBs */
31 native_write_cr4(cr4);
32}
33
23static inline void __native_flush_tlb_global(void) 34static inline void __native_flush_tlb_global(void)
24{ 35{
25 unsigned long flags; 36 unsigned long flags;
26 unsigned long cr4;
27 37
28 /* 38 /*
29 * Read-modify-write to CR4 - protect it from preemption and 39 * Read-modify-write to CR4 - protect it from preemption and
@@ -32,11 +42,7 @@ static inline void __native_flush_tlb_global(void)
32 */ 42 */
33 raw_local_irq_save(flags); 43 raw_local_irq_save(flags);
34 44
35 cr4 = native_read_cr4(); 45 __native_flush_tlb_global_irq_disabled();
36 /* clear PGE */
37 native_write_cr4(cr4 & ~X86_CR4_PGE);
38 /* write old PGE again and flush TLBs */
39 native_write_cr4(cr4);
40 46
41 raw_local_irq_restore(flags); 47 raw_local_irq_restore(flags);
42} 48}
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1709801d18ec..5ee26875baea 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -125,13 +125,12 @@ extern int __get_user_4(void);
125extern int __get_user_8(void); 125extern int __get_user_8(void);
126extern int __get_user_bad(void); 126extern int __get_user_bad(void);
127 127
128#define __get_user_x(size, ret, x, ptr) \ 128/*
129 asm volatile("call __get_user_" #size \ 129 * This is a type: either unsigned long, if the argument fits into
130 : "=a" (ret), "=d" (x) \ 130 * that type, or otherwise unsigned long long.
131 : "0" (ptr)) \ 131 */
132 132#define __inttype(x) \
133/* Careful: we have to cast the result to the type of the pointer 133__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
134 * for sign reasons */
135 134
136/** 135/**
137 * get_user: - Get a simple variable from user space. 136 * get_user: - Get a simple variable from user space.
@@ -150,38 +149,26 @@ extern int __get_user_bad(void);
150 * Returns zero on success, or -EFAULT on error. 149 * Returns zero on success, or -EFAULT on error.
151 * On error, the variable @x is set to zero. 150 * On error, the variable @x is set to zero.
152 */ 151 */
153#ifdef CONFIG_X86_32 152/*
154#define __get_user_8(__ret_gu, __val_gu, ptr) \ 153 * Careful: we have to cast the result to the type of the pointer
155 __get_user_x(X, __ret_gu, __val_gu, ptr) 154 * for sign reasons.
156#else 155 *
157#define __get_user_8(__ret_gu, __val_gu, ptr) \ 156 * The use of %edx as the register specifier is a bit of a
158 __get_user_x(8, __ret_gu, __val_gu, ptr) 157 * simplification, as gcc only cares about it as the starting point
159#endif 158 * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits
160 159 * (%ecx being the next register in gcc's x86 register sequence), and
160 * %rdx on 64 bits.
161 */
161#define get_user(x, ptr) \ 162#define get_user(x, ptr) \
162({ \ 163({ \
163 int __ret_gu; \ 164 int __ret_gu; \
164 unsigned long __val_gu; \ 165 register __inttype(*(ptr)) __val_gu asm("%edx"); \
165 __chk_user_ptr(ptr); \ 166 __chk_user_ptr(ptr); \
166 might_fault(); \ 167 might_fault(); \
167 switch (sizeof(*(ptr))) { \ 168 asm volatile("call __get_user_%P3" \
168 case 1: \ 169 : "=a" (__ret_gu), "=r" (__val_gu) \
169 __get_user_x(1, __ret_gu, __val_gu, ptr); \ 170 : "0" (ptr), "i" (sizeof(*(ptr)))); \
170 break; \ 171 (x) = (__typeof__(*(ptr))) __val_gu; \
171 case 2: \
172 __get_user_x(2, __ret_gu, __val_gu, ptr); \
173 break; \
174 case 4: \
175 __get_user_x(4, __ret_gu, __val_gu, ptr); \
176 break; \
177 case 8: \
178 __get_user_8(__ret_gu, __val_gu, ptr); \
179 break; \
180 default: \
181 __get_user_x(X, __ret_gu, __val_gu, ptr); \
182 break; \
183 } \
184 (x) = (__typeof__(*(ptr)))__val_gu; \
185 __ret_gu; \ 172 __ret_gu; \
186}) 173})
187 174
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index a0790e07ba65..3d5df1c4447f 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -38,8 +38,6 @@
38# define __ARCH_WANT_SYS_OLD_GETRLIMIT 38# define __ARCH_WANT_SYS_OLD_GETRLIMIT
39# define __ARCH_WANT_SYS_OLD_UNAME 39# define __ARCH_WANT_SYS_OLD_UNAME
40# define __ARCH_WANT_SYS_PAUSE 40# define __ARCH_WANT_SYS_PAUSE
41# define __ARCH_WANT_SYS_RT_SIGACTION
42# define __ARCH_WANT_SYS_RT_SIGSUSPEND
43# define __ARCH_WANT_SYS_SGETMASK 41# define __ARCH_WANT_SYS_SGETMASK
44# define __ARCH_WANT_SYS_SIGNAL 42# define __ARCH_WANT_SYS_SIGNAL
45# define __ARCH_WANT_SYS_SIGPENDING 43# define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index b47c2a82ff15..062921ef34e9 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -16,7 +16,7 @@ extern void uv_system_init(void);
16extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 16extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
17 struct mm_struct *mm, 17 struct mm_struct *mm,
18 unsigned long start, 18 unsigned long start,
19 unsigned end, 19 unsigned long end,
20 unsigned int cpu); 20 unsigned int cpu);
21 21
22#else /* X86_UV */ 22#else /* X86_UV */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 21f7385badb8..2c32df95bb78 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV architectural definitions 6 * SGI UV architectural definitions
7 * 7 *
8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_HUB_H 11#ifndef _ASM_X86_UV_UV_HUB_H
@@ -175,6 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
175 */ 175 */
176#define UV1_HUB_REVISION_BASE 1 176#define UV1_HUB_REVISION_BASE 1
177#define UV2_HUB_REVISION_BASE 3 177#define UV2_HUB_REVISION_BASE 3
178#define UV3_HUB_REVISION_BASE 5
178 179
179static inline int is_uv1_hub(void) 180static inline int is_uv1_hub(void)
180{ 181{
@@ -183,6 +184,23 @@ static inline int is_uv1_hub(void)
183 184
184static inline int is_uv2_hub(void) 185static inline int is_uv2_hub(void)
185{ 186{
187 return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) &&
188 (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE));
189}
190
191static inline int is_uv3_hub(void)
192{
193 return uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE;
194}
195
196static inline int is_uv_hub(void)
197{
198 return uv_hub_info->hub_revision;
199}
200
201/* code common to uv2 and uv3 only */
202static inline int is_uvx_hub(void)
203{
186 return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE; 204 return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE;
187} 205}
188 206
@@ -230,14 +248,23 @@ union uvh_apicid {
230#define UV2_LOCAL_MMR_SIZE (32UL * 1024 * 1024) 248#define UV2_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
231#define UV2_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024) 249#define UV2_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024)
232 250
233#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE \ 251#define UV3_LOCAL_MMR_BASE 0xfa000000UL
234 : UV2_LOCAL_MMR_BASE) 252#define UV3_GLOBAL_MMR32_BASE 0xfc000000UL
235#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE \ 253#define UV3_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
236 : UV2_GLOBAL_MMR32_BASE) 254#define UV3_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024)
237#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \ 255
238 UV2_LOCAL_MMR_SIZE) 256#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \
257 (is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
258 UV3_LOCAL_MMR_BASE))
259#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE :\
260 (is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE :\
261 UV3_GLOBAL_MMR32_BASE))
262#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \
263 (is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
264 UV3_LOCAL_MMR_SIZE))
239#define UV_GLOBAL_MMR32_SIZE (is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\ 265#define UV_GLOBAL_MMR32_SIZE (is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\
240 UV2_GLOBAL_MMR32_SIZE) 266 (is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE :\
267 UV3_GLOBAL_MMR32_SIZE))
241#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) 268#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base)
242 269
243#define UV_GLOBAL_GRU_MMR_BASE 0x4000000 270#define UV_GLOBAL_GRU_MMR_BASE 0x4000000
@@ -599,6 +626,7 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
599 * 1 - UV1 rev 1.0 initial silicon 626 * 1 - UV1 rev 1.0 initial silicon
600 * 2 - UV1 rev 2.0 production silicon 627 * 2 - UV1 rev 2.0 production silicon
601 * 3 - UV2 rev 1.0 initial silicon 628 * 3 - UV2 rev 1.0 initial silicon
629 * 5 - UV3 rev 1.0 initial silicon
602 */ 630 */
603static inline int uv_get_min_hub_revision_id(void) 631static inline int uv_get_min_hub_revision_id(void)
604{ 632{
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index cf1d73643f60..bd5f80e58a23 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,16 +5,25 @@
5 * 5 *
6 * SGI UV MMR definitions 6 * SGI UV MMR definitions
7 * 7 *
8 * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_MMRS_H 11#ifndef _ASM_X86_UV_UV_MMRS_H
12#define _ASM_X86_UV_UV_MMRS_H 12#define _ASM_X86_UV_UV_MMRS_H
13 13
14/* 14/*
15 * This file contains MMR definitions for both UV1 & UV2 hubs. 15 * This file contains MMR definitions for all UV hubs types.
16 * 16 *
17 * In general, MMR addresses and structures are identical on both hubs. 17 * To minimize coding differences between hub types, the symbols are
18 * grouped by architecture types.
19 *
20 * UVH - definitions common to all UV hub types.
21 * UVXH - definitions common to all UV eXtended hub types (currently 2 & 3).
22 * UV1H - definitions specific to UV type 1 hub.
23 * UV2H - definitions specific to UV type 2 hub.
24 * UV3H - definitions specific to UV type 3 hub.
25 *
26 * So in general, MMR addresses and structures are identical on all hubs types.
18 * These MMRs are identified as: 27 * These MMRs are identified as:
19 * #define UVH_xxx <address> 28 * #define UVH_xxx <address>
20 * union uvh_xxx { 29 * union uvh_xxx {
@@ -23,24 +32,36 @@
23 * } s; 32 * } s;
24 * }; 33 * };
25 * 34 *
26 * If the MMR exists on both hub type but has different addresses or 35 * If the MMR exists on all hub types but have different addresses:
27 * contents, the MMR definition is similar to: 36 * #define UV1Hxxx a
28 * #define UV1H_xxx <uv1 address> 37 * #define UV2Hxxx b
29 * #define UV2H_xxx <uv2address> 38 * #define UV3Hxxx c
30 * #define UVH_xxx (is_uv1_hub() ? UV1H_xxx : UV2H_xxx) 39 * #define UVHxxx (is_uv1_hub() ? UV1Hxxx :
40 * (is_uv2_hub() ? UV2Hxxx :
41 * UV3Hxxx))
42 *
43 * If the MMR exists on all hub types > 1 but have different addresses:
44 * #define UV2Hxxx b
45 * #define UV3Hxxx c
46 * #define UVXHxxx (is_uv2_hub() ? UV2Hxxx :
47 * UV3Hxxx))
48 *
31 * union uvh_xxx { 49 * union uvh_xxx {
32 * unsigned long v; 50 * unsigned long v;
33 * struct uv1h_int_cmpd_s { (Common fields only) 51 * struct uvh_xxx_s { # Common fields only
34 * } s; 52 * } s;
35 * struct uv1h_int_cmpd_s { (Full UV1 definition) 53 * struct uv1h_xxx_s { # Full UV1 definition (*)
36 * } s1; 54 * } s1;
37 * struct uv2h_int_cmpd_s { (Full UV2 definition) 55 * struct uv2h_xxx_s { # Full UV2 definition (*)
38 * } s2; 56 * } s2;
57 * struct uv3h_xxx_s { # Full UV3 definition (*)
58 * } s3;
39 * }; 59 * };
60 * (* - if present and different than the common struct)
40 * 61 *
41 * Only essential difference are enumerated. For example, if the address is 62 * Only essential differences are enumerated. For example, if the address is
42 * the same for both UV1 & UV2, only a single #define is generated. Likewise, 63 * the same for all UV's, only a single #define is generated. Likewise,
43 * if the contents is the same for both hubs, only the "s" structure is 64 * if the contents is the same for all hubs, only the "s" structure is
44 * generated. 65 * generated.
45 * 66 *
46 * If the MMR exists on ONLY 1 type of hub, no generic definition is 67 * If the MMR exists on ONLY 1 type of hub, no generic definition is
@@ -51,6 +72,8 @@
51 * struct uvh_int_cmpd_s { 72 * struct uvh_int_cmpd_s {
52 * } sn; 73 * } sn;
53 * }; 74 * };
75 *
76 * (GEN Flags: mflags_opt= undefs=0 UV23=UVXH)
54 */ 77 */
55 78
56#define UV_MMR_ENABLE (1UL << 63) 79#define UV_MMR_ENABLE (1UL << 63)
@@ -58,15 +81,18 @@
58#define UV1_HUB_PART_NUMBER 0x88a5 81#define UV1_HUB_PART_NUMBER 0x88a5
59#define UV2_HUB_PART_NUMBER 0x8eb8 82#define UV2_HUB_PART_NUMBER 0x8eb8
60#define UV2_HUB_PART_NUMBER_X 0x1111 83#define UV2_HUB_PART_NUMBER_X 0x1111
84#define UV3_HUB_PART_NUMBER 0x9578
85#define UV3_HUB_PART_NUMBER_X 0x4321
61 86
62/* Compat: if this #define is present, UV headers support UV2 */ 87/* Compat: Indicate which UV Hubs are supported. */
63#define UV2_HUB_IS_SUPPORTED 1 88#define UV2_HUB_IS_SUPPORTED 1
89#define UV3_HUB_IS_SUPPORTED 1
64 90
65/* ========================================================================= */ 91/* ========================================================================= */
66/* UVH_BAU_DATA_BROADCAST */ 92/* UVH_BAU_DATA_BROADCAST */
67/* ========================================================================= */ 93/* ========================================================================= */
68#define UVH_BAU_DATA_BROADCAST 0x61688UL 94#define UVH_BAU_DATA_BROADCAST 0x61688UL
69#define UVH_BAU_DATA_BROADCAST_32 0x440 95#define UVH_BAU_DATA_BROADCAST_32 0x440
70 96
71#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 97#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0
72#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL 98#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL
@@ -82,8 +108,8 @@ union uvh_bau_data_broadcast_u {
82/* ========================================================================= */ 108/* ========================================================================= */
83/* UVH_BAU_DATA_CONFIG */ 109/* UVH_BAU_DATA_CONFIG */
84/* ========================================================================= */ 110/* ========================================================================= */
85#define UVH_BAU_DATA_CONFIG 0x61680UL 111#define UVH_BAU_DATA_CONFIG 0x61680UL
86#define UVH_BAU_DATA_CONFIG_32 0x438 112#define UVH_BAU_DATA_CONFIG_32 0x438
87 113
88#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 114#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
89#define UVH_BAU_DATA_CONFIG_DM_SHFT 8 115#define UVH_BAU_DATA_CONFIG_DM_SHFT 8
@@ -121,10 +147,14 @@ union uvh_bau_data_config_u {
121/* ========================================================================= */ 147/* ========================================================================= */
122/* UVH_EVENT_OCCURRED0 */ 148/* UVH_EVENT_OCCURRED0 */
123/* ========================================================================= */ 149/* ========================================================================= */
124#define UVH_EVENT_OCCURRED0 0x70000UL 150#define UVH_EVENT_OCCURRED0 0x70000UL
125#define UVH_EVENT_OCCURRED0_32 0x5e8 151#define UVH_EVENT_OCCURRED0_32 0x5e8
152
153#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0
154#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
155#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
156#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
126 157
127#define UV1H_EVENT_OCCURRED0_LB_HCERR_SHFT 0
128#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 158#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1
129#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 159#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2
130#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3 160#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3
@@ -135,7 +165,6 @@ union uvh_bau_data_config_u {
135#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 165#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8
136#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 166#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9
137#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 167#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10
138#define UV1H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
139#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 168#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12
140#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 169#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13
141#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 170#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14
@@ -181,7 +210,6 @@ union uvh_bau_data_config_u {
181#define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54 210#define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54
182#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55 211#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55
183#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 212#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56
184#define UV1H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
185#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL 213#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL
186#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL 214#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL
187#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL 215#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL
@@ -192,7 +220,6 @@ union uvh_bau_data_config_u {
192#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL 220#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL
193#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL 221#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL
194#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL 222#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL
195#define UV1H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
196#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL 223#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL
197#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL 224#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL
198#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL 225#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL
@@ -239,188 +266,130 @@ union uvh_bau_data_config_u {
239#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL 266#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
240#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL 267#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
241 268
242#define UV2H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 269#define UVXH_EVENT_OCCURRED0_QP_HCERR_SHFT 1
243#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 270#define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2
244#define UV2H_EVENT_OCCURRED0_RH_HCERR_SHFT 2 271#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3
245#define UV2H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 272#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4
246#define UV2H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 273#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5
247#define UV2H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 274#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6
248#define UV2H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 275#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7
249#define UV2H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 276#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8
250#define UV2H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 277#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9
251#define UV2H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 278#define UVXH_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
252#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 279#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12
253#define UV2H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 280#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13
254#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 281#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14
255#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 282#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15
256#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 283#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16
257#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 284#define UVXH_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
258#define UV2H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 285#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
259#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 286#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
260#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 287#define UVXH_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
261#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 288#define UVXH_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
262#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 289#define UVXH_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
263#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 290#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
264#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 291#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
265#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 292#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
266#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 293#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
267#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 294#define UVXH_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
268#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 295#define UVXH_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
269#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 296#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
270#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 297#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
271#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 298#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
272#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 299#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
273#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 300#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
274#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 301#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
275#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 302#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
276#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 303#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
277#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 304#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
278#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 305#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
279#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 306#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
280#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 307#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
281#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 308#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
282#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 309#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
283#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 310#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
284#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 311#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
285#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 312#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
286#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 313#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
287#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 314#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
288#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 315#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
289#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 316#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
290#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 317#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
291#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 318#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
292#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 319#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
293#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 320#define UVXH_EVENT_OCCURRED0_IPI_INT_SHFT 53
294#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 321#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
295#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 322#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
296#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 323#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
297#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 324#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
298#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 325#define UVXH_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
299#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 326#define UVXH_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
300#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 327#define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL
301#define UV2H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL 328#define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL
302#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL 329#define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL
303#define UV2H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL 330#define UVXH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL
304#define UV2H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL 331#define UVXH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL
305#define UV2H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL 332#define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL
306#define UV2H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL 333#define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL
307#define UV2H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL 334#define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL
308#define UV2H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL 335#define UVXH_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
309#define UV2H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL 336#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL
310#define UV2H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL 337#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL
311#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL 338#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL
312#define UV2H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL 339#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL
313#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL 340#define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL
314#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL 341#define UVXH_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
315#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL 342#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
316#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL 343#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
317#define UV2H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL 344#define UVXH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
318#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL 345#define UVXH_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
319#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL 346#define UVXH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
320#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL 347#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
321#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL 348#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
322#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL 349#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
323#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL 350#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
324#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL 351#define UVXH_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
325#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL 352#define UVXH_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
326#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL 353#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
327#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL 354#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
328#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL 355#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
329#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL 356#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
330#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL 357#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
331#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL 358#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
332#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL 359#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
333#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL 360#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
334#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL 361#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
335#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL 362#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
336#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL 363#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
337#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL 364#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
338#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL 365#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
339#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL 366#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
340#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL 367#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
341#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL 368#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
342#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL 369#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
343#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL 370#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
344#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL 371#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
345#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL 372#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
346#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL 373#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
347#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL 374#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
348#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL 375#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
349#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL 376#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
350#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL 377#define UVXH_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
351#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL 378#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
352#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL 379#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
353#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL 380#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
354#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL 381#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
355#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL 382#define UVXH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
356#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
357#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
358#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
359#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
360 383
361union uvh_event_occurred0_u { 384union uvh_event_occurred0_u {
362 unsigned long v; 385 unsigned long v;
363 struct uv1h_event_occurred0_s { 386 struct uvh_event_occurred0_s {
364 unsigned long lb_hcerr:1; /* RW, W1C */ 387 unsigned long lb_hcerr:1; /* RW, W1C */
365 unsigned long gr0_hcerr:1; /* RW, W1C */ 388 unsigned long rsvd_1_10:10;
366 unsigned long gr1_hcerr:1; /* RW, W1C */
367 unsigned long lh_hcerr:1; /* RW, W1C */
368 unsigned long rh_hcerr:1; /* RW, W1C */
369 unsigned long xn_hcerr:1; /* RW, W1C */
370 unsigned long si_hcerr:1; /* RW, W1C */
371 unsigned long lb_aoerr0:1; /* RW, W1C */
372 unsigned long gr0_aoerr0:1; /* RW, W1C */
373 unsigned long gr1_aoerr0:1; /* RW, W1C */
374 unsigned long lh_aoerr0:1; /* RW, W1C */
375 unsigned long rh_aoerr0:1; /* RW, W1C */ 389 unsigned long rh_aoerr0:1; /* RW, W1C */
376 unsigned long xn_aoerr0:1; /* RW, W1C */ 390 unsigned long rsvd_12_63:52;
377 unsigned long si_aoerr0:1; /* RW, W1C */ 391 } s;
378 unsigned long lb_aoerr1:1; /* RW, W1C */ 392 struct uvxh_event_occurred0_s {
379 unsigned long gr0_aoerr1:1; /* RW, W1C */
380 unsigned long gr1_aoerr1:1; /* RW, W1C */
381 unsigned long lh_aoerr1:1; /* RW, W1C */
382 unsigned long rh_aoerr1:1; /* RW, W1C */
383 unsigned long xn_aoerr1:1; /* RW, W1C */
384 unsigned long si_aoerr1:1; /* RW, W1C */
385 unsigned long rh_vpi_int:1; /* RW, W1C */
386 unsigned long system_shutdown_int:1; /* RW, W1C */
387 unsigned long lb_irq_int_0:1; /* RW, W1C */
388 unsigned long lb_irq_int_1:1; /* RW, W1C */
389 unsigned long lb_irq_int_2:1; /* RW, W1C */
390 unsigned long lb_irq_int_3:1; /* RW, W1C */
391 unsigned long lb_irq_int_4:1; /* RW, W1C */
392 unsigned long lb_irq_int_5:1; /* RW, W1C */
393 unsigned long lb_irq_int_6:1; /* RW, W1C */
394 unsigned long lb_irq_int_7:1; /* RW, W1C */
395 unsigned long lb_irq_int_8:1; /* RW, W1C */
396 unsigned long lb_irq_int_9:1; /* RW, W1C */
397 unsigned long lb_irq_int_10:1; /* RW, W1C */
398 unsigned long lb_irq_int_11:1; /* RW, W1C */
399 unsigned long lb_irq_int_12:1; /* RW, W1C */
400 unsigned long lb_irq_int_13:1; /* RW, W1C */
401 unsigned long lb_irq_int_14:1; /* RW, W1C */
402 unsigned long lb_irq_int_15:1; /* RW, W1C */
403 unsigned long l1_nmi_int:1; /* RW, W1C */
404 unsigned long stop_clock:1; /* RW, W1C */
405 unsigned long asic_to_l1:1; /* RW, W1C */
406 unsigned long l1_to_asic:1; /* RW, W1C */
407 unsigned long ltc_int:1; /* RW, W1C */
408 unsigned long la_seq_trigger:1; /* RW, W1C */
409 unsigned long ipi_int:1; /* RW, W1C */
410 unsigned long extio_int0:1; /* RW, W1C */
411 unsigned long extio_int1:1; /* RW, W1C */
412 unsigned long extio_int2:1; /* RW, W1C */
413 unsigned long extio_int3:1; /* RW, W1C */
414 unsigned long profile_int:1; /* RW, W1C */
415 unsigned long rtc0:1; /* RW, W1C */
416 unsigned long rtc1:1; /* RW, W1C */
417 unsigned long rtc2:1; /* RW, W1C */
418 unsigned long rtc3:1; /* RW, W1C */
419 unsigned long bau_data:1; /* RW, W1C */
420 unsigned long power_management_req:1; /* RW, W1C */
421 unsigned long rsvd_57_63:7;
422 } s1;
423 struct uv2h_event_occurred0_s {
424 unsigned long lb_hcerr:1; /* RW */ 393 unsigned long lb_hcerr:1; /* RW */
425 unsigned long qp_hcerr:1; /* RW */ 394 unsigned long qp_hcerr:1; /* RW */
426 unsigned long rh_hcerr:1; /* RW */ 395 unsigned long rh_hcerr:1; /* RW */
@@ -481,19 +450,20 @@ union uvh_event_occurred0_u {
481 unsigned long extio_int3:1; /* RW */ 450 unsigned long extio_int3:1; /* RW */
482 unsigned long profile_int:1; /* RW */ 451 unsigned long profile_int:1; /* RW */
483 unsigned long rsvd_59_63:5; 452 unsigned long rsvd_59_63:5;
484 } s2; 453 } sx;
485}; 454};
486 455
487/* ========================================================================= */ 456/* ========================================================================= */
488/* UVH_EVENT_OCCURRED0_ALIAS */ 457/* UVH_EVENT_OCCURRED0_ALIAS */
489/* ========================================================================= */ 458/* ========================================================================= */
490#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL 459#define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL
491#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 460#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0
461
492 462
493/* ========================================================================= */ 463/* ========================================================================= */
494/* UVH_GR0_TLB_INT0_CONFIG */ 464/* UVH_GR0_TLB_INT0_CONFIG */
495/* ========================================================================= */ 465/* ========================================================================= */
496#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL 466#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
497 467
498#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 468#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
499#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 469#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
@@ -531,7 +501,7 @@ union uvh_gr0_tlb_int0_config_u {
531/* ========================================================================= */ 501/* ========================================================================= */
532/* UVH_GR0_TLB_INT1_CONFIG */ 502/* UVH_GR0_TLB_INT1_CONFIG */
533/* ========================================================================= */ 503/* ========================================================================= */
534#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL 504#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
535 505
536#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 506#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
537#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 507#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
@@ -571,9 +541,11 @@ union uvh_gr0_tlb_int1_config_u {
571/* ========================================================================= */ 541/* ========================================================================= */
572#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL 542#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL
573#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL 543#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL
574#define UVH_GR0_TLB_MMR_CONTROL (is_uv1_hub() ? \ 544#define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL
575 UV1H_GR0_TLB_MMR_CONTROL : \ 545#define UVH_GR0_TLB_MMR_CONTROL \
576 UV2H_GR0_TLB_MMR_CONTROL) 546 (is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL : \
547 (is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \
548 UV3H_GR0_TLB_MMR_CONTROL))
577 549
578#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 550#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
579#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 551#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
@@ -611,6 +583,21 @@ union uvh_gr0_tlb_int1_config_u {
611#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL 583#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL
612#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL 584#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL
613 585
586#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
587#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
588#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
589#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
590#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
591#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
592#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
593#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
594#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
595#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
596#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
597#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
598#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
599#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
600
614#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 601#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
615#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 602#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
616#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 603#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
@@ -630,6 +617,23 @@ union uvh_gr0_tlb_int1_config_u {
630#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL 617#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
631#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL 618#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
632 619
620#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
621#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
622#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
623#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
624#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
625#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
626#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
627#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
628#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
629#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
630#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
631#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
632#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
633#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
634#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
635#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
636
633union uvh_gr0_tlb_mmr_control_u { 637union uvh_gr0_tlb_mmr_control_u {
634 unsigned long v; 638 unsigned long v;
635 struct uvh_gr0_tlb_mmr_control_s { 639 struct uvh_gr0_tlb_mmr_control_s {
@@ -642,7 +646,9 @@ union uvh_gr0_tlb_mmr_control_u {
642 unsigned long rsvd_21_29:9; 646 unsigned long rsvd_21_29:9;
643 unsigned long mmr_write:1; /* WP */ 647 unsigned long mmr_write:1; /* WP */
644 unsigned long mmr_read:1; /* WP */ 648 unsigned long mmr_read:1; /* WP */
645 unsigned long rsvd_32_63:32; 649 unsigned long rsvd_32_48:17;
650 unsigned long rsvd_49_51:3;
651 unsigned long rsvd_52_63:12;
646 } s; 652 } s;
647 struct uv1h_gr0_tlb_mmr_control_s { 653 struct uv1h_gr0_tlb_mmr_control_s {
648 unsigned long index:12; /* RW */ 654 unsigned long index:12; /* RW */
@@ -666,6 +672,23 @@ union uvh_gr0_tlb_mmr_control_u {
666 unsigned long mmr_inj_tlblruv:1; /* RW */ 672 unsigned long mmr_inj_tlblruv:1; /* RW */
667 unsigned long rsvd_61_63:3; 673 unsigned long rsvd_61_63:3;
668 } s1; 674 } s1;
675 struct uvxh_gr0_tlb_mmr_control_s {
676 unsigned long index:12; /* RW */
677 unsigned long mem_sel:2; /* RW */
678 unsigned long rsvd_14_15:2;
679 unsigned long auto_valid_en:1; /* RW */
680 unsigned long rsvd_17_19:3;
681 unsigned long mmr_hash_index_en:1; /* RW */
682 unsigned long rsvd_21_29:9;
683 unsigned long mmr_write:1; /* WP */
684 unsigned long mmr_read:1; /* WP */
685 unsigned long mmr_op_done:1; /* RW */
686 unsigned long rsvd_33_47:15;
687 unsigned long rsvd_48:1;
688 unsigned long rsvd_49_51:3;
689 unsigned long rsvd_52:1;
690 unsigned long rsvd_53_63:11;
691 } sx;
669 struct uv2h_gr0_tlb_mmr_control_s { 692 struct uv2h_gr0_tlb_mmr_control_s {
670 unsigned long index:12; /* RW */ 693 unsigned long index:12; /* RW */
671 unsigned long mem_sel:2; /* RW */ 694 unsigned long mem_sel:2; /* RW */
@@ -683,6 +706,24 @@ union uvh_gr0_tlb_mmr_control_u {
683 unsigned long mmr_inj_tlbram:1; /* RW */ 706 unsigned long mmr_inj_tlbram:1; /* RW */
684 unsigned long rsvd_53_63:11; 707 unsigned long rsvd_53_63:11;
685 } s2; 708 } s2;
709 struct uv3h_gr0_tlb_mmr_control_s {
710 unsigned long index:12; /* RW */
711 unsigned long mem_sel:2; /* RW */
712 unsigned long rsvd_14_15:2;
713 unsigned long auto_valid_en:1; /* RW */
714 unsigned long rsvd_17_19:3;
715 unsigned long mmr_hash_index_en:1; /* RW */
716 unsigned long ecc_sel:1; /* RW */
717 unsigned long rsvd_22_29:8;
718 unsigned long mmr_write:1; /* WP */
719 unsigned long mmr_read:1; /* WP */
720 unsigned long mmr_op_done:1; /* RW */
721 unsigned long rsvd_33_47:15;
722 unsigned long undef_48:1; /* Undefined */
723 unsigned long rsvd_49_51:3;
724 unsigned long undef_52:1; /* Undefined */
725 unsigned long rsvd_53_63:11;
726 } s3;
686}; 727};
687 728
688/* ========================================================================= */ 729/* ========================================================================= */
@@ -690,9 +731,11 @@ union uvh_gr0_tlb_mmr_control_u {
690/* ========================================================================= */ 731/* ========================================================================= */
691#define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL 732#define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL
692#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL 733#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
693#define UVH_GR0_TLB_MMR_READ_DATA_HI (is_uv1_hub() ? \ 734#define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
694 UV1H_GR0_TLB_MMR_READ_DATA_HI : \ 735#define UVH_GR0_TLB_MMR_READ_DATA_HI \
695 UV2H_GR0_TLB_MMR_READ_DATA_HI) 736 (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI : \
737 (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \
738 UV3H_GR0_TLB_MMR_READ_DATA_HI))
696 739
697#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 740#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
698#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 741#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
@@ -703,6 +746,46 @@ union uvh_gr0_tlb_mmr_control_u {
703#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL 746#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
704#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL 747#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
705 748
749#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
750#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
751#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
752#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
753#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
754#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
755#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
756#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
757
758#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
759#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
760#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
761#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
762#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
763#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
764#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
765#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
766
767#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
768#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
769#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
770#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
771#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
772#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
773#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
774#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
775
776#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
777#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
778#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
779#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
780#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45
781#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
782#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
783#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
784#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
785#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
786#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
787#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
788
706union uvh_gr0_tlb_mmr_read_data_hi_u { 789union uvh_gr0_tlb_mmr_read_data_hi_u {
707 unsigned long v; 790 unsigned long v;
708 struct uvh_gr0_tlb_mmr_read_data_hi_s { 791 struct uvh_gr0_tlb_mmr_read_data_hi_s {
@@ -712,6 +795,36 @@ union uvh_gr0_tlb_mmr_read_data_hi_u {
712 unsigned long larger:1; /* RO */ 795 unsigned long larger:1; /* RO */
713 unsigned long rsvd_45_63:19; 796 unsigned long rsvd_45_63:19;
714 } s; 797 } s;
798 struct uv1h_gr0_tlb_mmr_read_data_hi_s {
799 unsigned long pfn:41; /* RO */
800 unsigned long gaa:2; /* RO */
801 unsigned long dirty:1; /* RO */
802 unsigned long larger:1; /* RO */
803 unsigned long rsvd_45_63:19;
804 } s1;
805 struct uvxh_gr0_tlb_mmr_read_data_hi_s {
806 unsigned long pfn:41; /* RO */
807 unsigned long gaa:2; /* RO */
808 unsigned long dirty:1; /* RO */
809 unsigned long larger:1; /* RO */
810 unsigned long rsvd_45_63:19;
811 } sx;
812 struct uv2h_gr0_tlb_mmr_read_data_hi_s {
813 unsigned long pfn:41; /* RO */
814 unsigned long gaa:2; /* RO */
815 unsigned long dirty:1; /* RO */
816 unsigned long larger:1; /* RO */
817 unsigned long rsvd_45_63:19;
818 } s2;
819 struct uv3h_gr0_tlb_mmr_read_data_hi_s {
820 unsigned long pfn:41; /* RO */
821 unsigned long gaa:2; /* RO */
822 unsigned long dirty:1; /* RO */
823 unsigned long larger:1; /* RO */
824 unsigned long aa_ext:1; /* RO */
825 unsigned long undef_46_54:9; /* Undefined */
826 unsigned long way_ecc:9; /* RO */
827 } s3;
715}; 828};
716 829
717/* ========================================================================= */ 830/* ========================================================================= */
@@ -719,9 +832,11 @@ union uvh_gr0_tlb_mmr_read_data_hi_u {
719/* ========================================================================= */ 832/* ========================================================================= */
720#define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL 833#define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL
721#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL 834#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
722#define UVH_GR0_TLB_MMR_READ_DATA_LO (is_uv1_hub() ? \ 835#define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
723 UV1H_GR0_TLB_MMR_READ_DATA_LO : \ 836#define UVH_GR0_TLB_MMR_READ_DATA_LO \
724 UV2H_GR0_TLB_MMR_READ_DATA_LO) 837 (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO : \
838 (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \
839 UV3H_GR0_TLB_MMR_READ_DATA_LO))
725 840
726#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 841#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
727#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 842#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
@@ -730,6 +845,34 @@ union uvh_gr0_tlb_mmr_read_data_hi_u {
730#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL 845#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
731#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL 846#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
732 847
848#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
849#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
850#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
851#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
852#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
853#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
854
855#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
856#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
857#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
858#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
859#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
860#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
861
862#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
863#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
864#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
865#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
866#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
867#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
868
869#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
870#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
871#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
872#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
873#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
874#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
875
733union uvh_gr0_tlb_mmr_read_data_lo_u { 876union uvh_gr0_tlb_mmr_read_data_lo_u {
734 unsigned long v; 877 unsigned long v;
735 struct uvh_gr0_tlb_mmr_read_data_lo_s { 878 struct uvh_gr0_tlb_mmr_read_data_lo_s {
@@ -737,12 +880,32 @@ union uvh_gr0_tlb_mmr_read_data_lo_u {
737 unsigned long asid:24; /* RO */ 880 unsigned long asid:24; /* RO */
738 unsigned long valid:1; /* RO */ 881 unsigned long valid:1; /* RO */
739 } s; 882 } s;
883 struct uv1h_gr0_tlb_mmr_read_data_lo_s {
884 unsigned long vpn:39; /* RO */
885 unsigned long asid:24; /* RO */
886 unsigned long valid:1; /* RO */
887 } s1;
888 struct uvxh_gr0_tlb_mmr_read_data_lo_s {
889 unsigned long vpn:39; /* RO */
890 unsigned long asid:24; /* RO */
891 unsigned long valid:1; /* RO */
892 } sx;
893 struct uv2h_gr0_tlb_mmr_read_data_lo_s {
894 unsigned long vpn:39; /* RO */
895 unsigned long asid:24; /* RO */
896 unsigned long valid:1; /* RO */
897 } s2;
898 struct uv3h_gr0_tlb_mmr_read_data_lo_s {
899 unsigned long vpn:39; /* RO */
900 unsigned long asid:24; /* RO */
901 unsigned long valid:1; /* RO */
902 } s3;
740}; 903};
741 904
742/* ========================================================================= */ 905/* ========================================================================= */
743/* UVH_GR1_TLB_INT0_CONFIG */ 906/* UVH_GR1_TLB_INT0_CONFIG */
744/* ========================================================================= */ 907/* ========================================================================= */
745#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL 908#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL
746 909
747#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 910#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
748#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 911#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
@@ -780,7 +943,7 @@ union uvh_gr1_tlb_int0_config_u {
780/* ========================================================================= */ 943/* ========================================================================= */
781/* UVH_GR1_TLB_INT1_CONFIG */ 944/* UVH_GR1_TLB_INT1_CONFIG */
782/* ========================================================================= */ 945/* ========================================================================= */
783#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL 946#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL
784 947
785#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 948#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
786#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 949#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
@@ -820,9 +983,11 @@ union uvh_gr1_tlb_int1_config_u {
820/* ========================================================================= */ 983/* ========================================================================= */
821#define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL 984#define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL
822#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL 985#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL
823#define UVH_GR1_TLB_MMR_CONTROL (is_uv1_hub() ? \ 986#define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL
824 UV1H_GR1_TLB_MMR_CONTROL : \ 987#define UVH_GR1_TLB_MMR_CONTROL \
825 UV2H_GR1_TLB_MMR_CONTROL) 988 (is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL : \
989 (is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \
990 UV3H_GR1_TLB_MMR_CONTROL))
826 991
827#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 992#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
828#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 993#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
@@ -860,6 +1025,21 @@ union uvh_gr1_tlb_int1_config_u {
860#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL 1025#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL
861#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL 1026#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL
862 1027
1028#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
1029#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
1030#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
1031#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
1032#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
1033#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
1034#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
1035#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
1036#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
1037#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
1038#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
1039#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
1040#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
1041#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
1042
863#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 1043#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
864#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 1044#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
865#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 1045#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
@@ -879,6 +1059,23 @@ union uvh_gr1_tlb_int1_config_u {
879#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL 1059#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
880#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL 1060#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
881 1061
1062#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
1063#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
1064#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
1065#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
1066#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
1067#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
1068#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
1069#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
1070#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
1071#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
1072#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
1073#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
1074#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
1075#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
1076#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
1077#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
1078
882union uvh_gr1_tlb_mmr_control_u { 1079union uvh_gr1_tlb_mmr_control_u {
883 unsigned long v; 1080 unsigned long v;
884 struct uvh_gr1_tlb_mmr_control_s { 1081 struct uvh_gr1_tlb_mmr_control_s {
@@ -891,7 +1088,9 @@ union uvh_gr1_tlb_mmr_control_u {
891 unsigned long rsvd_21_29:9; 1088 unsigned long rsvd_21_29:9;
892 unsigned long mmr_write:1; /* WP */ 1089 unsigned long mmr_write:1; /* WP */
893 unsigned long mmr_read:1; /* WP */ 1090 unsigned long mmr_read:1; /* WP */
894 unsigned long rsvd_32_63:32; 1091 unsigned long rsvd_32_48:17;
1092 unsigned long rsvd_49_51:3;
1093 unsigned long rsvd_52_63:12;
895 } s; 1094 } s;
896 struct uv1h_gr1_tlb_mmr_control_s { 1095 struct uv1h_gr1_tlb_mmr_control_s {
897 unsigned long index:12; /* RW */ 1096 unsigned long index:12; /* RW */
@@ -915,6 +1114,23 @@ union uvh_gr1_tlb_mmr_control_u {
915 unsigned long mmr_inj_tlblruv:1; /* RW */ 1114 unsigned long mmr_inj_tlblruv:1; /* RW */
916 unsigned long rsvd_61_63:3; 1115 unsigned long rsvd_61_63:3;
917 } s1; 1116 } s1;
1117 struct uvxh_gr1_tlb_mmr_control_s {
1118 unsigned long index:12; /* RW */
1119 unsigned long mem_sel:2; /* RW */
1120 unsigned long rsvd_14_15:2;
1121 unsigned long auto_valid_en:1; /* RW */
1122 unsigned long rsvd_17_19:3;
1123 unsigned long mmr_hash_index_en:1; /* RW */
1124 unsigned long rsvd_21_29:9;
1125 unsigned long mmr_write:1; /* WP */
1126 unsigned long mmr_read:1; /* WP */
1127 unsigned long mmr_op_done:1; /* RW */
1128 unsigned long rsvd_33_47:15;
1129 unsigned long rsvd_48:1;
1130 unsigned long rsvd_49_51:3;
1131 unsigned long rsvd_52:1;
1132 unsigned long rsvd_53_63:11;
1133 } sx;
918 struct uv2h_gr1_tlb_mmr_control_s { 1134 struct uv2h_gr1_tlb_mmr_control_s {
919 unsigned long index:12; /* RW */ 1135 unsigned long index:12; /* RW */
920 unsigned long mem_sel:2; /* RW */ 1136 unsigned long mem_sel:2; /* RW */
@@ -932,6 +1148,24 @@ union uvh_gr1_tlb_mmr_control_u {
932 unsigned long mmr_inj_tlbram:1; /* RW */ 1148 unsigned long mmr_inj_tlbram:1; /* RW */
933 unsigned long rsvd_53_63:11; 1149 unsigned long rsvd_53_63:11;
934 } s2; 1150 } s2;
1151 struct uv3h_gr1_tlb_mmr_control_s {
1152 unsigned long index:12; /* RW */
1153 unsigned long mem_sel:2; /* RW */
1154 unsigned long rsvd_14_15:2;
1155 unsigned long auto_valid_en:1; /* RW */
1156 unsigned long rsvd_17_19:3;
1157 unsigned long mmr_hash_index_en:1; /* RW */
1158 unsigned long ecc_sel:1; /* RW */
1159 unsigned long rsvd_22_29:8;
1160 unsigned long mmr_write:1; /* WP */
1161 unsigned long mmr_read:1; /* WP */
1162 unsigned long mmr_op_done:1; /* RW */
1163 unsigned long rsvd_33_47:15;
1164 unsigned long undef_48:1; /* Undefined */
1165 unsigned long rsvd_49_51:3;
1166 unsigned long undef_52:1; /* Undefined */
1167 unsigned long rsvd_53_63:11;
1168 } s3;
935}; 1169};
936 1170
937/* ========================================================================= */ 1171/* ========================================================================= */
@@ -939,9 +1173,11 @@ union uvh_gr1_tlb_mmr_control_u {
939/* ========================================================================= */ 1173/* ========================================================================= */
940#define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL 1174#define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL
941#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL 1175#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
942#define UVH_GR1_TLB_MMR_READ_DATA_HI (is_uv1_hub() ? \ 1176#define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
943 UV1H_GR1_TLB_MMR_READ_DATA_HI : \ 1177#define UVH_GR1_TLB_MMR_READ_DATA_HI \
944 UV2H_GR1_TLB_MMR_READ_DATA_HI) 1178 (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI : \
1179 (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \
1180 UV3H_GR1_TLB_MMR_READ_DATA_HI))
945 1181
946#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 1182#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
947#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 1183#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
@@ -952,6 +1188,46 @@ union uvh_gr1_tlb_mmr_control_u {
952#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL 1188#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
953#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL 1189#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
954 1190
1191#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
1192#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
1193#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
1194#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
1195#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
1196#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
1197#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
1198#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
1199
1200#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
1201#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
1202#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
1203#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
1204#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
1205#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
1206#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
1207#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
1208
1209#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
1210#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
1211#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
1212#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
1213#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
1214#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
1215#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
1216#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
1217
1218#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
1219#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
1220#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
1221#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
1222#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45
1223#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
1224#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
1225#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
1226#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
1227#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
1228#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
1229#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
1230
955union uvh_gr1_tlb_mmr_read_data_hi_u { 1231union uvh_gr1_tlb_mmr_read_data_hi_u {
956 unsigned long v; 1232 unsigned long v;
957 struct uvh_gr1_tlb_mmr_read_data_hi_s { 1233 struct uvh_gr1_tlb_mmr_read_data_hi_s {
@@ -961,6 +1237,36 @@ union uvh_gr1_tlb_mmr_read_data_hi_u {
961 unsigned long larger:1; /* RO */ 1237 unsigned long larger:1; /* RO */
962 unsigned long rsvd_45_63:19; 1238 unsigned long rsvd_45_63:19;
963 } s; 1239 } s;
1240 struct uv1h_gr1_tlb_mmr_read_data_hi_s {
1241 unsigned long pfn:41; /* RO */
1242 unsigned long gaa:2; /* RO */
1243 unsigned long dirty:1; /* RO */
1244 unsigned long larger:1; /* RO */
1245 unsigned long rsvd_45_63:19;
1246 } s1;
1247 struct uvxh_gr1_tlb_mmr_read_data_hi_s {
1248 unsigned long pfn:41; /* RO */
1249 unsigned long gaa:2; /* RO */
1250 unsigned long dirty:1; /* RO */
1251 unsigned long larger:1; /* RO */
1252 unsigned long rsvd_45_63:19;
1253 } sx;
1254 struct uv2h_gr1_tlb_mmr_read_data_hi_s {
1255 unsigned long pfn:41; /* RO */
1256 unsigned long gaa:2; /* RO */
1257 unsigned long dirty:1; /* RO */
1258 unsigned long larger:1; /* RO */
1259 unsigned long rsvd_45_63:19;
1260 } s2;
1261 struct uv3h_gr1_tlb_mmr_read_data_hi_s {
1262 unsigned long pfn:41; /* RO */
1263 unsigned long gaa:2; /* RO */
1264 unsigned long dirty:1; /* RO */
1265 unsigned long larger:1; /* RO */
1266 unsigned long aa_ext:1; /* RO */
1267 unsigned long undef_46_54:9; /* Undefined */
1268 unsigned long way_ecc:9; /* RO */
1269 } s3;
964}; 1270};
965 1271
966/* ========================================================================= */ 1272/* ========================================================================= */
@@ -968,9 +1274,11 @@ union uvh_gr1_tlb_mmr_read_data_hi_u {
968/* ========================================================================= */ 1274/* ========================================================================= */
969#define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL 1275#define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL
970#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL 1276#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
971#define UVH_GR1_TLB_MMR_READ_DATA_LO (is_uv1_hub() ? \ 1277#define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
972 UV1H_GR1_TLB_MMR_READ_DATA_LO : \ 1278#define UVH_GR1_TLB_MMR_READ_DATA_LO \
973 UV2H_GR1_TLB_MMR_READ_DATA_LO) 1279 (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO : \
1280 (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \
1281 UV3H_GR1_TLB_MMR_READ_DATA_LO))
974 1282
975#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 1283#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
976#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 1284#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
@@ -979,6 +1287,34 @@ union uvh_gr1_tlb_mmr_read_data_hi_u {
979#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL 1287#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
980#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL 1288#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
981 1289
1290#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
1291#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
1292#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
1293#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
1294#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
1295#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
1296
1297#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
1298#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
1299#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
1300#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
1301#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
1302#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
1303
1304#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
1305#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
1306#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
1307#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
1308#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
1309#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
1310
1311#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
1312#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
1313#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
1314#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
1315#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
1316#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
1317
982union uvh_gr1_tlb_mmr_read_data_lo_u { 1318union uvh_gr1_tlb_mmr_read_data_lo_u {
983 unsigned long v; 1319 unsigned long v;
984 struct uvh_gr1_tlb_mmr_read_data_lo_s { 1320 struct uvh_gr1_tlb_mmr_read_data_lo_s {
@@ -986,12 +1322,32 @@ union uvh_gr1_tlb_mmr_read_data_lo_u {
986 unsigned long asid:24; /* RO */ 1322 unsigned long asid:24; /* RO */
987 unsigned long valid:1; /* RO */ 1323 unsigned long valid:1; /* RO */
988 } s; 1324 } s;
1325 struct uv1h_gr1_tlb_mmr_read_data_lo_s {
1326 unsigned long vpn:39; /* RO */
1327 unsigned long asid:24; /* RO */
1328 unsigned long valid:1; /* RO */
1329 } s1;
1330 struct uvxh_gr1_tlb_mmr_read_data_lo_s {
1331 unsigned long vpn:39; /* RO */
1332 unsigned long asid:24; /* RO */
1333 unsigned long valid:1; /* RO */
1334 } sx;
1335 struct uv2h_gr1_tlb_mmr_read_data_lo_s {
1336 unsigned long vpn:39; /* RO */
1337 unsigned long asid:24; /* RO */
1338 unsigned long valid:1; /* RO */
1339 } s2;
1340 struct uv3h_gr1_tlb_mmr_read_data_lo_s {
1341 unsigned long vpn:39; /* RO */
1342 unsigned long asid:24; /* RO */
1343 unsigned long valid:1; /* RO */
1344 } s3;
989}; 1345};
990 1346
991/* ========================================================================= */ 1347/* ========================================================================= */
992/* UVH_INT_CMPB */ 1348/* UVH_INT_CMPB */
993/* ========================================================================= */ 1349/* ========================================================================= */
994#define UVH_INT_CMPB 0x22080UL 1350#define UVH_INT_CMPB 0x22080UL
995 1351
996#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 1352#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0
997#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL 1353#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL
@@ -1007,10 +1363,13 @@ union uvh_int_cmpb_u {
1007/* ========================================================================= */ 1363/* ========================================================================= */
1008/* UVH_INT_CMPC */ 1364/* UVH_INT_CMPC */
1009/* ========================================================================= */ 1365/* ========================================================================= */
1010#define UVH_INT_CMPC 0x22100UL 1366#define UVH_INT_CMPC 0x22100UL
1367
1368#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0
1369#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL
1011 1370
1012#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 1371#define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT 0
1013#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL 1372#define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL
1014 1373
1015union uvh_int_cmpc_u { 1374union uvh_int_cmpc_u {
1016 unsigned long v; 1375 unsigned long v;
@@ -1023,10 +1382,13 @@ union uvh_int_cmpc_u {
1023/* ========================================================================= */ 1382/* ========================================================================= */
1024/* UVH_INT_CMPD */ 1383/* UVH_INT_CMPD */
1025/* ========================================================================= */ 1384/* ========================================================================= */
1026#define UVH_INT_CMPD 0x22180UL 1385#define UVH_INT_CMPD 0x22180UL
1027 1386
1028#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 1387#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0
1029#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL 1388#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL
1389
1390#define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT 0
1391#define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL
1030 1392
1031union uvh_int_cmpd_u { 1393union uvh_int_cmpd_u {
1032 unsigned long v; 1394 unsigned long v;
@@ -1039,8 +1401,8 @@ union uvh_int_cmpd_u {
1039/* ========================================================================= */ 1401/* ========================================================================= */
1040/* UVH_IPI_INT */ 1402/* UVH_IPI_INT */
1041/* ========================================================================= */ 1403/* ========================================================================= */
1042#define UVH_IPI_INT 0x60500UL 1404#define UVH_IPI_INT 0x60500UL
1043#define UVH_IPI_INT_32 0x348 1405#define UVH_IPI_INT_32 0x348
1044 1406
1045#define UVH_IPI_INT_VECTOR_SHFT 0 1407#define UVH_IPI_INT_VECTOR_SHFT 0
1046#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 1408#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8
@@ -1069,8 +1431,8 @@ union uvh_ipi_int_u {
1069/* ========================================================================= */ 1431/* ========================================================================= */
1070/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ 1432/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */
1071/* ========================================================================= */ 1433/* ========================================================================= */
1072#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL 1434#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
1073#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 1435#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
1074 1436
1075#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 1437#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
1076#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 1438#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
@@ -1091,8 +1453,8 @@ union uvh_lb_bau_intd_payload_queue_first_u {
1091/* ========================================================================= */ 1453/* ========================================================================= */
1092/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ 1454/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */
1093/* ========================================================================= */ 1455/* ========================================================================= */
1094#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL 1456#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
1095#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 1457#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
1096 1458
1097#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 1459#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
1098#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL 1460#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
@@ -1109,8 +1471,8 @@ union uvh_lb_bau_intd_payload_queue_last_u {
1109/* ========================================================================= */ 1471/* ========================================================================= */
1110/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ 1472/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */
1111/* ========================================================================= */ 1473/* ========================================================================= */
1112#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL 1474#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
1113#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 1475#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
1114 1476
1115#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 1477#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
1116#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL 1478#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
@@ -1127,8 +1489,8 @@ union uvh_lb_bau_intd_payload_queue_tail_u {
1127/* ========================================================================= */ 1489/* ========================================================================= */
1128/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ 1490/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */
1129/* ========================================================================= */ 1491/* ========================================================================= */
1130#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL 1492#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
1131#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 1493#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
1132 1494
1133#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 1495#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
1134#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 1496#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
@@ -1189,14 +1551,21 @@ union uvh_lb_bau_intd_software_acknowledge_u {
1189/* ========================================================================= */ 1551/* ========================================================================= */
1190/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ 1552/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */
1191/* ========================================================================= */ 1553/* ========================================================================= */
1192#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL 1554#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
1193#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 1555#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
1556
1194 1557
1195/* ========================================================================= */ 1558/* ========================================================================= */
1196/* UVH_LB_BAU_MISC_CONTROL */ 1559/* UVH_LB_BAU_MISC_CONTROL */
1197/* ========================================================================= */ 1560/* ========================================================================= */
1198#define UVH_LB_BAU_MISC_CONTROL 0x320170UL 1561#define UVH_LB_BAU_MISC_CONTROL 0x320170UL
1199#define UVH_LB_BAU_MISC_CONTROL_32 0xa10 1562#define UV1H_LB_BAU_MISC_CONTROL 0x320170UL
1563#define UV2H_LB_BAU_MISC_CONTROL 0x320170UL
1564#define UV3H_LB_BAU_MISC_CONTROL 0x320170UL
1565#define UVH_LB_BAU_MISC_CONTROL_32 0xa10
1566#define UV1H_LB_BAU_MISC_CONTROL_32 0x320170UL
1567#define UV2H_LB_BAU_MISC_CONTROL_32 0x320170UL
1568#define UV3H_LB_BAU_MISC_CONTROL_32 0x320170UL
1200 1569
1201#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 1570#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
1202#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 1571#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
@@ -1213,6 +1582,7 @@ union uvh_lb_bau_intd_software_acknowledge_u {
1213#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 1582#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
1214#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 1583#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
1215#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 1584#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
1585#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
1216#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL 1586#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
1217#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL 1587#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
1218#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL 1588#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
@@ -1228,6 +1598,7 @@ union uvh_lb_bau_intd_software_acknowledge_u {
1228#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL 1598#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
1229#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL 1599#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
1230#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL 1600#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
1601#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
1231 1602
1232#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 1603#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
1233#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 1604#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
@@ -1262,6 +1633,53 @@ union uvh_lb_bau_intd_software_acknowledge_u {
1262#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL 1633#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
1263#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL 1634#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
1264 1635
1636#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
1637#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
1638#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
1639#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
1640#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
1641#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
1642#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
1643#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
1644#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
1645#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
1646#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
1647#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
1648#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
1649#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
1650#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
1651#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
1652#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
1653#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
1654#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
1655#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
1656#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
1657#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
1658#define UVXH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
1659#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
1660#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
1661#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
1662#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
1663#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
1664#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
1665#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
1666#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
1667#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
1668#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
1669#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
1670#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
1671#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
1672#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
1673#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
1674#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
1675#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
1676#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
1677#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
1678#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
1679#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
1680#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
1681#define UVXH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
1682
1265#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 1683#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
1266#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 1684#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
1267#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 1685#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
@@ -1309,6 +1727,59 @@ union uvh_lb_bau_intd_software_acknowledge_u {
1309#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL 1727#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
1310#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL 1728#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
1311 1729
1730#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
1731#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
1732#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
1733#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
1734#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
1735#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
1736#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
1737#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
1738#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
1739#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
1740#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
1741#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
1742#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
1743#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
1744#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
1745#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
1746#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
1747#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
1748#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
1749#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
1750#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
1751#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
1752#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
1753#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT 37
1754#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
1755#define UV3H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
1756#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
1757#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
1758#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
1759#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
1760#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
1761#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
1762#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
1763#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
1764#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
1765#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
1766#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
1767#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
1768#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
1769#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
1770#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
1771#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
1772#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
1773#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
1774#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
1775#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
1776#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
1777#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
1778#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
1779#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_MASK 0x0000002000000000UL
1780#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
1781#define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
1782
1312union uvh_lb_bau_misc_control_u { 1783union uvh_lb_bau_misc_control_u {
1313 unsigned long v; 1784 unsigned long v;
1314 struct uvh_lb_bau_misc_control_s { 1785 struct uvh_lb_bau_misc_control_s {
@@ -1327,7 +1798,8 @@ union uvh_lb_bau_misc_control_u {
1327 unsigned long programmed_initial_priority:3; /* RW */ 1798 unsigned long programmed_initial_priority:3; /* RW */
1328 unsigned long use_incoming_priority:1; /* RW */ 1799 unsigned long use_incoming_priority:1; /* RW */
1329 unsigned long enable_programmed_initial_priority:1;/* RW */ 1800 unsigned long enable_programmed_initial_priority:1;/* RW */
1330 unsigned long rsvd_29_63:35; 1801 unsigned long rsvd_29_47:19;
1802 unsigned long fun:16; /* RW */
1331 } s; 1803 } s;
1332 struct uv1h_lb_bau_misc_control_s { 1804 struct uv1h_lb_bau_misc_control_s {
1333 unsigned long rejection_delay:8; /* RW */ 1805 unsigned long rejection_delay:8; /* RW */
@@ -1348,6 +1820,32 @@ union uvh_lb_bau_misc_control_u {
1348 unsigned long rsvd_29_47:19; 1820 unsigned long rsvd_29_47:19;
1349 unsigned long fun:16; /* RW */ 1821 unsigned long fun:16; /* RW */
1350 } s1; 1822 } s1;
1823 struct uvxh_lb_bau_misc_control_s {
1824 unsigned long rejection_delay:8; /* RW */
1825 unsigned long apic_mode:1; /* RW */
1826 unsigned long force_broadcast:1; /* RW */
1827 unsigned long force_lock_nop:1; /* RW */
1828 unsigned long qpi_agent_presence_vector:3; /* RW */
1829 unsigned long descriptor_fetch_mode:1; /* RW */
1830 unsigned long enable_intd_soft_ack_mode:1; /* RW */
1831 unsigned long intd_soft_ack_timeout_period:4; /* RW */
1832 unsigned long enable_dual_mapping_mode:1; /* RW */
1833 unsigned long vga_io_port_decode_enable:1; /* RW */
1834 unsigned long vga_io_port_16_bit_decode:1; /* RW */
1835 unsigned long suppress_dest_registration:1; /* RW */
1836 unsigned long programmed_initial_priority:3; /* RW */
1837 unsigned long use_incoming_priority:1; /* RW */
1838 unsigned long enable_programmed_initial_priority:1;/* RW */
1839 unsigned long enable_automatic_apic_mode_selection:1;/* RW */
1840 unsigned long apic_mode_status:1; /* RO */
1841 unsigned long suppress_interrupts_to_self:1; /* RW */
1842 unsigned long enable_lock_based_system_flush:1;/* RW */
1843 unsigned long enable_extended_sb_status:1; /* RW */
1844 unsigned long suppress_int_prio_udt_to_self:1;/* RW */
1845 unsigned long use_legacy_descriptor_formats:1;/* RW */
1846 unsigned long rsvd_36_47:12;
1847 unsigned long fun:16; /* RW */
1848 } sx;
1351 struct uv2h_lb_bau_misc_control_s { 1849 struct uv2h_lb_bau_misc_control_s {
1352 unsigned long rejection_delay:8; /* RW */ 1850 unsigned long rejection_delay:8; /* RW */
1353 unsigned long apic_mode:1; /* RW */ 1851 unsigned long apic_mode:1; /* RW */
@@ -1374,13 +1872,42 @@ union uvh_lb_bau_misc_control_u {
1374 unsigned long rsvd_36_47:12; 1872 unsigned long rsvd_36_47:12;
1375 unsigned long fun:16; /* RW */ 1873 unsigned long fun:16; /* RW */
1376 } s2; 1874 } s2;
1875 struct uv3h_lb_bau_misc_control_s {
1876 unsigned long rejection_delay:8; /* RW */
1877 unsigned long apic_mode:1; /* RW */
1878 unsigned long force_broadcast:1; /* RW */
1879 unsigned long force_lock_nop:1; /* RW */
1880 unsigned long qpi_agent_presence_vector:3; /* RW */
1881 unsigned long descriptor_fetch_mode:1; /* RW */
1882 unsigned long enable_intd_soft_ack_mode:1; /* RW */
1883 unsigned long intd_soft_ack_timeout_period:4; /* RW */
1884 unsigned long enable_dual_mapping_mode:1; /* RW */
1885 unsigned long vga_io_port_decode_enable:1; /* RW */
1886 unsigned long vga_io_port_16_bit_decode:1; /* RW */
1887 unsigned long suppress_dest_registration:1; /* RW */
1888 unsigned long programmed_initial_priority:3; /* RW */
1889 unsigned long use_incoming_priority:1; /* RW */
1890 unsigned long enable_programmed_initial_priority:1;/* RW */
1891 unsigned long enable_automatic_apic_mode_selection:1;/* RW */
1892 unsigned long apic_mode_status:1; /* RO */
1893 unsigned long suppress_interrupts_to_self:1; /* RW */
1894 unsigned long enable_lock_based_system_flush:1;/* RW */
1895 unsigned long enable_extended_sb_status:1; /* RW */
1896 unsigned long suppress_int_prio_udt_to_self:1;/* RW */
1897 unsigned long use_legacy_descriptor_formats:1;/* RW */
1898 unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */
1899 unsigned long enable_intd_prefetch_hint:1; /* RW */
1900 unsigned long thread_kill_timebase:8; /* RW */
1901 unsigned long rsvd_46_47:2;
1902 unsigned long fun:16; /* RW */
1903 } s3;
1377}; 1904};
1378 1905
1379/* ========================================================================= */ 1906/* ========================================================================= */
1380/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ 1907/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */
1381/* ========================================================================= */ 1908/* ========================================================================= */
1382#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL 1909#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
1383#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 1910#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
1384 1911
1385#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 1912#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0
1386#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 1913#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62
@@ -1402,8 +1929,8 @@ union uvh_lb_bau_sb_activation_control_u {
1402/* ========================================================================= */ 1929/* ========================================================================= */
1403/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ 1930/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */
1404/* ========================================================================= */ 1931/* ========================================================================= */
1405#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL 1932#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
1406#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 1933#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
1407 1934
1408#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 1935#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0
1409#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL 1936#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL
@@ -1418,8 +1945,8 @@ union uvh_lb_bau_sb_activation_status_0_u {
1418/* ========================================================================= */ 1945/* ========================================================================= */
1419/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ 1946/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */
1420/* ========================================================================= */ 1947/* ========================================================================= */
1421#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL 1948#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
1422#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 1949#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
1423 1950
1424#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 1951#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0
1425#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL 1952#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL
@@ -1434,8 +1961,8 @@ union uvh_lb_bau_sb_activation_status_1_u {
1434/* ========================================================================= */ 1961/* ========================================================================= */
1435/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ 1962/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */
1436/* ========================================================================= */ 1963/* ========================================================================= */
1437#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL 1964#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
1438#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 1965#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
1439 1966
1440#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 1967#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12
1441#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 1968#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
@@ -1456,7 +1983,10 @@ union uvh_lb_bau_sb_descriptor_base_u {
1456/* ========================================================================= */ 1983/* ========================================================================= */
1457/* UVH_NODE_ID */ 1984/* UVH_NODE_ID */
1458/* ========================================================================= */ 1985/* ========================================================================= */
1459#define UVH_NODE_ID 0x0UL 1986#define UVH_NODE_ID 0x0UL
1987#define UV1H_NODE_ID 0x0UL
1988#define UV2H_NODE_ID 0x0UL
1989#define UV3H_NODE_ID 0x0UL
1460 1990
1461#define UVH_NODE_ID_FORCE1_SHFT 0 1991#define UVH_NODE_ID_FORCE1_SHFT 0
1462#define UVH_NODE_ID_MANUFACTURER_SHFT 1 1992#define UVH_NODE_ID_MANUFACTURER_SHFT 1
@@ -1484,6 +2014,21 @@ union uvh_lb_bau_sb_descriptor_base_u {
1484#define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL 2014#define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL
1485#define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL 2015#define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL
1486 2016
2017#define UVXH_NODE_ID_FORCE1_SHFT 0
2018#define UVXH_NODE_ID_MANUFACTURER_SHFT 1
2019#define UVXH_NODE_ID_PART_NUMBER_SHFT 12
2020#define UVXH_NODE_ID_REVISION_SHFT 28
2021#define UVXH_NODE_ID_NODE_ID_SHFT 32
2022#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50
2023#define UVXH_NODE_ID_NI_PORT_SHFT 57
2024#define UVXH_NODE_ID_FORCE1_MASK 0x0000000000000001UL
2025#define UVXH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
2026#define UVXH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
2027#define UVXH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
2028#define UVXH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
2029#define UVXH_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
2030#define UVXH_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
2031
1487#define UV2H_NODE_ID_FORCE1_SHFT 0 2032#define UV2H_NODE_ID_FORCE1_SHFT 0
1488#define UV2H_NODE_ID_MANUFACTURER_SHFT 1 2033#define UV2H_NODE_ID_MANUFACTURER_SHFT 1
1489#define UV2H_NODE_ID_PART_NUMBER_SHFT 12 2034#define UV2H_NODE_ID_PART_NUMBER_SHFT 12
@@ -1499,6 +2044,25 @@ union uvh_lb_bau_sb_descriptor_base_u {
1499#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL 2044#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
1500#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL 2045#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
1501 2046
2047#define UV3H_NODE_ID_FORCE1_SHFT 0
2048#define UV3H_NODE_ID_MANUFACTURER_SHFT 1
2049#define UV3H_NODE_ID_PART_NUMBER_SHFT 12
2050#define UV3H_NODE_ID_REVISION_SHFT 28
2051#define UV3H_NODE_ID_NODE_ID_SHFT 32
2052#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48
2053#define UV3H_NODE_ID_RESERVED_2_SHFT 49
2054#define UV3H_NODE_ID_NODES_PER_BIT_SHFT 50
2055#define UV3H_NODE_ID_NI_PORT_SHFT 57
2056#define UV3H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
2057#define UV3H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
2058#define UV3H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
2059#define UV3H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
2060#define UV3H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
2061#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
2062#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
2063#define UV3H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
2064#define UV3H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
2065
1502union uvh_node_id_u { 2066union uvh_node_id_u {
1503 unsigned long v; 2067 unsigned long v;
1504 struct uvh_node_id_s { 2068 struct uvh_node_id_s {
@@ -1521,6 +2085,17 @@ union uvh_node_id_u {
1521 unsigned long ni_port:4; /* RO */ 2085 unsigned long ni_port:4; /* RO */
1522 unsigned long rsvd_60_63:4; 2086 unsigned long rsvd_60_63:4;
1523 } s1; 2087 } s1;
2088 struct uvxh_node_id_s {
2089 unsigned long force1:1; /* RO */
2090 unsigned long manufacturer:11; /* RO */
2091 unsigned long part_number:16; /* RO */
2092 unsigned long revision:4; /* RO */
2093 unsigned long node_id:15; /* RW */
2094 unsigned long rsvd_47_49:3;
2095 unsigned long nodes_per_bit:7; /* RO */
2096 unsigned long ni_port:5; /* RO */
2097 unsigned long rsvd_62_63:2;
2098 } sx;
1524 struct uv2h_node_id_s { 2099 struct uv2h_node_id_s {
1525 unsigned long force1:1; /* RO */ 2100 unsigned long force1:1; /* RO */
1526 unsigned long manufacturer:11; /* RO */ 2101 unsigned long manufacturer:11; /* RO */
@@ -1532,13 +2107,26 @@ union uvh_node_id_u {
1532 unsigned long ni_port:5; /* RO */ 2107 unsigned long ni_port:5; /* RO */
1533 unsigned long rsvd_62_63:2; 2108 unsigned long rsvd_62_63:2;
1534 } s2; 2109 } s2;
2110 struct uv3h_node_id_s {
2111 unsigned long force1:1; /* RO */
2112 unsigned long manufacturer:11; /* RO */
2113 unsigned long part_number:16; /* RO */
2114 unsigned long revision:4; /* RO */
2115 unsigned long node_id:15; /* RW */
2116 unsigned long rsvd_47:1;
2117 unsigned long router_select:1; /* RO */
2118 unsigned long rsvd_49:1;
2119 unsigned long nodes_per_bit:7; /* RO */
2120 unsigned long ni_port:5; /* RO */
2121 unsigned long rsvd_62_63:2;
2122 } s3;
1535}; 2123};
1536 2124
1537/* ========================================================================= */ 2125/* ========================================================================= */
1538/* UVH_NODE_PRESENT_TABLE */ 2126/* UVH_NODE_PRESENT_TABLE */
1539/* ========================================================================= */ 2127/* ========================================================================= */
1540#define UVH_NODE_PRESENT_TABLE 0x1400UL 2128#define UVH_NODE_PRESENT_TABLE 0x1400UL
1541#define UVH_NODE_PRESENT_TABLE_DEPTH 16 2129#define UVH_NODE_PRESENT_TABLE_DEPTH 16
1542 2130
1543#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 2131#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0
1544#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL 2132#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL
@@ -1553,7 +2141,7 @@ union uvh_node_present_table_u {
1553/* ========================================================================= */ 2141/* ========================================================================= */
1554/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */ 2142/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */
1555/* ========================================================================= */ 2143/* ========================================================================= */
1556#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL 2144#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
1557 2145
1558#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 2146#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
1559#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 2147#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
@@ -1577,7 +2165,7 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
1577/* ========================================================================= */ 2165/* ========================================================================= */
1578/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */ 2166/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */
1579/* ========================================================================= */ 2167/* ========================================================================= */
1580#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL 2168#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
1581 2169
1582#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 2170#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
1583#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 2171#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
@@ -1601,7 +2189,7 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
1601/* ========================================================================= */ 2189/* ========================================================================= */
1602/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */ 2190/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */
1603/* ========================================================================= */ 2191/* ========================================================================= */
1604#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL 2192#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
1605 2193
1606#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 2194#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
1607#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 2195#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
@@ -1625,7 +2213,7 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
1625/* ========================================================================= */ 2213/* ========================================================================= */
1626/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ 2214/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */
1627/* ========================================================================= */ 2215/* ========================================================================= */
1628#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL 2216#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
1629 2217
1630#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 2218#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
1631#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL 2219#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
@@ -1642,7 +2230,7 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
1642/* ========================================================================= */ 2230/* ========================================================================= */
1643/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ 2231/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */
1644/* ========================================================================= */ 2232/* ========================================================================= */
1645#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL 2233#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
1646 2234
1647#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 2235#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
1648#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL 2236#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
@@ -1659,7 +2247,7 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
1659/* ========================================================================= */ 2247/* ========================================================================= */
1660/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ 2248/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */
1661/* ========================================================================= */ 2249/* ========================================================================= */
1662#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL 2250#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
1663 2251
1664#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 2252#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
1665#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL 2253#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
@@ -1676,7 +2264,10 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
1676/* ========================================================================= */ 2264/* ========================================================================= */
1677/* UVH_RH_GAM_CONFIG_MMR */ 2265/* UVH_RH_GAM_CONFIG_MMR */
1678/* ========================================================================= */ 2266/* ========================================================================= */
1679#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL 2267#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL
2268#define UV1H_RH_GAM_CONFIG_MMR 0x1600000UL
2269#define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL
2270#define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL
1680 2271
1681#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 2272#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
1682#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 2273#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
@@ -1690,11 +2281,21 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
1690#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL 2281#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
1691#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL 2282#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL
1692 2283
2284#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
2285#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
2286#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
2287#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
2288
1693#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 2289#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
1694#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 2290#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
1695#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL 2291#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
1696#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL 2292#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
1697 2293
2294#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
2295#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
2296#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
2297#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
2298
1698union uvh_rh_gam_config_mmr_u { 2299union uvh_rh_gam_config_mmr_u {
1699 unsigned long v; 2300 unsigned long v;
1700 struct uvh_rh_gam_config_mmr_s { 2301 struct uvh_rh_gam_config_mmr_s {
@@ -1709,20 +2310,37 @@ union uvh_rh_gam_config_mmr_u {
1709 unsigned long mmiol_cfg:1; /* RW */ 2310 unsigned long mmiol_cfg:1; /* RW */
1710 unsigned long rsvd_13_63:51; 2311 unsigned long rsvd_13_63:51;
1711 } s1; 2312 } s1;
2313 struct uvxh_rh_gam_config_mmr_s {
2314 unsigned long m_skt:6; /* RW */
2315 unsigned long n_skt:4; /* RW */
2316 unsigned long rsvd_10_63:54;
2317 } sx;
1712 struct uv2h_rh_gam_config_mmr_s { 2318 struct uv2h_rh_gam_config_mmr_s {
1713 unsigned long m_skt:6; /* RW */ 2319 unsigned long m_skt:6; /* RW */
1714 unsigned long n_skt:4; /* RW */ 2320 unsigned long n_skt:4; /* RW */
1715 unsigned long rsvd_10_63:54; 2321 unsigned long rsvd_10_63:54;
1716 } s2; 2322 } s2;
2323 struct uv3h_rh_gam_config_mmr_s {
2324 unsigned long m_skt:6; /* RW */
2325 unsigned long n_skt:4; /* RW */
2326 unsigned long rsvd_10_63:54;
2327 } s3;
1717}; 2328};
1718 2329
1719/* ========================================================================= */ 2330/* ========================================================================= */
1720/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ 2331/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */
1721/* ========================================================================= */ 2332/* ========================================================================= */
1722#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL 2333#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
2334#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
2335#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
2336#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
1723 2337
1724#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 2338#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
2339#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
2340#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
1725#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL 2341#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
2342#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
2343#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1726 2344
1727#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 2345#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
1728#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 2346#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48
@@ -1733,6 +2351,13 @@ union uvh_rh_gam_config_mmr_u {
1733#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL 2351#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
1734#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL 2352#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1735 2353
2354#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
2355#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
2356#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
2357#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
2358#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
2359#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
2360
1736#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 2361#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
1737#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 2362#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
1738#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 2363#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
@@ -1740,12 +2365,23 @@ union uvh_rh_gam_config_mmr_u {
1740#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL 2365#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
1741#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL 2366#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1742 2367
2368#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
2369#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
2370#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_SHFT 62
2371#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
2372#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
2373#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
2374#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK 0x4000000000000000UL
2375#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
2376
1743union uvh_rh_gam_gru_overlay_config_mmr_u { 2377union uvh_rh_gam_gru_overlay_config_mmr_u {
1744 unsigned long v; 2378 unsigned long v;
1745 struct uvh_rh_gam_gru_overlay_config_mmr_s { 2379 struct uvh_rh_gam_gru_overlay_config_mmr_s {
1746 unsigned long rsvd_0_27:28; 2380 unsigned long rsvd_0_27:28;
1747 unsigned long base:18; /* RW */ 2381 unsigned long base:18; /* RW */
1748 unsigned long rsvd_46_62:17; 2382 unsigned long rsvd_46_51:6;
2383 unsigned long n_gru:4; /* RW */
2384 unsigned long rsvd_56_62:7;
1749 unsigned long enable:1; /* RW */ 2385 unsigned long enable:1; /* RW */
1750 } s; 2386 } s;
1751 struct uv1h_rh_gam_gru_overlay_config_mmr_s { 2387 struct uv1h_rh_gam_gru_overlay_config_mmr_s {
@@ -1758,6 +2394,14 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
1758 unsigned long rsvd_56_62:7; 2394 unsigned long rsvd_56_62:7;
1759 unsigned long enable:1; /* RW */ 2395 unsigned long enable:1; /* RW */
1760 } s1; 2396 } s1;
2397 struct uvxh_rh_gam_gru_overlay_config_mmr_s {
2398 unsigned long rsvd_0_27:28;
2399 unsigned long base:18; /* RW */
2400 unsigned long rsvd_46_51:6;
2401 unsigned long n_gru:4; /* RW */
2402 unsigned long rsvd_56_62:7;
2403 unsigned long enable:1; /* RW */
2404 } sx;
1761 struct uv2h_rh_gam_gru_overlay_config_mmr_s { 2405 struct uv2h_rh_gam_gru_overlay_config_mmr_s {
1762 unsigned long rsvd_0_27:28; 2406 unsigned long rsvd_0_27:28;
1763 unsigned long base:18; /* RW */ 2407 unsigned long base:18; /* RW */
@@ -1766,12 +2410,22 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
1766 unsigned long rsvd_56_62:7; 2410 unsigned long rsvd_56_62:7;
1767 unsigned long enable:1; /* RW */ 2411 unsigned long enable:1; /* RW */
1768 } s2; 2412 } s2;
2413 struct uv3h_rh_gam_gru_overlay_config_mmr_s {
2414 unsigned long rsvd_0_27:28;
2415 unsigned long base:18; /* RW */
2416 unsigned long rsvd_46_51:6;
2417 unsigned long n_gru:4; /* RW */
2418 unsigned long rsvd_56_61:6;
2419 unsigned long mode:1; /* RW */
2420 unsigned long enable:1; /* RW */
2421 } s3;
1769}; 2422};
1770 2423
1771/* ========================================================================= */ 2424/* ========================================================================= */
1772/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */ 2425/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */
1773/* ========================================================================= */ 2426/* ========================================================================= */
1774#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL 2427#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
2428#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
1775 2429
1776#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 2430#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
1777#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 2431#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
@@ -1814,10 +2468,15 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u {
1814/* ========================================================================= */ 2468/* ========================================================================= */
1815/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ 2469/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */
1816/* ========================================================================= */ 2470/* ========================================================================= */
1817#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL 2471#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
2472#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
2473#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
2474#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
1818 2475
1819#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 2476#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
2477#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
1820#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL 2478#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
2479#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1821 2480
1822#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 2481#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
1823#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 2482#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46
@@ -1826,11 +2485,21 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u {
1826#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL 2485#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL
1827#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL 2486#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1828 2487
2488#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
2489#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
2490#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
2491#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
2492
1829#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 2493#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
1830#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 2494#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
1831#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL 2495#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
1832#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL 2496#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1833 2497
2498#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
2499#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
2500#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
2501#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
2502
1834union uvh_rh_gam_mmr_overlay_config_mmr_u { 2503union uvh_rh_gam_mmr_overlay_config_mmr_u {
1835 unsigned long v; 2504 unsigned long v;
1836 struct uvh_rh_gam_mmr_overlay_config_mmr_s { 2505 struct uvh_rh_gam_mmr_overlay_config_mmr_s {
@@ -1846,18 +2515,30 @@ union uvh_rh_gam_mmr_overlay_config_mmr_u {
1846 unsigned long rsvd_47_62:16; 2515 unsigned long rsvd_47_62:16;
1847 unsigned long enable:1; /* RW */ 2516 unsigned long enable:1; /* RW */
1848 } s1; 2517 } s1;
2518 struct uvxh_rh_gam_mmr_overlay_config_mmr_s {
2519 unsigned long rsvd_0_25:26;
2520 unsigned long base:20; /* RW */
2521 unsigned long rsvd_46_62:17;
2522 unsigned long enable:1; /* RW */
2523 } sx;
1849 struct uv2h_rh_gam_mmr_overlay_config_mmr_s { 2524 struct uv2h_rh_gam_mmr_overlay_config_mmr_s {
1850 unsigned long rsvd_0_25:26; 2525 unsigned long rsvd_0_25:26;
1851 unsigned long base:20; /* RW */ 2526 unsigned long base:20; /* RW */
1852 unsigned long rsvd_46_62:17; 2527 unsigned long rsvd_46_62:17;
1853 unsigned long enable:1; /* RW */ 2528 unsigned long enable:1; /* RW */
1854 } s2; 2529 } s2;
2530 struct uv3h_rh_gam_mmr_overlay_config_mmr_s {
2531 unsigned long rsvd_0_25:26;
2532 unsigned long base:20; /* RW */
2533 unsigned long rsvd_46_62:17;
2534 unsigned long enable:1; /* RW */
2535 } s3;
1855}; 2536};
1856 2537
1857/* ========================================================================= */ 2538/* ========================================================================= */
1858/* UVH_RTC */ 2539/* UVH_RTC */
1859/* ========================================================================= */ 2540/* ========================================================================= */
1860#define UVH_RTC 0x340000UL 2541#define UVH_RTC 0x340000UL
1861 2542
1862#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 2543#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0
1863#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL 2544#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL
@@ -1873,7 +2554,7 @@ union uvh_rtc_u {
1873/* ========================================================================= */ 2554/* ========================================================================= */
1874/* UVH_RTC1_INT_CONFIG */ 2555/* UVH_RTC1_INT_CONFIG */
1875/* ========================================================================= */ 2556/* ========================================================================= */
1876#define UVH_RTC1_INT_CONFIG 0x615c0UL 2557#define UVH_RTC1_INT_CONFIG 0x615c0UL
1877 2558
1878#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 2559#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0
1879#define UVH_RTC1_INT_CONFIG_DM_SHFT 8 2560#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
@@ -1911,8 +2592,8 @@ union uvh_rtc1_int_config_u {
1911/* ========================================================================= */ 2592/* ========================================================================= */
1912/* UVH_SCRATCH5 */ 2593/* UVH_SCRATCH5 */
1913/* ========================================================================= */ 2594/* ========================================================================= */
1914#define UVH_SCRATCH5 0x2d0200UL 2595#define UVH_SCRATCH5 0x2d0200UL
1915#define UVH_SCRATCH5_32 0x778 2596#define UVH_SCRATCH5_32 0x778
1916 2597
1917#define UVH_SCRATCH5_SCRATCH5_SHFT 0 2598#define UVH_SCRATCH5_SCRATCH5_SHFT 0
1918#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL 2599#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
@@ -1925,79 +2606,79 @@ union uvh_scratch5_u {
1925}; 2606};
1926 2607
1927/* ========================================================================= */ 2608/* ========================================================================= */
1928/* UV2H_EVENT_OCCURRED2 */ 2609/* UVXH_EVENT_OCCURRED2 */
1929/* ========================================================================= */ 2610/* ========================================================================= */
1930#define UV2H_EVENT_OCCURRED2 0x70100UL 2611#define UVXH_EVENT_OCCURRED2 0x70100UL
1931#define UV2H_EVENT_OCCURRED2_32 0xb68 2612#define UVXH_EVENT_OCCURRED2_32 0xb68
1932 2613
1933#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 2614#define UVXH_EVENT_OCCURRED2_RTC_0_SHFT 0
1934#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 2615#define UVXH_EVENT_OCCURRED2_RTC_1_SHFT 1
1935#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 2616#define UVXH_EVENT_OCCURRED2_RTC_2_SHFT 2
1936#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 2617#define UVXH_EVENT_OCCURRED2_RTC_3_SHFT 3
1937#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 2618#define UVXH_EVENT_OCCURRED2_RTC_4_SHFT 4
1938#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 2619#define UVXH_EVENT_OCCURRED2_RTC_5_SHFT 5
1939#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 2620#define UVXH_EVENT_OCCURRED2_RTC_6_SHFT 6
1940#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 2621#define UVXH_EVENT_OCCURRED2_RTC_7_SHFT 7
1941#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 2622#define UVXH_EVENT_OCCURRED2_RTC_8_SHFT 8
1942#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 2623#define UVXH_EVENT_OCCURRED2_RTC_9_SHFT 9
1943#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 2624#define UVXH_EVENT_OCCURRED2_RTC_10_SHFT 10
1944#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 2625#define UVXH_EVENT_OCCURRED2_RTC_11_SHFT 11
1945#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 2626#define UVXH_EVENT_OCCURRED2_RTC_12_SHFT 12
1946#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 2627#define UVXH_EVENT_OCCURRED2_RTC_13_SHFT 13
1947#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 2628#define UVXH_EVENT_OCCURRED2_RTC_14_SHFT 14
1948#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 2629#define UVXH_EVENT_OCCURRED2_RTC_15_SHFT 15
1949#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 2630#define UVXH_EVENT_OCCURRED2_RTC_16_SHFT 16
1950#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 2631#define UVXH_EVENT_OCCURRED2_RTC_17_SHFT 17
1951#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 2632#define UVXH_EVENT_OCCURRED2_RTC_18_SHFT 18
1952#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 2633#define UVXH_EVENT_OCCURRED2_RTC_19_SHFT 19
1953#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 2634#define UVXH_EVENT_OCCURRED2_RTC_20_SHFT 20
1954#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 2635#define UVXH_EVENT_OCCURRED2_RTC_21_SHFT 21
1955#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 2636#define UVXH_EVENT_OCCURRED2_RTC_22_SHFT 22
1956#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 2637#define UVXH_EVENT_OCCURRED2_RTC_23_SHFT 23
1957#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 2638#define UVXH_EVENT_OCCURRED2_RTC_24_SHFT 24
1958#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 2639#define UVXH_EVENT_OCCURRED2_RTC_25_SHFT 25
1959#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 2640#define UVXH_EVENT_OCCURRED2_RTC_26_SHFT 26
1960#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 2641#define UVXH_EVENT_OCCURRED2_RTC_27_SHFT 27
1961#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 2642#define UVXH_EVENT_OCCURRED2_RTC_28_SHFT 28
1962#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 2643#define UVXH_EVENT_OCCURRED2_RTC_29_SHFT 29
1963#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 2644#define UVXH_EVENT_OCCURRED2_RTC_30_SHFT 30
1964#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 2645#define UVXH_EVENT_OCCURRED2_RTC_31_SHFT 31
1965#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL 2646#define UVXH_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
1966#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL 2647#define UVXH_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
1967#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL 2648#define UVXH_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
1968#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL 2649#define UVXH_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
1969#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL 2650#define UVXH_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
1970#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL 2651#define UVXH_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
1971#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL 2652#define UVXH_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
1972#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL 2653#define UVXH_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
1973#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL 2654#define UVXH_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
1974#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL 2655#define UVXH_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
1975#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL 2656#define UVXH_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
1976#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL 2657#define UVXH_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
1977#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL 2658#define UVXH_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
1978#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL 2659#define UVXH_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
1979#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL 2660#define UVXH_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
1980#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL 2661#define UVXH_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
1981#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL 2662#define UVXH_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
1982#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL 2663#define UVXH_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
1983#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL 2664#define UVXH_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
1984#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL 2665#define UVXH_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
1985#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL 2666#define UVXH_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
1986#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL 2667#define UVXH_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
1987#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL 2668#define UVXH_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
1988#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL 2669#define UVXH_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
1989#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL 2670#define UVXH_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
1990#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL 2671#define UVXH_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
1991#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL 2672#define UVXH_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
1992#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL 2673#define UVXH_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
1993#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL 2674#define UVXH_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
1994#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL 2675#define UVXH_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
1995#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL 2676#define UVXH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
1996#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL 2677#define UVXH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
1997 2678
1998union uv2h_event_occurred2_u { 2679union uvxh_event_occurred2_u {
1999 unsigned long v; 2680 unsigned long v;
2000 struct uv2h_event_occurred2_s { 2681 struct uvxh_event_occurred2_s {
2001 unsigned long rtc_0:1; /* RW */ 2682 unsigned long rtc_0:1; /* RW */
2002 unsigned long rtc_1:1; /* RW */ 2683 unsigned long rtc_1:1; /* RW */
2003 unsigned long rtc_2:1; /* RW */ 2684 unsigned long rtc_2:1; /* RW */
@@ -2031,29 +2712,46 @@ union uv2h_event_occurred2_u {
2031 unsigned long rtc_30:1; /* RW */ 2712 unsigned long rtc_30:1; /* RW */
2032 unsigned long rtc_31:1; /* RW */ 2713 unsigned long rtc_31:1; /* RW */
2033 unsigned long rsvd_32_63:32; 2714 unsigned long rsvd_32_63:32;
2034 } s1; 2715 } sx;
2035}; 2716};
2036 2717
2037/* ========================================================================= */ 2718/* ========================================================================= */
2038/* UV2H_EVENT_OCCURRED2_ALIAS */ 2719/* UVXH_EVENT_OCCURRED2_ALIAS */
2039/* ========================================================================= */ 2720/* ========================================================================= */
2040#define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL 2721#define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL
2041#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70 2722#define UVXH_EVENT_OCCURRED2_ALIAS_32 0xb70
2723
2042 2724
2043/* ========================================================================= */ 2725/* ========================================================================= */
2044/* UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 */ 2726/* UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 */
2045/* ========================================================================= */ 2727/* ========================================================================= */
2046#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL 2728#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
2047#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 2729#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
2730#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
2731#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
2732#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL
2733#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL
2734
2735#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
2736#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
2048 2737
2049#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 2738#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
2050#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL 2739#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
2051 2740
2052union uv2h_lb_bau_sb_activation_status_2_u { 2741#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
2742#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
2743
2744union uvxh_lb_bau_sb_activation_status_2_u {
2053 unsigned long v; 2745 unsigned long v;
2746 struct uvxh_lb_bau_sb_activation_status_2_s {
2747 unsigned long aux_error:64; /* RW */
2748 } sx;
2054 struct uv2h_lb_bau_sb_activation_status_2_s { 2749 struct uv2h_lb_bau_sb_activation_status_2_s {
2055 unsigned long aux_error:64; /* RW */ 2750 unsigned long aux_error:64; /* RW */
2056 } s1; 2751 } s2;
2752 struct uv3h_lb_bau_sb_activation_status_2_s {
2753 unsigned long aux_error:64; /* RW */
2754 } s3;
2057}; 2755};
2058 2756
2059/* ========================================================================= */ 2757/* ========================================================================= */
@@ -2073,5 +2771,87 @@ union uv1h_lb_target_physical_apic_id_mask_u {
2073 } s1; 2771 } s1;
2074}; 2772};
2075 2773
2774/* ========================================================================= */
2775/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */
2776/* ========================================================================= */
2777#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL
2778
2779#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26
2780#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46
2781#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
2782#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL
2783#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL
2784#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
2785
2786union uv3h_rh_gam_mmioh_overlay_config0_mmr_u {
2787 unsigned long v;
2788 struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s {
2789 unsigned long rsvd_0_25:26;
2790 unsigned long base:20; /* RW */
2791 unsigned long m_io:6; /* RW */
2792 unsigned long n_io:4;
2793 unsigned long rsvd_56_62:7;
2794 unsigned long enable:1; /* RW */
2795 } s3;
2796};
2797
2798/* ========================================================================= */
2799/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */
2800/* ========================================================================= */
2801#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1604000UL
2802
2803#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26
2804#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46
2805#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
2806#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL
2807#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL
2808#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
2809
2810union uv3h_rh_gam_mmioh_overlay_config1_mmr_u {
2811 unsigned long v;
2812 struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s {
2813 unsigned long rsvd_0_25:26;
2814 unsigned long base:20; /* RW */
2815 unsigned long m_io:6; /* RW */
2816 unsigned long n_io:4;
2817 unsigned long rsvd_56_62:7;
2818 unsigned long enable:1; /* RW */
2819 } s3;
2820};
2821
2822/* ========================================================================= */
2823/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */
2824/* ========================================================================= */
2825#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL
2826#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
2827
2828#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
2829#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
2830
2831union uv3h_rh_gam_mmioh_redirect_config0_mmr_u {
2832 unsigned long v;
2833 struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s {
2834 unsigned long nasid:15; /* RW */
2835 unsigned long rsvd_15_63:49;
2836 } s3;
2837};
2838
2839/* ========================================================================= */
2840/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */
2841/* ========================================================================= */
2842#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL
2843#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
2844
2845#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
2846#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
2847
2848union uv3h_rh_gam_mmioh_redirect_config1_mmr_u {
2849 unsigned long v;
2850 struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s {
2851 unsigned long nasid:15; /* RW */
2852 unsigned long rsvd_15_63:49;
2853 } s3;
2854};
2855
2076 2856
2077#endif /* _ASM_X86_UV_UV_MMRS_H */ 2857#endif /* _ASM_X86_UV_UV_MMRS_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 235b49fa554b..b6fbf860e398 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -57,9 +57,12 @@
57#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 57#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
58#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 58#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
59#define SECONDARY_EXEC_RDTSCP 0x00000008 59#define SECONDARY_EXEC_RDTSCP 0x00000008
60#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
60#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 61#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
61#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 62#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
62#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 63#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
64#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
65#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
63#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 66#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
64#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 67#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
65 68
@@ -97,6 +100,7 @@ enum vmcs_field {
97 GUEST_GS_SELECTOR = 0x0000080a, 100 GUEST_GS_SELECTOR = 0x0000080a,
98 GUEST_LDTR_SELECTOR = 0x0000080c, 101 GUEST_LDTR_SELECTOR = 0x0000080c,
99 GUEST_TR_SELECTOR = 0x0000080e, 102 GUEST_TR_SELECTOR = 0x0000080e,
103 GUEST_INTR_STATUS = 0x00000810,
100 HOST_ES_SELECTOR = 0x00000c00, 104 HOST_ES_SELECTOR = 0x00000c00,
101 HOST_CS_SELECTOR = 0x00000c02, 105 HOST_CS_SELECTOR = 0x00000c02,
102 HOST_SS_SELECTOR = 0x00000c04, 106 HOST_SS_SELECTOR = 0x00000c04,
@@ -124,6 +128,14 @@ enum vmcs_field {
124 APIC_ACCESS_ADDR_HIGH = 0x00002015, 128 APIC_ACCESS_ADDR_HIGH = 0x00002015,
125 EPT_POINTER = 0x0000201a, 129 EPT_POINTER = 0x0000201a,
126 EPT_POINTER_HIGH = 0x0000201b, 130 EPT_POINTER_HIGH = 0x0000201b,
131 EOI_EXIT_BITMAP0 = 0x0000201c,
132 EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
133 EOI_EXIT_BITMAP1 = 0x0000201e,
134 EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
135 EOI_EXIT_BITMAP2 = 0x00002020,
136 EOI_EXIT_BITMAP2_HIGH = 0x00002021,
137 EOI_EXIT_BITMAP3 = 0x00002022,
138 EOI_EXIT_BITMAP3_HIGH = 0x00002023,
127 GUEST_PHYSICAL_ADDRESS = 0x00002400, 139 GUEST_PHYSICAL_ADDRESS = 0x00002400,
128 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, 140 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
129 VMCS_LINK_POINTER = 0x00002800, 141 VMCS_LINK_POINTER = 0x00002800,
@@ -346,9 +358,9 @@ enum vmcs_field {
346 358
347#define AR_RESERVD_MASK 0xfffe0f00 359#define AR_RESERVD_MASK 0xfffe0f00
348 360
349#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) 361#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
350#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) 362#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1)
351#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) 363#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
352 364
353#define VMX_NR_VPIDS (1 << 16) 365#define VMX_NR_VPIDS (1 << 16)
354#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 366#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 57693498519c..d8d99222b36a 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -69,17 +69,6 @@ struct x86_init_oem {
69}; 69};
70 70
71/** 71/**
72 * struct x86_init_mapping - platform specific initial kernel pagetable setup
73 * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
74 *
75 * For more details on the purpose of this hook, look in
76 * init_memory_mapping and the commit that added it.
77 */
78struct x86_init_mapping {
79 void (*pagetable_reserve)(u64 start, u64 end);
80};
81
82/**
83 * struct x86_init_paging - platform specific paging functions 72 * struct x86_init_paging - platform specific paging functions
84 * @pagetable_init: platform specific paging initialization call to setup 73 * @pagetable_init: platform specific paging initialization call to setup
85 * the kernel pagetables and prepare accessors functions. 74 * the kernel pagetables and prepare accessors functions.
@@ -136,7 +125,6 @@ struct x86_init_ops {
136 struct x86_init_mpparse mpparse; 125 struct x86_init_mpparse mpparse;
137 struct x86_init_irqs irqs; 126 struct x86_init_irqs irqs;
138 struct x86_init_oem oem; 127 struct x86_init_oem oem;
139 struct x86_init_mapping mapping;
140 struct x86_init_paging paging; 128 struct x86_init_paging paging;
141 struct x86_init_timers timers; 129 struct x86_init_timers timers;
142 struct x86_init_iommu iommu; 130 struct x86_init_iommu iommu;
@@ -181,19 +169,38 @@ struct x86_platform_ops {
181}; 169};
182 170
183struct pci_dev; 171struct pci_dev;
172struct msi_msg;
184 173
185struct x86_msi_ops { 174struct x86_msi_ops {
186 int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); 175 int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
176 void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
177 unsigned int dest, struct msi_msg *msg,
178 u8 hpet_id);
187 void (*teardown_msi_irq)(unsigned int irq); 179 void (*teardown_msi_irq)(unsigned int irq);
188 void (*teardown_msi_irqs)(struct pci_dev *dev); 180 void (*teardown_msi_irqs)(struct pci_dev *dev);
189 void (*restore_msi_irqs)(struct pci_dev *dev, int irq); 181 void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
182 int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
190}; 183};
191 184
185struct IO_APIC_route_entry;
186struct io_apic_irq_attr;
187struct irq_data;
188struct cpumask;
189
192struct x86_io_apic_ops { 190struct x86_io_apic_ops {
193 void (*init) (void); 191 void (*init) (void);
194 unsigned int (*read) (unsigned int apic, unsigned int reg); 192 unsigned int (*read) (unsigned int apic, unsigned int reg);
195 void (*write) (unsigned int apic, unsigned int reg, unsigned int value); 193 void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
196 void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); 194 void (*modify) (unsigned int apic, unsigned int reg, unsigned int value);
195 void (*disable)(void);
196 void (*print_entries)(unsigned int apic, unsigned int nr_entries);
197 int (*set_affinity)(struct irq_data *data,
198 const struct cpumask *mask,
199 bool force);
200 int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry,
201 unsigned int destination, int vector,
202 struct io_apic_irq_attr *attr);
203 void (*eoi_ioapic_pin)(int apic, int pin, int vector);
197}; 204};
198 205
199extern struct x86_init_ops x86_init; 206extern struct x86_init_ops x86_init;
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index cc146d51449e..ca842f2769ef 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -16,4 +16,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
16 return raw_irqs_disabled_flags(regs->flags); 16 return raw_irqs_disabled_flags(regs->flags);
17} 17}
18 18
19/* No need for a barrier -- XCHG is a barrier on x86. */
20#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
21
19#endif /* _ASM_X86_XEN_EVENTS_H */ 22#endif /* _ASM_X86_XEN_EVENTS_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 472b9b783019..6aef9fbc09b7 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -212,4 +212,6 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr);
212void make_lowmem_page_readonly(void *vaddr); 212void make_lowmem_page_readonly(void *vaddr);
213void make_lowmem_page_readwrite(void *vaddr); 213void make_lowmem_page_readwrite(void *vaddr);
214 214
215#define xen_remap(cookie, size) ioremap((cookie), (size));
216
215#endif /* _ASM_X86_XEN_PAGE_H */ 217#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index f8fde90bc45e..d8829751b3f8 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,10 +1,499 @@
1#ifdef CONFIG_KMEMCHECK 1#ifdef CONFIG_KMEMCHECK
2/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ 2/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
3# include <asm-generic/xor.h> 3# include <asm-generic/xor.h>
4#elif !defined(_ASM_X86_XOR_H)
5#define _ASM_X86_XOR_H
6
7/*
8 * Optimized RAID-5 checksumming functions for SSE.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2, or (at your option)
13 * any later version.
14 *
15 * You should have received a copy of the GNU General Public License
16 * (for example /usr/src/linux/COPYING); if not, write to the Free
17 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20/*
21 * Cache avoiding checksumming functions utilizing KNI instructions
22 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
23 */
24
25/*
26 * Based on
27 * High-speed RAID5 checksumming functions utilizing SSE instructions.
28 * Copyright (C) 1998 Ingo Molnar.
29 */
30
31/*
32 * x86-64 changes / gcc fixes from Andi Kleen.
33 * Copyright 2002 Andi Kleen, SuSE Labs.
34 *
35 * This hasn't been optimized for the hammer yet, but there are likely
36 * no advantages to be gotten from x86-64 here anyways.
37 */
38
39#include <asm/i387.h>
40
41#ifdef CONFIG_X86_32
42/* reduce register pressure */
43# define XOR_CONSTANT_CONSTRAINT "i"
4#else 44#else
45# define XOR_CONSTANT_CONSTRAINT "re"
46#endif
47
48#define OFFS(x) "16*("#x")"
49#define PF_OFFS(x) "256+16*("#x")"
50#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
51#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
52#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
53#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
54#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
55#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
56#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
57#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
58#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
59#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
60#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
61#define NOP(x)
62
63#define BLK64(pf, op, i) \
64 pf(i) \
65 op(i, 0) \
66 op(i + 1, 1) \
67 op(i + 2, 2) \
68 op(i + 3, 3)
69
70static void
71xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
72{
73 unsigned long lines = bytes >> 8;
74
75 kernel_fpu_begin();
76
77 asm volatile(
78#undef BLOCK
79#define BLOCK(i) \
80 LD(i, 0) \
81 LD(i + 1, 1) \
82 PF1(i) \
83 PF1(i + 2) \
84 LD(i + 2, 2) \
85 LD(i + 3, 3) \
86 PF0(i + 4) \
87 PF0(i + 6) \
88 XO1(i, 0) \
89 XO1(i + 1, 1) \
90 XO1(i + 2, 2) \
91 XO1(i + 3, 3) \
92 ST(i, 0) \
93 ST(i + 1, 1) \
94 ST(i + 2, 2) \
95 ST(i + 3, 3) \
96
97
98 PF0(0)
99 PF0(2)
100
101 " .align 32 ;\n"
102 " 1: ;\n"
103
104 BLOCK(0)
105 BLOCK(4)
106 BLOCK(8)
107 BLOCK(12)
108
109 " add %[inc], %[p1] ;\n"
110 " add %[inc], %[p2] ;\n"
111 " dec %[cnt] ;\n"
112 " jnz 1b ;\n"
113 : [cnt] "+r" (lines),
114 [p1] "+r" (p1), [p2] "+r" (p2)
115 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
116 : "memory");
117
118 kernel_fpu_end();
119}
120
121static void
122xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
123{
124 unsigned long lines = bytes >> 8;
125
126 kernel_fpu_begin();
127
128 asm volatile(
129#undef BLOCK
130#define BLOCK(i) \
131 BLK64(PF0, LD, i) \
132 BLK64(PF1, XO1, i) \
133 BLK64(NOP, ST, i) \
134
135 " .align 32 ;\n"
136 " 1: ;\n"
137
138 BLOCK(0)
139 BLOCK(4)
140 BLOCK(8)
141 BLOCK(12)
142
143 " add %[inc], %[p1] ;\n"
144 " add %[inc], %[p2] ;\n"
145 " dec %[cnt] ;\n"
146 " jnz 1b ;\n"
147 : [cnt] "+r" (lines),
148 [p1] "+r" (p1), [p2] "+r" (p2)
149 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
150 : "memory");
151
152 kernel_fpu_end();
153}
154
155static void
156xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
157 unsigned long *p3)
158{
159 unsigned long lines = bytes >> 8;
160
161 kernel_fpu_begin();
162
163 asm volatile(
164#undef BLOCK
165#define BLOCK(i) \
166 PF1(i) \
167 PF1(i + 2) \
168 LD(i, 0) \
169 LD(i + 1, 1) \
170 LD(i + 2, 2) \
171 LD(i + 3, 3) \
172 PF2(i) \
173 PF2(i + 2) \
174 PF0(i + 4) \
175 PF0(i + 6) \
176 XO1(i, 0) \
177 XO1(i + 1, 1) \
178 XO1(i + 2, 2) \
179 XO1(i + 3, 3) \
180 XO2(i, 0) \
181 XO2(i + 1, 1) \
182 XO2(i + 2, 2) \
183 XO2(i + 3, 3) \
184 ST(i, 0) \
185 ST(i + 1, 1) \
186 ST(i + 2, 2) \
187 ST(i + 3, 3) \
188
189
190 PF0(0)
191 PF0(2)
192
193 " .align 32 ;\n"
194 " 1: ;\n"
195
196 BLOCK(0)
197 BLOCK(4)
198 BLOCK(8)
199 BLOCK(12)
200
201 " add %[inc], %[p1] ;\n"
202 " add %[inc], %[p2] ;\n"
203 " add %[inc], %[p3] ;\n"
204 " dec %[cnt] ;\n"
205 " jnz 1b ;\n"
206 : [cnt] "+r" (lines),
207 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
208 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
209 : "memory");
210
211 kernel_fpu_end();
212}
213
214static void
215xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
216 unsigned long *p3)
217{
218 unsigned long lines = bytes >> 8;
219
220 kernel_fpu_begin();
221
222 asm volatile(
223#undef BLOCK
224#define BLOCK(i) \
225 BLK64(PF0, LD, i) \
226 BLK64(PF1, XO1, i) \
227 BLK64(PF2, XO2, i) \
228 BLK64(NOP, ST, i) \
229
230 " .align 32 ;\n"
231 " 1: ;\n"
232
233 BLOCK(0)
234 BLOCK(4)
235 BLOCK(8)
236 BLOCK(12)
237
238 " add %[inc], %[p1] ;\n"
239 " add %[inc], %[p2] ;\n"
240 " add %[inc], %[p3] ;\n"
241 " dec %[cnt] ;\n"
242 " jnz 1b ;\n"
243 : [cnt] "+r" (lines),
244 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
245 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
246 : "memory");
247
248 kernel_fpu_end();
249}
250
251static void
252xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
253 unsigned long *p3, unsigned long *p4)
254{
255 unsigned long lines = bytes >> 8;
256
257 kernel_fpu_begin();
258
259 asm volatile(
260#undef BLOCK
261#define BLOCK(i) \
262 PF1(i) \
263 PF1(i + 2) \
264 LD(i, 0) \
265 LD(i + 1, 1) \
266 LD(i + 2, 2) \
267 LD(i + 3, 3) \
268 PF2(i) \
269 PF2(i + 2) \
270 XO1(i, 0) \
271 XO1(i + 1, 1) \
272 XO1(i + 2, 2) \
273 XO1(i + 3, 3) \
274 PF3(i) \
275 PF3(i + 2) \
276 PF0(i + 4) \
277 PF0(i + 6) \
278 XO2(i, 0) \
279 XO2(i + 1, 1) \
280 XO2(i + 2, 2) \
281 XO2(i + 3, 3) \
282 XO3(i, 0) \
283 XO3(i + 1, 1) \
284 XO3(i + 2, 2) \
285 XO3(i + 3, 3) \
286 ST(i, 0) \
287 ST(i + 1, 1) \
288 ST(i + 2, 2) \
289 ST(i + 3, 3) \
290
291
292 PF0(0)
293 PF0(2)
294
295 " .align 32 ;\n"
296 " 1: ;\n"
297
298 BLOCK(0)
299 BLOCK(4)
300 BLOCK(8)
301 BLOCK(12)
302
303 " add %[inc], %[p1] ;\n"
304 " add %[inc], %[p2] ;\n"
305 " add %[inc], %[p3] ;\n"
306 " add %[inc], %[p4] ;\n"
307 " dec %[cnt] ;\n"
308 " jnz 1b ;\n"
309 : [cnt] "+r" (lines), [p1] "+r" (p1),
310 [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
311 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
312 : "memory");
313
314 kernel_fpu_end();
315}
316
317static void
318xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
319 unsigned long *p3, unsigned long *p4)
320{
321 unsigned long lines = bytes >> 8;
322
323 kernel_fpu_begin();
324
325 asm volatile(
326#undef BLOCK
327#define BLOCK(i) \
328 BLK64(PF0, LD, i) \
329 BLK64(PF1, XO1, i) \
330 BLK64(PF2, XO2, i) \
331 BLK64(PF3, XO3, i) \
332 BLK64(NOP, ST, i) \
333
334 " .align 32 ;\n"
335 " 1: ;\n"
336
337 BLOCK(0)
338 BLOCK(4)
339 BLOCK(8)
340 BLOCK(12)
341
342 " add %[inc], %[p1] ;\n"
343 " add %[inc], %[p2] ;\n"
344 " add %[inc], %[p3] ;\n"
345 " add %[inc], %[p4] ;\n"
346 " dec %[cnt] ;\n"
347 " jnz 1b ;\n"
348 : [cnt] "+r" (lines), [p1] "+r" (p1),
349 [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
350 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
351 : "memory");
352
353 kernel_fpu_end();
354}
355
356static void
357xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
358 unsigned long *p3, unsigned long *p4, unsigned long *p5)
359{
360 unsigned long lines = bytes >> 8;
361
362 kernel_fpu_begin();
363
364 asm volatile(
365#undef BLOCK
366#define BLOCK(i) \
367 PF1(i) \
368 PF1(i + 2) \
369 LD(i, 0) \
370 LD(i + 1, 1) \
371 LD(i + 2, 2) \
372 LD(i + 3, 3) \
373 PF2(i) \
374 PF2(i + 2) \
375 XO1(i, 0) \
376 XO1(i + 1, 1) \
377 XO1(i + 2, 2) \
378 XO1(i + 3, 3) \
379 PF3(i) \
380 PF3(i + 2) \
381 XO2(i, 0) \
382 XO2(i + 1, 1) \
383 XO2(i + 2, 2) \
384 XO2(i + 3, 3) \
385 PF4(i) \
386 PF4(i + 2) \
387 PF0(i + 4) \
388 PF0(i + 6) \
389 XO3(i, 0) \
390 XO3(i + 1, 1) \
391 XO3(i + 2, 2) \
392 XO3(i + 3, 3) \
393 XO4(i, 0) \
394 XO4(i + 1, 1) \
395 XO4(i + 2, 2) \
396 XO4(i + 3, 3) \
397 ST(i, 0) \
398 ST(i + 1, 1) \
399 ST(i + 2, 2) \
400 ST(i + 3, 3) \
401
402
403 PF0(0)
404 PF0(2)
405
406 " .align 32 ;\n"
407 " 1: ;\n"
408
409 BLOCK(0)
410 BLOCK(4)
411 BLOCK(8)
412 BLOCK(12)
413
414 " add %[inc], %[p1] ;\n"
415 " add %[inc], %[p2] ;\n"
416 " add %[inc], %[p3] ;\n"
417 " add %[inc], %[p4] ;\n"
418 " add %[inc], %[p5] ;\n"
419 " dec %[cnt] ;\n"
420 " jnz 1b ;\n"
421 : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
422 [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
423 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
424 : "memory");
425
426 kernel_fpu_end();
427}
428
429static void
430xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
431 unsigned long *p3, unsigned long *p4, unsigned long *p5)
432{
433 unsigned long lines = bytes >> 8;
434
435 kernel_fpu_begin();
436
437 asm volatile(
438#undef BLOCK
439#define BLOCK(i) \
440 BLK64(PF0, LD, i) \
441 BLK64(PF1, XO1, i) \
442 BLK64(PF2, XO2, i) \
443 BLK64(PF3, XO3, i) \
444 BLK64(PF4, XO4, i) \
445 BLK64(NOP, ST, i) \
446
447 " .align 32 ;\n"
448 " 1: ;\n"
449
450 BLOCK(0)
451 BLOCK(4)
452 BLOCK(8)
453 BLOCK(12)
454
455 " add %[inc], %[p1] ;\n"
456 " add %[inc], %[p2] ;\n"
457 " add %[inc], %[p3] ;\n"
458 " add %[inc], %[p4] ;\n"
459 " add %[inc], %[p5] ;\n"
460 " dec %[cnt] ;\n"
461 " jnz 1b ;\n"
462 : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
463 [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
464 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
465 : "memory");
466
467 kernel_fpu_end();
468}
469
470static struct xor_block_template xor_block_sse_pf64 = {
471 .name = "prefetch64-sse",
472 .do_2 = xor_sse_2_pf64,
473 .do_3 = xor_sse_3_pf64,
474 .do_4 = xor_sse_4_pf64,
475 .do_5 = xor_sse_5_pf64,
476};
477
478#undef LD
479#undef XO1
480#undef XO2
481#undef XO3
482#undef XO4
483#undef ST
484#undef NOP
485#undef BLK64
486#undef BLOCK
487
488#undef XOR_CONSTANT_CONSTRAINT
489
5#ifdef CONFIG_X86_32 490#ifdef CONFIG_X86_32
6# include <asm/xor_32.h> 491# include <asm/xor_32.h>
7#else 492#else
8# include <asm/xor_64.h> 493# include <asm/xor_64.h>
9#endif 494#endif
10#endif 495
496#define XOR_SELECT_TEMPLATE(FASTEST) \
497 AVX_SELECT(FASTEST)
498
499#endif /* _ASM_X86_XOR_H */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index f79cb7ec0e06..ce05722e3c68 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -2,7 +2,7 @@
2#define _ASM_X86_XOR_32_H 2#define _ASM_X86_XOR_32_H
3 3
4/* 4/*
5 * Optimized RAID-5 checksumming functions for MMX and SSE. 5 * Optimized RAID-5 checksumming functions for MMX.
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -529,290 +529,6 @@ static struct xor_block_template xor_block_p5_mmx = {
529 .do_5 = xor_p5_mmx_5, 529 .do_5 = xor_p5_mmx_5,
530}; 530};
531 531
532/*
533 * Cache avoiding checksumming functions utilizing KNI instructions
534 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
535 */
536
537#define OFFS(x) "16*("#x")"
538#define PF_OFFS(x) "256+16*("#x")"
539#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
540#define LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
541#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
542#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n"
543#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n"
544#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n"
545#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n"
546#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n"
547#define XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
548#define XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
549#define XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
550#define XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
551#define XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
552
553
554static void
555xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
556{
557 unsigned long lines = bytes >> 8;
558
559 kernel_fpu_begin();
560
561 asm volatile(
562#undef BLOCK
563#define BLOCK(i) \
564 LD(i, 0) \
565 LD(i + 1, 1) \
566 PF1(i) \
567 PF1(i + 2) \
568 LD(i + 2, 2) \
569 LD(i + 3, 3) \
570 PF0(i + 4) \
571 PF0(i + 6) \
572 XO1(i, 0) \
573 XO1(i + 1, 1) \
574 XO1(i + 2, 2) \
575 XO1(i + 3, 3) \
576 ST(i, 0) \
577 ST(i + 1, 1) \
578 ST(i + 2, 2) \
579 ST(i + 3, 3) \
580
581
582 PF0(0)
583 PF0(2)
584
585 " .align 32 ;\n"
586 " 1: ;\n"
587
588 BLOCK(0)
589 BLOCK(4)
590 BLOCK(8)
591 BLOCK(12)
592
593 " addl $256, %1 ;\n"
594 " addl $256, %2 ;\n"
595 " decl %0 ;\n"
596 " jnz 1b ;\n"
597 : "+r" (lines),
598 "+r" (p1), "+r" (p2)
599 :
600 : "memory");
601
602 kernel_fpu_end();
603}
604
605static void
606xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
607 unsigned long *p3)
608{
609 unsigned long lines = bytes >> 8;
610
611 kernel_fpu_begin();
612
613 asm volatile(
614#undef BLOCK
615#define BLOCK(i) \
616 PF1(i) \
617 PF1(i + 2) \
618 LD(i,0) \
619 LD(i + 1, 1) \
620 LD(i + 2, 2) \
621 LD(i + 3, 3) \
622 PF2(i) \
623 PF2(i + 2) \
624 PF0(i + 4) \
625 PF0(i + 6) \
626 XO1(i,0) \
627 XO1(i + 1, 1) \
628 XO1(i + 2, 2) \
629 XO1(i + 3, 3) \
630 XO2(i,0) \
631 XO2(i + 1, 1) \
632 XO2(i + 2, 2) \
633 XO2(i + 3, 3) \
634 ST(i,0) \
635 ST(i + 1, 1) \
636 ST(i + 2, 2) \
637 ST(i + 3, 3) \
638
639
640 PF0(0)
641 PF0(2)
642
643 " .align 32 ;\n"
644 " 1: ;\n"
645
646 BLOCK(0)
647 BLOCK(4)
648 BLOCK(8)
649 BLOCK(12)
650
651 " addl $256, %1 ;\n"
652 " addl $256, %2 ;\n"
653 " addl $256, %3 ;\n"
654 " decl %0 ;\n"
655 " jnz 1b ;\n"
656 : "+r" (lines),
657 "+r" (p1), "+r"(p2), "+r"(p3)
658 :
659 : "memory" );
660
661 kernel_fpu_end();
662}
663
664static void
665xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
666 unsigned long *p3, unsigned long *p4)
667{
668 unsigned long lines = bytes >> 8;
669
670 kernel_fpu_begin();
671
672 asm volatile(
673#undef BLOCK
674#define BLOCK(i) \
675 PF1(i) \
676 PF1(i + 2) \
677 LD(i,0) \
678 LD(i + 1, 1) \
679 LD(i + 2, 2) \
680 LD(i + 3, 3) \
681 PF2(i) \
682 PF2(i + 2) \
683 XO1(i,0) \
684 XO1(i + 1, 1) \
685 XO1(i + 2, 2) \
686 XO1(i + 3, 3) \
687 PF3(i) \
688 PF3(i + 2) \
689 PF0(i + 4) \
690 PF0(i + 6) \
691 XO2(i,0) \
692 XO2(i + 1, 1) \
693 XO2(i + 2, 2) \
694 XO2(i + 3, 3) \
695 XO3(i,0) \
696 XO3(i + 1, 1) \
697 XO3(i + 2, 2) \
698 XO3(i + 3, 3) \
699 ST(i,0) \
700 ST(i + 1, 1) \
701 ST(i + 2, 2) \
702 ST(i + 3, 3) \
703
704
705 PF0(0)
706 PF0(2)
707
708 " .align 32 ;\n"
709 " 1: ;\n"
710
711 BLOCK(0)
712 BLOCK(4)
713 BLOCK(8)
714 BLOCK(12)
715
716 " addl $256, %1 ;\n"
717 " addl $256, %2 ;\n"
718 " addl $256, %3 ;\n"
719 " addl $256, %4 ;\n"
720 " decl %0 ;\n"
721 " jnz 1b ;\n"
722 : "+r" (lines),
723 "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
724 :
725 : "memory" );
726
727 kernel_fpu_end();
728}
729
730static void
731xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
732 unsigned long *p3, unsigned long *p4, unsigned long *p5)
733{
734 unsigned long lines = bytes >> 8;
735
736 kernel_fpu_begin();
737
738 /* Make sure GCC forgets anything it knows about p4 or p5,
739 such that it won't pass to the asm volatile below a
740 register that is shared with any other variable. That's
741 because we modify p4 and p5 there, but we can't mark them
742 as read/write, otherwise we'd overflow the 10-asm-operands
743 limit of GCC < 3.1. */
744 asm("" : "+r" (p4), "+r" (p5));
745
746 asm volatile(
747#undef BLOCK
748#define BLOCK(i) \
749 PF1(i) \
750 PF1(i + 2) \
751 LD(i,0) \
752 LD(i + 1, 1) \
753 LD(i + 2, 2) \
754 LD(i + 3, 3) \
755 PF2(i) \
756 PF2(i + 2) \
757 XO1(i,0) \
758 XO1(i + 1, 1) \
759 XO1(i + 2, 2) \
760 XO1(i + 3, 3) \
761 PF3(i) \
762 PF3(i + 2) \
763 XO2(i,0) \
764 XO2(i + 1, 1) \
765 XO2(i + 2, 2) \
766 XO2(i + 3, 3) \
767 PF4(i) \
768 PF4(i + 2) \
769 PF0(i + 4) \
770 PF0(i + 6) \
771 XO3(i,0) \
772 XO3(i + 1, 1) \
773 XO3(i + 2, 2) \
774 XO3(i + 3, 3) \
775 XO4(i,0) \
776 XO4(i + 1, 1) \
777 XO4(i + 2, 2) \
778 XO4(i + 3, 3) \
779 ST(i,0) \
780 ST(i + 1, 1) \
781 ST(i + 2, 2) \
782 ST(i + 3, 3) \
783
784
785 PF0(0)
786 PF0(2)
787
788 " .align 32 ;\n"
789 " 1: ;\n"
790
791 BLOCK(0)
792 BLOCK(4)
793 BLOCK(8)
794 BLOCK(12)
795
796 " addl $256, %1 ;\n"
797 " addl $256, %2 ;\n"
798 " addl $256, %3 ;\n"
799 " addl $256, %4 ;\n"
800 " addl $256, %5 ;\n"
801 " decl %0 ;\n"
802 " jnz 1b ;\n"
803 : "+r" (lines),
804 "+r" (p1), "+r" (p2), "+r" (p3)
805 : "r" (p4), "r" (p5)
806 : "memory");
807
808 /* p4 and p5 were modified, and now the variables are dead.
809 Clobber them just to be sure nobody does something stupid
810 like assuming they have some legal value. */
811 asm("" : "=r" (p4), "=r" (p5));
812
813 kernel_fpu_end();
814}
815
816static struct xor_block_template xor_block_pIII_sse = { 532static struct xor_block_template xor_block_pIII_sse = {
817 .name = "pIII_sse", 533 .name = "pIII_sse",
818 .do_2 = xor_sse_2, 534 .do_2 = xor_sse_2,
@@ -827,26 +543,25 @@ static struct xor_block_template xor_block_pIII_sse = {
827/* Also try the generic routines. */ 543/* Also try the generic routines. */
828#include <asm-generic/xor.h> 544#include <asm-generic/xor.h>
829 545
546/* We force the use of the SSE xor block because it can write around L2.
547 We may also be able to load into the L1 only depending on how the cpu
548 deals with a load to a line that is being prefetched. */
830#undef XOR_TRY_TEMPLATES 549#undef XOR_TRY_TEMPLATES
831#define XOR_TRY_TEMPLATES \ 550#define XOR_TRY_TEMPLATES \
832do { \ 551do { \
833 xor_speed(&xor_block_8regs); \
834 xor_speed(&xor_block_8regs_p); \
835 xor_speed(&xor_block_32regs); \
836 xor_speed(&xor_block_32regs_p); \
837 AVX_XOR_SPEED; \ 552 AVX_XOR_SPEED; \
838 if (cpu_has_xmm) \ 553 if (cpu_has_xmm) { \
839 xor_speed(&xor_block_pIII_sse); \ 554 xor_speed(&xor_block_pIII_sse); \
840 if (cpu_has_mmx) { \ 555 xor_speed(&xor_block_sse_pf64); \
556 } else if (cpu_has_mmx) { \
841 xor_speed(&xor_block_pII_mmx); \ 557 xor_speed(&xor_block_pII_mmx); \
842 xor_speed(&xor_block_p5_mmx); \ 558 xor_speed(&xor_block_p5_mmx); \
559 } else { \
560 xor_speed(&xor_block_8regs); \
561 xor_speed(&xor_block_8regs_p); \
562 xor_speed(&xor_block_32regs); \
563 xor_speed(&xor_block_32regs_p); \
843 } \ 564 } \
844} while (0) 565} while (0)
845 566
846/* We force the use of the SSE xor block because it can write around L2.
847 We may also be able to load into the L1 only depending on how the cpu
848 deals with a load to a line that is being prefetched. */
849#define XOR_SELECT_TEMPLATE(FASTEST) \
850 AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
851
852#endif /* _ASM_X86_XOR_32_H */ 567#endif /* _ASM_X86_XOR_32_H */
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 87ac522c4af5..546f1e3b87cc 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -1,301 +1,6 @@
1#ifndef _ASM_X86_XOR_64_H 1#ifndef _ASM_X86_XOR_64_H
2#define _ASM_X86_XOR_64_H 2#define _ASM_X86_XOR_64_H
3 3
4/*
5 * Optimized RAID-5 checksumming functions for MMX and SSE.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
11 *
12 * You should have received a copy of the GNU General Public License
13 * (for example /usr/src/linux/COPYING); if not, write to the Free
14 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17
18/*
19 * Cache avoiding checksumming functions utilizing KNI instructions
20 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
21 */
22
23/*
24 * Based on
25 * High-speed RAID5 checksumming functions utilizing SSE instructions.
26 * Copyright (C) 1998 Ingo Molnar.
27 */
28
29/*
30 * x86-64 changes / gcc fixes from Andi Kleen.
31 * Copyright 2002 Andi Kleen, SuSE Labs.
32 *
33 * This hasn't been optimized for the hammer yet, but there are likely
34 * no advantages to be gotten from x86-64 here anyways.
35 */
36
37#include <asm/i387.h>
38
39#define OFFS(x) "16*("#x")"
40#define PF_OFFS(x) "256+16*("#x")"
41#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
42#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
43#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
44#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
45#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
46#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
47#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
48#define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n"
49#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
50#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
51#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
52#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
53#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
54
55
56static void
57xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
58{
59 unsigned int lines = bytes >> 8;
60
61 kernel_fpu_begin();
62
63 asm volatile(
64#undef BLOCK
65#define BLOCK(i) \
66 LD(i, 0) \
67 LD(i + 1, 1) \
68 PF1(i) \
69 PF1(i + 2) \
70 LD(i + 2, 2) \
71 LD(i + 3, 3) \
72 PF0(i + 4) \
73 PF0(i + 6) \
74 XO1(i, 0) \
75 XO1(i + 1, 1) \
76 XO1(i + 2, 2) \
77 XO1(i + 3, 3) \
78 ST(i, 0) \
79 ST(i + 1, 1) \
80 ST(i + 2, 2) \
81 ST(i + 3, 3) \
82
83
84 PF0(0)
85 PF0(2)
86
87 " .align 32 ;\n"
88 " 1: ;\n"
89
90 BLOCK(0)
91 BLOCK(4)
92 BLOCK(8)
93 BLOCK(12)
94
95 " addq %[inc], %[p1] ;\n"
96 " addq %[inc], %[p2] ;\n"
97 " decl %[cnt] ; jnz 1b"
98 : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
99 : [inc] "r" (256UL)
100 : "memory");
101
102 kernel_fpu_end();
103}
104
105static void
106xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
107 unsigned long *p3)
108{
109 unsigned int lines = bytes >> 8;
110
111 kernel_fpu_begin();
112 asm volatile(
113#undef BLOCK
114#define BLOCK(i) \
115 PF1(i) \
116 PF1(i + 2) \
117 LD(i, 0) \
118 LD(i + 1, 1) \
119 LD(i + 2, 2) \
120 LD(i + 3, 3) \
121 PF2(i) \
122 PF2(i + 2) \
123 PF0(i + 4) \
124 PF0(i + 6) \
125 XO1(i, 0) \
126 XO1(i + 1, 1) \
127 XO1(i + 2, 2) \
128 XO1(i + 3, 3) \
129 XO2(i, 0) \
130 XO2(i + 1, 1) \
131 XO2(i + 2, 2) \
132 XO2(i + 3, 3) \
133 ST(i, 0) \
134 ST(i + 1, 1) \
135 ST(i + 2, 2) \
136 ST(i + 3, 3) \
137
138
139 PF0(0)
140 PF0(2)
141
142 " .align 32 ;\n"
143 " 1: ;\n"
144
145 BLOCK(0)
146 BLOCK(4)
147 BLOCK(8)
148 BLOCK(12)
149
150 " addq %[inc], %[p1] ;\n"
151 " addq %[inc], %[p2] ;\n"
152 " addq %[inc], %[p3] ;\n"
153 " decl %[cnt] ; jnz 1b"
154 : [cnt] "+r" (lines),
155 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
156 : [inc] "r" (256UL)
157 : "memory");
158 kernel_fpu_end();
159}
160
161static void
162xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
163 unsigned long *p3, unsigned long *p4)
164{
165 unsigned int lines = bytes >> 8;
166
167 kernel_fpu_begin();
168
169 asm volatile(
170#undef BLOCK
171#define BLOCK(i) \
172 PF1(i) \
173 PF1(i + 2) \
174 LD(i, 0) \
175 LD(i + 1, 1) \
176 LD(i + 2, 2) \
177 LD(i + 3, 3) \
178 PF2(i) \
179 PF2(i + 2) \
180 XO1(i, 0) \
181 XO1(i + 1, 1) \
182 XO1(i + 2, 2) \
183 XO1(i + 3, 3) \
184 PF3(i) \
185 PF3(i + 2) \
186 PF0(i + 4) \
187 PF0(i + 6) \
188 XO2(i, 0) \
189 XO2(i + 1, 1) \
190 XO2(i + 2, 2) \
191 XO2(i + 3, 3) \
192 XO3(i, 0) \
193 XO3(i + 1, 1) \
194 XO3(i + 2, 2) \
195 XO3(i + 3, 3) \
196 ST(i, 0) \
197 ST(i + 1, 1) \
198 ST(i + 2, 2) \
199 ST(i + 3, 3) \
200
201
202 PF0(0)
203 PF0(2)
204
205 " .align 32 ;\n"
206 " 1: ;\n"
207
208 BLOCK(0)
209 BLOCK(4)
210 BLOCK(8)
211 BLOCK(12)
212
213 " addq %[inc], %[p1] ;\n"
214 " addq %[inc], %[p2] ;\n"
215 " addq %[inc], %[p3] ;\n"
216 " addq %[inc], %[p4] ;\n"
217 " decl %[cnt] ; jnz 1b"
218 : [cnt] "+c" (lines),
219 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
220 : [inc] "r" (256UL)
221 : "memory" );
222
223 kernel_fpu_end();
224}
225
226static void
227xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
228 unsigned long *p3, unsigned long *p4, unsigned long *p5)
229{
230 unsigned int lines = bytes >> 8;
231
232 kernel_fpu_begin();
233
234 asm volatile(
235#undef BLOCK
236#define BLOCK(i) \
237 PF1(i) \
238 PF1(i + 2) \
239 LD(i, 0) \
240 LD(i + 1, 1) \
241 LD(i + 2, 2) \
242 LD(i + 3, 3) \
243 PF2(i) \
244 PF2(i + 2) \
245 XO1(i, 0) \
246 XO1(i + 1, 1) \
247 XO1(i + 2, 2) \
248 XO1(i + 3, 3) \
249 PF3(i) \
250 PF3(i + 2) \
251 XO2(i, 0) \
252 XO2(i + 1, 1) \
253 XO2(i + 2, 2) \
254 XO2(i + 3, 3) \
255 PF4(i) \
256 PF4(i + 2) \
257 PF0(i + 4) \
258 PF0(i + 6) \
259 XO3(i, 0) \
260 XO3(i + 1, 1) \
261 XO3(i + 2, 2) \
262 XO3(i + 3, 3) \
263 XO4(i, 0) \
264 XO4(i + 1, 1) \
265 XO4(i + 2, 2) \
266 XO4(i + 3, 3) \
267 ST(i, 0) \
268 ST(i + 1, 1) \
269 ST(i + 2, 2) \
270 ST(i + 3, 3) \
271
272
273 PF0(0)
274 PF0(2)
275
276 " .align 32 ;\n"
277 " 1: ;\n"
278
279 BLOCK(0)
280 BLOCK(4)
281 BLOCK(8)
282 BLOCK(12)
283
284 " addq %[inc], %[p1] ;\n"
285 " addq %[inc], %[p2] ;\n"
286 " addq %[inc], %[p3] ;\n"
287 " addq %[inc], %[p4] ;\n"
288 " addq %[inc], %[p5] ;\n"
289 " decl %[cnt] ; jnz 1b"
290 : [cnt] "+c" (lines),
291 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
292 [p5] "+r" (p5)
293 : [inc] "r" (256UL)
294 : "memory");
295
296 kernel_fpu_end();
297}
298
299static struct xor_block_template xor_block_sse = { 4static struct xor_block_template xor_block_sse = {
300 .name = "generic_sse", 5 .name = "generic_sse",
301 .do_2 = xor_sse_2, 6 .do_2 = xor_sse_2,
@@ -308,17 +13,15 @@ static struct xor_block_template xor_block_sse = {
308/* Also try the AVX routines */ 13/* Also try the AVX routines */
309#include <asm/xor_avx.h> 14#include <asm/xor_avx.h>
310 15
16/* We force the use of the SSE xor block because it can write around L2.
17 We may also be able to load into the L1 only depending on how the cpu
18 deals with a load to a line that is being prefetched. */
311#undef XOR_TRY_TEMPLATES 19#undef XOR_TRY_TEMPLATES
312#define XOR_TRY_TEMPLATES \ 20#define XOR_TRY_TEMPLATES \
313do { \ 21do { \
314 AVX_XOR_SPEED; \ 22 AVX_XOR_SPEED; \
23 xor_speed(&xor_block_sse_pf64); \
315 xor_speed(&xor_block_sse); \ 24 xor_speed(&xor_block_sse); \
316} while (0) 25} while (0)
317 26
318/* We force the use of the SSE xor block because it can write around L2.
319 We may also be able to load into the L1 only depending on how the cpu
320 deals with a load to a line that is being prefetched. */
321#define XOR_SELECT_TEMPLATE(FASTEST) \
322 AVX_SELECT(&xor_block_sse)
323
324#endif /* _ASM_X86_XOR_64_H */ 27#endif /* _ASM_X86_XOR_64_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 92862cd90201..c15ddaf90710 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -1,6 +1,31 @@
1#ifndef _ASM_X86_BOOTPARAM_H 1#ifndef _ASM_X86_BOOTPARAM_H
2#define _ASM_X86_BOOTPARAM_H 2#define _ASM_X86_BOOTPARAM_H
3 3
4/* setup_data types */
5#define SETUP_NONE 0
6#define SETUP_E820_EXT 1
7#define SETUP_DTB 2
8#define SETUP_PCI 3
9
10/* ram_size flags */
11#define RAMDISK_IMAGE_START_MASK 0x07FF
12#define RAMDISK_PROMPT_FLAG 0x8000
13#define RAMDISK_LOAD_FLAG 0x4000
14
15/* loadflags */
16#define LOADED_HIGH (1<<0)
17#define QUIET_FLAG (1<<5)
18#define KEEP_SEGMENTS (1<<6)
19#define CAN_USE_HEAP (1<<7)
20
21/* xloadflags */
22#define XLF_KERNEL_64 (1<<0)
23#define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1)
24#define XLF_EFI_HANDOVER_32 (1<<2)
25#define XLF_EFI_HANDOVER_64 (1<<3)
26
27#ifndef __ASSEMBLY__
28
4#include <linux/types.h> 29#include <linux/types.h>
5#include <linux/screen_info.h> 30#include <linux/screen_info.h>
6#include <linux/apm_bios.h> 31#include <linux/apm_bios.h>
@@ -9,12 +34,6 @@
9#include <asm/ist.h> 34#include <asm/ist.h>
10#include <video/edid.h> 35#include <video/edid.h>
11 36
12/* setup data types */
13#define SETUP_NONE 0
14#define SETUP_E820_EXT 1
15#define SETUP_DTB 2
16#define SETUP_PCI 3
17
18/* extensible setup data list node */ 37/* extensible setup data list node */
19struct setup_data { 38struct setup_data {
20 __u64 next; 39 __u64 next;
@@ -28,9 +47,6 @@ struct setup_header {
28 __u16 root_flags; 47 __u16 root_flags;
29 __u32 syssize; 48 __u32 syssize;
30 __u16 ram_size; 49 __u16 ram_size;
31#define RAMDISK_IMAGE_START_MASK 0x07FF
32#define RAMDISK_PROMPT_FLAG 0x8000
33#define RAMDISK_LOAD_FLAG 0x4000
34 __u16 vid_mode; 50 __u16 vid_mode;
35 __u16 root_dev; 51 __u16 root_dev;
36 __u16 boot_flag; 52 __u16 boot_flag;
@@ -42,10 +58,6 @@ struct setup_header {
42 __u16 kernel_version; 58 __u16 kernel_version;
43 __u8 type_of_loader; 59 __u8 type_of_loader;
44 __u8 loadflags; 60 __u8 loadflags;
45#define LOADED_HIGH (1<<0)
46#define QUIET_FLAG (1<<5)
47#define KEEP_SEGMENTS (1<<6)
48#define CAN_USE_HEAP (1<<7)
49 __u16 setup_move_size; 61 __u16 setup_move_size;
50 __u32 code32_start; 62 __u32 code32_start;
51 __u32 ramdisk_image; 63 __u32 ramdisk_image;
@@ -58,7 +70,8 @@ struct setup_header {
58 __u32 initrd_addr_max; 70 __u32 initrd_addr_max;
59 __u32 kernel_alignment; 71 __u32 kernel_alignment;
60 __u8 relocatable_kernel; 72 __u8 relocatable_kernel;
61 __u8 _pad2[3]; 73 __u8 min_alignment;
74 __u16 xloadflags;
62 __u32 cmdline_size; 75 __u32 cmdline_size;
63 __u32 hardware_subarch; 76 __u32 hardware_subarch;
64 __u64 hardware_subarch_data; 77 __u64 hardware_subarch_data;
@@ -106,7 +119,10 @@ struct boot_params {
106 __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */ 119 __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */
107 struct sys_desc_table sys_desc_table; /* 0x0a0 */ 120 struct sys_desc_table sys_desc_table; /* 0x0a0 */
108 struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */ 121 struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */
109 __u8 _pad4[128]; /* 0x0c0 */ 122 __u32 ext_ramdisk_image; /* 0x0c0 */
123 __u32 ext_ramdisk_size; /* 0x0c4 */
124 __u32 ext_cmd_line_ptr; /* 0x0c8 */
125 __u8 _pad4[116]; /* 0x0cc */
110 struct edid_info edid_info; /* 0x140 */ 126 struct edid_info edid_info; /* 0x140 */
111 struct efi_info efi_info; /* 0x1c0 */ 127 struct efi_info efi_info; /* 0x1c0 */
112 __u32 alt_mem_k; /* 0x1e0 */ 128 __u32 alt_mem_k; /* 0x1e0 */
@@ -115,7 +131,20 @@ struct boot_params {
115 __u8 eddbuf_entries; /* 0x1e9 */ 131 __u8 eddbuf_entries; /* 0x1e9 */
116 __u8 edd_mbr_sig_buf_entries; /* 0x1ea */ 132 __u8 edd_mbr_sig_buf_entries; /* 0x1ea */
117 __u8 kbd_status; /* 0x1eb */ 133 __u8 kbd_status; /* 0x1eb */
118 __u8 _pad6[5]; /* 0x1ec */ 134 __u8 _pad5[3]; /* 0x1ec */
135 /*
136 * The sentinel is set to a nonzero value (0xff) in header.S.
137 *
138 * A bootloader is supposed to only take setup_header and put
139 * it into a clean boot_params buffer. If it turns out that
140 * it is clumsy or too generous with the buffer, it most
141 * probably will pick up the sentinel variable too. The fact
142 * that this variable then is still 0xff will let kernel
143 * know that some variables in boot_params are invalid and
144 * kernel should zero out certain portions of boot_params.
145 */
146 __u8 sentinel; /* 0x1ef */
147 __u8 _pad6[1]; /* 0x1f0 */
119 struct setup_header hdr; /* setup header */ /* 0x1f1 */ 148 struct setup_header hdr; /* setup header */ /* 0x1f1 */
120 __u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)]; 149 __u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)];
121 __u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */ 150 __u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */
@@ -134,6 +163,6 @@ enum {
134 X86_NR_SUBARCHS, 163 X86_NR_SUBARCHS,
135}; 164};
136 165
137 166#endif /* __ASSEMBLY__ */
138 167
139#endif /* _ASM_X86_BOOTPARAM_H */ 168#endif /* _ASM_X86_BOOTPARAM_H */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 58c829871c31..a0eab85ce7b8 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -4,66 +4,6 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <asm/ioctls.h> 5#include <asm/ioctls.h>
6 6
7/*
8 * Machine Check support for x86
9 */
10
11/* MCG_CAP register defines */
12#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
13#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
14#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
15#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
16#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
17#define MCG_EXT_CNT_SHIFT 16
18#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
19#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
20
21/* MCG_STATUS register defines */
22#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
23#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
24#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
25
26/* MCi_STATUS register defines */
27#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
28#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
29#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
30#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
31#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
32#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
33#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
34#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
35#define MCI_STATUS_AR (1ULL<<55) /* Action required */
36#define MCACOD 0xffff /* MCA Error Code */
37
38/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
39#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
40#define MCACOD_SCRUBMSK 0xfff0
41#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
42#define MCACOD_DATA 0x0134 /* Data Load */
43#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
44
45/* MCi_MISC register defines */
46#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
47#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
48#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
49#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
50#define MCI_MISC_ADDR_PHYS 2 /* physical address */
51#define MCI_MISC_ADDR_MEM 3 /* memory address */
52#define MCI_MISC_ADDR_GENERIC 7 /* generic */
53
54/* CTL2 register defines */
55#define MCI_CTL2_CMCI_EN (1ULL << 30)
56#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
57
58#define MCJ_CTX_MASK 3
59#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
60#define MCJ_CTX_RANDOM 0 /* inject context: random */
61#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
62#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
63#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
64#define MCJ_EXCEPTION 0x8 /* raise as exception */
65#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
66
67/* Fields are zero when not available */ 7/* Fields are zero when not available */
68struct mce { 8struct mce {
69 __u64 status; 9 __u64 status;
@@ -87,35 +27,8 @@ struct mce {
87 __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ 27 __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
88}; 28};
89 29
90/*
91 * This structure contains all data related to the MCE log. Also
92 * carries a signature to make it easier to find from external
93 * debugging tools. Each entry is only valid when its finished flag
94 * is set.
95 */
96
97#define MCE_LOG_LEN 32
98
99struct mce_log {
100 char signature[12]; /* "MACHINECHECK" */
101 unsigned len; /* = MCE_LOG_LEN */
102 unsigned next;
103 unsigned flags;
104 unsigned recordlen; /* length of struct mce */
105 struct mce entry[MCE_LOG_LEN];
106};
107
108#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
109
110#define MCE_LOG_SIGNATURE "MACHINECHECK"
111
112#define MCE_GET_RECORD_LEN _IOR('M', 1, int) 30#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
113#define MCE_GET_LOG_LEN _IOR('M', 2, int) 31#define MCE_GET_LOG_LEN _IOR('M', 2, int)
114#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int) 32#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
115 33
116/* Software defined banks */
117#define MCE_EXTENDED_BANK 128
118#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
119#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
120
121#endif /* _UAPI_ASM_X86_MCE_H */ 34#endif /* _UAPI_ASM_X86_MCE_H */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 433a59fb1a74..892ce40a7470 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -103,6 +103,8 @@
103#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) 103#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
104#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) 104#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
105 105
106#define MSR_IA32_POWER_CTL 0x000001fc
107
106#define MSR_IA32_MC0_CTL 0x00000400 108#define MSR_IA32_MC0_CTL 0x00000400
107#define MSR_IA32_MC0_STATUS 0x00000401 109#define MSR_IA32_MC0_STATUS 0x00000401
108#define MSR_IA32_MC0_ADDR 0x00000402 110#define MSR_IA32_MC0_ADDR 0x00000402
@@ -173,6 +175,7 @@
173#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 175#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
174#define MSR_AMD64_OSVW_STATUS 0xc0010141 176#define MSR_AMD64_OSVW_STATUS 0xc0010141
175#define MSR_AMD64_DC_CFG 0xc0011022 177#define MSR_AMD64_DC_CFG 0xc0011022
178#define MSR_AMD64_BU_CFG2 0xc001102a
176#define MSR_AMD64_IBSFETCHCTL 0xc0011030 179#define MSR_AMD64_IBSFETCHCTL 0xc0011030
177#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 180#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
178#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 181#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
@@ -194,6 +197,8 @@
194/* Fam 15h MSRs */ 197/* Fam 15h MSRs */
195#define MSR_F15H_PERF_CTL 0xc0010200 198#define MSR_F15H_PERF_CTL 0xc0010200
196#define MSR_F15H_PERF_CTR 0xc0010201 199#define MSR_F15H_PERF_CTR 0xc0010201
200#define MSR_F15H_NB_PERF_CTL 0xc0010240
201#define MSR_F15H_NB_PERF_CTR 0xc0010241
197 202
198/* Fam 10h MSRs */ 203/* Fam 10h MSRs */
199#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 204#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
@@ -272,6 +277,7 @@
272#define MSR_IA32_PLATFORM_ID 0x00000017 277#define MSR_IA32_PLATFORM_ID 0x00000017
273#define MSR_IA32_EBL_CR_POWERON 0x0000002a 278#define MSR_IA32_EBL_CR_POWERON 0x0000002a
274#define MSR_EBC_FREQUENCY_ID 0x0000002c 279#define MSR_EBC_FREQUENCY_ID 0x0000002c
280#define MSR_SMI_COUNT 0x00000034
275#define MSR_IA32_FEATURE_CONTROL 0x0000003a 281#define MSR_IA32_FEATURE_CONTROL 0x0000003a
276#define MSR_IA32_TSC_ADJUST 0x0000003b 282#define MSR_IA32_TSC_ADJUST 0x0000003b
277 283
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index aa7d6ae39e0e..8264f47cf53e 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -95,9 +95,9 @@ typedef unsigned long sigset_t;
95#ifndef __ASSEMBLY__ 95#ifndef __ASSEMBLY__
96 96
97 97
98#ifdef __i386__
99# ifndef __KERNEL__ 98# ifndef __KERNEL__
100/* Here we must cater to libcs that poke about in kernel headers. */ 99/* Here we must cater to libcs that poke about in kernel headers. */
100#ifdef __i386__
101 101
102struct sigaction { 102struct sigaction {
103 union { 103 union {
@@ -112,7 +112,6 @@ struct sigaction {
112#define sa_handler _u._sa_handler 112#define sa_handler _u._sa_handler
113#define sa_sigaction _u._sa_sigaction 113#define sa_sigaction _u._sa_sigaction
114 114
115# endif /* ! __KERNEL__ */
116#else /* __i386__ */ 115#else /* __i386__ */
117 116
118struct sigaction { 117struct sigaction {
@@ -122,11 +121,8 @@ struct sigaction {
122 sigset_t sa_mask; /* mask last for extensibility */ 121 sigset_t sa_mask; /* mask last for extensibility */
123}; 122};
124 123
125struct k_sigaction {
126 struct sigaction sa;
127};
128
129#endif /* !__i386__ */ 124#endif /* !__i386__ */
125# endif /* ! __KERNEL__ */
130 126
131typedef struct sigaltstack { 127typedef struct sigaltstack {
132 void __user *ss_sp; 128 void __user *ss_sp;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 979d03bce135..2871fccfee68 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -62,10 +62,12 @@
62#define EXIT_REASON_MCE_DURING_VMENTRY 41 62#define EXIT_REASON_MCE_DURING_VMENTRY 41
63#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 63#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
64#define EXIT_REASON_APIC_ACCESS 44 64#define EXIT_REASON_APIC_ACCESS 44
65#define EXIT_REASON_EOI_INDUCED 45
65#define EXIT_REASON_EPT_VIOLATION 48 66#define EXIT_REASON_EPT_VIOLATION 48
66#define EXIT_REASON_EPT_MISCONFIG 49 67#define EXIT_REASON_EPT_MISCONFIG 49
67#define EXIT_REASON_WBINVD 54 68#define EXIT_REASON_WBINVD 54
68#define EXIT_REASON_XSETBV 55 69#define EXIT_REASON_XSETBV 55
70#define EXIT_REASON_APIC_WRITE 56
69#define EXIT_REASON_INVPCID 58 71#define EXIT_REASON_INVPCID 58
70 72
71#define VMX_EXIT_REASONS \ 73#define VMX_EXIT_REASONS \
@@ -103,7 +105,12 @@
103 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ 105 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
104 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ 106 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
105 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ 107 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
106 { EXIT_REASON_WBINVD, "WBINVD" } 108 { EXIT_REASON_WBINVD, "WBINVD" }, \
109 { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
110 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
111 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
112 { EXIT_REASON_INVD, "INVD" }, \
113 { EXIT_REASON_INVPCID, "INVPCID" }
107 114
108 115
109#endif /* _UAPIVMX_H */ 116#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34e923a53762..7bd3bd310106 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o
65obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 65obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
66obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 66obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
67obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 67obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
68obj-$(CONFIG_KPROBES) += kprobes.o 68obj-y += kprobes/
69obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
70obj-$(CONFIG_MODULES) += module.o 69obj-$(CONFIG_MODULES) += module.o
71obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o 70obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
72obj-$(CONFIG_KGDB) += kgdb.o 71obj-$(CONFIG_KGDB) += kgdb.o
@@ -88,6 +87,9 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
88 87
89obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 88obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
90 89
90obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o
91obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o
92obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o
91microcode-y := microcode_core.o 93microcode-y := microcode_core.o
92microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o 94microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
93microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o 95microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bacf4b0d91f4..230c8ea878e5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(acpi_disabled);
51 51
52#ifdef CONFIG_X86_64 52#ifdef CONFIG_X86_64
53# include <asm/proto.h> 53# include <asm/proto.h>
54# include <asm/numa_64.h>
55#endif /* X86 */ 54#endif /* X86 */
56 55
57#define BAD_MADT_ENTRY(entry, end) ( \ 56#define BAD_MADT_ENTRY(entry, end) ( \
@@ -697,6 +696,10 @@ EXPORT_SYMBOL(acpi_map_lsapic);
697 696
698int acpi_unmap_lsapic(int cpu) 697int acpi_unmap_lsapic(int cpu)
699{ 698{
699#ifdef CONFIG_ACPI_NUMA
700 set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE);
701#endif
702
700 per_cpu(x86_cpu_to_apicid, cpu) = -1; 703 per_cpu(x86_cpu_to_apicid, cpu) = -1;
701 set_cpu_present(cpu, false); 704 set_cpu_present(cpu, false);
702 num_processors--; 705 num_processors--;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index d5e0d717005a..0532f5d6e4ef 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -69,7 +69,7 @@ int acpi_suspend_lowlevel(void)
69 69
70#ifndef CONFIG_64BIT 70#ifndef CONFIG_64BIT
71 header->pmode_entry = (u32)&wakeup_pmode_return; 71 header->pmode_entry = (u32)&wakeup_pmode_return;
72 header->pmode_cr3 = (u32)__pa(&initial_page_table); 72 header->pmode_cr3 = (u32)__pa_symbol(initial_page_table);
73 saved_magic = 0x12345678; 73 saved_magic = 0x12345678;
74#else /* CONFIG_64BIT */ 74#else /* CONFIG_64BIT */
75#ifdef CONFIG_SMP 75#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index e66311200cbd..b574b295a2f9 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -768,10 +768,9 @@ int __init gart_iommu_init(void)
768 aper_base = info.aper_base; 768 aper_base = info.aper_base;
769 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); 769 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
770 770
771 if (end_pfn > max_low_pfn_mapped) { 771 start_pfn = PFN_DOWN(aper_base);
772 start_pfn = (aper_base>>PAGE_SHIFT); 772 if (!pfn_range_is_mapped(start_pfn, end_pfn))
773 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); 773 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
774 }
775 774
776 pr_info("PCI-DMA: using GART IOMMU.\n"); 775 pr_info("PCI-DMA: using GART IOMMU.\n");
777 iommu_size = check_iommu_size(info.aper_base, aper_size); 776 iommu_size = check_iommu_size(info.aper_base, aper_size);
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index afdc3f756dea..c9876efecafb 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -240,7 +240,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
240 dw_apb_clockevent_pause(adev->timer); 240 dw_apb_clockevent_pause(adev->timer);
241 if (system_state == SYSTEM_RUNNING) { 241 if (system_state == SYSTEM_RUNNING) {
242 pr_debug("skipping APBT CPU %lu offline\n", cpu); 242 pr_debug("skipping APBT CPU %lu offline\n", cpu);
243 } else if (adev) { 243 } else {
244 pr_debug("APBT clockevent for cpu %lu offline\n", cpu); 244 pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
245 dw_apb_clockevent_stop(adev->timer); 245 dw_apb_clockevent_stop(adev->timer);
246 } 246 }
@@ -311,7 +311,6 @@ void __init apbt_time_init(void)
311#ifdef CONFIG_SMP 311#ifdef CONFIG_SMP
312 int i; 312 int i;
313 struct sfi_timer_table_entry *p_mtmr; 313 struct sfi_timer_table_entry *p_mtmr;
314 unsigned int percpu_timer;
315 struct apbt_dev *adev; 314 struct apbt_dev *adev;
316#endif 315#endif
317 316
@@ -346,13 +345,10 @@ void __init apbt_time_init(void)
346 return; 345 return;
347 } 346 }
348 pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); 347 pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
349 if (num_possible_cpus() <= sfi_mtimer_num) { 348 if (num_possible_cpus() <= sfi_mtimer_num)
350 percpu_timer = 1;
351 apbt_num_timers_used = num_possible_cpus(); 349 apbt_num_timers_used = num_possible_cpus();
352 } else { 350 else
353 percpu_timer = 0;
354 apbt_num_timers_used = 1; 351 apbt_num_timers_used = 1;
355 }
356 pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); 352 pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
357 353
358 /* here we set up per CPU timer data structure */ 354 /* here we set up per CPU timer data structure */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b994cc84aa7e..a5b4dce1b7ac 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1477,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void)
1477 * Now that local APIC setup is completed for BP, configure the fault 1477 * Now that local APIC setup is completed for BP, configure the fault
1478 * handling for interrupt remapping. 1478 * handling for interrupt remapping.
1479 */ 1479 */
1480 if (irq_remapping_enabled) 1480 irq_remap_enable_fault_handling();
1481 irq_remap_enable_fault_handling();
1482 1481
1483} 1482}
1484 1483
@@ -2251,8 +2250,7 @@ static int lapic_suspend(void)
2251 local_irq_save(flags); 2250 local_irq_save(flags);
2252 disable_local_APIC(); 2251 disable_local_APIC();
2253 2252
2254 if (irq_remapping_enabled) 2253 irq_remapping_disable();
2255 irq_remapping_disable();
2256 2254
2257 local_irq_restore(flags); 2255 local_irq_restore(flags);
2258 return 0; 2256 return 0;
@@ -2268,16 +2266,15 @@ static void lapic_resume(void)
2268 return; 2266 return;
2269 2267
2270 local_irq_save(flags); 2268 local_irq_save(flags);
2271 if (irq_remapping_enabled) { 2269
2272 /* 2270 /*
2273 * IO-APIC and PIC have their own resume routines. 2271 * IO-APIC and PIC have their own resume routines.
2274 * We just mask them here to make sure the interrupt 2272 * We just mask them here to make sure the interrupt
2275 * subsystem is completely quiet while we enable x2apic 2273 * subsystem is completely quiet while we enable x2apic
2276 * and interrupt-remapping. 2274 * and interrupt-remapping.
2277 */ 2275 */
2278 mask_ioapic_entries(); 2276 mask_ioapic_entries();
2279 legacy_pic->mask_all(); 2277 legacy_pic->mask_all();
2280 }
2281 2278
2282 if (x2apic_mode) 2279 if (x2apic_mode)
2283 enable_x2apic(); 2280 enable_x2apic();
@@ -2320,8 +2317,7 @@ static void lapic_resume(void)
2320 apic_write(APIC_ESR, 0); 2317 apic_write(APIC_ESR, 0);
2321 apic_read(APIC_ESR); 2318 apic_read(APIC_ESR);
2322 2319
2323 if (irq_remapping_enabled) 2320 irq_remapping_reenable(x2apic_mode);
2324 irq_remapping_reenable(x2apic_mode);
2325 2321
2326 local_irq_restore(flags); 2322 local_irq_restore(flags);
2327} 2323}
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 9c2aa89a11cb..9a9110918ca7 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -28,6 +28,7 @@
28#include <asm/apic.h> 28#include <asm/apic.h>
29#include <asm/ipi.h> 29#include <asm/ipi.h>
30#include <asm/apic_flat_64.h> 30#include <asm/apic_flat_64.h>
31#include <asm/pgtable.h>
31 32
32static int numachip_system __read_mostly; 33static int numachip_system __read_mostly;
33 34
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b739d398bb29..9ed796ccc32c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,22 +68,6 @@
68#define for_each_irq_pin(entry, head) \ 68#define for_each_irq_pin(entry, head) \
69 for (entry = head; entry; entry = entry->next) 69 for (entry = head; entry; entry = entry->next)
70 70
71#ifdef CONFIG_IRQ_REMAP
72static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
73static inline bool irq_remapped(struct irq_cfg *cfg)
74{
75 return cfg->irq_2_iommu.iommu != NULL;
76}
77#else
78static inline bool irq_remapped(struct irq_cfg *cfg)
79{
80 return false;
81}
82static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
83{
84}
85#endif
86
87/* 71/*
88 * Is the SiS APIC rmw bug present ? 72 * Is the SiS APIC rmw bug present ?
89 * -1 = don't know, 0 = no, 1 = yes 73 * -1 = don't know, 0 = no, 1 = yes
@@ -300,9 +284,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
300 return cfg; 284 return cfg;
301} 285}
302 286
303static int alloc_irq_from(unsigned int from, int node) 287static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
304{ 288{
305 return irq_alloc_desc_from(from, node); 289 return irq_alloc_descs_from(from, count, node);
306} 290}
307 291
308static void free_irq_at(unsigned int at, struct irq_cfg *cfg) 292static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
@@ -326,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
326 + (mpc_ioapic_addr(idx) & ~PAGE_MASK); 310 + (mpc_ioapic_addr(idx) & ~PAGE_MASK);
327} 311}
328 312
329static inline void io_apic_eoi(unsigned int apic, unsigned int vector) 313void io_apic_eoi(unsigned int apic, unsigned int vector)
330{ 314{
331 struct io_apic __iomem *io_apic = io_apic_base(apic); 315 struct io_apic __iomem *io_apic = io_apic_base(apic);
332 writel(vector, &io_apic->eoi); 316 writel(vector, &io_apic->eoi);
@@ -573,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data)
573 * Otherwise, we simulate the EOI message manually by changing the trigger 557 * Otherwise, we simulate the EOI message manually by changing the trigger
574 * mode to edge and then back to level, with RTE being masked during this. 558 * mode to edge and then back to level, with RTE being masked during this.
575 */ 559 */
576static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) 560void native_eoi_ioapic_pin(int apic, int pin, int vector)
577{ 561{
578 if (mpc_ioapic_ver(apic) >= 0x20) { 562 if (mpc_ioapic_ver(apic) >= 0x20) {
579 /* 563 io_apic_eoi(apic, vector);
580 * Intr-remapping uses pin number as the virtual vector
581 * in the RTE. Actual vector is programmed in
582 * intr-remapping table entry. Hence for the io-apic
583 * EOI we use the pin number.
584 */
585 if (cfg && irq_remapped(cfg))
586 io_apic_eoi(apic, pin);
587 else
588 io_apic_eoi(apic, vector);
589 } else { 564 } else {
590 struct IO_APIC_route_entry entry, entry1; 565 struct IO_APIC_route_entry entry, entry1;
591 566
@@ -606,14 +581,15 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
606 } 581 }
607} 582}
608 583
609static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) 584void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
610{ 585{
611 struct irq_pin_list *entry; 586 struct irq_pin_list *entry;
612 unsigned long flags; 587 unsigned long flags;
613 588
614 raw_spin_lock_irqsave(&ioapic_lock, flags); 589 raw_spin_lock_irqsave(&ioapic_lock, flags);
615 for_each_irq_pin(entry, cfg->irq_2_pin) 590 for_each_irq_pin(entry, cfg->irq_2_pin)
616 __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); 591 x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
592 cfg->vector);
617 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 593 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
618} 594}
619 595
@@ -650,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
650 } 626 }
651 627
652 raw_spin_lock_irqsave(&ioapic_lock, flags); 628 raw_spin_lock_irqsave(&ioapic_lock, flags);
653 __eoi_ioapic_pin(apic, pin, entry.vector, NULL); 629 x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
654 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 630 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
655 } 631 }
656 632
@@ -1304,25 +1280,18 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1304 fasteoi = false; 1280 fasteoi = false;
1305 } 1281 }
1306 1282
1307 if (irq_remapped(cfg)) { 1283 if (setup_remapped_irq(irq, cfg, chip))
1308 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1309 irq_remap_modify_chip_defaults(chip);
1310 fasteoi = trigger != 0; 1284 fasteoi = trigger != 0;
1311 }
1312 1285
1313 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; 1286 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
1314 irq_set_chip_and_handler_name(irq, chip, hdl, 1287 irq_set_chip_and_handler_name(irq, chip, hdl,
1315 fasteoi ? "fasteoi" : "edge"); 1288 fasteoi ? "fasteoi" : "edge");
1316} 1289}
1317 1290
1318static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, 1291int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
1319 unsigned int destination, int vector, 1292 unsigned int destination, int vector,
1320 struct io_apic_irq_attr *attr) 1293 struct io_apic_irq_attr *attr)
1321{ 1294{
1322 if (irq_remapping_enabled)
1323 return setup_ioapic_remapped_entry(irq, entry, destination,
1324 vector, attr);
1325
1326 memset(entry, 0, sizeof(*entry)); 1295 memset(entry, 0, sizeof(*entry));
1327 1296
1328 entry->delivery_mode = apic->irq_delivery_mode; 1297 entry->delivery_mode = apic->irq_delivery_mode;
@@ -1370,8 +1339,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1370 attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, 1339 attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
1371 cfg->vector, irq, attr->trigger, attr->polarity, dest); 1340 cfg->vector, irq, attr->trigger, attr->polarity, dest);
1372 1341
1373 if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { 1342 if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
1374 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1343 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1375 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); 1344 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
1376 __clear_irq_vector(irq, cfg); 1345 __clear_irq_vector(irq, cfg);
1377 1346
@@ -1479,9 +1448,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1479 struct IO_APIC_route_entry entry; 1448 struct IO_APIC_route_entry entry;
1480 unsigned int dest; 1449 unsigned int dest;
1481 1450
1482 if (irq_remapping_enabled)
1483 return;
1484
1485 memset(&entry, 0, sizeof(entry)); 1451 memset(&entry, 0, sizeof(entry));
1486 1452
1487 /* 1453 /*
@@ -1513,9 +1479,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1513 ioapic_write_entry(ioapic_idx, pin, entry); 1479 ioapic_write_entry(ioapic_idx, pin, entry);
1514} 1480}
1515 1481
1516__apicdebuginit(void) print_IO_APIC(int ioapic_idx) 1482void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
1517{ 1483{
1518 int i; 1484 int i;
1485
1486 pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
1487
1488 for (i = 0; i <= nr_entries; i++) {
1489 struct IO_APIC_route_entry entry;
1490
1491 entry = ioapic_read_entry(apic, i);
1492
1493 pr_debug(" %02x %02X ", i, entry.dest);
1494 pr_cont("%1d %1d %1d %1d %1d "
1495 "%1d %1d %02X\n",
1496 entry.mask,
1497 entry.trigger,
1498 entry.irr,
1499 entry.polarity,
1500 entry.delivery_status,
1501 entry.dest_mode,
1502 entry.delivery_mode,
1503 entry.vector);
1504 }
1505}
1506
1507void intel_ir_io_apic_print_entries(unsigned int apic,
1508 unsigned int nr_entries)
1509{
1510 int i;
1511
1512 pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
1513
1514 for (i = 0; i <= nr_entries; i++) {
1515 struct IR_IO_APIC_route_entry *ir_entry;
1516 struct IO_APIC_route_entry entry;
1517
1518 entry = ioapic_read_entry(apic, i);
1519
1520 ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
1521
1522 pr_debug(" %02x %04X ", i, ir_entry->index);
1523 pr_cont("%1d %1d %1d %1d %1d "
1524 "%1d %1d %X %02X\n",
1525 ir_entry->format,
1526 ir_entry->mask,
1527 ir_entry->trigger,
1528 ir_entry->irr,
1529 ir_entry->polarity,
1530 ir_entry->delivery_status,
1531 ir_entry->index2,
1532 ir_entry->zero,
1533 ir_entry->vector);
1534 }
1535}
1536
1537__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1538{
1519 union IO_APIC_reg_00 reg_00; 1539 union IO_APIC_reg_00 reg_00;
1520 union IO_APIC_reg_01 reg_01; 1540 union IO_APIC_reg_01 reg_01;
1521 union IO_APIC_reg_02 reg_02; 1541 union IO_APIC_reg_02 reg_02;
@@ -1568,58 +1588,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1568 1588
1569 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1589 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1570 1590
1571 if (irq_remapping_enabled) { 1591 x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
1572 printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
1573 " Pol Stat Indx2 Zero Vect:\n");
1574 } else {
1575 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1576 " Stat Dmod Deli Vect:\n");
1577 }
1578
1579 for (i = 0; i <= reg_01.bits.entries; i++) {
1580 if (irq_remapping_enabled) {
1581 struct IO_APIC_route_entry entry;
1582 struct IR_IO_APIC_route_entry *ir_entry;
1583
1584 entry = ioapic_read_entry(ioapic_idx, i);
1585 ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
1586 printk(KERN_DEBUG " %02x %04X ",
1587 i,
1588 ir_entry->index
1589 );
1590 pr_cont("%1d %1d %1d %1d %1d "
1591 "%1d %1d %X %02X\n",
1592 ir_entry->format,
1593 ir_entry->mask,
1594 ir_entry->trigger,
1595 ir_entry->irr,
1596 ir_entry->polarity,
1597 ir_entry->delivery_status,
1598 ir_entry->index2,
1599 ir_entry->zero,
1600 ir_entry->vector
1601 );
1602 } else {
1603 struct IO_APIC_route_entry entry;
1604
1605 entry = ioapic_read_entry(ioapic_idx, i);
1606 printk(KERN_DEBUG " %02x %02X ",
1607 i,
1608 entry.dest
1609 );
1610 pr_cont("%1d %1d %1d %1d %1d "
1611 "%1d %1d %02X\n",
1612 entry.mask,
1613 entry.trigger,
1614 entry.irr,
1615 entry.polarity,
1616 entry.delivery_status,
1617 entry.dest_mode,
1618 entry.delivery_mode,
1619 entry.vector
1620 );
1621 }
1622 }
1623} 1592}
1624 1593
1625__apicdebuginit(void) print_IO_APICs(void) 1594__apicdebuginit(void) print_IO_APICs(void)
@@ -1921,30 +1890,14 @@ void __init enable_IO_APIC(void)
1921 clear_IO_APIC(); 1890 clear_IO_APIC();
1922} 1891}
1923 1892
1924/* 1893void native_disable_io_apic(void)
1925 * Not an __init, needed by the reboot code
1926 */
1927void disable_IO_APIC(void)
1928{ 1894{
1929 /* 1895 /*
1930 * Clear the IO-APIC before rebooting:
1931 */
1932 clear_IO_APIC();
1933
1934 if (!legacy_pic->nr_legacy_irqs)
1935 return;
1936
1937 /*
1938 * If the i8259 is routed through an IOAPIC 1896 * If the i8259 is routed through an IOAPIC
1939 * Put that IOAPIC in virtual wire mode 1897 * Put that IOAPIC in virtual wire mode
1940 * so legacy interrupts can be delivered. 1898 * so legacy interrupts can be delivered.
1941 *
1942 * With interrupt-remapping, for now we will use virtual wire A mode,
1943 * as virtual wire B is little complex (need to configure both
1944 * IOAPIC RTE as well as interrupt-remapping table entry).
1945 * As this gets called during crash dump, keep this simple for now.
1946 */ 1899 */
1947 if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { 1900 if (ioapic_i8259.pin != -1) {
1948 struct IO_APIC_route_entry entry; 1901 struct IO_APIC_route_entry entry;
1949 1902
1950 memset(&entry, 0, sizeof(entry)); 1903 memset(&entry, 0, sizeof(entry));
@@ -1964,12 +1917,25 @@ void disable_IO_APIC(void)
1964 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); 1917 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1965 } 1918 }
1966 1919
1920 if (cpu_has_apic || apic_from_smp_config())
1921 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1922
1923}
1924
1925/*
1926 * Not an __init, needed by the reboot code
1927 */
1928void disable_IO_APIC(void)
1929{
1967 /* 1930 /*
1968 * Use virtual wire A mode when interrupt remapping is enabled. 1931 * Clear the IO-APIC before rebooting:
1969 */ 1932 */
1970 if (cpu_has_apic || apic_from_smp_config()) 1933 clear_IO_APIC();
1971 disconnect_bsp_APIC(!irq_remapping_enabled && 1934
1972 ioapic_i8259.pin != -1); 1935 if (!legacy_pic->nr_legacy_irqs)
1936 return;
1937
1938 x86_io_apic_ops.disable();
1973} 1939}
1974 1940
1975#ifdef CONFIG_X86_32 1941#ifdef CONFIG_X86_32
@@ -2322,12 +2288,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2322 2288
2323 apic = entry->apic; 2289 apic = entry->apic;
2324 pin = entry->pin; 2290 pin = entry->pin;
2325 /* 2291
2326 * With interrupt-remapping, destination information comes 2292 io_apic_write(apic, 0x11 + pin*2, dest);
2327 * from interrupt-remapping table entry.
2328 */
2329 if (!irq_remapped(cfg))
2330 io_apic_write(apic, 0x11 + pin*2, dest);
2331 reg = io_apic_read(apic, 0x10 + pin*2); 2293 reg = io_apic_read(apic, 0x10 + pin*2);
2332 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 2294 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2333 reg |= vector; 2295 reg |= vector;
@@ -2369,9 +2331,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2369 return 0; 2331 return 0;
2370} 2332}
2371 2333
2372static int 2334
2373ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, 2335int native_ioapic_set_affinity(struct irq_data *data,
2374 bool force) 2336 const struct cpumask *mask,
2337 bool force)
2375{ 2338{
2376 unsigned int dest, irq = data->irq; 2339 unsigned int dest, irq = data->irq;
2377 unsigned long flags; 2340 unsigned long flags;
@@ -2548,33 +2511,6 @@ static void ack_apic_level(struct irq_data *data)
2548 ioapic_irqd_unmask(data, cfg, masked); 2511 ioapic_irqd_unmask(data, cfg, masked);
2549} 2512}
2550 2513
2551#ifdef CONFIG_IRQ_REMAP
2552static void ir_ack_apic_edge(struct irq_data *data)
2553{
2554 ack_APIC_irq();
2555}
2556
2557static void ir_ack_apic_level(struct irq_data *data)
2558{
2559 ack_APIC_irq();
2560 eoi_ioapic_irq(data->irq, data->chip_data);
2561}
2562
2563static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
2564{
2565 seq_printf(p, " IR-%s", data->chip->name);
2566}
2567
2568static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
2569{
2570 chip->irq_print_chip = ir_print_prefix;
2571 chip->irq_ack = ir_ack_apic_edge;
2572 chip->irq_eoi = ir_ack_apic_level;
2573
2574 chip->irq_set_affinity = set_remapped_irq_affinity;
2575}
2576#endif /* CONFIG_IRQ_REMAP */
2577
2578static struct irq_chip ioapic_chip __read_mostly = { 2514static struct irq_chip ioapic_chip __read_mostly = {
2579 .name = "IO-APIC", 2515 .name = "IO-APIC",
2580 .irq_startup = startup_ioapic_irq, 2516 .irq_startup = startup_ioapic_irq,
@@ -2582,7 +2518,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
2582 .irq_unmask = unmask_ioapic_irq, 2518 .irq_unmask = unmask_ioapic_irq,
2583 .irq_ack = ack_apic_edge, 2519 .irq_ack = ack_apic_edge,
2584 .irq_eoi = ack_apic_level, 2520 .irq_eoi = ack_apic_level,
2585 .irq_set_affinity = ioapic_set_affinity, 2521 .irq_set_affinity = native_ioapic_set_affinity,
2586 .irq_retrigger = ioapic_retrigger_irq, 2522 .irq_retrigger = ioapic_retrigger_irq,
2587}; 2523};
2588 2524
@@ -2781,8 +2717,7 @@ static inline void __init check_timer(void)
2781 * 8259A. 2717 * 8259A.
2782 */ 2718 */
2783 if (pin1 == -1) { 2719 if (pin1 == -1) {
2784 if (irq_remapping_enabled) 2720 panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
2785 panic("BIOS bug: timer not connected to IO-APIC");
2786 pin1 = pin2; 2721 pin1 = pin2;
2787 apic1 = apic2; 2722 apic1 = apic2;
2788 no_pin1 = 1; 2723 no_pin1 = 1;
@@ -2814,8 +2749,7 @@ static inline void __init check_timer(void)
2814 clear_IO_APIC_pin(0, pin1); 2749 clear_IO_APIC_pin(0, pin1);
2815 goto out; 2750 goto out;
2816 } 2751 }
2817 if (irq_remapping_enabled) 2752 panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
2818 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2819 local_irq_disable(); 2753 local_irq_disable();
2820 clear_IO_APIC_pin(apic1, pin1); 2754 clear_IO_APIC_pin(apic1, pin1);
2821 if (!no_pin1) 2755 if (!no_pin1)
@@ -2982,37 +2916,58 @@ device_initcall(ioapic_init_ops);
2982/* 2916/*
2983 * Dynamic irq allocate and deallocation 2917 * Dynamic irq allocate and deallocation
2984 */ 2918 */
2985unsigned int create_irq_nr(unsigned int from, int node) 2919unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
2986{ 2920{
2987 struct irq_cfg *cfg; 2921 struct irq_cfg **cfg;
2988 unsigned long flags; 2922 unsigned long flags;
2989 unsigned int ret = 0; 2923 int irq, i;
2990 int irq;
2991 2924
2992 if (from < nr_irqs_gsi) 2925 if (from < nr_irqs_gsi)
2993 from = nr_irqs_gsi; 2926 from = nr_irqs_gsi;
2994 2927
2995 irq = alloc_irq_from(from, node); 2928 cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
2996 if (irq < 0) 2929 if (!cfg)
2997 return 0;
2998 cfg = alloc_irq_cfg(irq, node);
2999 if (!cfg) {
3000 free_irq_at(irq, NULL);
3001 return 0; 2930 return 0;
2931
2932 irq = alloc_irqs_from(from, count, node);
2933 if (irq < 0)
2934 goto out_cfgs;
2935
2936 for (i = 0; i < count; i++) {
2937 cfg[i] = alloc_irq_cfg(irq + i, node);
2938 if (!cfg[i])
2939 goto out_irqs;
3002 } 2940 }
3003 2941
3004 raw_spin_lock_irqsave(&vector_lock, flags); 2942 raw_spin_lock_irqsave(&vector_lock, flags);
3005 if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) 2943 for (i = 0; i < count; i++)
3006 ret = irq; 2944 if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
2945 goto out_vecs;
3007 raw_spin_unlock_irqrestore(&vector_lock, flags); 2946 raw_spin_unlock_irqrestore(&vector_lock, flags);
3008 2947
3009 if (ret) { 2948 for (i = 0; i < count; i++) {
3010 irq_set_chip_data(irq, cfg); 2949 irq_set_chip_data(irq + i, cfg[i]);
3011 irq_clear_status_flags(irq, IRQ_NOREQUEST); 2950 irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
3012 } else {
3013 free_irq_at(irq, cfg);
3014 } 2951 }
3015 return ret; 2952
2953 kfree(cfg);
2954 return irq;
2955
2956out_vecs:
2957 for (i--; i >= 0; i--)
2958 __clear_irq_vector(irq + i, cfg[i]);
2959 raw_spin_unlock_irqrestore(&vector_lock, flags);
2960out_irqs:
2961 for (i = 0; i < count; i++)
2962 free_irq_at(irq + i, cfg[i]);
2963out_cfgs:
2964 kfree(cfg);
2965 return 0;
2966}
2967
2968unsigned int create_irq_nr(unsigned int from, int node)
2969{
2970 return __create_irqs(from, 1, node);
3016} 2971}
3017 2972
3018int create_irq(void) 2973int create_irq(void)
@@ -3037,48 +2992,35 @@ void destroy_irq(unsigned int irq)
3037 2992
3038 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 2993 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3039 2994
3040 if (irq_remapped(cfg)) 2995 free_remapped_irq(irq);
3041 free_remapped_irq(irq); 2996
3042 raw_spin_lock_irqsave(&vector_lock, flags); 2997 raw_spin_lock_irqsave(&vector_lock, flags);
3043 __clear_irq_vector(irq, cfg); 2998 __clear_irq_vector(irq, cfg);
3044 raw_spin_unlock_irqrestore(&vector_lock, flags); 2999 raw_spin_unlock_irqrestore(&vector_lock, flags);
3045 free_irq_at(irq, cfg); 3000 free_irq_at(irq, cfg);
3046} 3001}
3047 3002
3003void destroy_irqs(unsigned int irq, unsigned int count)
3004{
3005 unsigned int i;
3006
3007 for (i = 0; i < count; i++)
3008 destroy_irq(irq + i);
3009}
3010
3048/* 3011/*
3049 * MSI message composition 3012 * MSI message composition
3050 */ 3013 */
3051#ifdef CONFIG_PCI_MSI 3014void native_compose_msi_msg(struct pci_dev *pdev,
3052static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, 3015 unsigned int irq, unsigned int dest,
3053 struct msi_msg *msg, u8 hpet_id) 3016 struct msi_msg *msg, u8 hpet_id)
3054{ 3017{
3055 struct irq_cfg *cfg; 3018 struct irq_cfg *cfg = irq_cfg(irq);
3056 int err;
3057 unsigned dest;
3058
3059 if (disable_apic)
3060 return -ENXIO;
3061
3062 cfg = irq_cfg(irq);
3063 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3064 if (err)
3065 return err;
3066 3019
3067 err = apic->cpu_mask_to_apicid_and(cfg->domain, 3020 msg->address_hi = MSI_ADDR_BASE_HI;
3068 apic->target_cpus(), &dest);
3069 if (err)
3070 return err;
3071
3072 if (irq_remapped(cfg)) {
3073 compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
3074 return err;
3075 }
3076 3021
3077 if (x2apic_enabled()) 3022 if (x2apic_enabled())
3078 msg->address_hi = MSI_ADDR_BASE_HI | 3023 msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
3079 MSI_ADDR_EXT_DEST_ID(dest);
3080 else
3081 msg->address_hi = MSI_ADDR_BASE_HI;
3082 3024
3083 msg->address_lo = 3025 msg->address_lo =
3084 MSI_ADDR_BASE_LO | 3026 MSI_ADDR_BASE_LO |
@@ -3097,8 +3039,32 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3097 MSI_DATA_DELIVERY_FIXED: 3039 MSI_DATA_DELIVERY_FIXED:
3098 MSI_DATA_DELIVERY_LOWPRI) | 3040 MSI_DATA_DELIVERY_LOWPRI) |
3099 MSI_DATA_VECTOR(cfg->vector); 3041 MSI_DATA_VECTOR(cfg->vector);
3042}
3100 3043
3101 return err; 3044#ifdef CONFIG_PCI_MSI
3045static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3046 struct msi_msg *msg, u8 hpet_id)
3047{
3048 struct irq_cfg *cfg;
3049 int err;
3050 unsigned dest;
3051
3052 if (disable_apic)
3053 return -ENXIO;
3054
3055 cfg = irq_cfg(irq);
3056 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3057 if (err)
3058 return err;
3059
3060 err = apic->cpu_mask_to_apicid_and(cfg->domain,
3061 apic->target_cpus(), &dest);
3062 if (err)
3063 return err;
3064
3065 x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
3066
3067 return 0;
3102} 3068}
3103 3069
3104static int 3070static int
@@ -3136,23 +3102,28 @@ static struct irq_chip msi_chip = {
3136 .irq_retrigger = ioapic_retrigger_irq, 3102 .irq_retrigger = ioapic_retrigger_irq,
3137}; 3103};
3138 3104
3139static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3105int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
3106 unsigned int irq_base, unsigned int irq_offset)
3140{ 3107{
3141 struct irq_chip *chip = &msi_chip; 3108 struct irq_chip *chip = &msi_chip;
3142 struct msi_msg msg; 3109 struct msi_msg msg;
3110 unsigned int irq = irq_base + irq_offset;
3143 int ret; 3111 int ret;
3144 3112
3145 ret = msi_compose_msg(dev, irq, &msg, -1); 3113 ret = msi_compose_msg(dev, irq, &msg, -1);
3146 if (ret < 0) 3114 if (ret < 0)
3147 return ret; 3115 return ret;
3148 3116
3149 irq_set_msi_desc(irq, msidesc); 3117 irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
3150 write_msi_msg(irq, &msg);
3151 3118
3152 if (irq_remapped(irq_get_chip_data(irq))) { 3119 /*
3153 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3120 * MSI-X message is written per-IRQ, the offset is always 0.
3154 irq_remap_modify_chip_defaults(chip); 3121 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
3155 } 3122 */
3123 if (!irq_offset)
3124 write_msi_msg(irq, &msg);
3125
3126 setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
3156 3127
3157 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3128 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3158 3129
@@ -3163,46 +3134,26 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3163 3134
3164int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3135int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3165{ 3136{
3166 int node, ret, sub_handle, index = 0;
3167 unsigned int irq, irq_want; 3137 unsigned int irq, irq_want;
3168 struct msi_desc *msidesc; 3138 struct msi_desc *msidesc;
3139 int node, ret;
3169 3140
3170 /* x86 doesn't support multiple MSI yet */ 3141 /* Multiple MSI vectors only supported with interrupt remapping */
3171 if (type == PCI_CAP_ID_MSI && nvec > 1) 3142 if (type == PCI_CAP_ID_MSI && nvec > 1)
3172 return 1; 3143 return 1;
3173 3144
3174 node = dev_to_node(&dev->dev); 3145 node = dev_to_node(&dev->dev);
3175 irq_want = nr_irqs_gsi; 3146 irq_want = nr_irqs_gsi;
3176 sub_handle = 0;
3177 list_for_each_entry(msidesc, &dev->msi_list, list) { 3147 list_for_each_entry(msidesc, &dev->msi_list, list) {
3178 irq = create_irq_nr(irq_want, node); 3148 irq = create_irq_nr(irq_want, node);
3179 if (irq == 0) 3149 if (irq == 0)
3180 return -1; 3150 return -ENOSPC;
3151
3181 irq_want = irq + 1; 3152 irq_want = irq + 1;
3182 if (!irq_remapping_enabled)
3183 goto no_ir;
3184 3153
3185 if (!sub_handle) { 3154 ret = setup_msi_irq(dev, msidesc, irq, 0);
3186 /*
3187 * allocate the consecutive block of IRTE's
3188 * for 'nvec'
3189 */
3190 index = msi_alloc_remapped_irq(dev, irq, nvec);
3191 if (index < 0) {
3192 ret = index;
3193 goto error;
3194 }
3195 } else {
3196 ret = msi_setup_remapped_irq(dev, irq, index,
3197 sub_handle);
3198 if (ret < 0)
3199 goto error;
3200 }
3201no_ir:
3202 ret = setup_msi_irq(dev, msidesc, irq);
3203 if (ret < 0) 3155 if (ret < 0)
3204 goto error; 3156 goto error;
3205 sub_handle++;
3206 } 3157 }
3207 return 0; 3158 return 0;
3208 3159
@@ -3298,26 +3249,19 @@ static struct irq_chip hpet_msi_type = {
3298 .irq_retrigger = ioapic_retrigger_irq, 3249 .irq_retrigger = ioapic_retrigger_irq,
3299}; 3250};
3300 3251
3301int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 3252int default_setup_hpet_msi(unsigned int irq, unsigned int id)
3302{ 3253{
3303 struct irq_chip *chip = &hpet_msi_type; 3254 struct irq_chip *chip = &hpet_msi_type;
3304 struct msi_msg msg; 3255 struct msi_msg msg;
3305 int ret; 3256 int ret;
3306 3257
3307 if (irq_remapping_enabled) {
3308 ret = setup_hpet_msi_remapped(irq, id);
3309 if (ret)
3310 return ret;
3311 }
3312
3313 ret = msi_compose_msg(NULL, irq, &msg, id); 3258 ret = msi_compose_msg(NULL, irq, &msg, id);
3314 if (ret < 0) 3259 if (ret < 0)
3315 return ret; 3260 return ret;
3316 3261
3317 hpet_msi_write(irq_get_handler_data(irq), &msg); 3262 hpet_msi_write(irq_get_handler_data(irq), &msg);
3318 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3263 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3319 if (irq_remapped(irq_get_chip_data(irq))) 3264 setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
3320 irq_remap_modify_chip_defaults(chip);
3321 3265
3322 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3266 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3323 return 0; 3267 return 0;
@@ -3683,10 +3627,7 @@ void __init setup_ioapic_dest(void)
3683 else 3627 else
3684 mask = apic->target_cpus(); 3628 mask = apic->target_cpus();
3685 3629
3686 if (irq_remapping_enabled) 3630 x86_io_apic_ops.set_affinity(idata, mask, false);
3687 set_remapped_irq_affinity(idata, mask, false);
3688 else
3689 ioapic_set_affinity(idata, mask, false);
3690 } 3631 }
3691 3632
3692} 3633}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index cce91bf26676..7434d8556d09 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
106 unsigned long mask = cpumask_bits(cpumask)[0]; 106 unsigned long mask = cpumask_bits(cpumask)[0];
107 unsigned long flags; 107 unsigned long flags;
108 108
109 if (WARN_ONCE(!mask, "empty IPI mask")) 109 if (!mask)
110 return; 110 return;
111 111
112 local_irq_save(flags); 112 local_irq_save(flags);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index e03a1e180e81..562a76d433c8 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg)
20} 20}
21early_param("x2apic_phys", set_x2apic_phys_mode); 21early_param("x2apic_phys", set_x2apic_phys_mode);
22 22
23static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 23static bool x2apic_fadt_phys(void)
24{ 24{
25 if (x2apic_phys) 25 if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
26 return x2apic_enabled(); 26 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
27 else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
28 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) &&
29 x2apic_enabled()) {
30 printk(KERN_DEBUG "System requires x2apic physical mode\n"); 27 printk(KERN_DEBUG "System requires x2apic physical mode\n");
31 return 1; 28 return true;
32 } 29 }
33 else 30 return false;
34 return 0; 31}
32
33static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
35} 36}
36 37
37static void 38static void
@@ -82,7 +83,7 @@ static void init_x2apic_ldr(void)
82 83
83static int x2apic_phys_probe(void) 84static int x2apic_phys_probe(void)
84{ 85{
85 if (x2apic_mode && x2apic_phys) 86 if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
86 return 1; 87 return 1;
87 88
88 return apic == &apic_x2apic_phys; 89 return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8cfade9510a4..794f6eb54cd3 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV APIC functions (note: not an Intel compatible APIC) 6 * SGI UV APIC functions (note: not an Intel compatible APIC)
7 * 7 *
8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10#include <linux/cpumask.h> 10#include <linux/cpumask.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -91,10 +91,16 @@ static int __init early_get_pnodeid(void)
91 m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); 91 m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
92 uv_min_hub_revision_id = node_id.s.revision; 92 uv_min_hub_revision_id = node_id.s.revision;
93 93
94 if (node_id.s.part_number == UV2_HUB_PART_NUMBER) 94 switch (node_id.s.part_number) {
95 uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; 95 case UV2_HUB_PART_NUMBER:
96 if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X) 96 case UV2_HUB_PART_NUMBER_X:
97 uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; 97 uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
98 break;
99 case UV3_HUB_PART_NUMBER:
100 case UV3_HUB_PART_NUMBER_X:
101 uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1;
102 break;
103 }
98 104
99 uv_hub_info->hub_revision = uv_min_hub_revision_id; 105 uv_hub_info->hub_revision = uv_min_hub_revision_id;
100 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); 106 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
@@ -130,13 +136,16 @@ static void __init uv_set_apicid_hibit(void)
130 136
131static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 137static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
132{ 138{
133 int pnodeid, is_uv1, is_uv2; 139 int pnodeid, is_uv1, is_uv2, is_uv3;
134 140
135 is_uv1 = !strcmp(oem_id, "SGI"); 141 is_uv1 = !strcmp(oem_id, "SGI");
136 is_uv2 = !strcmp(oem_id, "SGI2"); 142 is_uv2 = !strcmp(oem_id, "SGI2");
137 if (is_uv1 || is_uv2) { 143 is_uv3 = !strncmp(oem_id, "SGI3", 4); /* there are varieties of UV3 */
144 if (is_uv1 || is_uv2 || is_uv3) {
138 uv_hub_info->hub_revision = 145 uv_hub_info->hub_revision =
139 is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE; 146 (is_uv1 ? UV1_HUB_REVISION_BASE :
147 (is_uv2 ? UV2_HUB_REVISION_BASE :
148 UV3_HUB_REVISION_BASE));
140 pnodeid = early_get_pnodeid(); 149 pnodeid = early_get_pnodeid();
141 early_get_apic_pnode_shift(); 150 early_get_apic_pnode_shift();
142 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 151 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
@@ -450,14 +459,17 @@ static __init void map_high(char *id, unsigned long base, int pshift,
450 459
451 paddr = base << pshift; 460 paddr = base << pshift;
452 bytes = (1UL << bshift) * (max_pnode + 1); 461 bytes = (1UL << bshift) * (max_pnode + 1);
453 printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, 462 if (!paddr) {
454 paddr + bytes); 463 pr_info("UV: Map %s_HI base address NULL\n", id);
464 return;
465 }
466 pr_info("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
455 if (map_type == map_uc) 467 if (map_type == map_uc)
456 init_extra_mapping_uc(paddr, bytes); 468 init_extra_mapping_uc(paddr, bytes);
457 else 469 else
458 init_extra_mapping_wb(paddr, bytes); 470 init_extra_mapping_wb(paddr, bytes);
459
460} 471}
472
461static __init void map_gru_high(int max_pnode) 473static __init void map_gru_high(int max_pnode)
462{ 474{
463 union uvh_rh_gam_gru_overlay_config_mmr_u gru; 475 union uvh_rh_gam_gru_overlay_config_mmr_u gru;
@@ -468,7 +480,8 @@ static __init void map_gru_high(int max_pnode)
468 map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); 480 map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
469 gru_start_paddr = ((u64)gru.s.base << shift); 481 gru_start_paddr = ((u64)gru.s.base << shift);
470 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); 482 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
471 483 } else {
484 pr_info("UV: GRU disabled\n");
472 } 485 }
473} 486}
474 487
@@ -480,23 +493,146 @@ static __init void map_mmr_high(int max_pnode)
480 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); 493 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
481 if (mmr.s.enable) 494 if (mmr.s.enable)
482 map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); 495 map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
496 else
497 pr_info("UV: MMR disabled\n");
498}
499
500/*
501 * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY
502 * and REDIRECT MMR regs are exactly the same on UV3.
503 */
504struct mmioh_config {
505 unsigned long overlay;
506 unsigned long redirect;
507 char *id;
508};
509
510static __initdata struct mmioh_config mmiohs[] = {
511 {
512 UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR,
513 UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR,
514 "MMIOH0"
515 },
516 {
517 UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR,
518 UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR,
519 "MMIOH1"
520 },
521};
522
523static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
524{
525 union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay;
526 unsigned long mmr;
527 unsigned long base;
528 int i, n, shift, m_io, max_io;
529 int nasid, lnasid, fi, li;
530 char *id;
531
532 id = mmiohs[index].id;
533 overlay.v = uv_read_local_mmr(mmiohs[index].overlay);
534 pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n",
535 id, overlay.v, overlay.s3.base, overlay.s3.m_io);
536 if (!overlay.s3.enable) {
537 pr_info("UV: %s disabled\n", id);
538 return;
539 }
540
541 shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT;
542 base = (unsigned long)overlay.s3.base;
543 m_io = overlay.s3.m_io;
544 mmr = mmiohs[index].redirect;
545 n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
546 min_pnode *= 2; /* convert to NASID */
547 max_pnode *= 2;
548 max_io = lnasid = fi = li = -1;
549
550 for (i = 0; i < n; i++) {
551 union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect;
552
553 redirect.v = uv_read_local_mmr(mmr + i * 8);
554 nasid = redirect.s3.nasid;
555 if (nasid < min_pnode || max_pnode < nasid)
556 nasid = -1; /* invalid NASID */
557
558 if (nasid == lnasid) {
559 li = i;
560 if (i != n-1) /* last entry check */
561 continue;
562 }
563
564 /* check if we have a cached (or last) redirect to print */
565 if (lnasid != -1 || (i == n-1 && nasid != -1)) {
566 unsigned long addr1, addr2;
567 int f, l;
568
569 if (lnasid == -1) {
570 f = l = i;
571 lnasid = nasid;
572 } else {
573 f = fi;
574 l = li;
575 }
576 addr1 = (base << shift) +
577 f * (unsigned long)(1 << m_io);
578 addr2 = (base << shift) +
579 (l + 1) * (unsigned long)(1 << m_io);
580 pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
581 id, fi, li, lnasid, addr1, addr2);
582 if (max_io < l)
583 max_io = l;
584 }
585 fi = li = i;
586 lnasid = nasid;
587 }
588
589 pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n",
590 id, base, shift, m_io, max_io);
591
592 if (max_io >= 0)
593 map_high(id, base, shift, m_io, max_io, map_uc);
483} 594}
484 595
485static __init void map_mmioh_high(int max_pnode) 596static __init void map_mmioh_high(int min_pnode, int max_pnode)
486{ 597{
487 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; 598 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
488 int shift; 599 unsigned long mmr, base;
600 int shift, enable, m_io, n_io;
489 601
490 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); 602 if (is_uv3_hub()) {
491 if (is_uv1_hub() && mmioh.s1.enable) { 603 /* Map both MMIOH Regions */
492 shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; 604 map_mmioh_high_uv3(0, min_pnode, max_pnode);
493 map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io, 605 map_mmioh_high_uv3(1, min_pnode, max_pnode);
494 max_pnode, map_uc); 606 return;
495 } 607 }
496 if (is_uv2_hub() && mmioh.s2.enable) { 608
609 if (is_uv1_hub()) {
610 mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
611 shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
612 mmioh.v = uv_read_local_mmr(mmr);
613 enable = !!mmioh.s1.enable;
614 base = mmioh.s1.base;
615 m_io = mmioh.s1.m_io;
616 n_io = mmioh.s1.n_io;
617 } else if (is_uv2_hub()) {
618 mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
497 shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; 619 shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
498 map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io, 620 mmioh.v = uv_read_local_mmr(mmr);
499 max_pnode, map_uc); 621 enable = !!mmioh.s2.enable;
622 base = mmioh.s2.base;
623 m_io = mmioh.s2.m_io;
624 n_io = mmioh.s2.n_io;
625 } else
626 return;
627
628 if (enable) {
629 max_pnode &= (1 << n_io) - 1;
630 pr_info(
631 "UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n",
632 base, shift, m_io, n_io, max_pnode);
633 map_high("MMIOH", base, shift, m_io, max_pnode, map_uc);
634 } else {
635 pr_info("UV: MMIOH disabled\n");
500 } 636 }
501} 637}
502 638
@@ -724,42 +860,41 @@ void uv_nmi_init(void)
724void __init uv_system_init(void) 860void __init uv_system_init(void)
725{ 861{
726 union uvh_rh_gam_config_mmr_u m_n_config; 862 union uvh_rh_gam_config_mmr_u m_n_config;
727 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
728 union uvh_node_id_u node_id; 863 union uvh_node_id_u node_id;
729 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; 864 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
730 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io; 865 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
731 int gnode_extra, max_pnode = 0; 866 int gnode_extra, min_pnode = 999999, max_pnode = -1;
732 unsigned long mmr_base, present, paddr; 867 unsigned long mmr_base, present, paddr;
733 unsigned short pnode_mask, pnode_io_mask; 868 unsigned short pnode_mask;
869 char *hub = (is_uv1_hub() ? "UV1" :
870 (is_uv2_hub() ? "UV2" :
871 "UV3"));
734 872
735 printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2"); 873 pr_info("UV: Found %s hub\n", hub);
736 map_low_mmrs(); 874 map_low_mmrs();
737 875
738 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); 876 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
739 m_val = m_n_config.s.m_skt; 877 m_val = m_n_config.s.m_skt;
740 n_val = m_n_config.s.n_skt; 878 n_val = m_n_config.s.n_skt;
741 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); 879 pnode_mask = (1 << n_val) - 1;
742 n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io;
743 mmr_base = 880 mmr_base =
744 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & 881 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
745 ~UV_MMR_ENABLE; 882 ~UV_MMR_ENABLE;
746 pnode_mask = (1 << n_val) - 1;
747 pnode_io_mask = (1 << n_io) - 1;
748 883
749 node_id.v = uv_read_local_mmr(UVH_NODE_ID); 884 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
750 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; 885 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
751 gnode_upper = ((unsigned long)gnode_extra << m_val); 886 gnode_upper = ((unsigned long)gnode_extra << m_val);
752 printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n", 887 pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x\n",
753 n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask); 888 n_val, m_val, pnode_mask, gnode_upper, gnode_extra);
754 889
755 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); 890 pr_info("UV: global MMR base 0x%lx\n", mmr_base);
756 891
757 for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) 892 for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
758 uv_possible_blades += 893 uv_possible_blades +=
759 hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); 894 hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
760 895
761 /* uv_num_possible_blades() is really the hub count */ 896 /* uv_num_possible_blades() is really the hub count */
762 printk(KERN_INFO "UV: Found %d blades, %d hubs\n", 897 pr_info("UV: Found %d blades, %d hubs\n",
763 is_uv1_hub() ? uv_num_possible_blades() : 898 is_uv1_hub() ? uv_num_possible_blades() :
764 (uv_num_possible_blades() + 1) / 2, 899 (uv_num_possible_blades() + 1) / 2,
765 uv_num_possible_blades()); 900 uv_num_possible_blades());
@@ -794,6 +929,7 @@ void __init uv_system_init(void)
794 uv_blade_info[blade].nr_possible_cpus = 0; 929 uv_blade_info[blade].nr_possible_cpus = 0;
795 uv_blade_info[blade].nr_online_cpus = 0; 930 uv_blade_info[blade].nr_online_cpus = 0;
796 spin_lock_init(&uv_blade_info[blade].nmi_lock); 931 spin_lock_init(&uv_blade_info[blade].nmi_lock);
932 min_pnode = min(pnode, min_pnode);
797 max_pnode = max(pnode, max_pnode); 933 max_pnode = max(pnode, max_pnode);
798 blade++; 934 blade++;
799 } 935 }
@@ -856,7 +992,7 @@ void __init uv_system_init(void)
856 992
857 map_gru_high(max_pnode); 993 map_gru_high(max_pnode);
858 map_mmr_high(max_pnode); 994 map_mmr_high(max_pnode);
859 map_mmioh_high(max_pnode & pnode_io_mask); 995 map_mmioh_high(min_pnode, max_pnode);
860 996
861 uv_cpu_init(); 997 uv_cpu_init();
862 uv_scir_register_cpu_notifier(); 998 uv_scir_register_cpu_notifier();
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d65464e43503..66b5faffe14a 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -232,6 +232,7 @@
232#include <linux/acpi.h> 232#include <linux/acpi.h>
233#include <linux/syscore_ops.h> 233#include <linux/syscore_ops.h>
234#include <linux/i8253.h> 234#include <linux/i8253.h>
235#include <linux/cpuidle.h>
235 236
236#include <asm/uaccess.h> 237#include <asm/uaccess.h>
237#include <asm/desc.h> 238#include <asm/desc.h>
@@ -360,13 +361,35 @@ struct apm_user {
360 * idle percentage above which bios idle calls are done 361 * idle percentage above which bios idle calls are done
361 */ 362 */
362#ifdef CONFIG_APM_CPU_IDLE 363#ifdef CONFIG_APM_CPU_IDLE
363#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012
364#define DEFAULT_IDLE_THRESHOLD 95 364#define DEFAULT_IDLE_THRESHOLD 95
365#else 365#else
366#define DEFAULT_IDLE_THRESHOLD 100 366#define DEFAULT_IDLE_THRESHOLD 100
367#endif 367#endif
368#define DEFAULT_IDLE_PERIOD (100 / 3) 368#define DEFAULT_IDLE_PERIOD (100 / 3)
369 369
370static int apm_cpu_idle(struct cpuidle_device *dev,
371 struct cpuidle_driver *drv, int index);
372
373static struct cpuidle_driver apm_idle_driver = {
374 .name = "apm_idle",
375 .owner = THIS_MODULE,
376 .en_core_tk_irqen = 1,
377 .states = {
378 { /* entry 0 is for polling */ },
379 { /* entry 1 is for APM idle */
380 .name = "APM",
381 .desc = "APM idle",
382 .flags = CPUIDLE_FLAG_TIME_VALID,
383 .exit_latency = 250, /* WAG */
384 .target_residency = 500, /* WAG */
385 .enter = &apm_cpu_idle
386 },
387 },
388 .state_count = 2,
389};
390
391static struct cpuidle_device apm_cpuidle_device;
392
370/* 393/*
371 * Local variables 394 * Local variables
372 */ 395 */
@@ -377,7 +400,6 @@ static struct {
377static int clock_slowed; 400static int clock_slowed;
378static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; 401static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
379static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; 402static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
380static int set_pm_idle;
381static int suspends_pending; 403static int suspends_pending;
382static int standbys_pending; 404static int standbys_pending;
383static int ignore_sys_suspend; 405static int ignore_sys_suspend;
@@ -884,8 +906,6 @@ static void apm_do_busy(void)
884#define IDLE_CALC_LIMIT (HZ * 100) 906#define IDLE_CALC_LIMIT (HZ * 100)
885#define IDLE_LEAKY_MAX 16 907#define IDLE_LEAKY_MAX 16
886 908
887static void (*original_pm_idle)(void) __read_mostly;
888
889/** 909/**
890 * apm_cpu_idle - cpu idling for APM capable Linux 910 * apm_cpu_idle - cpu idling for APM capable Linux
891 * 911 *
@@ -894,35 +914,36 @@ static void (*original_pm_idle)(void) __read_mostly;
894 * Furthermore it calls the system default idle routine. 914 * Furthermore it calls the system default idle routine.
895 */ 915 */
896 916
897static void apm_cpu_idle(void) 917static int apm_cpu_idle(struct cpuidle_device *dev,
918 struct cpuidle_driver *drv, int index)
898{ 919{
899 static int use_apm_idle; /* = 0 */ 920 static int use_apm_idle; /* = 0 */
900 static unsigned int last_jiffies; /* = 0 */ 921 static unsigned int last_jiffies; /* = 0 */
901 static unsigned int last_stime; /* = 0 */ 922 static unsigned int last_stime; /* = 0 */
923 cputime_t stime;
902 924
903 int apm_idle_done = 0; 925 int apm_idle_done = 0;
904 unsigned int jiffies_since_last_check = jiffies - last_jiffies; 926 unsigned int jiffies_since_last_check = jiffies - last_jiffies;
905 unsigned int bucket; 927 unsigned int bucket;
906 928
907 WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
908recalc: 929recalc:
930 task_cputime(current, NULL, &stime);
909 if (jiffies_since_last_check > IDLE_CALC_LIMIT) { 931 if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
910 use_apm_idle = 0; 932 use_apm_idle = 0;
911 last_jiffies = jiffies;
912 last_stime = current->stime;
913 } else if (jiffies_since_last_check > idle_period) { 933 } else if (jiffies_since_last_check > idle_period) {
914 unsigned int idle_percentage; 934 unsigned int idle_percentage;
915 935
916 idle_percentage = current->stime - last_stime; 936 idle_percentage = stime - last_stime;
917 idle_percentage *= 100; 937 idle_percentage *= 100;
918 idle_percentage /= jiffies_since_last_check; 938 idle_percentage /= jiffies_since_last_check;
919 use_apm_idle = (idle_percentage > idle_threshold); 939 use_apm_idle = (idle_percentage > idle_threshold);
920 if (apm_info.forbid_idle) 940 if (apm_info.forbid_idle)
921 use_apm_idle = 0; 941 use_apm_idle = 0;
922 last_jiffies = jiffies;
923 last_stime = current->stime;
924 } 942 }
925 943
944 last_jiffies = jiffies;
945 last_stime = stime;
946
926 bucket = IDLE_LEAKY_MAX; 947 bucket = IDLE_LEAKY_MAX;
927 948
928 while (!need_resched()) { 949 while (!need_resched()) {
@@ -950,10 +971,7 @@ recalc:
950 break; 971 break;
951 } 972 }
952 } 973 }
953 if (original_pm_idle) 974 default_idle();
954 original_pm_idle();
955 else
956 default_idle();
957 local_irq_disable(); 975 local_irq_disable();
958 jiffies_since_last_check = jiffies - last_jiffies; 976 jiffies_since_last_check = jiffies - last_jiffies;
959 if (jiffies_since_last_check > idle_period) 977 if (jiffies_since_last_check > idle_period)
@@ -963,7 +981,7 @@ recalc:
963 if (apm_idle_done) 981 if (apm_idle_done)
964 apm_do_busy(); 982 apm_do_busy();
965 983
966 local_irq_enable(); 984 return index;
967} 985}
968 986
969/** 987/**
@@ -2381,9 +2399,9 @@ static int __init apm_init(void)
2381 if (HZ != 100) 2399 if (HZ != 100)
2382 idle_period = (idle_period * HZ) / 100; 2400 idle_period = (idle_period * HZ) / 100;
2383 if (idle_threshold < 100) { 2401 if (idle_threshold < 100) {
2384 original_pm_idle = pm_idle; 2402 if (!cpuidle_register_driver(&apm_idle_driver))
2385 pm_idle = apm_cpu_idle; 2403 if (cpuidle_register_device(&apm_cpuidle_device))
2386 set_pm_idle = 1; 2404 cpuidle_unregister_driver(&apm_idle_driver);
2387 } 2405 }
2388 2406
2389 return 0; 2407 return 0;
@@ -2393,15 +2411,9 @@ static void __exit apm_exit(void)
2393{ 2411{
2394 int error; 2412 int error;
2395 2413
2396 if (set_pm_idle) { 2414 cpuidle_unregister_device(&apm_cpuidle_device);
2397 pm_idle = original_pm_idle; 2415 cpuidle_unregister_driver(&apm_idle_driver);
2398 /* 2416
2399 * We are about to unload the current idle thread pm callback
2400 * (pm_idle), Wait for all processors to update cached/local
2401 * copies of pm_idle before proceeding.
2402 */
2403 kick_all_cpus_sync();
2404 }
2405 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) 2417 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
2406 && (apm_info.connection_version > 0x0100)) { 2418 && (apm_info.connection_version > 0x0100)) {
2407 error = apm_engage_power_management(APM_DEVICE_ALL, 0); 2419 error = apm_engage_power_management(APM_DEVICE_ALL, 0);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 15239fffd6fe..fa96eb0d02fb 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -12,7 +12,6 @@
12#include <asm/pci-direct.h> 12#include <asm/pci-direct.h>
13 13
14#ifdef CONFIG_X86_64 14#ifdef CONFIG_X86_64
15# include <asm/numa_64.h>
16# include <asm/mmconfig.h> 15# include <asm/mmconfig.h>
17# include <asm/cacheflush.h> 16# include <asm/cacheflush.h>
18#endif 17#endif
@@ -220,8 +219,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
220 */ 219 */
221 WARN_ONCE(1, "WARNING: This combination of AMD" 220 WARN_ONCE(1, "WARNING: This combination of AMD"
222 " processors is not suitable for SMP.\n"); 221 " processors is not suitable for SMP.\n");
223 if (!test_taint(TAINT_UNSAFE_SMP)) 222 add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE);
224 add_taint(TAINT_UNSAFE_SMP);
225 223
226valid_k7: 224valid_k7:
227 ; 225 ;
@@ -364,9 +362,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
364#endif 362#endif
365} 363}
366 364
367int amd_get_nb_id(int cpu) 365u16 amd_get_nb_id(int cpu)
368{ 366{
369 int id = 0; 367 u16 id = 0;
370#ifdef CONFIG_SMP 368#ifdef CONFIG_SMP
371 id = per_cpu(cpu_llc_id, cpu); 369 id = per_cpu(cpu_llc_id, cpu);
372#endif 370#endif
@@ -518,10 +516,9 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
518static void __cpuinit init_amd(struct cpuinfo_x86 *c) 516static void __cpuinit init_amd(struct cpuinfo_x86 *c)
519{ 517{
520 u32 dummy; 518 u32 dummy;
521
522#ifdef CONFIG_SMP
523 unsigned long long value; 519 unsigned long long value;
524 520
521#ifdef CONFIG_SMP
525 /* 522 /*
526 * Disable TLB flush filter by setting HWCR.FFDIS on K8 523 * Disable TLB flush filter by setting HWCR.FFDIS on K8
527 * bit 6 of msr C001_0015 524 * bit 6 of msr C001_0015
@@ -559,12 +556,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
559 * (AMD Erratum #110, docId: 25759). 556 * (AMD Erratum #110, docId: 25759).
560 */ 557 */
561 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { 558 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
562 u64 val;
563
564 clear_cpu_cap(c, X86_FEATURE_LAHF_LM); 559 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
565 if (!rdmsrl_amd_safe(0xc001100d, &val)) { 560 if (!rdmsrl_amd_safe(0xc001100d, &value)) {
566 val &= ~(1ULL << 32); 561 value &= ~(1ULL << 32);
567 wrmsrl_amd_safe(0xc001100d, val); 562 wrmsrl_amd_safe(0xc001100d, value);
568 } 563 }
569 } 564 }
570 565
@@ -617,13 +612,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
617 if ((c->x86 == 0x15) && 612 if ((c->x86 == 0x15) &&
618 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && 613 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
619 !cpu_has(c, X86_FEATURE_TOPOEXT)) { 614 !cpu_has(c, X86_FEATURE_TOPOEXT)) {
620 u64 val;
621 615
622 if (!rdmsrl_safe(0xc0011005, &val)) { 616 if (!rdmsrl_safe(0xc0011005, &value)) {
623 val |= 1ULL << 54; 617 value |= 1ULL << 54;
624 wrmsrl_safe(0xc0011005, val); 618 wrmsrl_safe(0xc0011005, value);
625 rdmsrl(0xc0011005, val); 619 rdmsrl(0xc0011005, value);
626 if (val & (1ULL << 54)) { 620 if (value & (1ULL << 54)) {
627 set_cpu_cap(c, X86_FEATURE_TOPOEXT); 621 set_cpu_cap(c, X86_FEATURE_TOPOEXT);
628 printk(KERN_INFO FW_INFO "CPU: Re-enabling " 622 printk(KERN_INFO FW_INFO "CPU: Re-enabling "
629 "disabled Topology Extensions Support\n"); 623 "disabled Topology Extensions Support\n");
@@ -637,11 +631,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
637 */ 631 */
638 if ((c->x86 == 0x15) && 632 if ((c->x86 == 0x15) &&
639 (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { 633 (c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
640 u64 val;
641 634
642 if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { 635 if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) {
643 val |= 0x1E; 636 value |= 0x1E;
644 wrmsrl_safe(0xc0011021, val); 637 wrmsrl_safe(0xc0011021, value);
645 } 638 }
646 } 639 }
647 640
@@ -685,12 +678,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
685 * benefit in doing so. 678 * benefit in doing so.
686 */ 679 */
687 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { 680 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
681 unsigned long pfn = tseg >> PAGE_SHIFT;
682
688 printk(KERN_DEBUG "tseg: %010llx\n", tseg); 683 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
689 if ((tseg>>PMD_SHIFT) < 684 if (pfn_range_is_mapped(pfn, pfn + 1))
690 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
691 ((tseg>>PMD_SHIFT) <
692 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
693 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
694 set_memory_4k((unsigned long)__va(tseg), 1); 685 set_memory_4k((unsigned long)__va(tseg), 1);
695 } 686 }
696 } 687 }
@@ -703,13 +694,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
703 if (c->x86 > 0x11) 694 if (c->x86 > 0x11)
704 set_cpu_cap(c, X86_FEATURE_ARAT); 695 set_cpu_cap(c, X86_FEATURE_ARAT);
705 696
706 /*
707 * Disable GART TLB Walk Errors on Fam10h. We do this here
708 * because this is always needed when GART is enabled, even in a
709 * kernel which has no MCE support built in.
710 */
711 if (c->x86 == 0x10) { 697 if (c->x86 == 0x10) {
712 /* 698 /*
699 * Disable GART TLB Walk Errors on Fam10h. We do this here
700 * because this is always needed when GART is enabled, even in a
701 * kernel which has no MCE support built in.
713 * BIOS should disable GartTlbWlk Errors themself. If 702 * BIOS should disable GartTlbWlk Errors themself. If
714 * it doesn't do it here as suggested by the BKDG. 703 * it doesn't do it here as suggested by the BKDG.
715 * 704 *
@@ -723,6 +712,21 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
723 mask |= (1 << 10); 712 mask |= (1 << 10);
724 wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); 713 wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask);
725 } 714 }
715
716 /*
717 * On family 10h BIOS may not have properly enabled WC+ support,
718 * causing it to be converted to CD memtype. This may result in
719 * performance degradation for certain nested-paging guests.
720 * Prevent this conversion by clearing bit 24 in
721 * MSR_AMD64_BU_CFG2.
722 *
723 * NOTE: we want to use the _safe accessors so as not to #GP kvm
724 * guests on older kvm hosts.
725 */
726
727 rdmsrl_safe(MSR_AMD64_BU_CFG2, &value);
728 value &= ~(1ULL << 24);
729 wrmsrl_safe(MSR_AMD64_BU_CFG2, value);
726 } 730 }
727 731
728 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); 732 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 92dfec986a48..af6455e3fcc9 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -17,15 +17,6 @@
17#include <asm/paravirt.h> 17#include <asm/paravirt.h>
18#include <asm/alternative.h> 18#include <asm/alternative.h>
19 19
20static int __init no_halt(char *s)
21{
22 WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n");
23 boot_cpu_data.hlt_works_ok = 0;
24 return 1;
25}
26
27__setup("no-hlt", no_halt);
28
29static int __init no_387(char *s) 20static int __init no_387(char *s)
30{ 21{
31 boot_cpu_data.hard_math = 0; 22 boot_cpu_data.hard_math = 0;
@@ -89,23 +80,6 @@ static void __init check_fpu(void)
89 pr_warn("Hmm, FPU with FDIV bug\n"); 80 pr_warn("Hmm, FPU with FDIV bug\n");
90} 81}
91 82
92static void __init check_hlt(void)
93{
94 if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
95 return;
96
97 pr_info("Checking 'hlt' instruction... ");
98 if (!boot_cpu_data.hlt_works_ok) {
99 pr_cont("disabled\n");
100 return;
101 }
102 halt();
103 halt();
104 halt();
105 halt();
106 pr_cont("OK\n");
107}
108
109/* 83/*
110 * Check whether we are able to run this kernel safely on SMP. 84 * Check whether we are able to run this kernel safely on SMP.
111 * 85 *
@@ -129,7 +103,6 @@ void __init check_bugs(void)
129 print_cpu_info(&boot_cpu_data); 103 print_cpu_info(&boot_cpu_data);
130#endif 104#endif
131 check_config(); 105 check_config();
132 check_hlt();
133 init_utsname()->machine[1] = 106 init_utsname()->machine[1] =
134 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 107 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
135 alternative_instructions(); 108 alternative_instructions();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9c3ab43a6954..d814772c5bed 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -37,6 +37,8 @@
37#include <asm/mce.h> 37#include <asm/mce.h>
38#include <asm/msr.h> 38#include <asm/msr.h>
39#include <asm/pat.h> 39#include <asm/pat.h>
40#include <asm/microcode.h>
41#include <asm/microcode_intel.h>
40 42
41#ifdef CONFIG_X86_LOCAL_APIC 43#ifdef CONFIG_X86_LOCAL_APIC
42#include <asm/uv/uv.h> 44#include <asm/uv/uv.h>
@@ -213,7 +215,7 @@ static inline int flag_is_changeable_p(u32 flag)
213} 215}
214 216
215/* Probe for the CPUID instruction */ 217/* Probe for the CPUID instruction */
216static int __cpuinit have_cpuid_p(void) 218int __cpuinit have_cpuid_p(void)
217{ 219{
218 return flag_is_changeable_p(X86_EFLAGS_ID); 220 return flag_is_changeable_p(X86_EFLAGS_ID);
219} 221}
@@ -249,11 +251,6 @@ static inline int flag_is_changeable_p(u32 flag)
249{ 251{
250 return 1; 252 return 1;
251} 253}
252/* Probe for the CPUID instruction */
253static inline int have_cpuid_p(void)
254{
255 return 1;
256}
257static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 254static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
258{ 255{
259} 256}
@@ -1223,6 +1220,12 @@ void __cpuinit cpu_init(void)
1223 int cpu; 1220 int cpu;
1224 int i; 1221 int i;
1225 1222
1223 /*
1224 * Load microcode on this cpu if a valid microcode is available.
1225 * This is early microcode loading procedure.
1226 */
1227 load_ucode_ap();
1228
1226 cpu = stack_smp_processor_id(); 1229 cpu = stack_smp_processor_id();
1227 t = &per_cpu(init_tss, cpu); 1230 t = &per_cpu(init_tss, cpu);
1228 oist = &per_cpu(orig_ist, cpu); 1231 oist = &per_cpu(orig_ist, cpu);
@@ -1314,6 +1317,8 @@ void __cpuinit cpu_init(void)
1314 struct tss_struct *t = &per_cpu(init_tss, cpu); 1317 struct tss_struct *t = &per_cpu(init_tss, cpu);
1315 struct thread_struct *thread = &curr->thread; 1318 struct thread_struct *thread = &curr->thread;
1316 1319
1320 show_ucode_info_early();
1321
1317 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { 1322 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
1318 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1323 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
1319 for (;;) 1324 for (;;)
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index a8f8fa9769d6..1e7e84a02eba 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void)
79 if (x86_hyper->init_platform) 79 if (x86_hyper->init_platform)
80 x86_hyper->init_platform(); 80 x86_hyper->init_platform();
81} 81}
82
83bool __init hypervisor_x2apic_available(void)
84{
85 return x86_hyper &&
86 x86_hyper->x2apic_available &&
87 x86_hyper->x2apic_available();
88}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fcaabd0432c5..1905ce98bee0 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -17,7 +17,6 @@
17 17
18#ifdef CONFIG_X86_64 18#ifdef CONFIG_X86_64
19#include <linux/topology.h> 19#include <linux/topology.h>
20#include <asm/numa_64.h>
21#endif 20#endif
22 21
23#include "cpu.h" 22#include "cpu.h"
@@ -168,7 +167,7 @@ int __cpuinit ppro_with_ram_bug(void)
168#ifdef CONFIG_X86_F00F_BUG 167#ifdef CONFIG_X86_F00F_BUG
169static void __cpuinit trap_init_f00f_bug(void) 168static void __cpuinit trap_init_f00f_bug(void)
170{ 169{
171 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); 170 __set_fixmap(FIX_F00F_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
172 171
173 /* 172 /*
174 * Update the IDT descriptor and reload the IDT so that 173 * Update the IDT descriptor and reload the IDT so that
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fe9edec6698a..7c6f7d548c0f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -298,8 +298,7 @@ struct _cache_attr {
298 unsigned int); 298 unsigned int);
299}; 299};
300 300
301#ifdef CONFIG_AMD_NB 301#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
302
303/* 302/*
304 * L3 cache descriptors 303 * L3 cache descriptors
305 */ 304 */
@@ -524,9 +523,9 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
524static struct _cache_attr subcaches = 523static struct _cache_attr subcaches =
525 __ATTR(subcaches, 0644, show_subcaches, store_subcaches); 524 __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
526 525
527#else /* CONFIG_AMD_NB */ 526#else
528#define amd_init_l3_cache(x, y) 527#define amd_init_l3_cache(x, y)
529#endif /* CONFIG_AMD_NB */ 528#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
530 529
531static int 530static int
532__cpuinit cpuid4_cache_lookup_regs(int index, 531__cpuinit cpuid4_cache_lookup_regs(int index,
@@ -1227,7 +1226,7 @@ static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1227 .notifier_call = cacheinfo_cpu_callback, 1226 .notifier_call = cacheinfo_cpu_callback,
1228}; 1227};
1229 1228
1230static int __cpuinit cache_sysfs_init(void) 1229static int __init cache_sysfs_init(void)
1231{ 1230{
1232 int i; 1231 int i;
1233 1232
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 80dbda84f1c3..7bc126346ace 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -512,11 +512,8 @@ int mce_available(struct cpuinfo_x86 *c)
512 512
513static void mce_schedule_work(void) 513static void mce_schedule_work(void)
514{ 514{
515 if (!mce_ring_empty()) { 515 if (!mce_ring_empty())
516 struct work_struct *work = &__get_cpu_var(mce_work); 516 schedule_work(&__get_cpu_var(mce_work));
517 if (!work_pending(work))
518 schedule_work(work);
519 }
520} 517}
521 518
522DEFINE_PER_CPU(struct irq_work, mce_irq_work); 519DEFINE_PER_CPU(struct irq_work, mce_irq_work);
@@ -1085,7 +1082,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1085 /* 1082 /*
1086 * Set taint even when machine check was not enabled. 1083 * Set taint even when machine check was not enabled.
1087 */ 1084 */
1088 add_taint(TAINT_MACHINE_CHECK); 1085 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
1089 1086
1090 severity = mce_severity(&m, cfg->tolerant, NULL); 1087 severity = mce_severity(&m, cfg->tolerant, NULL);
1091 1088
@@ -1351,12 +1348,7 @@ int mce_notify_irq(void)
1351 /* wake processes polling /dev/mcelog */ 1348 /* wake processes polling /dev/mcelog */
1352 wake_up_interruptible(&mce_chrdev_wait); 1349 wake_up_interruptible(&mce_chrdev_wait);
1353 1350
1354 /* 1351 if (mce_helper[0])
1355 * There is no risk of missing notifications because
1356 * work_pending is always cleared before the function is
1357 * executed.
1358 */
1359 if (mce_helper[0] && !work_pending(&mce_trigger_work))
1360 schedule_work(&mce_trigger_work); 1352 schedule_work(&mce_trigger_work);
1361 1353
1362 if (__ratelimit(&ratelimit)) 1354 if (__ratelimit(&ratelimit))
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 2d5454cd2c4f..1c044b1ccc59 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -33,7 +33,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
33 smp_processor_id()); 33 smp_processor_id());
34 } 34 }
35 35
36 add_taint(TAINT_MACHINE_CHECK); 36 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
37} 37}
38 38
39/* Set up machine check reporting for processors with Intel style MCE: */ 39/* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 2d7998fb628c..e9a701aecaa1 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -15,7 +15,7 @@
15static void winchip_machine_check(struct pt_regs *regs, long error_code) 15static void winchip_machine_check(struct pt_regs *regs, long error_code)
16{ 16{
17 printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); 17 printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
18 add_taint(TAINT_MACHINE_CHECK); 18 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
19} 19}
20 20
21/* Set up machine check reporting on the Winchip C6 series */ 21/* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 0a630dd4b620..a7d26d83fb70 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -14,10 +14,15 @@
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/clocksource.h> 15#include <linux/clocksource.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/hardirq.h>
18#include <linux/interrupt.h>
17#include <asm/processor.h> 19#include <asm/processor.h>
18#include <asm/hypervisor.h> 20#include <asm/hypervisor.h>
19#include <asm/hyperv.h> 21#include <asm/hyperv.h>
20#include <asm/mshyperv.h> 22#include <asm/mshyperv.h>
23#include <asm/desc.h>
24#include <asm/idle.h>
25#include <asm/irq_regs.h>
21 26
22struct ms_hyperv_info ms_hyperv; 27struct ms_hyperv_info ms_hyperv;
23EXPORT_SYMBOL_GPL(ms_hyperv); 28EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -30,6 +35,13 @@ static bool __init ms_hyperv_platform(void)
30 if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) 35 if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
31 return false; 36 return false;
32 37
38 /*
39 * Xen emulates Hyper-V to support enlightened Windows.
40 * Check to see first if we are on a Xen Hypervisor.
41 */
42 if (xen_cpuid_base())
43 return false;
44
33 cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, 45 cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
34 &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); 46 &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
35 47
@@ -68,7 +80,14 @@ static void __init ms_hyperv_init_platform(void)
68 printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", 80 printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
69 ms_hyperv.features, ms_hyperv.hints); 81 ms_hyperv.features, ms_hyperv.hints);
70 82
71 clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); 83 if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
84 clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
85#if IS_ENABLED(CONFIG_HYPERV)
86 /*
87 * Setup the IDT for hypervisor callback.
88 */
89 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
90#endif
72} 91}
73 92
74const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { 93const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
@@ -77,3 +96,36 @@ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
77 .init_platform = ms_hyperv_init_platform, 96 .init_platform = ms_hyperv_init_platform,
78}; 97};
79EXPORT_SYMBOL(x86_hyper_ms_hyperv); 98EXPORT_SYMBOL(x86_hyper_ms_hyperv);
99
100#if IS_ENABLED(CONFIG_HYPERV)
101static int vmbus_irq = -1;
102static irq_handler_t vmbus_isr;
103
104void hv_register_vmbus_handler(int irq, irq_handler_t handler)
105{
106 vmbus_irq = irq;
107 vmbus_isr = handler;
108}
109
110void hyperv_vector_handler(struct pt_regs *regs)
111{
112 struct pt_regs *old_regs = set_irq_regs(regs);
113 struct irq_desc *desc;
114
115 irq_enter();
116 exit_idle();
117
118 desc = irq_to_desc(vmbus_irq);
119
120 if (desc)
121 generic_handle_irq_desc(vmbus_irq, desc);
122
123 irq_exit();
124 set_irq_regs(old_regs);
125}
126#else
127void hv_register_vmbus_handler(int irq, irq_handler_t handler)
128{
129}
130#endif
131EXPORT_SYMBOL_GPL(hv_register_vmbus_handler);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index e9fe907cd249..fa72a39e5d46 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -542,7 +542,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
542 542
543 if (tmp != mask_lo) { 543 if (tmp != mask_lo) {
544 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); 544 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
545 add_taint(TAINT_FIRMWARE_WORKAROUND); 545 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
546 mask_lo = tmp; 546 mask_lo = tmp;
547 } 547 }
548 } 548 }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4428fd178bce..bf0f01aea994 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -340,9 +340,6 @@ int x86_setup_perfctr(struct perf_event *event)
340 /* BTS is currently only allowed for user-mode. */ 340 /* BTS is currently only allowed for user-mode. */
341 if (!attr->exclude_kernel) 341 if (!attr->exclude_kernel)
342 return -EOPNOTSUPP; 342 return -EOPNOTSUPP;
343
344 if (!attr->exclude_guest)
345 return -EOPNOTSUPP;
346 } 343 }
347 344
348 hwc->config |= config; 345 hwc->config |= config;
@@ -385,9 +382,6 @@ int x86_pmu_hw_config(struct perf_event *event)
385 if (event->attr.precise_ip) { 382 if (event->attr.precise_ip) {
386 int precise = 0; 383 int precise = 0;
387 384
388 if (!event->attr.exclude_guest)
389 return -EOPNOTSUPP;
390
391 /* Support for constant skid */ 385 /* Support for constant skid */
392 if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { 386 if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
393 precise++; 387 precise++;
@@ -835,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
835 } else { 829 } else {
836 hwc->config_base = x86_pmu_config_addr(hwc->idx); 830 hwc->config_base = x86_pmu_config_addr(hwc->idx);
837 hwc->event_base = x86_pmu_event_addr(hwc->idx); 831 hwc->event_base = x86_pmu_event_addr(hwc->idx);
838 hwc->event_base_rdpmc = hwc->idx; 832 hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
839 } 833 }
840} 834}
841 835
@@ -1316,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = {
1316 .attrs = NULL, 1310 .attrs = NULL,
1317}; 1311};
1318 1312
1319struct perf_pmu_events_attr {
1320 struct device_attribute attr;
1321 u64 id;
1322};
1323
1324/* 1313/*
1325 * Remove all undefined events (x86_pmu.event_map(id) == 0) 1314 * Remove all undefined events (x86_pmu.event_map(id) == 0)
1326 * out of events_attr attributes. 1315 * out of events_attr attributes.
@@ -1354,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at
1354#define EVENT_VAR(_id) event_attr_##_id 1343#define EVENT_VAR(_id) event_attr_##_id
1355#define EVENT_PTR(_id) &event_attr_##_id.attr.attr 1344#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
1356 1345
1357#define EVENT_ATTR(_name, _id) \ 1346#define EVENT_ATTR(_name, _id) \
1358static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ 1347 PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
1359 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ 1348 events_sysfs_show)
1360 .id = PERF_COUNT_HW_##_id, \
1361};
1362 1349
1363EVENT_ATTR(cpu-cycles, CPU_CYCLES ); 1350EVENT_ATTR(cpu-cycles, CPU_CYCLES );
1364EVENT_ATTR(instructions, INSTRUCTIONS ); 1351EVENT_ATTR(instructions, INSTRUCTIONS );
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 115c1ea97746..7f5c75c2afdd 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -325,6 +325,8 @@ struct x86_pmu {
325 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); 325 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
326 unsigned eventsel; 326 unsigned eventsel;
327 unsigned perfctr; 327 unsigned perfctr;
328 int (*addr_offset)(int index, bool eventsel);
329 int (*rdpmc_index)(int index);
328 u64 (*event_map)(int); 330 u64 (*event_map)(int);
329 int max_events; 331 int max_events;
330 int num_counters; 332 int num_counters;
@@ -446,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs
446 448
447u64 x86_perf_event_update(struct perf_event *event); 449u64 x86_perf_event_update(struct perf_event *event);
448 450
449static inline int x86_pmu_addr_offset(int index) 451static inline unsigned int x86_pmu_config_addr(int index)
450{ 452{
451 int offset; 453 return x86_pmu.eventsel + (x86_pmu.addr_offset ?
452 454 x86_pmu.addr_offset(index, true) : index);
453 /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
454 alternative_io(ASM_NOP2,
455 "shll $1, %%eax",
456 X86_FEATURE_PERFCTR_CORE,
457 "=a" (offset),
458 "a" (index));
459
460 return offset;
461} 455}
462 456
463static inline unsigned int x86_pmu_config_addr(int index) 457static inline unsigned int x86_pmu_event_addr(int index)
464{ 458{
465 return x86_pmu.eventsel + x86_pmu_addr_offset(index); 459 return x86_pmu.perfctr + (x86_pmu.addr_offset ?
460 x86_pmu.addr_offset(index, false) : index);
466} 461}
467 462
468static inline unsigned int x86_pmu_event_addr(int index) 463static inline int x86_pmu_rdpmc_index(int index)
469{ 464{
470 return x86_pmu.perfctr + x86_pmu_addr_offset(index); 465 return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
471} 466}
472 467
473int x86_setup_perfctr(struct perf_event *event); 468int x86_setup_perfctr(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c93bc4e813a0..dfdab42aed27 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event)
132 return amd_perfmon_event_map[hw_event]; 132 return amd_perfmon_event_map[hw_event];
133} 133}
134 134
135static int amd_pmu_hw_config(struct perf_event *event) 135static struct event_constraint *amd_nb_event_constraint;
136
137/*
138 * Previously calculated offsets
139 */
140static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
141static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
142static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
143
144/*
145 * Legacy CPUs:
146 * 4 counters starting at 0xc0010000 each offset by 1
147 *
148 * CPUs with core performance counter extensions:
149 * 6 counters starting at 0xc0010200 each offset by 2
150 *
151 * CPUs with north bridge performance counter extensions:
152 * 4 additional counters starting at 0xc0010240 each offset by 2
153 * (indexed right above either one of the above core counters)
154 */
155static inline int amd_pmu_addr_offset(int index, bool eventsel)
136{ 156{
137 int ret; 157 int offset, first, base;
138 158
139 /* pass precise event sampling to ibs: */ 159 if (!index)
140 if (event->attr.precise_ip && get_ibs_caps()) 160 return index;
141 return -ENOENT; 161
162 if (eventsel)
163 offset = event_offsets[index];
164 else
165 offset = count_offsets[index];
166
167 if (offset)
168 return offset;
169
170 if (amd_nb_event_constraint &&
171 test_bit(index, amd_nb_event_constraint->idxmsk)) {
172 /*
173 * calculate the offset of NB counters with respect to
174 * base eventsel or perfctr
175 */
176
177 first = find_first_bit(amd_nb_event_constraint->idxmsk,
178 X86_PMC_IDX_MAX);
179
180 if (eventsel)
181 base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
182 else
183 base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
184
185 offset = base + ((index - first) << 1);
186 } else if (!cpu_has_perfctr_core)
187 offset = index;
188 else
189 offset = index << 1;
190
191 if (eventsel)
192 event_offsets[index] = offset;
193 else
194 count_offsets[index] = offset;
195
196 return offset;
197}
198
199static inline int amd_pmu_rdpmc_index(int index)
200{
201 int ret, first;
202
203 if (!index)
204 return index;
205
206 ret = rdpmc_indexes[index];
142 207
143 ret = x86_pmu_hw_config(event);
144 if (ret) 208 if (ret)
145 return ret; 209 return ret;
146 210
147 if (has_branch_stack(event)) 211 if (amd_nb_event_constraint &&
148 return -EOPNOTSUPP; 212 test_bit(index, amd_nb_event_constraint->idxmsk)) {
213 /*
214 * according to the mnual, ECX value of the NB counters is
215 * the index of the NB counter (0, 1, 2 or 3) plus 6
216 */
217
218 first = find_first_bit(amd_nb_event_constraint->idxmsk,
219 X86_PMC_IDX_MAX);
220 ret = index - first + 6;
221 } else
222 ret = index;
223
224 rdpmc_indexes[index] = ret;
225
226 return ret;
227}
149 228
229static int amd_core_hw_config(struct perf_event *event)
230{
150 if (event->attr.exclude_host && event->attr.exclude_guest) 231 if (event->attr.exclude_host && event->attr.exclude_guest)
151 /* 232 /*
152 * When HO == GO == 1 the hardware treats that as GO == HO == 0 233 * When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event)
156 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | 237 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
157 ARCH_PERFMON_EVENTSEL_OS); 238 ARCH_PERFMON_EVENTSEL_OS);
158 else if (event->attr.exclude_host) 239 else if (event->attr.exclude_host)
159 event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; 240 event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
160 else if (event->attr.exclude_guest) 241 else if (event->attr.exclude_guest)
161 event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; 242 event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
243
244 return 0;
245}
246
247/*
248 * NB counters do not support the following event select bits:
249 * Host/Guest only
250 * Counter mask
251 * Invert counter mask
252 * Edge detect
253 * OS/User mode
254 */
255static int amd_nb_hw_config(struct perf_event *event)
256{
257 /* for NB, we only allow system wide counting mode */
258 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
259 return -EINVAL;
260
261 if (event->attr.exclude_user || event->attr.exclude_kernel ||
262 event->attr.exclude_host || event->attr.exclude_guest)
263 return -EINVAL;
162 264
163 if (event->attr.type != PERF_TYPE_RAW) 265 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
164 return 0; 266 ARCH_PERFMON_EVENTSEL_OS);
165 267
166 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; 268 if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
269 ARCH_PERFMON_EVENTSEL_INT))
270 return -EINVAL;
167 271
168 return 0; 272 return 0;
169} 273}
@@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
181 return (hwc->config & 0xe0) == 0xe0; 285 return (hwc->config & 0xe0) == 0xe0;
182} 286}
183 287
288static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
289{
290 return amd_nb_event_constraint && amd_is_nb_event(hwc);
291}
292
184static inline int amd_has_nb(struct cpu_hw_events *cpuc) 293static inline int amd_has_nb(struct cpu_hw_events *cpuc)
185{ 294{
186 struct amd_nb *nb = cpuc->amd_nb; 295 struct amd_nb *nb = cpuc->amd_nb;
@@ -188,20 +297,37 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
188 return nb && nb->nb_id != -1; 297 return nb && nb->nb_id != -1;
189} 298}
190 299
191static void amd_put_event_constraints(struct cpu_hw_events *cpuc, 300static int amd_pmu_hw_config(struct perf_event *event)
192 struct perf_event *event) 301{
302 int ret;
303
304 /* pass precise event sampling to ibs: */
305 if (event->attr.precise_ip && get_ibs_caps())
306 return -ENOENT;
307
308 if (has_branch_stack(event))
309 return -EOPNOTSUPP;
310
311 ret = x86_pmu_hw_config(event);
312 if (ret)
313 return ret;
314
315 if (event->attr.type == PERF_TYPE_RAW)
316 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
317
318 if (amd_is_perfctr_nb_event(&event->hw))
319 return amd_nb_hw_config(event);
320
321 return amd_core_hw_config(event);
322}
323
324static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
325 struct perf_event *event)
193{ 326{
194 struct hw_perf_event *hwc = &event->hw;
195 struct amd_nb *nb = cpuc->amd_nb; 327 struct amd_nb *nb = cpuc->amd_nb;
196 int i; 328 int i;
197 329
198 /* 330 /*
199 * only care about NB events
200 */
201 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
202 return;
203
204 /*
205 * need to scan whole list because event may not have 331 * need to scan whole list because event may not have
206 * been assigned during scheduling 332 * been assigned during scheduling
207 * 333 *
@@ -215,6 +341,19 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
215 } 341 }
216} 342}
217 343
344static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
345{
346 int core_id = cpu_data(smp_processor_id()).cpu_core_id;
347
348 /* deliver interrupts only to this core */
349 if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
350 hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
351 hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
352 hwc->config |= (u64)(core_id) <<
353 AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
354 }
355}
356
218 /* 357 /*
219 * AMD64 NorthBridge events need special treatment because 358 * AMD64 NorthBridge events need special treatment because
220 * counter access needs to be synchronized across all cores 359 * counter access needs to be synchronized across all cores
@@ -247,24 +386,24 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
247 * 386 *
248 * Given that resources are allocated (cmpxchg), they must be 387 * Given that resources are allocated (cmpxchg), they must be
249 * eventually freed for others to use. This is accomplished by 388 * eventually freed for others to use. This is accomplished by
250 * calling amd_put_event_constraints(). 389 * calling __amd_put_nb_event_constraints()
251 * 390 *
252 * Non NB events are not impacted by this restriction. 391 * Non NB events are not impacted by this restriction.
253 */ 392 */
254static struct event_constraint * 393static struct event_constraint *
255amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 394__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
395 struct event_constraint *c)
256{ 396{
257 struct hw_perf_event *hwc = &event->hw; 397 struct hw_perf_event *hwc = &event->hw;
258 struct amd_nb *nb = cpuc->amd_nb; 398 struct amd_nb *nb = cpuc->amd_nb;
259 struct perf_event *old = NULL; 399 struct perf_event *old;
260 int max = x86_pmu.num_counters; 400 int idx, new = -1;
261 int i, j, k = -1;
262 401
263 /* 402 if (!c)
264 * if not NB event or no NB, then no constraints 403 c = &unconstrained;
265 */ 404
266 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) 405 if (cpuc->is_fake)
267 return &unconstrained; 406 return c;
268 407
269 /* 408 /*
270 * detect if already present, if so reuse 409 * detect if already present, if so reuse
@@ -276,48 +415,36 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
276 * because of successive calls to x86_schedule_events() from 415 * because of successive calls to x86_schedule_events() from
277 * hw_perf_group_sched_in() without hw_perf_enable() 416 * hw_perf_group_sched_in() without hw_perf_enable()
278 */ 417 */
279 for (i = 0; i < max; i++) { 418 for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
280 /* 419 if (new == -1 || hwc->idx == idx)
281 * keep track of first free slot 420 /* assign free slot, prefer hwc->idx */
282 */ 421 old = cmpxchg(nb->owners + idx, NULL, event);
283 if (k == -1 && !nb->owners[i]) 422 else if (nb->owners[idx] == event)
284 k = i; 423 /* event already present */
424 old = event;
425 else
426 continue;
427
428 if (old && old != event)
429 continue;
430
431 /* reassign to this slot */
432 if (new != -1)
433 cmpxchg(nb->owners + new, event, NULL);
434 new = idx;
285 435
286 /* already present, reuse */ 436 /* already present, reuse */
287 if (nb->owners[i] == event) 437 if (old == event)
288 goto done;
289 }
290 /*
291 * not present, so grab a new slot
292 * starting either at:
293 */
294 if (hwc->idx != -1) {
295 /* previous assignment */
296 i = hwc->idx;
297 } else if (k != -1) {
298 /* start from free slot found */
299 i = k;
300 } else {
301 /*
302 * event not found, no slot found in
303 * first pass, try again from the
304 * beginning
305 */
306 i = 0;
307 }
308 j = i;
309 do {
310 old = cmpxchg(nb->owners+i, NULL, event);
311 if (!old)
312 break; 438 break;
313 if (++i == max) 439 }
314 i = 0; 440
315 } while (i != j); 441 if (new == -1)
316done: 442 return &emptyconstraint;
317 if (!old) 443
318 return &nb->event_constraints[i]; 444 if (amd_is_perfctr_nb_event(hwc))
319 445 amd_nb_interrupt_hw_config(hwc);
320 return &emptyconstraint; 446
447 return &nb->event_constraints[new];
321} 448}
322 449
323static struct amd_nb *amd_alloc_nb(int cpu) 450static struct amd_nb *amd_alloc_nb(int cpu)
@@ -364,7 +491,7 @@ static void amd_pmu_cpu_starting(int cpu)
364 struct amd_nb *nb; 491 struct amd_nb *nb;
365 int i, nb_id; 492 int i, nb_id;
366 493
367 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; 494 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
368 495
369 if (boot_cpu_data.x86_max_cores < 2) 496 if (boot_cpu_data.x86_max_cores < 2)
370 return; 497 return;
@@ -407,6 +534,26 @@ static void amd_pmu_cpu_dead(int cpu)
407 } 534 }
408} 535}
409 536
537static struct event_constraint *
538amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
539{
540 /*
541 * if not NB event or no NB, then no constraints
542 */
543 if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
544 return &unconstrained;
545
546 return __amd_get_nb_event_constraints(cpuc, event,
547 amd_nb_event_constraint);
548}
549
550static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
551 struct perf_event *event)
552{
553 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
554 __amd_put_nb_event_constraints(cpuc, event);
555}
556
410PMU_FORMAT_ATTR(event, "config:0-7,32-35"); 557PMU_FORMAT_ATTR(event, "config:0-7,32-35");
411PMU_FORMAT_ATTR(umask, "config:8-15" ); 558PMU_FORMAT_ATTR(umask, "config:8-15" );
412PMU_FORMAT_ATTR(edge, "config:18" ); 559PMU_FORMAT_ATTR(edge, "config:18" );
@@ -496,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 643static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 644static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
498 645
646static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
647static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
648
499static struct event_constraint * 649static struct event_constraint *
500amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) 650amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
501{ 651{
@@ -561,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
561 return &amd_f15_PMC20; 711 return &amd_f15_PMC20;
562 } 712 }
563 case AMD_EVENT_NB: 713 case AMD_EVENT_NB:
564 /* not yet implemented */ 714 return __amd_get_nb_event_constraints(cpuc, event,
565 return &emptyconstraint; 715 amd_nb_event_constraint);
566 default: 716 default:
567 return &emptyconstraint; 717 return &emptyconstraint;
568 } 718 }
@@ -587,6 +737,8 @@ static __initconst const struct x86_pmu amd_pmu = {
587 .schedule_events = x86_schedule_events, 737 .schedule_events = x86_schedule_events,
588 .eventsel = MSR_K7_EVNTSEL0, 738 .eventsel = MSR_K7_EVNTSEL0,
589 .perfctr = MSR_K7_PERFCTR0, 739 .perfctr = MSR_K7_PERFCTR0,
740 .addr_offset = amd_pmu_addr_offset,
741 .rdpmc_index = amd_pmu_rdpmc_index,
590 .event_map = amd_pmu_event_map, 742 .event_map = amd_pmu_event_map,
591 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 743 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
592 .num_counters = AMD64_NUM_COUNTERS, 744 .num_counters = AMD64_NUM_COUNTERS,
@@ -608,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = {
608 760
609static int setup_event_constraints(void) 761static int setup_event_constraints(void)
610{ 762{
611 if (boot_cpu_data.x86 >= 0x15) 763 if (boot_cpu_data.x86 == 0x15)
612 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; 764 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
613 return 0; 765 return 0;
614} 766}
@@ -638,6 +790,23 @@ static int setup_perfctr_core(void)
638 return 0; 790 return 0;
639} 791}
640 792
793static int setup_perfctr_nb(void)
794{
795 if (!cpu_has_perfctr_nb)
796 return -ENODEV;
797
798 x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
799
800 if (cpu_has_perfctr_core)
801 amd_nb_event_constraint = &amd_NBPMC96;
802 else
803 amd_nb_event_constraint = &amd_NBPMC74;
804
805 printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
806
807 return 0;
808}
809
641__init int amd_pmu_init(void) 810__init int amd_pmu_init(void)
642{ 811{
643 /* Performance-monitoring supported from K7 and later: */ 812 /* Performance-monitoring supported from K7 and later: */
@@ -648,6 +817,7 @@ __init int amd_pmu_init(void)
648 817
649 setup_event_constraints(); 818 setup_event_constraints();
650 setup_perfctr_core(); 819 setup_perfctr_core();
820 setup_perfctr_nb();
651 821
652 /* Events are common for all AMDs */ 822 /* Events are common for all AMDs */
653 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 823 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
@@ -678,7 +848,7 @@ void amd_pmu_disable_virt(void)
678 * SVM is disabled the Guest-only bits still gets set and the counter 848 * SVM is disabled the Guest-only bits still gets set and the counter
679 * will not count anything. 849 * will not count anything.
680 */ 850 */
681 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; 851 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
682 852
683 /* Reload all events */ 853 /* Reload all events */
684 x86_pmu_disable_all(); 854 x86_pmu_disable_all();
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 6336bcbd0618..5f0581e713c2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -528,7 +528,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
528 if (!test_bit(IBS_STARTED, pcpu->state)) { 528 if (!test_bit(IBS_STARTED, pcpu->state)) {
529 /* 529 /*
530 * Catch spurious interrupts after stopping IBS: After 530 * Catch spurious interrupts after stopping IBS: After
531 * disabling IBS there could be still incomming NMIs 531 * disabling IBS there could be still incoming NMIs
532 * with samples that even have the valid bit cleared. 532 * with samples that even have the valid bit cleared.
533 * Mark all this NMIs as handled. 533 * Mark all this NMIs as handled.
534 */ 534 */
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 93b9e1181f83..4914e94ad6e8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2019,7 +2019,10 @@ __init int intel_pmu_init(void)
2019 break; 2019 break;
2020 2020
2021 case 28: /* Atom */ 2021 case 28: /* Atom */
2022 case 54: /* Cedariew */ 2022 case 38: /* Lincroft */
2023 case 39: /* Penwell */
2024 case 53: /* Cloverview */
2025 case 54: /* Cedarview */
2023 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 2026 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2024 sizeof(hw_cache_event_ids)); 2027 sizeof(hw_cache_event_ids));
2025 2028
@@ -2084,6 +2087,7 @@ __init int intel_pmu_init(void)
2084 pr_cont("SandyBridge events, "); 2087 pr_cont("SandyBridge events, ");
2085 break; 2088 break;
2086 case 58: /* IvyBridge */ 2089 case 58: /* IvyBridge */
2090 case 62: /* IvyBridge EP */
2087 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 2091 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
2088 sizeof(hw_cache_event_ids)); 2092 sizeof(hw_cache_event_ids));
2089 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 2093 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 3cf3d97cce3a..b43200dbfe7e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2500,7 +2500,7 @@ static bool pcidrv_registered;
2500/* 2500/*
2501 * add a pci uncore device 2501 * add a pci uncore device
2502 */ 2502 */
2503static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) 2503static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
2504{ 2504{
2505 struct intel_uncore_pmu *pmu; 2505 struct intel_uncore_pmu *pmu;
2506 struct intel_uncore_box *box; 2506 struct intel_uncore_box *box;
@@ -2571,8 +2571,8 @@ static void uncore_pci_remove(struct pci_dev *pdev)
2571 kfree(box); 2571 kfree(box);
2572} 2572}
2573 2573
2574static int __devinit uncore_pci_probe(struct pci_dev *pdev, 2574static int uncore_pci_probe(struct pci_dev *pdev,
2575 const struct pci_device_id *id) 2575 const struct pci_device_id *id)
2576{ 2576{
2577 struct intel_uncore_type *type; 2577 struct intel_uncore_type *type;
2578 2578
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index f2af39f5dc3d..4820c232a0b9 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -19,7 +19,7 @@ static const u64 p6_perfmon_event_map[] =
19 19
20}; 20};
21 21
22static __initconst u64 p6_hw_cache_event_ids 22static u64 p6_hw_cache_event_ids
23 [PERF_COUNT_HW_CACHE_MAX] 23 [PERF_COUNT_HW_CACHE_MAX]
24 [PERF_COUNT_HW_CACHE_OP_MAX] 24 [PERF_COUNT_HW_CACHE_OP_MAX]
25 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 25 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 3286a92e662a..e280253f6f94 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -28,7 +28,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
28{ 28{
29 seq_printf(m, 29 seq_printf(m,
30 "fdiv_bug\t: %s\n" 30 "fdiv_bug\t: %s\n"
31 "hlt_bug\t\t: %s\n"
32 "f00f_bug\t: %s\n" 31 "f00f_bug\t: %s\n"
33 "coma_bug\t: %s\n" 32 "coma_bug\t: %s\n"
34 "fpu\t\t: %s\n" 33 "fpu\t\t: %s\n"
@@ -36,7 +35,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
36 "cpuid level\t: %d\n" 35 "cpuid level\t: %d\n"
37 "wp\t\t: %s\n", 36 "wp\t\t: %s\n",
38 c->fdiv_bug ? "yes" : "no", 37 c->fdiv_bug ? "yes" : "no",
39 c->hlt_works_ok ? "no" : "yes",
40 c->f00f_bug ? "yes" : "no", 38 c->f00f_bug ? "yes" : "no",
41 c->coma_bug ? "yes" : "no", 39 c->coma_bug ? "yes" : "no",
42 c->hard_math ? "yes" : "no", 40 c->hard_math ? "yes" : "no",
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d22d0c4edcfd..03a36321ec54 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -33,6 +33,9 @@
33 33
34#define VMWARE_PORT_CMD_GETVERSION 10 34#define VMWARE_PORT_CMD_GETVERSION 10
35#define VMWARE_PORT_CMD_GETHZ 45 35#define VMWARE_PORT_CMD_GETHZ 45
36#define VMWARE_PORT_CMD_GETVCPU_INFO 68
37#define VMWARE_PORT_CMD_LEGACY_X2APIC 3
38#define VMWARE_PORT_CMD_VCPU_RESERVED 31
36 39
37#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ 40#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
38 __asm__("inl (%%dx)" : \ 41 __asm__("inl (%%dx)" : \
@@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c)
125 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); 128 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
126} 129}
127 130
131/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
132static bool __init vmware_legacy_x2apic_available(void)
133{
134 uint32_t eax, ebx, ecx, edx;
135 VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx);
136 return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 &&
137 (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
138}
139
128const __refconst struct hypervisor_x86 x86_hyper_vmware = { 140const __refconst struct hypervisor_x86 x86_hyper_vmware = {
129 .name = "VMware", 141 .name = "VMware",
130 .detect = vmware_platform, 142 .detect = vmware_platform,
131 .set_cpu_features = vmware_set_cpu_features, 143 .set_cpu_features = vmware_set_cpu_features,
132 .init_platform = vmware_platform_setup, 144 .init_platform = vmware_platform_setup,
145 .x2apic_available = vmware_legacy_x2apic_available,
133}; 146};
134EXPORT_SYMBOL(x86_hyper_vmware); 147EXPORT_SYMBOL(x86_hyper_vmware);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ae42418bc50f..c8797d55b245 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -232,7 +232,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
232 232
233 bust_spinlocks(0); 233 bust_spinlocks(0);
234 die_owner = -1; 234 die_owner = -1;
235 add_taint(TAINT_DIE); 235 add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
236 die_nest_count--; 236 die_nest_count--;
237 if (!die_nest_count) 237 if (!die_nest_count)
238 /* Nest count reaches zero, release the lock. */ 238 /* Nest count reaches zero, release the lock. */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index df06ade26bef..d32abeabbda5 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -835,7 +835,7 @@ static int __init parse_memopt(char *p)
835} 835}
836early_param("mem", parse_memopt); 836early_param("mem", parse_memopt);
837 837
838static int __init parse_memmap_opt(char *p) 838static int __init parse_memmap_one(char *p)
839{ 839{
840 char *oldp; 840 char *oldp;
841 u64 start_at, mem_size; 841 u64 start_at, mem_size;
@@ -877,6 +877,20 @@ static int __init parse_memmap_opt(char *p)
877 877
878 return *p == '\0' ? 0 : -EINVAL; 878 return *p == '\0' ? 0 : -EINVAL;
879} 879}
880static int __init parse_memmap_opt(char *str)
881{
882 while (str) {
883 char *k = strchr(str, ',');
884
885 if (k)
886 *k++ = 0;
887
888 parse_memmap_one(str);
889 str = k;
890 }
891
892 return 0;
893}
880early_param("memmap", parse_memmap_opt); 894early_param("memmap", parse_memmap_opt);
881 895
882void __init finish_e820_parsing(void) 896void __init finish_e820_parsing(void)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index ff84d5469d77..8f3e2dec1df3 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -699,51 +699,6 @@ END(syscall_badsys)
699 */ 699 */
700 .popsection 700 .popsection
701 701
702/*
703 * System calls that need a pt_regs pointer.
704 */
705#define PTREGSCALL0(name) \
706ENTRY(ptregs_##name) ; \
707 leal 4(%esp),%eax; \
708 jmp sys_##name; \
709ENDPROC(ptregs_##name)
710
711#define PTREGSCALL1(name) \
712ENTRY(ptregs_##name) ; \
713 leal 4(%esp),%edx; \
714 movl (PT_EBX+4)(%esp),%eax; \
715 jmp sys_##name; \
716ENDPROC(ptregs_##name)
717
718#define PTREGSCALL2(name) \
719ENTRY(ptregs_##name) ; \
720 leal 4(%esp),%ecx; \
721 movl (PT_ECX+4)(%esp),%edx; \
722 movl (PT_EBX+4)(%esp),%eax; \
723 jmp sys_##name; \
724ENDPROC(ptregs_##name)
725
726#define PTREGSCALL3(name) \
727ENTRY(ptregs_##name) ; \
728 CFI_STARTPROC; \
729 leal 4(%esp),%eax; \
730 pushl_cfi %eax; \
731 movl PT_EDX(%eax),%ecx; \
732 movl PT_ECX(%eax),%edx; \
733 movl PT_EBX(%eax),%eax; \
734 call sys_##name; \
735 addl $4,%esp; \
736 CFI_ADJUST_CFA_OFFSET -4; \
737 ret; \
738 CFI_ENDPROC; \
739ENDPROC(ptregs_##name)
740
741PTREGSCALL1(iopl)
742PTREGSCALL0(sigreturn)
743PTREGSCALL0(rt_sigreturn)
744PTREGSCALL2(vm86)
745PTREGSCALL1(vm86old)
746
747.macro FIXUP_ESPFIX_STACK 702.macro FIXUP_ESPFIX_STACK
748/* 703/*
749 * Switch back for ESPFIX stack to the normal zerobased stack 704 * Switch back for ESPFIX stack to the normal zerobased stack
@@ -1065,7 +1020,6 @@ ENTRY(xen_failsafe_callback)
1065 lea 16(%esp),%esp 1020 lea 16(%esp),%esp
1066 CFI_ADJUST_CFA_OFFSET -16 1021 CFI_ADJUST_CFA_OFFSET -16
1067 jz 5f 1022 jz 5f
1068 addl $16,%esp
1069 jmp iret_exc 1023 jmp iret_exc
10705: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ 10245: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
1071 SAVE_ALL 1025 SAVE_ALL
@@ -1092,11 +1046,18 @@ ENTRY(xen_failsafe_callback)
1092 _ASM_EXTABLE(4b,9b) 1046 _ASM_EXTABLE(4b,9b)
1093ENDPROC(xen_failsafe_callback) 1047ENDPROC(xen_failsafe_callback)
1094 1048
1095BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, 1049BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1096 xen_evtchn_do_upcall) 1050 xen_evtchn_do_upcall)
1097 1051
1098#endif /* CONFIG_XEN */ 1052#endif /* CONFIG_XEN */
1099 1053
1054#if IS_ENABLED(CONFIG_HYPERV)
1055
1056BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1057 hyperv_vector_handler)
1058
1059#endif /* CONFIG_HYPERV */
1060
1100#ifdef CONFIG_FUNCTION_TRACER 1061#ifdef CONFIG_FUNCTION_TRACER
1101#ifdef CONFIG_DYNAMIC_FTRACE 1062#ifdef CONFIG_DYNAMIC_FTRACE
1102 1063
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 07a7a04529bc..c1d01e6ca790 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -828,23 +828,6 @@ int_restore_rest:
828 CFI_ENDPROC 828 CFI_ENDPROC
829END(system_call) 829END(system_call)
830 830
831/*
832 * Certain special system calls that need to save a complete full stack frame.
833 */
834 .macro PTREGSCALL label,func,arg
835ENTRY(\label)
836 PARTIAL_FRAME 1 8 /* offset 8: return address */
837 subq $REST_SKIP, %rsp
838 CFI_ADJUST_CFA_OFFSET REST_SKIP
839 call save_rest
840 DEFAULT_FRAME 0 8 /* offset 8: return address */
841 leaq 8(%rsp), \arg /* pt_regs pointer */
842 call \func
843 jmp ptregscall_common
844 CFI_ENDPROC
845END(\label)
846 .endm
847
848 .macro FORK_LIKE func 831 .macro FORK_LIKE func
849ENTRY(stub_\func) 832ENTRY(stub_\func)
850 CFI_STARTPROC 833 CFI_STARTPROC
@@ -861,10 +844,22 @@ ENTRY(stub_\func)
861END(stub_\func) 844END(stub_\func)
862 .endm 845 .endm
863 846
847 .macro FIXED_FRAME label,func
848ENTRY(\label)
849 CFI_STARTPROC
850 PARTIAL_FRAME 0 8 /* offset 8: return address */
851 FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
852 call \func
853 RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
854 ret
855 CFI_ENDPROC
856END(\label)
857 .endm
858
864 FORK_LIKE clone 859 FORK_LIKE clone
865 FORK_LIKE fork 860 FORK_LIKE fork
866 FORK_LIKE vfork 861 FORK_LIKE vfork
867 PTREGSCALL stub_iopl, sys_iopl, %rsi 862 FIXED_FRAME stub_iopl, sys_iopl
868 863
869ENTRY(ptregscall_common) 864ENTRY(ptregscall_common)
870 DEFAULT_FRAME 1 8 /* offset 8: return address */ 865 DEFAULT_FRAME 1 8 /* offset 8: return address */
@@ -886,7 +881,6 @@ ENTRY(stub_execve)
886 SAVE_REST 881 SAVE_REST
887 FIXUP_TOP_OF_STACK %r11 882 FIXUP_TOP_OF_STACK %r11
888 call sys_execve 883 call sys_execve
889 RESTORE_TOP_OF_STACK %r11
890 movq %rax,RAX(%rsp) 884 movq %rax,RAX(%rsp)
891 RESTORE_REST 885 RESTORE_REST
892 jmp int_ret_from_sys_call 886 jmp int_ret_from_sys_call
@@ -902,7 +896,6 @@ ENTRY(stub_rt_sigreturn)
902 addq $8, %rsp 896 addq $8, %rsp
903 PARTIAL_FRAME 0 897 PARTIAL_FRAME 0
904 SAVE_REST 898 SAVE_REST
905 movq %rsp,%rdi
906 FIXUP_TOP_OF_STACK %r11 899 FIXUP_TOP_OF_STACK %r11
907 call sys_rt_sigreturn 900 call sys_rt_sigreturn
908 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 901 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
@@ -917,7 +910,6 @@ ENTRY(stub_x32_rt_sigreturn)
917 addq $8, %rsp 910 addq $8, %rsp
918 PARTIAL_FRAME 0 911 PARTIAL_FRAME 0
919 SAVE_REST 912 SAVE_REST
920 movq %rsp,%rdi
921 FIXUP_TOP_OF_STACK %r11 913 FIXUP_TOP_OF_STACK %r11
922 call sys32_x32_rt_sigreturn 914 call sys32_x32_rt_sigreturn
923 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 915 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
@@ -1454,11 +1446,16 @@ ENTRY(xen_failsafe_callback)
1454 CFI_ENDPROC 1446 CFI_ENDPROC
1455END(xen_failsafe_callback) 1447END(xen_failsafe_callback)
1456 1448
1457apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ 1449apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
1458 xen_hvm_callback_vector xen_evtchn_do_upcall 1450 xen_hvm_callback_vector xen_evtchn_do_upcall
1459 1451
1460#endif /* CONFIG_XEN */ 1452#endif /* CONFIG_XEN */
1461 1453
1454#if IS_ENABLED(CONFIG_HYPERV)
1455apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
1456 hyperv_callback_vector hyperv_vector_handler
1457#endif /* CONFIG_HYPERV */
1458
1462/* 1459/*
1463 * Some functions should be protected against kprobes 1460 * Some functions should be protected against kprobes
1464 */ 1461 */
@@ -1781,6 +1778,7 @@ first_nmi:
1781 * Leave room for the "copied" frame 1778 * Leave room for the "copied" frame
1782 */ 1779 */
1783 subq $(5*8), %rsp 1780 subq $(5*8), %rsp
1781 CFI_ADJUST_CFA_OFFSET 5*8
1784 1782
1785 /* Copy the stack frame to the Saved frame */ 1783 /* Copy the stack frame to the Saved frame */
1786 .rept 5 1784 .rept 5
@@ -1863,10 +1861,8 @@ end_repeat_nmi:
1863nmi_swapgs: 1861nmi_swapgs:
1864 SWAPGS_UNSAFE_STACK 1862 SWAPGS_UNSAFE_STACK
1865nmi_restore: 1863nmi_restore:
1866 RESTORE_ALL 8 1864 /* Pop the extra iret frame at once */
1867 1865 RESTORE_ALL 6*8
1868 /* Pop the extra iret frame */
1869 addq $(5*8), %rsp
1870 1866
1871 /* Clear the NMI executing stack variable */ 1867 /* Clear the NMI executing stack variable */
1872 movq $0, 5*8(%rsp) 1868 movq $0, 5*8(%rsp)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d414029f1d8..42a392a9fd02 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -89,7 +89,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
89 * kernel identity mapping to modify code. 89 * kernel identity mapping to modify code.
90 */ 90 */
91 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
92 ip = (unsigned long)__va(__pa(ip)); 92 ip = (unsigned long)__va(__pa_symbol(ip));
93 93
94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
95} 95}
@@ -279,7 +279,7 @@ static int ftrace_write(unsigned long ip, const char *val, int size)
279 * kernel identity mapping to modify code. 279 * kernel identity mapping to modify code.
280 */ 280 */
281 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 281 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
282 ip = (unsigned long)__va(__pa(ip)); 282 ip = (unsigned long)__va(__pa_symbol(ip));
283 283
284 return probe_kernel_write((void *)ip, val, size); 284 return probe_kernel_write((void *)ip, val, size);
285} 285}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c18f59d10101..138463a24877 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,6 +18,7 @@
18#include <asm/io_apic.h> 18#include <asm/io_apic.h>
19#include <asm/bios_ebda.h> 19#include <asm/bios_ebda.h>
20#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
21#include <asm/bootparam_utils.h>
21 22
22static void __init i386_default_early_setup(void) 23static void __init i386_default_early_setup(void)
23{ 24{
@@ -30,19 +31,7 @@ static void __init i386_default_early_setup(void)
30 31
31void __init i386_start_kernel(void) 32void __init i386_start_kernel(void)
32{ 33{
33 memblock_reserve(__pa_symbol(&_text), 34 sanitize_boot_params(&boot_params);
34 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
35
36#ifdef CONFIG_BLK_DEV_INITRD
37 /* Reserve INITRD */
38 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
39 /* Assume only end is not page aligned */
40 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
41 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
42 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
43 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
44 }
45#endif
46 35
47 /* Call the subarch specific early setup function */ 36 /* Call the subarch specific early setup function */
48 switch (boot_params.hdr.hardware_subarch) { 37 switch (boot_params.hdr.hardware_subarch) {
@@ -57,11 +46,5 @@ void __init i386_start_kernel(void)
57 break; 46 break;
58 } 47 }
59 48
60 /*
61 * At this point everything still needed from the boot loader
62 * or BIOS or kernel text should be early reserved or marked not
63 * RAM in e820. All other memory is free game.
64 */
65
66 start_kernel(); 49 start_kernel();
67} 50}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 037df57a99ac..c5e403f6d869 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -25,12 +25,84 @@
25#include <asm/kdebug.h> 25#include <asm/kdebug.h>
26#include <asm/e820.h> 26#include <asm/e820.h>
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/bootparam_utils.h>
29#include <asm/microcode.h>
28 30
29static void __init zap_identity_mappings(void) 31/*
32 * Manage page tables very early on.
33 */
34extern pgd_t early_level4_pgt[PTRS_PER_PGD];
35extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
36static unsigned int __initdata next_early_pgt = 2;
37
38/* Wipe all early page tables except for the kernel symbol map */
39static void __init reset_early_page_tables(void)
30{ 40{
31 pgd_t *pgd = pgd_offset_k(0UL); 41 unsigned long i;
32 pgd_clear(pgd); 42
33 __flush_tlb_all(); 43 for (i = 0; i < PTRS_PER_PGD-1; i++)
44 early_level4_pgt[i].pgd = 0;
45
46 next_early_pgt = 0;
47
48 write_cr3(__pa(early_level4_pgt));
49}
50
51/* Create a new PMD entry */
52int __init early_make_pgtable(unsigned long address)
53{
54 unsigned long physaddr = address - __PAGE_OFFSET;
55 unsigned long i;
56 pgdval_t pgd, *pgd_p;
57 pudval_t pud, *pud_p;
58 pmdval_t pmd, *pmd_p;
59
60 /* Invalid address or early pgt is done ? */
61 if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt))
62 return -1;
63
64again:
65 pgd_p = &early_level4_pgt[pgd_index(address)].pgd;
66 pgd = *pgd_p;
67
68 /*
69 * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
70 * critical -- __PAGE_OFFSET would point us back into the dynamic
71 * range and we might end up looping forever...
72 */
73 if (pgd)
74 pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
75 else {
76 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
77 reset_early_page_tables();
78 goto again;
79 }
80
81 pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
82 for (i = 0; i < PTRS_PER_PUD; i++)
83 pud_p[i] = 0;
84 *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
85 }
86 pud_p += pud_index(address);
87 pud = *pud_p;
88
89 if (pud)
90 pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
91 else {
92 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
93 reset_early_page_tables();
94 goto again;
95 }
96
97 pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
98 for (i = 0; i < PTRS_PER_PMD; i++)
99 pmd_p[i] = 0;
100 *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
101 }
102 pmd = (physaddr & PMD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL);
103 pmd_p[pmd_index(address)] = pmd;
104
105 return 0;
34} 106}
35 107
36/* Don't add a printk in there. printk relies on the PDA which is not initialized 108/* Don't add a printk in there. printk relies on the PDA which is not initialized
@@ -41,13 +113,25 @@ static void __init clear_bss(void)
41 (unsigned long) __bss_stop - (unsigned long) __bss_start); 113 (unsigned long) __bss_stop - (unsigned long) __bss_start);
42} 114}
43 115
116static unsigned long get_cmd_line_ptr(void)
117{
118 unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
119
120 cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32;
121
122 return cmd_line_ptr;
123}
124
44static void __init copy_bootdata(char *real_mode_data) 125static void __init copy_bootdata(char *real_mode_data)
45{ 126{
46 char * command_line; 127 char * command_line;
128 unsigned long cmd_line_ptr;
47 129
48 memcpy(&boot_params, real_mode_data, sizeof boot_params); 130 memcpy(&boot_params, real_mode_data, sizeof boot_params);
49 if (boot_params.hdr.cmd_line_ptr) { 131 sanitize_boot_params(&boot_params);
50 command_line = __va(boot_params.hdr.cmd_line_ptr); 132 cmd_line_ptr = get_cmd_line_ptr();
133 if (cmd_line_ptr) {
134 command_line = __va(cmd_line_ptr);
51 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); 135 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
52 } 136 }
53} 137}
@@ -70,54 +154,40 @@ void __init x86_64_start_kernel(char * real_mode_data)
70 (__START_KERNEL & PGDIR_MASK))); 154 (__START_KERNEL & PGDIR_MASK)));
71 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); 155 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
72 156
157 /* Kill off the identity-map trampoline */
158 reset_early_page_tables();
159
73 /* clear bss before set_intr_gate with early_idt_handler */ 160 /* clear bss before set_intr_gate with early_idt_handler */
74 clear_bss(); 161 clear_bss();
75 162
76 /* Make NULL pointers segfault */ 163 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
77 zap_identity_mappings();
78
79 max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
80
81 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
82#ifdef CONFIG_EARLY_PRINTK
83 set_intr_gate(i, &early_idt_handlers[i]); 164 set_intr_gate(i, &early_idt_handlers[i]);
84#else
85 set_intr_gate(i, early_idt_handler);
86#endif
87 }
88 load_idt((const struct desc_ptr *)&idt_descr); 165 load_idt((const struct desc_ptr *)&idt_descr);
89 166
167 copy_bootdata(__va(real_mode_data));
168
169 /*
170 * Load microcode early on BSP.
171 */
172 load_ucode_bsp();
173
90 if (console_loglevel == 10) 174 if (console_loglevel == 10)
91 early_printk("Kernel alive\n"); 175 early_printk("Kernel alive\n");
92 176
177 clear_page(init_level4_pgt);
178 /* set init_level4_pgt kernel high mapping*/
179 init_level4_pgt[511] = early_level4_pgt[511];
180
93 x86_64_start_reservations(real_mode_data); 181 x86_64_start_reservations(real_mode_data);
94} 182}
95 183
96void __init x86_64_start_reservations(char *real_mode_data) 184void __init x86_64_start_reservations(char *real_mode_data)
97{ 185{
98 copy_bootdata(__va(real_mode_data)); 186 /* version is always not zero if it is copied */
99 187 if (!boot_params.hdr.version)
100 memblock_reserve(__pa_symbol(&_text), 188 copy_bootdata(__va(real_mode_data));
101 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
102
103#ifdef CONFIG_BLK_DEV_INITRD
104 /* Reserve INITRD */
105 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
106 /* Assume only end is not page aligned */
107 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
108 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
109 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
110 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
111 }
112#endif
113 189
114 reserve_ebda_region(); 190 reserve_ebda_region();
115 191
116 /*
117 * At this point everything still needed from the boot loader
118 * or BIOS or kernel text should be early reserved or marked not
119 * RAM in e820. All other memory is free game.
120 */
121
122 start_kernel(); 192 start_kernel();
123} 193}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 8e7f6556028f..73afd11799ca 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -144,6 +144,11 @@ ENTRY(startup_32)
144 movl %eax, pa(olpc_ofw_pgd) 144 movl %eax, pa(olpc_ofw_pgd)
145#endif 145#endif
146 146
147#ifdef CONFIG_MICROCODE_EARLY
148 /* Early load ucode on BSP. */
149 call load_ucode_bsp
150#endif
151
147/* 152/*
148 * Initialize page tables. This creates a PDE and a set of page 153 * Initialize page tables. This creates a PDE and a set of page
149 * tables, which are located immediately beyond __brk_base. The variable 154 * tables, which are located immediately beyond __brk_base. The variable
@@ -299,38 +304,59 @@ ENTRY(startup_32_smp)
299 movl %eax,%ss 304 movl %eax,%ss
300 leal -__PAGE_OFFSET(%ecx),%esp 305 leal -__PAGE_OFFSET(%ecx),%esp
301 306
307#ifdef CONFIG_MICROCODE_EARLY
308 /* Early load ucode on AP. */
309 call load_ucode_ap
310#endif
311
312
302default_entry: 313default_entry:
314#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
315 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
316 X86_CR0_PG)
317 movl $(CR0_STATE & ~X86_CR0_PG),%eax
318 movl %eax,%cr0
319
303/* 320/*
304 * New page tables may be in 4Mbyte page mode and may 321 * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave
305 * be using the global pages. 322 * bits like NT set. This would confuse the debugger if this code is traced. So
323 * initialize them properly now before switching to protected mode. That means
324 * DF in particular (even though we have cleared it earlier after copying the
325 * command line) because GCC expects it.
326 */
327 pushl $0
328 popfl
329
330/*
331 * New page tables may be in 4Mbyte page mode and may be using the global pages.
306 * 332 *
307 * NOTE! If we are on a 486 we may have no cr4 at all! 333 * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists
308 * Specifically, cr4 exists if and only if CPUID exists 334 * if and only if CPUID exists and has flags other than the FPU flag set.
309 * and has flags other than the FPU flag set.
310 */ 335 */
336 movl $-1,pa(X86_CPUID) # preset CPUID level
311 movl $X86_EFLAGS_ID,%ecx 337 movl $X86_EFLAGS_ID,%ecx
312 pushl %ecx 338 pushl %ecx
313 popfl 339 popfl # set EFLAGS=ID
314 pushfl
315 popl %eax
316 pushl $0
317 popfl
318 pushfl 340 pushfl
319 popl %edx 341 popl %eax # get EFLAGS
320 xorl %edx,%eax 342 testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set?
321 testl %ecx,%eax 343 jz enable_paging # hw disallowed setting of ID bit
322 jz 6f # No ID flag = no CPUID = no CR4 344 # which means no CPUID and no CR4
345
346 xorl %eax,%eax
347 cpuid
348 movl %eax,pa(X86_CPUID) # save largest std CPUID function
323 349
324 movl $1,%eax 350 movl $1,%eax
325 cpuid 351 cpuid
326 andl $~1,%edx # Ignore CPUID.FPU 352 andl $~1,%edx # Ignore CPUID.FPU
327 jz 6f # No flags or only CPUID.FPU = no CR4 353 jz enable_paging # No flags or only CPUID.FPU = no CR4
328 354
329 movl pa(mmu_cr4_features),%eax 355 movl pa(mmu_cr4_features),%eax
330 movl %eax,%cr4 356 movl %eax,%cr4
331 357
332 testb $X86_CR4_PAE, %al # check if PAE is enabled 358 testb $X86_CR4_PAE, %al # check if PAE is enabled
333 jz 6f 359 jz enable_paging
334 360
335 /* Check if extended functions are implemented */ 361 /* Check if extended functions are implemented */
336 movl $0x80000000, %eax 362 movl $0x80000000, %eax
@@ -338,7 +364,7 @@ default_entry:
338 /* Value must be in the range 0x80000001 to 0x8000ffff */ 364 /* Value must be in the range 0x80000001 to 0x8000ffff */
339 subl $0x80000001, %eax 365 subl $0x80000001, %eax
340 cmpl $(0x8000ffff-0x80000001), %eax 366 cmpl $(0x8000ffff-0x80000001), %eax
341 ja 6f 367 ja enable_paging
342 368
343 /* Clear bogus XD_DISABLE bits */ 369 /* Clear bogus XD_DISABLE bits */
344 call verify_cpu 370 call verify_cpu
@@ -347,7 +373,7 @@ default_entry:
347 cpuid 373 cpuid
348 /* Execute Disable bit supported? */ 374 /* Execute Disable bit supported? */
349 btl $(X86_FEATURE_NX & 31), %edx 375 btl $(X86_FEATURE_NX & 31), %edx
350 jnc 6f 376 jnc enable_paging
351 377
352 /* Setup EFER (Extended Feature Enable Register) */ 378 /* Setup EFER (Extended Feature Enable Register) */
353 movl $MSR_EFER, %ecx 379 movl $MSR_EFER, %ecx
@@ -357,15 +383,14 @@ default_entry:
357 /* Make changes effective */ 383 /* Make changes effective */
358 wrmsr 384 wrmsr
359 385
3606: 386enable_paging:
361 387
362/* 388/*
363 * Enable paging 389 * Enable paging
364 */ 390 */
365 movl $pa(initial_page_table), %eax 391 movl $pa(initial_page_table), %eax
366 movl %eax,%cr3 /* set the page table pointer.. */ 392 movl %eax,%cr3 /* set the page table pointer.. */
367 movl %cr0,%eax 393 movl $CR0_STATE,%eax
368 orl $X86_CR0_PG,%eax
369 movl %eax,%cr0 /* ..and set paging (PG) bit */ 394 movl %eax,%cr0 /* ..and set paging (PG) bit */
370 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 395 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
3711: 3961:
@@ -373,14 +398,6 @@ default_entry:
373 addl $__PAGE_OFFSET, %esp 398 addl $__PAGE_OFFSET, %esp
374 399
375/* 400/*
376 * Initialize eflags. Some BIOS's leave bits like NT set. This would
377 * confuse the debugger if this code is traced.
378 * XXX - best to initialize before switching to protected mode.
379 */
380 pushl $0
381 popfl
382
383/*
384 * start system 32-bit setup. We need to re-do some of the things done 401 * start system 32-bit setup. We need to re-do some of the things done
385 * in 16-bit mode for the "real" operations. 402 * in 16-bit mode for the "real" operations.
386 */ 403 */
@@ -389,31 +406,11 @@ default_entry:
389 jz 1f # Did we do this already? 406 jz 1f # Did we do this already?
390 call *%eax 407 call *%eax
3911: 4081:
392 409
393/* check if it is 486 or 386. */
394/* 410/*
395 * XXX - this does a lot of unnecessary setup. Alignment checks don't 411 * Check if it is 486
396 * apply at our cpl of 0 and the stack ought to be aligned already, and
397 * we don't need to preserve eflags.
398 */ 412 */
399 movl $-1,X86_CPUID # -1 for no CPUID initially 413 cmpl $-1,X86_CPUID
400 movb $3,X86 # at least 386
401 pushfl # push EFLAGS
402 popl %eax # get EFLAGS
403 movl %eax,%ecx # save original EFLAGS
404 xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
405 pushl %eax # copy to EFLAGS
406 popfl # set EFLAGS
407 pushfl # get new EFLAGS
408 popl %eax # put it in eax
409 xorl %ecx,%eax # change in flags
410 pushl %ecx # restore original EFLAGS
411 popfl
412 testl $0x40000,%eax # check if AC bit changed
413 je is386
414
415 movb $4,X86 # at least 486
416 testl $0x200000,%eax # check if ID bit changed
417 je is486 414 je is486
418 415
419 /* get vendor info */ 416 /* get vendor info */
@@ -439,11 +436,10 @@ default_entry:
439 movb %cl,X86_MASK 436 movb %cl,X86_MASK
440 movl %edx,X86_CAPABILITY 437 movl %edx,X86_CAPABILITY
441 438
442is486: movl $0x50022,%ecx # set AM, WP, NE and MP 439is486:
443 jmp 2f 440 movb $4,X86
444 441 movl $0x50022,%ecx # set AM, WP, NE and MP
445is386: movl $2,%ecx # set MP 442 movl %cr0,%eax
4462: movl %cr0,%eax
447 andl $0x80000011,%eax # Save PG,PE,ET 443 andl $0x80000011,%eax # Save PG,PE,ET
448 orl %ecx,%eax 444 orl %ecx,%eax
449 movl %eax,%cr0 445 movl %eax,%cr0
@@ -468,7 +464,6 @@ is386: movl $2,%ecx # set MP
468 xorl %eax,%eax # Clear LDT 464 xorl %eax,%eax # Clear LDT
469 lldt %ax 465 lldt %ax
470 466
471 cld # gcc2 wants the direction flag cleared at all times
472 pushl $0 # fake return address for unwinder 467 pushl $0 # fake return address for unwinder
473 jmp *(initial_code) 468 jmp *(initial_code)
474 469
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 980053c4b9cc..b7de3b25adb5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -47,14 +47,13 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
47 .code64 47 .code64
48 .globl startup_64 48 .globl startup_64
49startup_64: 49startup_64:
50
51 /* 50 /*
52 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, 51 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
53 * and someone has loaded an identity mapped page table 52 * and someone has loaded an identity mapped page table
54 * for us. These identity mapped page tables map all of the 53 * for us. These identity mapped page tables map all of the
55 * kernel pages and possibly all of memory. 54 * kernel pages and possibly all of memory.
56 * 55 *
57 * %esi holds a physical pointer to real_mode_data. 56 * %rsi holds a physical pointer to real_mode_data.
58 * 57 *
59 * We come here either directly from a 64bit bootloader, or from 58 * We come here either directly from a 64bit bootloader, or from
60 * arch/x86_64/boot/compressed/head.S. 59 * arch/x86_64/boot/compressed/head.S.
@@ -66,7 +65,8 @@ startup_64:
66 * tables and then reload them. 65 * tables and then reload them.
67 */ 66 */
68 67
69 /* Compute the delta between the address I am compiled to run at and the 68 /*
69 * Compute the delta between the address I am compiled to run at and the
70 * address I am actually running at. 70 * address I am actually running at.
71 */ 71 */
72 leaq _text(%rip), %rbp 72 leaq _text(%rip), %rbp
@@ -78,45 +78,62 @@ startup_64:
78 testl %eax, %eax 78 testl %eax, %eax
79 jnz bad_address 79 jnz bad_address
80 80
81 /* Is the address too large? */ 81 /*
82 leaq _text(%rip), %rdx 82 * Is the address too large?
83 movq $PGDIR_SIZE, %rax
84 cmpq %rax, %rdx
85 jae bad_address
86
87 /* Fixup the physical addresses in the page table
88 */ 83 */
89 addq %rbp, init_level4_pgt + 0(%rip) 84 leaq _text(%rip), %rax
90 addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) 85 shrq $MAX_PHYSMEM_BITS, %rax
91 addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) 86 jnz bad_address
92 87
93 addq %rbp, level3_ident_pgt + 0(%rip) 88 /*
89 * Fixup the physical addresses in the page table
90 */
91 addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip)
94 92
95 addq %rbp, level3_kernel_pgt + (510*8)(%rip) 93 addq %rbp, level3_kernel_pgt + (510*8)(%rip)
96 addq %rbp, level3_kernel_pgt + (511*8)(%rip) 94 addq %rbp, level3_kernel_pgt + (511*8)(%rip)
97 95
98 addq %rbp, level2_fixmap_pgt + (506*8)(%rip) 96 addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
99 97
100 /* Add an Identity mapping if I am above 1G */ 98 /*
99 * Set up the identity mapping for the switchover. These
100 * entries should *NOT* have the global bit set! This also
101 * creates a bunch of nonsense entries but that is fine --
102 * it avoids problems around wraparound.
103 */
101 leaq _text(%rip), %rdi 104 leaq _text(%rip), %rdi
102 andq $PMD_PAGE_MASK, %rdi 105 leaq early_level4_pgt(%rip), %rbx
103 106
104 movq %rdi, %rax 107 movq %rdi, %rax
105 shrq $PUD_SHIFT, %rax 108 shrq $PGDIR_SHIFT, %rax
106 andq $(PTRS_PER_PUD - 1), %rax
107 jz ident_complete
108 109
109 leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx 110 leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx
110 leaq level3_ident_pgt(%rip), %rbx 111 movq %rdx, 0(%rbx,%rax,8)
111 movq %rdx, 0(%rbx, %rax, 8) 112 movq %rdx, 8(%rbx,%rax,8)
112 113
114 addq $4096, %rdx
113 movq %rdi, %rax 115 movq %rdi, %rax
114 shrq $PMD_SHIFT, %rax 116 shrq $PUD_SHIFT, %rax
115 andq $(PTRS_PER_PMD - 1), %rax 117 andl $(PTRS_PER_PUD-1), %eax
116 leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx 118 movq %rdx, (4096+0)(%rbx,%rax,8)
117 leaq level2_spare_pgt(%rip), %rbx 119 movq %rdx, (4096+8)(%rbx,%rax,8)
118 movq %rdx, 0(%rbx, %rax, 8) 120
119ident_complete: 121 addq $8192, %rbx
122 movq %rdi, %rax
123 shrq $PMD_SHIFT, %rdi
124 addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
125 leaq (_end - 1)(%rip), %rcx
126 shrq $PMD_SHIFT, %rcx
127 subq %rdi, %rcx
128 incl %ecx
129
1301:
131 andq $(PTRS_PER_PMD - 1), %rdi
132 movq %rax, (%rbx,%rdi,8)
133 incq %rdi
134 addq $PMD_SIZE, %rax
135 decl %ecx
136 jnz 1b
120 137
121 /* 138 /*
122 * Fixup the kernel text+data virtual addresses. Note that 139 * Fixup the kernel text+data virtual addresses. Note that
@@ -124,7 +141,6 @@ ident_complete:
124 * cleanup_highmap() fixes this up along with the mappings 141 * cleanup_highmap() fixes this up along with the mappings
125 * beyond _end. 142 * beyond _end.
126 */ 143 */
127
128 leaq level2_kernel_pgt(%rip), %rdi 144 leaq level2_kernel_pgt(%rip), %rdi
129 leaq 4096(%rdi), %r8 145 leaq 4096(%rdi), %r8
130 /* See if it is a valid page table entry */ 146 /* See if it is a valid page table entry */
@@ -139,17 +155,14 @@ ident_complete:
139 /* Fixup phys_base */ 155 /* Fixup phys_base */
140 addq %rbp, phys_base(%rip) 156 addq %rbp, phys_base(%rip)
141 157
142 /* Due to ENTRY(), sometimes the empty space gets filled with 158 movq $(early_level4_pgt - __START_KERNEL_map), %rax
143 * zeros. Better take a jmp than relying on empty space being 159 jmp 1f
144 * filled with 0x90 (nop)
145 */
146 jmp secondary_startup_64
147ENTRY(secondary_startup_64) 160ENTRY(secondary_startup_64)
148 /* 161 /*
149 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, 162 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
150 * and someone has loaded a mapped page table. 163 * and someone has loaded a mapped page table.
151 * 164 *
152 * %esi holds a physical pointer to real_mode_data. 165 * %rsi holds a physical pointer to real_mode_data.
153 * 166 *
154 * We come here either from startup_64 (using physical addresses) 167 * We come here either from startup_64 (using physical addresses)
155 * or from trampoline.S (using virtual addresses). 168 * or from trampoline.S (using virtual addresses).
@@ -159,12 +172,14 @@ ENTRY(secondary_startup_64)
159 * after the boot processor executes this code. 172 * after the boot processor executes this code.
160 */ 173 */
161 174
175 movq $(init_level4_pgt - __START_KERNEL_map), %rax
1761:
177
162 /* Enable PAE mode and PGE */ 178 /* Enable PAE mode and PGE */
163 movl $(X86_CR4_PAE | X86_CR4_PGE), %eax 179 movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
164 movq %rax, %cr4 180 movq %rcx, %cr4
165 181
166 /* Setup early boot stage 4 level pagetables. */ 182 /* Setup early boot stage 4 level pagetables. */
167 movq $(init_level4_pgt - __START_KERNEL_map), %rax
168 addq phys_base(%rip), %rax 183 addq phys_base(%rip), %rax
169 movq %rax, %cr3 184 movq %rax, %cr3
170 185
@@ -196,7 +211,7 @@ ENTRY(secondary_startup_64)
196 movq %rax, %cr0 211 movq %rax, %cr0
197 212
198 /* Setup a boot time stack */ 213 /* Setup a boot time stack */
199 movq stack_start(%rip),%rsp 214 movq stack_start(%rip), %rsp
200 215
201 /* zero EFLAGS after setting rsp */ 216 /* zero EFLAGS after setting rsp */
202 pushq $0 217 pushq $0
@@ -236,15 +251,33 @@ ENTRY(secondary_startup_64)
236 movl initial_gs+4(%rip),%edx 251 movl initial_gs+4(%rip),%edx
237 wrmsr 252 wrmsr
238 253
239 /* esi is pointer to real mode structure with interesting info. 254 /* rsi is pointer to real mode structure with interesting info.
240 pass it to C */ 255 pass it to C */
241 movl %esi, %edi 256 movq %rsi, %rdi
242 257
243 /* Finally jump to run C code and to be on real kernel address 258 /* Finally jump to run C code and to be on real kernel address
244 * Since we are running on identity-mapped space we have to jump 259 * Since we are running on identity-mapped space we have to jump
245 * to the full 64bit address, this is only possible as indirect 260 * to the full 64bit address, this is only possible as indirect
246 * jump. In addition we need to ensure %cs is set so we make this 261 * jump. In addition we need to ensure %cs is set so we make this
247 * a far return. 262 * a far return.
263 *
264 * Note: do not change to far jump indirect with 64bit offset.
265 *
266 * AMD does not support far jump indirect with 64bit offset.
267 * AMD64 Architecture Programmer's Manual, Volume 3: states only
268 * JMP FAR mem16:16 FF /5 Far jump indirect,
269 * with the target specified by a far pointer in memory.
270 * JMP FAR mem16:32 FF /5 Far jump indirect,
271 * with the target specified by a far pointer in memory.
272 *
273 * Intel64 does support 64bit offset.
274 * Software Developer Manual Vol 2: states:
275 * FF /5 JMP m16:16 Jump far, absolute indirect,
276 * address given in m16:16
277 * FF /5 JMP m16:32 Jump far, absolute indirect,
278 * address given in m16:32.
279 * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
280 * address given in m16:64.
248 */ 281 */
249 movq initial_code(%rip),%rax 282 movq initial_code(%rip),%rax
250 pushq $0 # fake return address to stop unwinder 283 pushq $0 # fake return address to stop unwinder
@@ -270,13 +303,13 @@ ENDPROC(start_cpu0)
270 303
271 /* SMP bootup changes these two */ 304 /* SMP bootup changes these two */
272 __REFDATA 305 __REFDATA
273 .align 8 306 .balign 8
274 ENTRY(initial_code) 307 GLOBAL(initial_code)
275 .quad x86_64_start_kernel 308 .quad x86_64_start_kernel
276 ENTRY(initial_gs) 309 GLOBAL(initial_gs)
277 .quad INIT_PER_CPU_VAR(irq_stack_union) 310 .quad INIT_PER_CPU_VAR(irq_stack_union)
278 311
279 ENTRY(stack_start) 312 GLOBAL(stack_start)
280 .quad init_thread_union+THREAD_SIZE-8 313 .quad init_thread_union+THREAD_SIZE-8
281 .word 0 314 .word 0
282 __FINITDATA 315 __FINITDATA
@@ -284,7 +317,7 @@ ENDPROC(start_cpu0)
284bad_address: 317bad_address:
285 jmp bad_address 318 jmp bad_address
286 319
287 .section ".init.text","ax" 320 __INIT
288 .globl early_idt_handlers 321 .globl early_idt_handlers
289early_idt_handlers: 322early_idt_handlers:
290 # 104(%rsp) %rflags 323 # 104(%rsp) %rflags
@@ -303,6 +336,7 @@ early_idt_handlers:
303 i = i + 1 336 i = i + 1
304 .endr 337 .endr
305 338
339/* This is global to keep gas from relaxing the jumps */
306ENTRY(early_idt_handler) 340ENTRY(early_idt_handler)
307 cld 341 cld
308 342
@@ -321,14 +355,22 @@ ENTRY(early_idt_handler)
321 pushq %r11 # 0(%rsp) 355 pushq %r11 # 0(%rsp)
322 356
323 cmpl $__KERNEL_CS,96(%rsp) 357 cmpl $__KERNEL_CS,96(%rsp)
324 jne 10f 358 jne 11f
359
360 cmpl $14,72(%rsp) # Page fault?
361 jnz 10f
362 GET_CR2_INTO(%rdi) # can clobber any volatile register if pv
363 call early_make_pgtable
364 andl %eax,%eax
365 jz 20f # All good
325 366
36710:
326 leaq 88(%rsp),%rdi # Pointer to %rip 368 leaq 88(%rsp),%rdi # Pointer to %rip
327 call early_fixup_exception 369 call early_fixup_exception
328 andl %eax,%eax 370 andl %eax,%eax
329 jnz 20f # Found an exception entry 371 jnz 20f # Found an exception entry
330 372
33110: 37311:
332#ifdef CONFIG_EARLY_PRINTK 374#ifdef CONFIG_EARLY_PRINTK
333 GET_CR2_INTO(%r9) # can clobber any volatile register if pv 375 GET_CR2_INTO(%r9) # can clobber any volatile register if pv
334 movl 80(%rsp),%r8d # error code 376 movl 80(%rsp),%r8d # error code
@@ -350,7 +392,7 @@ ENTRY(early_idt_handler)
3501: hlt 3921: hlt
351 jmp 1b 393 jmp 1b
352 394
35320: # Exception table entry found 39520: # Exception table entry found or page table generated
354 popq %r11 396 popq %r11
355 popq %r10 397 popq %r10
356 popq %r9 398 popq %r9
@@ -363,6 +405,9 @@ ENTRY(early_idt_handler)
363 addq $16,%rsp # drop vector number and error code 405 addq $16,%rsp # drop vector number and error code
364 decl early_recursion_flag(%rip) 406 decl early_recursion_flag(%rip)
365 INTERRUPT_RETURN 407 INTERRUPT_RETURN
408ENDPROC(early_idt_handler)
409
410 __INITDATA
366 411
367 .balign 4 412 .balign 4
368early_recursion_flag: 413early_recursion_flag:
@@ -374,11 +419,10 @@ early_idt_msg:
374early_idt_ripmsg: 419early_idt_ripmsg:
375 .asciz "RIP %s\n" 420 .asciz "RIP %s\n"
376#endif /* CONFIG_EARLY_PRINTK */ 421#endif /* CONFIG_EARLY_PRINTK */
377 .previous
378 422
379#define NEXT_PAGE(name) \ 423#define NEXT_PAGE(name) \
380 .balign PAGE_SIZE; \ 424 .balign PAGE_SIZE; \
381ENTRY(name) 425GLOBAL(name)
382 426
383/* Automate the creation of 1 to 1 mapping pmd entries */ 427/* Automate the creation of 1 to 1 mapping pmd entries */
384#define PMDS(START, PERM, COUNT) \ 428#define PMDS(START, PERM, COUNT) \
@@ -388,24 +432,37 @@ ENTRY(name)
388 i = i + 1 ; \ 432 i = i + 1 ; \
389 .endr 433 .endr
390 434
435 __INITDATA
436NEXT_PAGE(early_level4_pgt)
437 .fill 511,8,0
438 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
439
440NEXT_PAGE(early_dynamic_pgts)
441 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
442
391 .data 443 .data
392 /* 444
393 * This default setting generates an ident mapping at address 0x100000 445#ifndef CONFIG_XEN
394 * and a mapping for the kernel that precisely maps virtual address
395 * 0xffffffff80000000 to physical address 0x000000. (always using
396 * 2Mbyte large pages provided by PAE mode)
397 */
398NEXT_PAGE(init_level4_pgt) 446NEXT_PAGE(init_level4_pgt)
399 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE 447 .fill 512,8,0
400 .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 448#else
401 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE 449NEXT_PAGE(init_level4_pgt)
402 .org init_level4_pgt + L4_START_KERNEL*8, 0 450 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
451 .org init_level4_pgt + L4_PAGE_OFFSET*8, 0
452 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
453 .org init_level4_pgt + L4_START_KERNEL*8, 0
403 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 454 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
404 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE 455 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
405 456
406NEXT_PAGE(level3_ident_pgt) 457NEXT_PAGE(level3_ident_pgt)
407 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE 458 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
408 .fill 511,8,0 459 .fill 511, 8, 0
460NEXT_PAGE(level2_ident_pgt)
461 /* Since I easily can, map the first 1G.
462 * Don't set NX because code runs from these pages.
463 */
464 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
465#endif
409 466
410NEXT_PAGE(level3_kernel_pgt) 467NEXT_PAGE(level3_kernel_pgt)
411 .fill L3_START_KERNEL,8,0 468 .fill L3_START_KERNEL,8,0
@@ -413,21 +470,6 @@ NEXT_PAGE(level3_kernel_pgt)
413 .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE 470 .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
414 .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE 471 .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
415 472
416NEXT_PAGE(level2_fixmap_pgt)
417 .fill 506,8,0
418 .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
419 /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
420 .fill 5,8,0
421
422NEXT_PAGE(level1_fixmap_pgt)
423 .fill 512,8,0
424
425NEXT_PAGE(level2_ident_pgt)
426 /* Since I easily can, map the first 1G.
427 * Don't set NX because code runs from these pages.
428 */
429 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
430
431NEXT_PAGE(level2_kernel_pgt) 473NEXT_PAGE(level2_kernel_pgt)
432 /* 474 /*
433 * 512 MB kernel mapping. We spend a full page on this pagetable 475 * 512 MB kernel mapping. We spend a full page on this pagetable
@@ -442,11 +484,16 @@ NEXT_PAGE(level2_kernel_pgt)
442 PMDS(0, __PAGE_KERNEL_LARGE_EXEC, 484 PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
443 KERNEL_IMAGE_SIZE/PMD_SIZE) 485 KERNEL_IMAGE_SIZE/PMD_SIZE)
444 486
445NEXT_PAGE(level2_spare_pgt) 487NEXT_PAGE(level2_fixmap_pgt)
446 .fill 512, 8, 0 488 .fill 506,8,0
489 .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
490 /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
491 .fill 5,8,0
492
493NEXT_PAGE(level1_fixmap_pgt)
494 .fill 512,8,0
447 495
448#undef PMDS 496#undef PMDS
449#undef NEXT_PAGE
450 497
451 .data 498 .data
452 .align 16 499 .align 16
@@ -472,6 +519,5 @@ ENTRY(nmi_idt_table)
472 .skip IDT_ENTRIES * 16 519 .skip IDT_ENTRIES * 16
473 520
474 __PAGE_ALIGNED_BSS 521 __PAGE_ALIGNED_BSS
475 .align PAGE_SIZE 522NEXT_PAGE(empty_zero_page)
476ENTRY(empty_zero_page)
477 .skip PAGE_SIZE 523 .skip PAGE_SIZE
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index e28670f9a589..da85a8e830a1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta,
478 478
479static int hpet_setup_msi_irq(unsigned int irq) 479static int hpet_setup_msi_irq(unsigned int irq)
480{ 480{
481 if (arch_setup_hpet_msi(irq, hpet_blockid)) { 481 if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
482 destroy_irq(irq); 482 destroy_irq(irq);
483 return -EINVAL; 483 return -EINVAL;
484 } 484 }
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 9c3bd4a2050e..0fa69127209a 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -26,6 +26,7 @@ EXPORT_SYMBOL(csum_partial_copy_generic);
26EXPORT_SYMBOL(__get_user_1); 26EXPORT_SYMBOL(__get_user_1);
27EXPORT_SYMBOL(__get_user_2); 27EXPORT_SYMBOL(__get_user_2);
28EXPORT_SYMBOL(__get_user_4); 28EXPORT_SYMBOL(__get_user_4);
29EXPORT_SYMBOL(__get_user_8);
29 30
30EXPORT_SYMBOL(__put_user_1); 31EXPORT_SYMBOL(__put_user_1);
31EXPORT_SYMBOL(__put_user_2); 32EXPORT_SYMBOL(__put_user_2);
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8c968974253d..4ddaf66ea35f 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -93,8 +93,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
93 * on system-call entry - see also fork() and the signal handling 93 * on system-call entry - see also fork() and the signal handling
94 * code. 94 * code.
95 */ 95 */
96long sys_iopl(unsigned int level, struct pt_regs *regs) 96SYSCALL_DEFINE1(iopl, unsigned int, level)
97{ 97{
98 struct pt_regs *regs = current_pt_regs();
98 unsigned int old = (regs->flags >> 12) & 3; 99 unsigned int old = (regs->flags >> 12) & 3;
99 struct thread_struct *t = &current->thread; 100 struct thread_struct *t = &current->thread;
100 101
diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile
new file mode 100644
index 000000000000..0d33169cc1a2
--- /dev/null
+++ b/arch/x86/kernel/kprobes/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for kernel probes
3#
4
5obj-$(CONFIG_KPROBES) += core.o
6obj-$(CONFIG_OPTPROBES) += opt.o
7obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes/common.h
index 3230b68ef29a..2e9d4b5af036 100644
--- a/arch/x86/kernel/kprobes-common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
99 return addr; 99 return addr;
100} 100}
101#endif 101#endif
102
103#ifdef CONFIG_KPROBES_ON_FTRACE
104extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
105 struct kprobe_ctlblk *kcb);
106#else
107static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
108 struct kprobe_ctlblk *kcb)
109{
110 return 0;
111}
112#endif
102#endif 113#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes/core.c
index 57916c0d3cf6..e124554598ee 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -58,7 +58,7 @@
58#include <asm/insn.h> 58#include <asm/insn.h>
59#include <asm/debugreg.h> 59#include <asm/debugreg.h>
60 60
61#include "kprobes-common.h" 61#include "common.h"
62 62
63void jprobe_return_end(void); 63void jprobe_return_end(void);
64 64
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
78 * Groups, and some special opcodes can not boost. 78 * Groups, and some special opcodes can not boost.
79 * This is non-const and volatile to keep gcc from statically 79 * This is non-const and volatile to keep gcc from statically
80 * optimizing it out, as variable_test_bit makes gcc think only 80 * optimizing it out, as variable_test_bit makes gcc think only
81 * *(unsigned long*) is used. 81 * *(unsigned long*) is used.
82 */ 82 */
83static volatile u32 twobyte_is_boostable[256 / 32] = { 83static volatile u32 twobyte_is_boostable[256 / 32] = {
84 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 84 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
117 struct __arch_relative_insn { 117 struct __arch_relative_insn {
118 u8 op; 118 u8 op;
119 s32 raddr; 119 s32 raddr;
120 } __attribute__((packed)) *insn; 120 } __packed *insn;
121 121
122 insn = (struct __arch_relative_insn *)from; 122 insn = (struct __arch_relative_insn *)from;
123 insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); 123 insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
541 return 1; 541 return 1;
542} 542}
543 543
544#ifdef KPROBES_CAN_USE_FTRACE
545static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
546 struct kprobe_ctlblk *kcb)
547{
548 /*
549 * Emulate singlestep (and also recover regs->ip)
550 * as if there is a 5byte nop
551 */
552 regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
553 if (unlikely(p->post_handler)) {
554 kcb->kprobe_status = KPROBE_HIT_SSDONE;
555 p->post_handler(p, regs, 0);
556 }
557 __this_cpu_write(current_kprobe, NULL);
558}
559#endif
560
561/* 544/*
562 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 545 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
563 * remain disabled throughout this function. 546 * remain disabled throughout this function.
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
616 } else if (kprobe_running()) { 599 } else if (kprobe_running()) {
617 p = __this_cpu_read(current_kprobe); 600 p = __this_cpu_read(current_kprobe);
618 if (p->break_handler && p->break_handler(p, regs)) { 601 if (p->break_handler && p->break_handler(p, regs)) {
619#ifdef KPROBES_CAN_USE_FTRACE 602 if (!skip_singlestep(p, regs, kcb))
620 if (kprobe_ftrace(p)) { 603 setup_singlestep(p, regs, kcb, 0);
621 skip_singlestep(p, regs, kcb);
622 return 1;
623 }
624#endif
625 setup_singlestep(p, regs, kcb, 0);
626 return 1; 604 return 1;
627 } 605 }
628 } /* else: not a kprobe fault; let the kernel handle it */ 606 } /* else: not a kprobe fault; let the kernel handle it */
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1075 return 0; 1053 return 0;
1076} 1054}
1077 1055
1078#ifdef KPROBES_CAN_USE_FTRACE
1079/* Ftrace callback handler for kprobes */
1080void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
1081 struct ftrace_ops *ops, struct pt_regs *regs)
1082{
1083 struct kprobe *p;
1084 struct kprobe_ctlblk *kcb;
1085 unsigned long flags;
1086
1087 /* Disable irq for emulating a breakpoint and avoiding preempt */
1088 local_irq_save(flags);
1089
1090 p = get_kprobe((kprobe_opcode_t *)ip);
1091 if (unlikely(!p) || kprobe_disabled(p))
1092 goto end;
1093
1094 kcb = get_kprobe_ctlblk();
1095 if (kprobe_running()) {
1096 kprobes_inc_nmissed_count(p);
1097 } else {
1098 /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
1099 regs->ip = ip + sizeof(kprobe_opcode_t);
1100
1101 __this_cpu_write(current_kprobe, p);
1102 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1103 if (!p->pre_handler || !p->pre_handler(p, regs))
1104 skip_singlestep(p, regs, kcb);
1105 /*
1106 * If pre_handler returns !0, it sets regs->ip and
1107 * resets current kprobe.
1108 */
1109 }
1110end:
1111 local_irq_restore(flags);
1112}
1113
1114int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
1115{
1116 p->ainsn.insn = NULL;
1117 p->ainsn.boostable = -1;
1118 return 0;
1119}
1120#endif
1121
1122int __init arch_init_kprobes(void) 1056int __init arch_init_kprobes(void)
1123{ 1057{
1124 return arch_init_optprobes(); 1058 return arch_init_optprobes();
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
new file mode 100644
index 000000000000..23ef5c556f06
--- /dev/null
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -0,0 +1,93 @@
1/*
2 * Dynamic Ftrace based Kprobes Optimization
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) Hitachi Ltd., 2012
19 */
20#include <linux/kprobes.h>
21#include <linux/ptrace.h>
22#include <linux/hardirq.h>
23#include <linux/preempt.h>
24#include <linux/ftrace.h>
25
26#include "common.h"
27
28static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
29 struct kprobe_ctlblk *kcb)
30{
31 /*
32 * Emulate singlestep (and also recover regs->ip)
33 * as if there is a 5byte nop
34 */
35 regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
36 if (unlikely(p->post_handler)) {
37 kcb->kprobe_status = KPROBE_HIT_SSDONE;
38 p->post_handler(p, regs, 0);
39 }
40 __this_cpu_write(current_kprobe, NULL);
41 return 1;
42}
43
44int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
45 struct kprobe_ctlblk *kcb)
46{
47 if (kprobe_ftrace(p))
48 return __skip_singlestep(p, regs, kcb);
49 else
50 return 0;
51}
52
53/* Ftrace callback handler for kprobes */
54void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
55 struct ftrace_ops *ops, struct pt_regs *regs)
56{
57 struct kprobe *p;
58 struct kprobe_ctlblk *kcb;
59 unsigned long flags;
60
61 /* Disable irq for emulating a breakpoint and avoiding preempt */
62 local_irq_save(flags);
63
64 p = get_kprobe((kprobe_opcode_t *)ip);
65 if (unlikely(!p) || kprobe_disabled(p))
66 goto end;
67
68 kcb = get_kprobe_ctlblk();
69 if (kprobe_running()) {
70 kprobes_inc_nmissed_count(p);
71 } else {
72 /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
73 regs->ip = ip + sizeof(kprobe_opcode_t);
74
75 __this_cpu_write(current_kprobe, p);
76 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
77 if (!p->pre_handler || !p->pre_handler(p, regs))
78 __skip_singlestep(p, regs, kcb);
79 /*
80 * If pre_handler returns !0, it sets regs->ip and
81 * resets current kprobe.
82 */
83 }
84end:
85 local_irq_restore(flags);
86}
87
88int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
89{
90 p->ainsn.insn = NULL;
91 p->ainsn.boostable = -1;
92 return 0;
93}
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes/opt.c
index c5e410eed403..76dc6f095724 100644
--- a/arch/x86/kernel/kprobes-opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,7 +37,7 @@
37#include <asm/insn.h> 37#include <asm/insn.h>
38#include <asm/debugreg.h> 38#include <asm/debugreg.h>
39 39
40#include "kprobes-common.h" 40#include "common.h"
41 41
42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
43{ 43{
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 08b973f64032..b686a904d7c3 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -43,6 +43,7 @@
43#include <asm/apicdef.h> 43#include <asm/apicdef.h>
44#include <asm/hypervisor.h> 44#include <asm/hypervisor.h>
45#include <asm/kvm_guest.h> 45#include <asm/kvm_guest.h>
46#include <asm/context_tracking.h>
46 47
47static int kvmapf = 1; 48static int kvmapf = 1;
48 49
@@ -121,6 +122,8 @@ void kvm_async_pf_task_wait(u32 token)
121 struct kvm_task_sleep_node n, *e; 122 struct kvm_task_sleep_node n, *e;
122 DEFINE_WAIT(wait); 123 DEFINE_WAIT(wait);
123 124
125 rcu_irq_enter();
126
124 spin_lock(&b->lock); 127 spin_lock(&b->lock);
125 e = _find_apf_task(b, token); 128 e = _find_apf_task(b, token);
126 if (e) { 129 if (e) {
@@ -128,6 +131,8 @@ void kvm_async_pf_task_wait(u32 token)
128 hlist_del(&e->link); 131 hlist_del(&e->link);
129 kfree(e); 132 kfree(e);
130 spin_unlock(&b->lock); 133 spin_unlock(&b->lock);
134
135 rcu_irq_exit();
131 return; 136 return;
132 } 137 }
133 138
@@ -152,13 +157,16 @@ void kvm_async_pf_task_wait(u32 token)
152 /* 157 /*
153 * We cannot reschedule. So halt. 158 * We cannot reschedule. So halt.
154 */ 159 */
160 rcu_irq_exit();
155 native_safe_halt(); 161 native_safe_halt();
162 rcu_irq_enter();
156 local_irq_disable(); 163 local_irq_disable();
157 } 164 }
158 } 165 }
159 if (!n.halted) 166 if (!n.halted)
160 finish_wait(&n.wq, &wait); 167 finish_wait(&n.wq, &wait);
161 168
169 rcu_irq_exit();
162 return; 170 return;
163} 171}
164EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); 172EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
@@ -252,10 +260,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
252 break; 260 break;
253 case KVM_PV_REASON_PAGE_NOT_PRESENT: 261 case KVM_PV_REASON_PAGE_NOT_PRESENT:
254 /* page is swapped out by the host. */ 262 /* page is swapped out by the host. */
255 rcu_irq_enter(); 263 exception_enter(regs);
256 exit_idle(); 264 exit_idle();
257 kvm_async_pf_task_wait((u32)read_cr2()); 265 kvm_async_pf_task_wait((u32)read_cr2());
258 rcu_irq_exit(); 266 exception_exit(regs);
259 break; 267 break;
260 case KVM_PV_REASON_PAGE_READY: 268 case KVM_PV_REASON_PAGE_READY:
261 rcu_irq_enter(); 269 rcu_irq_enter();
@@ -289,9 +297,9 @@ static void kvm_register_steal_time(void)
289 297
290 memset(st, 0, sizeof(*st)); 298 memset(st, 0, sizeof(*st));
291 299
292 wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); 300 wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
293 printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", 301 pr_info("kvm-stealtime: cpu %d, msr %llx\n",
294 cpu, __pa(st)); 302 cpu, (unsigned long long) slow_virt_to_phys(st));
295} 303}
296 304
297static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; 305static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
@@ -316,7 +324,7 @@ void __cpuinit kvm_guest_cpu_init(void)
316 return; 324 return;
317 325
318 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { 326 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
319 u64 pa = __pa(&__get_cpu_var(apf_reason)); 327 u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason));
320 328
321#ifdef CONFIG_PREEMPT 329#ifdef CONFIG_PREEMPT
322 pa |= KVM_ASYNC_PF_SEND_ALWAYS; 330 pa |= KVM_ASYNC_PF_SEND_ALWAYS;
@@ -332,7 +340,8 @@ void __cpuinit kvm_guest_cpu_init(void)
332 /* Size alignment is implied but just to make it explicit. */ 340 /* Size alignment is implied but just to make it explicit. */
333 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); 341 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
334 __get_cpu_var(kvm_apic_eoi) = 0; 342 __get_cpu_var(kvm_apic_eoi) = 0;
335 pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; 343 pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi))
344 | KVM_MSR_ENABLED;
336 wrmsrl(MSR_KVM_PV_EOI_EN, pa); 345 wrmsrl(MSR_KVM_PV_EOI_EN, pa);
337 } 346 }
338 347
@@ -497,6 +506,7 @@ static bool __init kvm_detect(void)
497const struct hypervisor_x86 x86_hyper_kvm __refconst = { 506const struct hypervisor_x86 x86_hyper_kvm __refconst = {
498 .name = "KVM", 507 .name = "KVM",
499 .detect = kvm_detect, 508 .detect = kvm_detect,
509 .x2apic_available = kvm_para_available,
500}; 510};
501EXPORT_SYMBOL_GPL(x86_hyper_kvm); 511EXPORT_SYMBOL_GPL(x86_hyper_kvm);
502 512
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 220a360010f8..0732f0089a3d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -162,8 +162,8 @@ int kvm_register_clock(char *txt)
162 int low, high, ret; 162 int low, high, ret;
163 struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; 163 struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
164 164
165 low = (int)__pa(src) | 1; 165 low = (int)slow_virt_to_phys(src) | 1;
166 high = ((u64)__pa(src) >> 32); 166 high = ((u64)slow_virt_to_phys(src) >> 32);
167 ret = native_write_msr_safe(msr_kvm_system_time, low, high); 167 ret = native_write_msr_safe(msr_kvm_system_time, low, high);
168 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", 168 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
169 cpu, high, low, txt); 169 cpu, high, low, txt);
@@ -218,6 +218,9 @@ static void kvm_shutdown(void)
218void __init kvmclock_init(void) 218void __init kvmclock_init(void)
219{ 219{
220 unsigned long mem; 220 unsigned long mem;
221 int size;
222
223 size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
221 224
222 if (!kvm_para_available()) 225 if (!kvm_para_available())
223 return; 226 return;
@@ -231,16 +234,14 @@ void __init kvmclock_init(void)
231 printk(KERN_INFO "kvm-clock: Using msrs %x and %x", 234 printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
232 msr_kvm_system_time, msr_kvm_wall_clock); 235 msr_kvm_system_time, msr_kvm_wall_clock);
233 236
234 mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS, 237 mem = memblock_alloc(size, PAGE_SIZE);
235 PAGE_SIZE);
236 if (!mem) 238 if (!mem)
237 return; 239 return;
238 hv_clock = __va(mem); 240 hv_clock = __va(mem);
239 241
240 if (kvm_register_clock("boot clock")) { 242 if (kvm_register_clock("boot clock")) {
241 hv_clock = NULL; 243 hv_clock = NULL;
242 memblock_free(mem, 244 memblock_free(mem, size);
243 sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
244 return; 245 return;
245 } 246 }
246 pv_time_ops.sched_clock = kvm_clock_read; 247 pv_time_ops.sched_clock = kvm_clock_read;
@@ -275,7 +276,7 @@ int __init kvm_setup_vsyscall_timeinfo(void)
275 struct pvclock_vcpu_time_info *vcpu_time; 276 struct pvclock_vcpu_time_info *vcpu_time;
276 unsigned int size; 277 unsigned int size;
277 278
278 size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS; 279 size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
279 280
280 preempt_disable(); 281 preempt_disable();
281 cpu = smp_processor_id(); 282 cpu = smp_processor_id();
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b3ea9db39db6..4eabc160696f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -16,125 +16,12 @@
16#include <linux/io.h> 16#include <linux/io.h>
17#include <linux/suspend.h> 17#include <linux/suspend.h>
18 18
19#include <asm/init.h>
19#include <asm/pgtable.h> 20#include <asm/pgtable.h>
20#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
21#include <asm/mmu_context.h> 22#include <asm/mmu_context.h>
22#include <asm/debugreg.h> 23#include <asm/debugreg.h>
23 24
24static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
25 unsigned long addr)
26{
27 pud_t *pud;
28 pmd_t *pmd;
29 struct page *page;
30 int result = -ENOMEM;
31
32 addr &= PMD_MASK;
33 pgd += pgd_index(addr);
34 if (!pgd_present(*pgd)) {
35 page = kimage_alloc_control_pages(image, 0);
36 if (!page)
37 goto out;
38 pud = (pud_t *)page_address(page);
39 clear_page(pud);
40 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
41 }
42 pud = pud_offset(pgd, addr);
43 if (!pud_present(*pud)) {
44 page = kimage_alloc_control_pages(image, 0);
45 if (!page)
46 goto out;
47 pmd = (pmd_t *)page_address(page);
48 clear_page(pmd);
49 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
50 }
51 pmd = pmd_offset(pud, addr);
52 if (!pmd_present(*pmd))
53 set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
54 result = 0;
55out:
56 return result;
57}
58
59static void init_level2_page(pmd_t *level2p, unsigned long addr)
60{
61 unsigned long end_addr;
62
63 addr &= PAGE_MASK;
64 end_addr = addr + PUD_SIZE;
65 while (addr < end_addr) {
66 set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
67 addr += PMD_SIZE;
68 }
69}
70
71static int init_level3_page(struct kimage *image, pud_t *level3p,
72 unsigned long addr, unsigned long last_addr)
73{
74 unsigned long end_addr;
75 int result;
76
77 result = 0;
78 addr &= PAGE_MASK;
79 end_addr = addr + PGDIR_SIZE;
80 while ((addr < last_addr) && (addr < end_addr)) {
81 struct page *page;
82 pmd_t *level2p;
83
84 page = kimage_alloc_control_pages(image, 0);
85 if (!page) {
86 result = -ENOMEM;
87 goto out;
88 }
89 level2p = (pmd_t *)page_address(page);
90 init_level2_page(level2p, addr);
91 set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
92 addr += PUD_SIZE;
93 }
94 /* clear the unused entries */
95 while (addr < end_addr) {
96 pud_clear(level3p++);
97 addr += PUD_SIZE;
98 }
99out:
100 return result;
101}
102
103
104static int init_level4_page(struct kimage *image, pgd_t *level4p,
105 unsigned long addr, unsigned long last_addr)
106{
107 unsigned long end_addr;
108 int result;
109
110 result = 0;
111 addr &= PAGE_MASK;
112 end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
113 while ((addr < last_addr) && (addr < end_addr)) {
114 struct page *page;
115 pud_t *level3p;
116
117 page = kimage_alloc_control_pages(image, 0);
118 if (!page) {
119 result = -ENOMEM;
120 goto out;
121 }
122 level3p = (pud_t *)page_address(page);
123 result = init_level3_page(image, level3p, addr, last_addr);
124 if (result)
125 goto out;
126 set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
127 addr += PGDIR_SIZE;
128 }
129 /* clear the unused entries */
130 while (addr < end_addr) {
131 pgd_clear(level4p++);
132 addr += PGDIR_SIZE;
133 }
134out:
135 return result;
136}
137
138static void free_transition_pgtable(struct kimage *image) 25static void free_transition_pgtable(struct kimage *image)
139{ 26{
140 free_page((unsigned long)image->arch.pud); 27 free_page((unsigned long)image->arch.pud);
@@ -184,22 +71,62 @@ err:
184 return result; 71 return result;
185} 72}
186 73
74static void *alloc_pgt_page(void *data)
75{
76 struct kimage *image = (struct kimage *)data;
77 struct page *page;
78 void *p = NULL;
79
80 page = kimage_alloc_control_pages(image, 0);
81 if (page) {
82 p = page_address(page);
83 clear_page(p);
84 }
85
86 return p;
87}
187 88
188static int init_pgtable(struct kimage *image, unsigned long start_pgtable) 89static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
189{ 90{
91 struct x86_mapping_info info = {
92 .alloc_pgt_page = alloc_pgt_page,
93 .context = image,
94 .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
95 };
96 unsigned long mstart, mend;
190 pgd_t *level4p; 97 pgd_t *level4p;
191 int result; 98 int result;
99 int i;
100
192 level4p = (pgd_t *)__va(start_pgtable); 101 level4p = (pgd_t *)__va(start_pgtable);
193 result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); 102 clear_page(level4p);
194 if (result) 103 for (i = 0; i < nr_pfn_mapped; i++) {
195 return result; 104 mstart = pfn_mapped[i].start << PAGE_SHIFT;
105 mend = pfn_mapped[i].end << PAGE_SHIFT;
106
107 result = kernel_ident_mapping_init(&info,
108 level4p, mstart, mend);
109 if (result)
110 return result;
111 }
112
196 /* 113 /*
197 * image->start may be outside 0 ~ max_pfn, for example when 114 * segments's mem ranges could be outside 0 ~ max_pfn,
198 * jump back to original kernel from kexeced kernel 115 * for example when jump back to original kernel from kexeced kernel.
116 * or first kernel is booted with user mem map, and second kernel
117 * could be loaded out of that range.
199 */ 118 */
200 result = init_one_level2_page(image, level4p, image->start); 119 for (i = 0; i < image->nr_segments; i++) {
201 if (result) 120 mstart = image->segment[i].mem;
202 return result; 121 mend = mstart + image->segment[i].memsz;
122
123 result = kernel_ident_mapping_init(&info,
124 level4p, mstart, mend);
125
126 if (result)
127 return result;
128 }
129
203 return init_transition_pgtable(image, level4p); 130 return init_transition_pgtable(image, level4p);
204} 131}
205 132
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 3a04b224d0c0..22db92bbdf1a 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -364,10 +364,7 @@ static struct attribute_group mc_attr_group = {
364 364
365static void microcode_fini_cpu(int cpu) 365static void microcode_fini_cpu(int cpu)
366{ 366{
367 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
368
369 microcode_ops->microcode_fini_cpu(cpu); 367 microcode_ops->microcode_fini_cpu(cpu);
370 uci->valid = 0;
371} 368}
372 369
373static enum ucode_state microcode_resume_cpu(int cpu) 370static enum ucode_state microcode_resume_cpu(int cpu)
@@ -383,6 +380,10 @@ static enum ucode_state microcode_resume_cpu(int cpu)
383static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) 380static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
384{ 381{
385 enum ucode_state ustate; 382 enum ucode_state ustate;
383 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
384
385 if (uci && uci->valid)
386 return UCODE_OK;
386 387
387 if (collect_cpu_info(cpu)) 388 if (collect_cpu_info(cpu))
388 return UCODE_ERROR; 389 return UCODE_ERROR;
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
new file mode 100644
index 000000000000..577db8417d15
--- /dev/null
+++ b/arch/x86/kernel/microcode_core_early.c
@@ -0,0 +1,76 @@
1/*
2 * X86 CPU microcode early update for Linux
3 *
4 * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
5 * H Peter Anvin" <hpa@zytor.com>
6 *
7 * This driver allows to early upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc.
10 *
11 * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
12 * Software Developer's Manual.
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 */
19#include <linux/module.h>
20#include <asm/microcode_intel.h>
21#include <asm/processor.h>
22
23#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
24#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
25#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
26#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
27#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
28#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
29#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
30
31#define CPUID_IS(a, b, c, ebx, ecx, edx) \
32 (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
33
34/*
35 * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
36 * x86_vendor() gets vendor id for BSP.
37 *
38 * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
39 * coding, we still use x86_vendor() to get vendor id for AP.
40 *
41 * x86_vendor() gets vendor information directly through cpuid.
42 */
43static int __cpuinit x86_vendor(void)
44{
45 u32 eax = 0x00000000;
46 u32 ebx, ecx = 0, edx;
47
48 if (!have_cpuid_p())
49 return X86_VENDOR_UNKNOWN;
50
51 native_cpuid(&eax, &ebx, &ecx, &edx);
52
53 if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
54 return X86_VENDOR_INTEL;
55
56 if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
57 return X86_VENDOR_AMD;
58
59 return X86_VENDOR_UNKNOWN;
60}
61
62void __init load_ucode_bsp(void)
63{
64 int vendor = x86_vendor();
65
66 if (vendor == X86_VENDOR_INTEL)
67 load_ucode_intel_bsp();
68}
69
70void __cpuinit load_ucode_ap(void)
71{
72 int vendor = x86_vendor();
73
74 if (vendor == X86_VENDOR_INTEL)
75 load_ucode_intel_ap();
76}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 3544aed39338..5fb2cebf556b 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -79,7 +79,7 @@
79#include <linux/module.h> 79#include <linux/module.h>
80#include <linux/vmalloc.h> 80#include <linux/vmalloc.h>
81 81
82#include <asm/microcode.h> 82#include <asm/microcode_intel.h>
83#include <asm/processor.h> 83#include <asm/processor.h>
84#include <asm/msr.h> 84#include <asm/msr.h>
85 85
@@ -87,59 +87,6 @@ MODULE_DESCRIPTION("Microcode Update Driver");
87MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 87MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
88MODULE_LICENSE("GPL"); 88MODULE_LICENSE("GPL");
89 89
90struct microcode_header_intel {
91 unsigned int hdrver;
92 unsigned int rev;
93 unsigned int date;
94 unsigned int sig;
95 unsigned int cksum;
96 unsigned int ldrver;
97 unsigned int pf;
98 unsigned int datasize;
99 unsigned int totalsize;
100 unsigned int reserved[3];
101};
102
103struct microcode_intel {
104 struct microcode_header_intel hdr;
105 unsigned int bits[0];
106};
107
108/* microcode format is extended from prescott processors */
109struct extended_signature {
110 unsigned int sig;
111 unsigned int pf;
112 unsigned int cksum;
113};
114
115struct extended_sigtable {
116 unsigned int count;
117 unsigned int cksum;
118 unsigned int reserved[3];
119 struct extended_signature sigs[0];
120};
121
122#define DEFAULT_UCODE_DATASIZE (2000)
123#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
124#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
125#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
126#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
127#define DWSIZE (sizeof(u32))
128
129#define get_totalsize(mc) \
130 (((struct microcode_intel *)mc)->hdr.totalsize ? \
131 ((struct microcode_intel *)mc)->hdr.totalsize : \
132 DEFAULT_UCODE_TOTALSIZE)
133
134#define get_datasize(mc) \
135 (((struct microcode_intel *)mc)->hdr.datasize ? \
136 ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
137
138#define sigmatch(s1, s2, p1, p2) \
139 (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
140
141#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
142
143static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) 90static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
144{ 91{
145 struct cpuinfo_x86 *c = &cpu_data(cpu_num); 92 struct cpuinfo_x86 *c = &cpu_data(cpu_num);
@@ -162,128 +109,25 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
162 return 0; 109 return 0;
163} 110}
164 111
165static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
166{
167 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
168}
169
170static inline int
171update_match_revision(struct microcode_header_intel *mc_header, int rev)
172{
173 return (mc_header->rev <= rev) ? 0 : 1;
174}
175
176static int microcode_sanity_check(void *mc)
177{
178 unsigned long total_size, data_size, ext_table_size;
179 struct microcode_header_intel *mc_header = mc;
180 struct extended_sigtable *ext_header = NULL;
181 int sum, orig_sum, ext_sigcount = 0, i;
182 struct extended_signature *ext_sig;
183
184 total_size = get_totalsize(mc_header);
185 data_size = get_datasize(mc_header);
186
187 if (data_size + MC_HEADER_SIZE > total_size) {
188 pr_err("error! Bad data size in microcode data file\n");
189 return -EINVAL;
190 }
191
192 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
193 pr_err("error! Unknown microcode update format\n");
194 return -EINVAL;
195 }
196 ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
197 if (ext_table_size) {
198 if ((ext_table_size < EXT_HEADER_SIZE)
199 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
200 pr_err("error! Small exttable size in microcode data file\n");
201 return -EINVAL;
202 }
203 ext_header = mc + MC_HEADER_SIZE + data_size;
204 if (ext_table_size != exttable_size(ext_header)) {
205 pr_err("error! Bad exttable size in microcode data file\n");
206 return -EFAULT;
207 }
208 ext_sigcount = ext_header->count;
209 }
210
211 /* check extended table checksum */
212 if (ext_table_size) {
213 int ext_table_sum = 0;
214 int *ext_tablep = (int *)ext_header;
215
216 i = ext_table_size / DWSIZE;
217 while (i--)
218 ext_table_sum += ext_tablep[i];
219 if (ext_table_sum) {
220 pr_warning("aborting, bad extended signature table checksum\n");
221 return -EINVAL;
222 }
223 }
224
225 /* calculate the checksum */
226 orig_sum = 0;
227 i = (MC_HEADER_SIZE + data_size) / DWSIZE;
228 while (i--)
229 orig_sum += ((int *)mc)[i];
230 if (orig_sum) {
231 pr_err("aborting, bad checksum\n");
232 return -EINVAL;
233 }
234 if (!ext_table_size)
235 return 0;
236 /* check extended signature checksum */
237 for (i = 0; i < ext_sigcount; i++) {
238 ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
239 EXT_SIGNATURE_SIZE * i;
240 sum = orig_sum
241 - (mc_header->sig + mc_header->pf + mc_header->cksum)
242 + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
243 if (sum) {
244 pr_err("aborting, bad checksum\n");
245 return -EINVAL;
246 }
247 }
248 return 0;
249}
250
251/* 112/*
252 * return 0 - no update found 113 * return 0 - no update found
253 * return 1 - found update 114 * return 1 - found update
254 */ 115 */
255static int 116static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
256get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
257{ 117{
258 struct microcode_header_intel *mc_header = mc; 118 struct cpu_signature cpu_sig;
259 struct extended_sigtable *ext_header; 119 unsigned int csig, cpf, crev;
260 unsigned long total_size = get_totalsize(mc_header);
261 int ext_sigcount, i;
262 struct extended_signature *ext_sig;
263
264 if (!update_match_revision(mc_header, rev))
265 return 0;
266
267 if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf))
268 return 1;
269 120
270 /* Look for ext. headers: */ 121 collect_cpu_info(cpu, &cpu_sig);
271 if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
272 return 0;
273 122
274 ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; 123 csig = cpu_sig.sig;
275 ext_sigcount = ext_header->count; 124 cpf = cpu_sig.pf;
276 ext_sig = (void *)ext_header + EXT_HEADER_SIZE; 125 crev = cpu_sig.rev;
277 126
278 for (i = 0; i < ext_sigcount; i++) { 127 return get_matching_microcode(csig, cpf, mc_intel, crev);
279 if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf))
280 return 1;
281 ext_sig++;
282 }
283 return 0;
284} 128}
285 129
286static int apply_microcode(int cpu) 130int apply_microcode(int cpu)
287{ 131{
288 struct microcode_intel *mc_intel; 132 struct microcode_intel *mc_intel;
289 struct ucode_cpu_info *uci; 133 struct ucode_cpu_info *uci;
@@ -300,6 +144,14 @@ static int apply_microcode(int cpu)
300 if (mc_intel == NULL) 144 if (mc_intel == NULL)
301 return 0; 145 return 0;
302 146
147 /*
148 * Microcode on this CPU could be updated earlier. Only apply the
149 * microcode patch in mc_intel when it is newer than the one on this
150 * CPU.
151 */
152 if (get_matching_mc(mc_intel, cpu) == 0)
153 return 0;
154
303 /* write microcode via MSR 0x79 */ 155 /* write microcode via MSR 0x79 */
304 wrmsr(MSR_IA32_UCODE_WRITE, 156 wrmsr(MSR_IA32_UCODE_WRITE,
305 (unsigned long) mc_intel->bits, 157 (unsigned long) mc_intel->bits,
@@ -338,6 +190,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
338 unsigned int leftover = size; 190 unsigned int leftover = size;
339 enum ucode_state state = UCODE_OK; 191 enum ucode_state state = UCODE_OK;
340 unsigned int curr_mc_size = 0; 192 unsigned int curr_mc_size = 0;
193 unsigned int csig, cpf;
341 194
342 while (leftover) { 195 while (leftover) {
343 struct microcode_header_intel mc_header; 196 struct microcode_header_intel mc_header;
@@ -362,11 +215,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
362 } 215 }
363 216
364 if (get_ucode_data(mc, ucode_ptr, mc_size) || 217 if (get_ucode_data(mc, ucode_ptr, mc_size) ||
365 microcode_sanity_check(mc) < 0) { 218 microcode_sanity_check(mc, 1) < 0) {
366 break; 219 break;
367 } 220 }
368 221
369 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { 222 csig = uci->cpu_sig.sig;
223 cpf = uci->cpu_sig.pf;
224 if (get_matching_microcode(csig, cpf, mc, new_rev)) {
370 vfree(new_mc); 225 vfree(new_mc);
371 new_rev = mc_header.rev; 226 new_rev = mc_header.rev;
372 new_mc = mc; 227 new_mc = mc;
@@ -393,6 +248,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
393 vfree(uci->mc); 248 vfree(uci->mc);
394 uci->mc = (struct microcode_intel *)new_mc; 249 uci->mc = (struct microcode_intel *)new_mc;
395 250
251 /*
252 * If early loading microcode is supported, save this mc into
253 * permanent memory. So it will be loaded early when a CPU is hot added
254 * or resumes.
255 */
256 save_mc_for_early(new_mc);
257
396 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", 258 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
397 cpu, new_rev, uci->cpu_sig.rev); 259 cpu, new_rev, uci->cpu_sig.rev);
398out: 260out:
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
new file mode 100644
index 000000000000..7890bc838952
--- /dev/null
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -0,0 +1,796 @@
1/*
2 * Intel CPU microcode early update for Linux
3 *
4 * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
5 * H Peter Anvin" <hpa@zytor.com>
6 *
7 * This allows to early upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc.
10 *
11 * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
12 * Software Developer's Manual.
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 */
19#include <linux/module.h>
20#include <linux/mm.h>
21#include <linux/slab.h>
22#include <linux/earlycpio.h>
23#include <linux/initrd.h>
24#include <linux/cpu.h>
25#include <asm/msr.h>
26#include <asm/microcode_intel.h>
27#include <asm/processor.h>
28#include <asm/tlbflush.h>
29#include <asm/setup.h>
30
31unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
32struct mc_saved_data {
33 unsigned int mc_saved_count;
34 struct microcode_intel **mc_saved;
35} mc_saved_data;
36
37static enum ucode_state __cpuinit
38generic_load_microcode_early(struct microcode_intel **mc_saved_p,
39 unsigned int mc_saved_count,
40 struct ucode_cpu_info *uci)
41{
42 struct microcode_intel *ucode_ptr, *new_mc = NULL;
43 int new_rev = uci->cpu_sig.rev;
44 enum ucode_state state = UCODE_OK;
45 unsigned int mc_size;
46 struct microcode_header_intel *mc_header;
47 unsigned int csig = uci->cpu_sig.sig;
48 unsigned int cpf = uci->cpu_sig.pf;
49 int i;
50
51 for (i = 0; i < mc_saved_count; i++) {
52 ucode_ptr = mc_saved_p[i];
53
54 mc_header = (struct microcode_header_intel *)ucode_ptr;
55 mc_size = get_totalsize(mc_header);
56 if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) {
57 new_rev = mc_header->rev;
58 new_mc = ucode_ptr;
59 }
60 }
61
62 if (!new_mc) {
63 state = UCODE_NFOUND;
64 goto out;
65 }
66
67 uci->mc = (struct microcode_intel *)new_mc;
68out:
69 return state;
70}
71
72static void __cpuinit
73microcode_pointer(struct microcode_intel **mc_saved,
74 unsigned long *mc_saved_in_initrd,
75 unsigned long initrd_start, int mc_saved_count)
76{
77 int i;
78
79 for (i = 0; i < mc_saved_count; i++)
80 mc_saved[i] = (struct microcode_intel *)
81 (mc_saved_in_initrd[i] + initrd_start);
82}
83
84#ifdef CONFIG_X86_32
85static void __cpuinit
86microcode_phys(struct microcode_intel **mc_saved_tmp,
87 struct mc_saved_data *mc_saved_data)
88{
89 int i;
90 struct microcode_intel ***mc_saved;
91
92 mc_saved = (struct microcode_intel ***)
93 __pa_symbol(&mc_saved_data->mc_saved);
94 for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
95 struct microcode_intel *p;
96
97 p = *(struct microcode_intel **)
98 __pa(mc_saved_data->mc_saved + i);
99 mc_saved_tmp[i] = (struct microcode_intel *)__pa(p);
100 }
101}
102#endif
103
104static enum ucode_state __cpuinit
105load_microcode(struct mc_saved_data *mc_saved_data,
106 unsigned long *mc_saved_in_initrd,
107 unsigned long initrd_start,
108 struct ucode_cpu_info *uci)
109{
110 struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
111 unsigned int count = mc_saved_data->mc_saved_count;
112
113 if (!mc_saved_data->mc_saved) {
114 microcode_pointer(mc_saved_tmp, mc_saved_in_initrd,
115 initrd_start, count);
116
117 return generic_load_microcode_early(mc_saved_tmp, count, uci);
118 } else {
119#ifdef CONFIG_X86_32
120 microcode_phys(mc_saved_tmp, mc_saved_data);
121 return generic_load_microcode_early(mc_saved_tmp, count, uci);
122#else
123 return generic_load_microcode_early(mc_saved_data->mc_saved,
124 count, uci);
125#endif
126 }
127}
128
129static u8 get_x86_family(unsigned long sig)
130{
131 u8 x86;
132
133 x86 = (sig >> 8) & 0xf;
134
135 if (x86 == 0xf)
136 x86 += (sig >> 20) & 0xff;
137
138 return x86;
139}
140
141static u8 get_x86_model(unsigned long sig)
142{
143 u8 x86, x86_model;
144
145 x86 = get_x86_family(sig);
146 x86_model = (sig >> 4) & 0xf;
147
148 if (x86 == 0x6 || x86 == 0xf)
149 x86_model += ((sig >> 16) & 0xf) << 4;
150
151 return x86_model;
152}
153
154/*
155 * Given CPU signature and a microcode patch, this function finds if the
156 * microcode patch has matching family and model with the CPU.
157 */
158static enum ucode_state
159matching_model_microcode(struct microcode_header_intel *mc_header,
160 unsigned long sig)
161{
162 u8 x86, x86_model;
163 u8 x86_ucode, x86_model_ucode;
164 struct extended_sigtable *ext_header;
165 unsigned long total_size = get_totalsize(mc_header);
166 unsigned long data_size = get_datasize(mc_header);
167 int ext_sigcount, i;
168 struct extended_signature *ext_sig;
169
170 x86 = get_x86_family(sig);
171 x86_model = get_x86_model(sig);
172
173 x86_ucode = get_x86_family(mc_header->sig);
174 x86_model_ucode = get_x86_model(mc_header->sig);
175
176 if (x86 == x86_ucode && x86_model == x86_model_ucode)
177 return UCODE_OK;
178
179 /* Look for ext. headers: */
180 if (total_size <= data_size + MC_HEADER_SIZE)
181 return UCODE_NFOUND;
182
183 ext_header = (struct extended_sigtable *)
184 mc_header + data_size + MC_HEADER_SIZE;
185 ext_sigcount = ext_header->count;
186 ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
187
188 for (i = 0; i < ext_sigcount; i++) {
189 x86_ucode = get_x86_family(ext_sig->sig);
190 x86_model_ucode = get_x86_model(ext_sig->sig);
191
192 if (x86 == x86_ucode && x86_model == x86_model_ucode)
193 return UCODE_OK;
194
195 ext_sig++;
196 }
197
198 return UCODE_NFOUND;
199}
200
201static int
202save_microcode(struct mc_saved_data *mc_saved_data,
203 struct microcode_intel **mc_saved_src,
204 unsigned int mc_saved_count)
205{
206 int i, j;
207 struct microcode_intel **mc_saved_p;
208 int ret;
209
210 if (!mc_saved_count)
211 return -EINVAL;
212
213 /*
214 * Copy new microcode data.
215 */
216 mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *),
217 GFP_KERNEL);
218 if (!mc_saved_p)
219 return -ENOMEM;
220
221 for (i = 0; i < mc_saved_count; i++) {
222 struct microcode_intel *mc = mc_saved_src[i];
223 struct microcode_header_intel *mc_header = &mc->hdr;
224 unsigned long mc_size = get_totalsize(mc_header);
225 mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL);
226 if (!mc_saved_p[i]) {
227 ret = -ENOMEM;
228 goto err;
229 }
230 if (!mc_saved_src[i]) {
231 ret = -EINVAL;
232 goto err;
233 }
234 memcpy(mc_saved_p[i], mc, mc_size);
235 }
236
237 /*
238 * Point to newly saved microcode.
239 */
240 mc_saved_data->mc_saved = mc_saved_p;
241 mc_saved_data->mc_saved_count = mc_saved_count;
242
243 return 0;
244
245err:
246 for (j = 0; j <= i; j++)
247 kfree(mc_saved_p[j]);
248 kfree(mc_saved_p);
249
250 return ret;
251}
252
253/*
254 * A microcode patch in ucode_ptr is saved into mc_saved
255 * - if it has matching signature and newer revision compared to an existing
256 * patch mc_saved.
257 * - or if it is a newly discovered microcode patch.
258 *
259 * The microcode patch should have matching model with CPU.
260 */
261static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr,
262 unsigned int *mc_saved_count_p)
263{
264 int i;
265 int found = 0;
266 unsigned int mc_saved_count = *mc_saved_count_p;
267 struct microcode_header_intel *mc_header;
268
269 mc_header = (struct microcode_header_intel *)ucode_ptr;
270 for (i = 0; i < mc_saved_count; i++) {
271 unsigned int sig, pf;
272 unsigned int new_rev;
273 struct microcode_header_intel *mc_saved_header =
274 (struct microcode_header_intel *)mc_saved[i];
275 sig = mc_saved_header->sig;
276 pf = mc_saved_header->pf;
277 new_rev = mc_header->rev;
278
279 if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) {
280 found = 1;
281 if (update_match_revision(mc_header, new_rev)) {
282 /*
283 * Found an older ucode saved before.
284 * Replace the older one with this newer
285 * one.
286 */
287 mc_saved[i] =
288 (struct microcode_intel *)ucode_ptr;
289 break;
290 }
291 }
292 }
293 if (i >= mc_saved_count && !found)
294 /*
295 * This ucode is first time discovered in ucode file.
296 * Save it to memory.
297 */
298 mc_saved[mc_saved_count++] =
299 (struct microcode_intel *)ucode_ptr;
300
301 *mc_saved_count_p = mc_saved_count;
302}
303
304/*
305 * Get microcode matching with BSP's model. Only CPUs with the same model as
306 * BSP can stay in the platform.
307 */
308static enum ucode_state __init
309get_matching_model_microcode(int cpu, unsigned long start,
310 void *data, size_t size,
311 struct mc_saved_data *mc_saved_data,
312 unsigned long *mc_saved_in_initrd,
313 struct ucode_cpu_info *uci)
314{
315 u8 *ucode_ptr = data;
316 unsigned int leftover = size;
317 enum ucode_state state = UCODE_OK;
318 unsigned int mc_size;
319 struct microcode_header_intel *mc_header;
320 struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
321 unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
322 int i;
323
324 while (leftover) {
325 mc_header = (struct microcode_header_intel *)ucode_ptr;
326
327 mc_size = get_totalsize(mc_header);
328 if (!mc_size || mc_size > leftover ||
329 microcode_sanity_check(ucode_ptr, 0) < 0)
330 break;
331
332 leftover -= mc_size;
333
334 /*
335 * Since APs with same family and model as the BSP may boot in
336 * the platform, we need to find and save microcode patches
337 * with the same family and model as the BSP.
338 */
339 if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
340 UCODE_OK) {
341 ucode_ptr += mc_size;
342 continue;
343 }
344
345 _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count);
346
347 ucode_ptr += mc_size;
348 }
349
350 if (leftover) {
351 state = UCODE_ERROR;
352 goto out;
353 }
354
355 if (mc_saved_count == 0) {
356 state = UCODE_NFOUND;
357 goto out;
358 }
359
360 for (i = 0; i < mc_saved_count; i++)
361 mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
362
363 mc_saved_data->mc_saved_count = mc_saved_count;
364out:
365 return state;
366}
367
368#define native_rdmsr(msr, val1, val2) \
369do { \
370 u64 __val = native_read_msr((msr)); \
371 (void)((val1) = (u32)__val); \
372 (void)((val2) = (u32)(__val >> 32)); \
373} while (0)
374
375#define native_wrmsr(msr, low, high) \
376 native_write_msr(msr, low, high);
377
378static int __cpuinit collect_cpu_info_early(struct ucode_cpu_info *uci)
379{
380 unsigned int val[2];
381 u8 x86, x86_model;
382 struct cpu_signature csig;
383 unsigned int eax, ebx, ecx, edx;
384
385 csig.sig = 0;
386 csig.pf = 0;
387 csig.rev = 0;
388
389 memset(uci, 0, sizeof(*uci));
390
391 eax = 0x00000001;
392 ecx = 0;
393 native_cpuid(&eax, &ebx, &ecx, &edx);
394 csig.sig = eax;
395
396 x86 = get_x86_family(csig.sig);
397 x86_model = get_x86_model(csig.sig);
398
399 if ((x86_model >= 5) || (x86 > 6)) {
400 /* get processor flags from MSR 0x17 */
401 native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
402 csig.pf = 1 << ((val[1] >> 18) & 7);
403 }
404 native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
405
406 /* As documented in the SDM: Do a CPUID 1 here */
407 sync_core();
408
409 /* get the current revision from MSR 0x8B */
410 native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
411
412 csig.rev = val[1];
413
414 uci->cpu_sig = csig;
415 uci->valid = 1;
416
417 return 0;
418}
419
420#ifdef DEBUG
421static void __ref show_saved_mc(void)
422{
423 int i, j;
424 unsigned int sig, pf, rev, total_size, data_size, date;
425 struct ucode_cpu_info uci;
426
427 if (mc_saved_data.mc_saved_count == 0) {
428 pr_debug("no micorcode data saved.\n");
429 return;
430 }
431 pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
432
433 collect_cpu_info_early(&uci);
434
435 sig = uci.cpu_sig.sig;
436 pf = uci.cpu_sig.pf;
437 rev = uci.cpu_sig.rev;
438 pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n",
439 smp_processor_id(), sig, pf, rev);
440
441 for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
442 struct microcode_header_intel *mc_saved_header;
443 struct extended_sigtable *ext_header;
444 int ext_sigcount;
445 struct extended_signature *ext_sig;
446
447 mc_saved_header = (struct microcode_header_intel *)
448 mc_saved_data.mc_saved[i];
449 sig = mc_saved_header->sig;
450 pf = mc_saved_header->pf;
451 rev = mc_saved_header->rev;
452 total_size = get_totalsize(mc_saved_header);
453 data_size = get_datasize(mc_saved_header);
454 date = mc_saved_header->date;
455
456 pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
457 i, sig, pf, rev, total_size,
458 date & 0xffff,
459 date >> 24,
460 (date >> 16) & 0xff);
461
462 /* Look for ext. headers: */
463 if (total_size <= data_size + MC_HEADER_SIZE)
464 continue;
465
466 ext_header = (struct extended_sigtable *)
467 mc_saved_header + data_size + MC_HEADER_SIZE;
468 ext_sigcount = ext_header->count;
469 ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
470
471 for (j = 0; j < ext_sigcount; j++) {
472 sig = ext_sig->sig;
473 pf = ext_sig->pf;
474
475 pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
476 j, sig, pf);
477
478 ext_sig++;
479 }
480
481 }
482}
483#else
484static inline void show_saved_mc(void)
485{
486}
487#endif
488
489#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
490/*
491 * Save this mc into mc_saved_data. So it will be loaded early when a CPU is
492 * hot added or resumes.
493 *
494 * Please make sure this mc should be a valid microcode patch before calling
495 * this function.
496 */
497int save_mc_for_early(u8 *mc)
498{
499 struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
500 unsigned int mc_saved_count_init;
501 unsigned int mc_saved_count;
502 struct microcode_intel **mc_saved;
503 int ret = 0;
504 int i;
505
506 /*
507 * Hold hotplug lock so mc_saved_data is not accessed by a CPU in
508 * hotplug.
509 */
510 cpu_hotplug_driver_lock();
511
512 mc_saved_count_init = mc_saved_data.mc_saved_count;
513 mc_saved_count = mc_saved_data.mc_saved_count;
514 mc_saved = mc_saved_data.mc_saved;
515
516 if (mc_saved && mc_saved_count)
517 memcpy(mc_saved_tmp, mc_saved,
518 mc_saved_count * sizeof(struct mirocode_intel *));
519 /*
520 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
521 * version.
522 */
523
524 _save_mc(mc_saved_tmp, mc, &mc_saved_count);
525
526 /*
527 * Save the mc_save_tmp in global mc_saved_data.
528 */
529 ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
530 if (ret) {
531 pr_err("Can not save microcode patch.\n");
532 goto out;
533 }
534
535 show_saved_mc();
536
537 /*
538 * Free old saved microcod data.
539 */
540 if (mc_saved) {
541 for (i = 0; i < mc_saved_count_init; i++)
542 kfree(mc_saved[i]);
543 kfree(mc_saved);
544 }
545
546out:
547 cpu_hotplug_driver_unlock();
548
549 return ret;
550}
551EXPORT_SYMBOL_GPL(save_mc_for_early);
552#endif
553
554static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
555static __init enum ucode_state
556scan_microcode(unsigned long start, unsigned long end,
557 struct mc_saved_data *mc_saved_data,
558 unsigned long *mc_saved_in_initrd,
559 struct ucode_cpu_info *uci)
560{
561 unsigned int size = end - start + 1;
562 struct cpio_data cd;
563 long offset = 0;
564#ifdef CONFIG_X86_32
565 char *p = (char *)__pa_symbol(ucode_name);
566#else
567 char *p = ucode_name;
568#endif
569
570 cd.data = NULL;
571 cd.size = 0;
572
573 cd = find_cpio_data(p, (void *)start, size, &offset);
574 if (!cd.data)
575 return UCODE_ERROR;
576
577
578 return get_matching_model_microcode(0, start, cd.data, cd.size,
579 mc_saved_data, mc_saved_in_initrd,
580 uci);
581}
582
583/*
584 * Print ucode update info.
585 */
586static void __cpuinit
587print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
588{
589 int cpu = smp_processor_id();
590
591 pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
592 cpu,
593 uci->cpu_sig.rev,
594 date & 0xffff,
595 date >> 24,
596 (date >> 16) & 0xff);
597}
598
599#ifdef CONFIG_X86_32
600
601static int delay_ucode_info;
602static int current_mc_date;
603
604/*
605 * Print early updated ucode info after printk works. This is delayed info dump.
606 */
607void __cpuinit show_ucode_info_early(void)
608{
609 struct ucode_cpu_info uci;
610
611 if (delay_ucode_info) {
612 collect_cpu_info_early(&uci);
613 print_ucode_info(&uci, current_mc_date);
614 delay_ucode_info = 0;
615 }
616}
617
618/*
619 * At this point, we can not call printk() yet. Keep microcode patch number in
620 * mc_saved_data.mc_saved and delay printing microcode info in
621 * show_ucode_info_early() until printk() works.
622 */
623static void __cpuinit print_ucode(struct ucode_cpu_info *uci)
624{
625 struct microcode_intel *mc_intel;
626 int *delay_ucode_info_p;
627 int *current_mc_date_p;
628
629 mc_intel = uci->mc;
630 if (mc_intel == NULL)
631 return;
632
633 delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info);
634 current_mc_date_p = (int *)__pa_symbol(&current_mc_date);
635
636 *delay_ucode_info_p = 1;
637 *current_mc_date_p = mc_intel->hdr.date;
638}
639#else
640
641/*
642 * Flush global tlb. We only do this in x86_64 where paging has been enabled
643 * already and PGE should be enabled as well.
644 */
645static inline void __cpuinit flush_tlb_early(void)
646{
647 __native_flush_tlb_global_irq_disabled();
648}
649
650static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci)
651{
652 struct microcode_intel *mc_intel;
653
654 mc_intel = uci->mc;
655 if (mc_intel == NULL)
656 return;
657
658 print_ucode_info(uci, mc_intel->hdr.date);
659}
660#endif
661
662static int apply_microcode_early(struct mc_saved_data *mc_saved_data,
663 struct ucode_cpu_info *uci)
664{
665 struct microcode_intel *mc_intel;
666 unsigned int val[2];
667
668 mc_intel = uci->mc;
669 if (mc_intel == NULL)
670 return 0;
671
672 /* write microcode via MSR 0x79 */
673 native_wrmsr(MSR_IA32_UCODE_WRITE,
674 (unsigned long) mc_intel->bits,
675 (unsigned long) mc_intel->bits >> 16 >> 16);
676 native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
677
678 /* As documented in the SDM: Do a CPUID 1 here */
679 sync_core();
680
681 /* get the current revision from MSR 0x8B */
682 native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
683 if (val[1] != mc_intel->hdr.rev)
684 return -1;
685
686#ifdef CONFIG_X86_64
687 /* Flush global tlb. This is precaution. */
688 flush_tlb_early();
689#endif
690 uci->cpu_sig.rev = val[1];
691
692 print_ucode(uci);
693
694 return 0;
695}
696
697/*
698 * This function converts microcode patch offsets previously stored in
699 * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
700 */
701int __init save_microcode_in_initrd(void)
702{
703 unsigned int count = mc_saved_data.mc_saved_count;
704 struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
705 int ret = 0;
706
707 if (count == 0)
708 return ret;
709
710 microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
711 ret = save_microcode(&mc_saved_data, mc_saved, count);
712 if (ret)
713 pr_err("Can not save microcod patches from initrd");
714
715 show_saved_mc();
716
717 return ret;
718}
719
720static void __init
721_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
722 unsigned long *mc_saved_in_initrd,
723 unsigned long initrd_start_early,
724 unsigned long initrd_end_early,
725 struct ucode_cpu_info *uci)
726{
727 collect_cpu_info_early(uci);
728 scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data,
729 mc_saved_in_initrd, uci);
730 load_microcode(mc_saved_data, mc_saved_in_initrd,
731 initrd_start_early, uci);
732 apply_microcode_early(mc_saved_data, uci);
733}
734
735void __init
736load_ucode_intel_bsp(void)
737{
738 u64 ramdisk_image, ramdisk_size;
739 unsigned long initrd_start_early, initrd_end_early;
740 struct ucode_cpu_info uci;
741#ifdef CONFIG_X86_32
742 struct boot_params *boot_params_p;
743
744 boot_params_p = (struct boot_params *)__pa_symbol(&boot_params);
745 ramdisk_image = boot_params_p->hdr.ramdisk_image;
746 ramdisk_size = boot_params_p->hdr.ramdisk_size;
747 initrd_start_early = ramdisk_image;
748 initrd_end_early = initrd_start_early + ramdisk_size;
749
750 _load_ucode_intel_bsp(
751 (struct mc_saved_data *)__pa_symbol(&mc_saved_data),
752 (unsigned long *)__pa_symbol(&mc_saved_in_initrd),
753 initrd_start_early, initrd_end_early, &uci);
754#else
755 ramdisk_image = boot_params.hdr.ramdisk_image;
756 ramdisk_size = boot_params.hdr.ramdisk_size;
757 initrd_start_early = ramdisk_image + PAGE_OFFSET;
758 initrd_end_early = initrd_start_early + ramdisk_size;
759
760 _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd,
761 initrd_start_early, initrd_end_early, &uci);
762#endif
763}
764
765void __cpuinit load_ucode_intel_ap(void)
766{
767 struct mc_saved_data *mc_saved_data_p;
768 struct ucode_cpu_info uci;
769 unsigned long *mc_saved_in_initrd_p;
770 unsigned long initrd_start_addr;
771#ifdef CONFIG_X86_32
772 unsigned long *initrd_start_p;
773
774 mc_saved_in_initrd_p =
775 (unsigned long *)__pa_symbol(mc_saved_in_initrd);
776 mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data);
777 initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start);
778 initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p);
779#else
780 mc_saved_data_p = &mc_saved_data;
781 mc_saved_in_initrd_p = mc_saved_in_initrd;
782 initrd_start_addr = initrd_start;
783#endif
784
785 /*
786 * If there is no valid ucode previously saved in memory, no need to
787 * update ucode on this AP.
788 */
789 if (mc_saved_data_p->mc_saved_count == 0)
790 return;
791
792 collect_cpu_info_early(&uci);
793 load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
794 initrd_start_addr, &uci);
795 apply_microcode_early(mc_saved_data_p, &uci);
796}
diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/microcode_intel_lib.c
new file mode 100644
index 000000000000..ce69320d0179
--- /dev/null
+++ b/arch/x86/kernel/microcode_intel_lib.c
@@ -0,0 +1,174 @@
1/*
2 * Intel CPU Microcode Update Driver for Linux
3 *
4 * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
5 * H Peter Anvin" <hpa@zytor.com>
6 *
7 * This driver allows to upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc.
10 *
11 * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
12 * Software Developer's Manual
13 * Order Number 253668 or free download from:
14 *
15 * http://developer.intel.com/Assets/PDF/manual/253668.pdf
16 *
17 * For more information, go to http://www.urbanmyth.org/microcode
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25#include <linux/firmware.h>
26#include <linux/uaccess.h>
27#include <linux/kernel.h>
28#include <linux/module.h>
29
30#include <asm/microcode_intel.h>
31#include <asm/processor.h>
32#include <asm/msr.h>
33
34static inline int
35update_match_cpu(unsigned int csig, unsigned int cpf,
36 unsigned int sig, unsigned int pf)
37{
38 return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1;
39}
40
41int
42update_match_revision(struct microcode_header_intel *mc_header, int rev)
43{
44 return (mc_header->rev <= rev) ? 0 : 1;
45}
46
47int microcode_sanity_check(void *mc, int print_err)
48{
49 unsigned long total_size, data_size, ext_table_size;
50 struct microcode_header_intel *mc_header = mc;
51 struct extended_sigtable *ext_header = NULL;
52 int sum, orig_sum, ext_sigcount = 0, i;
53 struct extended_signature *ext_sig;
54
55 total_size = get_totalsize(mc_header);
56 data_size = get_datasize(mc_header);
57
58 if (data_size + MC_HEADER_SIZE > total_size) {
59 if (print_err)
60 pr_err("error! Bad data size in microcode data file\n");
61 return -EINVAL;
62 }
63
64 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
65 if (print_err)
66 pr_err("error! Unknown microcode update format\n");
67 return -EINVAL;
68 }
69 ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
70 if (ext_table_size) {
71 if ((ext_table_size < EXT_HEADER_SIZE)
72 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
73 if (print_err)
74 pr_err("error! Small exttable size in microcode data file\n");
75 return -EINVAL;
76 }
77 ext_header = mc + MC_HEADER_SIZE + data_size;
78 if (ext_table_size != exttable_size(ext_header)) {
79 if (print_err)
80 pr_err("error! Bad exttable size in microcode data file\n");
81 return -EFAULT;
82 }
83 ext_sigcount = ext_header->count;
84 }
85
86 /* check extended table checksum */
87 if (ext_table_size) {
88 int ext_table_sum = 0;
89 int *ext_tablep = (int *)ext_header;
90
91 i = ext_table_size / DWSIZE;
92 while (i--)
93 ext_table_sum += ext_tablep[i];
94 if (ext_table_sum) {
95 if (print_err)
96 pr_warn("aborting, bad extended signature table checksum\n");
97 return -EINVAL;
98 }
99 }
100
101 /* calculate the checksum */
102 orig_sum = 0;
103 i = (MC_HEADER_SIZE + data_size) / DWSIZE;
104 while (i--)
105 orig_sum += ((int *)mc)[i];
106 if (orig_sum) {
107 if (print_err)
108 pr_err("aborting, bad checksum\n");
109 return -EINVAL;
110 }
111 if (!ext_table_size)
112 return 0;
113 /* check extended signature checksum */
114 for (i = 0; i < ext_sigcount; i++) {
115 ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
116 EXT_SIGNATURE_SIZE * i;
117 sum = orig_sum
118 - (mc_header->sig + mc_header->pf + mc_header->cksum)
119 + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
120 if (sum) {
121 if (print_err)
122 pr_err("aborting, bad checksum\n");
123 return -EINVAL;
124 }
125 }
126 return 0;
127}
128EXPORT_SYMBOL_GPL(microcode_sanity_check);
129
130/*
131 * return 0 - no update found
132 * return 1 - found update
133 */
134int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
135{
136 struct microcode_header_intel *mc_header = mc;
137 struct extended_sigtable *ext_header;
138 unsigned long total_size = get_totalsize(mc_header);
139 int ext_sigcount, i;
140 struct extended_signature *ext_sig;
141
142 if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf))
143 return 1;
144
145 /* Look for ext. headers: */
146 if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
147 return 0;
148
149 ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
150 ext_sigcount = ext_header->count;
151 ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
152
153 for (i = 0; i < ext_sigcount; i++) {
154 if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf))
155 return 1;
156 ext_sig++;
157 }
158 return 0;
159}
160
161/*
162 * return 0 - no update found
163 * return 1 - found update
164 */
165int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev)
166{
167 struct microcode_header_intel *mc_header = mc;
168
169 if (!update_match_revision(mc_header, rev))
170 return 0;
171
172 return get_matching_sig(csig, cpf, mc, rev);
173}
174EXPORT_SYMBOL_GPL(get_matching_microcode);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index a7c5661f8496..4929502c1372 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -174,6 +174,9 @@ static int msr_open(struct inode *inode, struct file *file)
174 unsigned int cpu; 174 unsigned int cpu;
175 struct cpuinfo_x86 *c; 175 struct cpuinfo_x86 *c;
176 176
177 if (!capable(CAP_SYS_RAWIO))
178 return -EPERM;
179
177 cpu = iminor(file->f_path.dentry->d_inode); 180 cpu = iminor(file->f_path.dentry->d_inode);
178 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) 181 if (cpu >= nr_cpu_ids || !cpu_online(cpu))
179 return -ENXIO; /* No such CPU */ 182 return -ENXIO; /* No such CPU */
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index de2b7ad70273..872079a67e4d 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -56,7 +56,7 @@ struct device x86_dma_fallback_dev = {
56EXPORT_SYMBOL(x86_dma_fallback_dev); 56EXPORT_SYMBOL(x86_dma_fallback_dev);
57 57
58/* Number of entries preallocated for DMA-API debugging */ 58/* Number of entries preallocated for DMA-API debugging */
59#define PREALLOC_DMA_DEBUG_ENTRIES 32768 59#define PREALLOC_DMA_DEBUG_ENTRIES 65536
60 60
61int dma_set_mask(struct device *dev, u64 mask) 61int dma_set_mask(struct device *dev, u64 mask)
62{ 62{
@@ -265,7 +265,7 @@ rootfs_initcall(pci_iommu_init);
265#ifdef CONFIG_PCI 265#ifdef CONFIG_PCI
266/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ 266/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
267 267
268static __devinit void via_no_dac(struct pci_dev *dev) 268static void via_no_dac(struct pci_dev *dev)
269{ 269{
270 if (forbid_dac == 0) { 270 if (forbid_dac == 0) {
271 dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); 271 dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 2ed787f15bf0..14ae10031ff0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -268,13 +268,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
268unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; 268unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
269EXPORT_SYMBOL(boot_option_idle_override); 269EXPORT_SYMBOL(boot_option_idle_override);
270 270
271/* 271static void (*x86_idle)(void);
272 * Powermanagement idle function, if any..
273 */
274void (*pm_idle)(void);
275#ifdef CONFIG_APM_MODULE
276EXPORT_SYMBOL(pm_idle);
277#endif
278 272
279#ifndef CONFIG_SMP 273#ifndef CONFIG_SMP
280static inline void play_dead(void) 274static inline void play_dead(void)
@@ -351,7 +345,7 @@ void cpu_idle(void)
351 rcu_idle_enter(); 345 rcu_idle_enter();
352 346
353 if (cpuidle_idle_call()) 347 if (cpuidle_idle_call())
354 pm_idle(); 348 x86_idle();
355 349
356 rcu_idle_exit(); 350 rcu_idle_exit();
357 start_critical_timings(); 351 start_critical_timings();
@@ -375,7 +369,6 @@ void cpu_idle(void)
375 */ 369 */
376void default_idle(void) 370void default_idle(void)
377{ 371{
378 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
379 trace_cpu_idle_rcuidle(1, smp_processor_id()); 372 trace_cpu_idle_rcuidle(1, smp_processor_id());
380 current_thread_info()->status &= ~TS_POLLING; 373 current_thread_info()->status &= ~TS_POLLING;
381 /* 374 /*
@@ -389,21 +382,22 @@ void default_idle(void)
389 else 382 else
390 local_irq_enable(); 383 local_irq_enable();
391 current_thread_info()->status |= TS_POLLING; 384 current_thread_info()->status |= TS_POLLING;
392 trace_power_end_rcuidle(smp_processor_id());
393 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 385 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
394} 386}
395#ifdef CONFIG_APM_MODULE 387#ifdef CONFIG_APM_MODULE
396EXPORT_SYMBOL(default_idle); 388EXPORT_SYMBOL(default_idle);
397#endif 389#endif
398 390
399bool set_pm_idle_to_default(void) 391#ifdef CONFIG_XEN
392bool xen_set_default_idle(void)
400{ 393{
401 bool ret = !!pm_idle; 394 bool ret = !!x86_idle;
402 395
403 pm_idle = default_idle; 396 x86_idle = default_idle;
404 397
405 return ret; 398 return ret;
406} 399}
400#endif
407void stop_this_cpu(void *dummy) 401void stop_this_cpu(void *dummy)
408{ 402{
409 local_irq_disable(); 403 local_irq_disable();
@@ -413,31 +407,8 @@ void stop_this_cpu(void *dummy)
413 set_cpu_online(smp_processor_id(), false); 407 set_cpu_online(smp_processor_id(), false);
414 disable_local_APIC(); 408 disable_local_APIC();
415 409
416 for (;;) { 410 for (;;)
417 if (hlt_works(smp_processor_id())) 411 halt();
418 halt();
419 }
420}
421
422/* Default MONITOR/MWAIT with no hints, used for default C1 state */
423static void mwait_idle(void)
424{
425 if (!need_resched()) {
426 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
427 trace_cpu_idle_rcuidle(1, smp_processor_id());
428 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
429 clflush((void *)&current_thread_info()->flags);
430
431 __monitor((void *)&current_thread_info()->flags, 0, 0);
432 smp_mb();
433 if (!need_resched())
434 __sti_mwait(0, 0);
435 else
436 local_irq_enable();
437 trace_power_end_rcuidle(smp_processor_id());
438 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
439 } else
440 local_irq_enable();
441} 412}
442 413
443/* 414/*
@@ -447,62 +418,13 @@ static void mwait_idle(void)
447 */ 418 */
448static void poll_idle(void) 419static void poll_idle(void)
449{ 420{
450 trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
451 trace_cpu_idle_rcuidle(0, smp_processor_id()); 421 trace_cpu_idle_rcuidle(0, smp_processor_id());
452 local_irq_enable(); 422 local_irq_enable();
453 while (!need_resched()) 423 while (!need_resched())
454 cpu_relax(); 424 cpu_relax();
455 trace_power_end_rcuidle(smp_processor_id());
456 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 425 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
457} 426}
458 427
459/*
460 * mwait selection logic:
461 *
462 * It depends on the CPU. For AMD CPUs that support MWAIT this is
463 * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
464 * then depend on a clock divisor and current Pstate of the core. If
465 * all cores of a processor are in halt state (C1) the processor can
466 * enter the C1E (C1 enhanced) state. If mwait is used this will never
467 * happen.
468 *
469 * idle=mwait overrides this decision and forces the usage of mwait.
470 */
471
472#define MWAIT_INFO 0x05
473#define MWAIT_ECX_EXTENDED_INFO 0x01
474#define MWAIT_EDX_C1 0xf0
475
476int mwait_usable(const struct cpuinfo_x86 *c)
477{
478 u32 eax, ebx, ecx, edx;
479
480 /* Use mwait if idle=mwait boot option is given */
481 if (boot_option_idle_override == IDLE_FORCE_MWAIT)
482 return 1;
483
484 /*
485 * Any idle= boot option other than idle=mwait means that we must not
486 * use mwait. Eg: idle=halt or idle=poll or idle=nomwait
487 */
488 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
489 return 0;
490
491 if (c->cpuid_level < MWAIT_INFO)
492 return 0;
493
494 cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
495 /* Check, whether EDX has extended info about MWAIT */
496 if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
497 return 1;
498
499 /*
500 * edx enumeratios MONITOR/MWAIT extensions. Check, whether
501 * C1 supports MWAIT
502 */
503 return (edx & MWAIT_EDX_C1);
504}
505
506bool amd_e400_c1e_detected; 428bool amd_e400_c1e_detected;
507EXPORT_SYMBOL(amd_e400_c1e_detected); 429EXPORT_SYMBOL(amd_e400_c1e_detected);
508 430
@@ -567,31 +489,24 @@ static void amd_e400_idle(void)
567void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 489void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
568{ 490{
569#ifdef CONFIG_SMP 491#ifdef CONFIG_SMP
570 if (pm_idle == poll_idle && smp_num_siblings > 1) { 492 if (x86_idle == poll_idle && smp_num_siblings > 1)
571 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); 493 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
572 }
573#endif 494#endif
574 if (pm_idle) 495 if (x86_idle)
575 return; 496 return;
576 497
577 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { 498 if (cpu_has_amd_erratum(amd_erratum_400)) {
578 /*
579 * One CPU supports mwait => All CPUs supports mwait
580 */
581 pr_info("using mwait in idle threads\n");
582 pm_idle = mwait_idle;
583 } else if (cpu_has_amd_erratum(amd_erratum_400)) {
584 /* E400: APIC timer interrupt does not wake up CPU from C1e */ 499 /* E400: APIC timer interrupt does not wake up CPU from C1e */
585 pr_info("using AMD E400 aware idle routine\n"); 500 pr_info("using AMD E400 aware idle routine\n");
586 pm_idle = amd_e400_idle; 501 x86_idle = amd_e400_idle;
587 } else 502 } else
588 pm_idle = default_idle; 503 x86_idle = default_idle;
589} 504}
590 505
591void __init init_amd_e400_c1e_mask(void) 506void __init init_amd_e400_c1e_mask(void)
592{ 507{
593 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ 508 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
594 if (pm_idle == amd_e400_idle) 509 if (x86_idle == amd_e400_idle)
595 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); 510 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
596} 511}
597 512
@@ -602,11 +517,8 @@ static int __init idle_setup(char *str)
602 517
603 if (!strcmp(str, "poll")) { 518 if (!strcmp(str, "poll")) {
604 pr_info("using polling idle threads\n"); 519 pr_info("using polling idle threads\n");
605 pm_idle = poll_idle; 520 x86_idle = poll_idle;
606 boot_option_idle_override = IDLE_POLL; 521 boot_option_idle_override = IDLE_POLL;
607 } else if (!strcmp(str, "mwait")) {
608 boot_option_idle_override = IDLE_FORCE_MWAIT;
609 WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
610 } else if (!strcmp(str, "halt")) { 522 } else if (!strcmp(str, "halt")) {
611 /* 523 /*
612 * When the boot option of idle=halt is added, halt is 524 * When the boot option of idle=halt is added, halt is
@@ -615,7 +527,7 @@ static int __init idle_setup(char *str)
615 * To continue to load the CPU idle driver, don't touch 527 * To continue to load the CPU idle driver, don't touch
616 * the boot_option_idle_override. 528 * the boot_option_idle_override.
617 */ 529 */
618 pm_idle = default_idle; 530 x86_idle = default_idle;
619 boot_option_idle_override = IDLE_HALT; 531 boot_option_idle_override = IDLE_HALT;
620 } else if (!strcmp(str, "nomwait")) { 532 } else if (!strcmp(str, "nomwait")) {
621 /* 533 /*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6e68a6194965..0f49677da51e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,7 +117,7 @@ void release_thread(struct task_struct *dead_task)
117{ 117{
118 if (dead_task->mm) { 118 if (dead_task->mm) {
119 if (dead_task->mm->context.size) { 119 if (dead_task->mm->context.size) {
120 pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n", 120 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
121 dead_task->comm, 121 dead_task->comm,
122 dead_task->mm->context.ldt, 122 dead_task->mm->context.ldt,
123 dead_task->mm->context.size); 123 dead_task->mm->context.size);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b629bbe0d9bd..29a8120e6fe8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,7 +22,7 @@
22#include <linux/perf_event.h> 22#include <linux/perf_event.h>
23#include <linux/hw_breakpoint.h> 23#include <linux/hw_breakpoint.h>
24#include <linux/rcupdate.h> 24#include <linux/rcupdate.h>
25#include <linux/module.h> 25#include <linux/export.h>
26#include <linux/context_tracking.h> 26#include <linux/context_tracking.h>
27 27
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 1b27de563561..26ee48a33dc4 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -8,7 +8,7 @@
8 8
9#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) 9#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
10 10
11static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) 11static void quirk_intel_irqbalance(struct pci_dev *dev)
12{ 12{
13 u8 config; 13 u8 config;
14 u16 word; 14 u16 word;
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
512 512
513#if defined(CONFIG_PCI) && defined(CONFIG_NUMA) 513#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
514/* Set correct numa_node information for AMD NB functions */ 514/* Set correct numa_node information for AMD NB functions */
515static void __devinit quirk_amd_nb_node(struct pci_dev *dev) 515static void quirk_amd_nb_node(struct pci_dev *dev)
516{ 516{
517 struct pci_dev *nb_ht; 517 struct pci_dev *nb_ht;
518 unsigned int devfn; 518 unsigned int devfn;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4e8ba39eaf0f..76fa1e9a2b39 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -584,7 +584,7 @@ static void native_machine_emergency_restart(void)
584 break; 584 break;
585 585
586 case BOOT_EFI: 586 case BOOT_EFI:
587 if (efi_enabled) 587 if (efi_enabled(EFI_RUNTIME_SERVICES))
588 efi.reset_system(reboot_mode ? 588 efi.reset_system(reboot_mode ?
589 EFI_RESET_WARM : 589 EFI_RESET_WARM :
590 EFI_RESET_COLD, 590 EFI_RESET_COLD,
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 801602b5d745..2e8f3d3b5641 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -149,7 +149,6 @@ unsigned long mach_get_cmos_time(void)
149 if (century) { 149 if (century) {
150 century = bcd2bin(century); 150 century = bcd2bin(century);
151 year += century * 100; 151 year += century * 100;
152 printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
153 } else 152 } else
154 year += CMOS_YEARS_OFFS; 153 year += CMOS_YEARS_OFFS;
155 154
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 23ddd558fbd5..9c857f05cef0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -108,17 +108,16 @@
108#include <asm/topology.h> 108#include <asm/topology.h>
109#include <asm/apicdef.h> 109#include <asm/apicdef.h>
110#include <asm/amd_nb.h> 110#include <asm/amd_nb.h>
111#ifdef CONFIG_X86_64
112#include <asm/numa_64.h>
113#endif
114#include <asm/mce.h> 111#include <asm/mce.h>
115#include <asm/alternative.h> 112#include <asm/alternative.h>
116#include <asm/prom.h> 113#include <asm/prom.h>
117 114
118/* 115/*
119 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 116 * max_low_pfn_mapped: highest direct mapped pfn under 4GB
120 * The direct mapping extends to max_pfn_mapped, so that we can directly access 117 * max_pfn_mapped: highest direct mapped pfn over 4GB
121 * apertures, ACPI and other tables without having to play with fixmaps. 118 *
119 * The direct mapping only covers E820_RAM regions, so the ranges and gaps are
120 * represented by pfn_mapped
122 */ 121 */
123unsigned long max_low_pfn_mapped; 122unsigned long max_low_pfn_mapped;
124unsigned long max_pfn_mapped; 123unsigned long max_pfn_mapped;
@@ -276,18 +275,7 @@ void * __init extend_brk(size_t size, size_t align)
276 return ret; 275 return ret;
277} 276}
278 277
279#ifdef CONFIG_X86_64 278#ifdef CONFIG_X86_32
280static void __init init_gbpages(void)
281{
282 if (direct_gbpages && cpu_has_gbpages)
283 printk(KERN_INFO "Using GB pages for direct mapping\n");
284 else
285 direct_gbpages = 0;
286}
287#else
288static inline void init_gbpages(void)
289{
290}
291static void __init cleanup_highmap(void) 279static void __init cleanup_highmap(void)
292{ 280{
293} 281}
@@ -296,8 +284,8 @@ static void __init cleanup_highmap(void)
296static void __init reserve_brk(void) 284static void __init reserve_brk(void)
297{ 285{
298 if (_brk_end > _brk_start) 286 if (_brk_end > _brk_start)
299 memblock_reserve(__pa(_brk_start), 287 memblock_reserve(__pa_symbol(_brk_start),
300 __pa(_brk_end) - __pa(_brk_start)); 288 _brk_end - _brk_start);
301 289
302 /* Mark brk area as locked down and no longer taking any 290 /* Mark brk area as locked down and no longer taking any
303 new allocations */ 291 new allocations */
@@ -306,27 +294,43 @@ static void __init reserve_brk(void)
306 294
307#ifdef CONFIG_BLK_DEV_INITRD 295#ifdef CONFIG_BLK_DEV_INITRD
308 296
297static u64 __init get_ramdisk_image(void)
298{
299 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
300
301 ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32;
302
303 return ramdisk_image;
304}
305static u64 __init get_ramdisk_size(void)
306{
307 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
308
309 ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32;
310
311 return ramdisk_size;
312}
313
309#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) 314#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
310static void __init relocate_initrd(void) 315static void __init relocate_initrd(void)
311{ 316{
312 /* Assume only end is not page aligned */ 317 /* Assume only end is not page aligned */
313 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 318 u64 ramdisk_image = get_ramdisk_image();
314 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 319 u64 ramdisk_size = get_ramdisk_size();
315 u64 area_size = PAGE_ALIGN(ramdisk_size); 320 u64 area_size = PAGE_ALIGN(ramdisk_size);
316 u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
317 u64 ramdisk_here; 321 u64 ramdisk_here;
318 unsigned long slop, clen, mapaddr; 322 unsigned long slop, clen, mapaddr;
319 char *p, *q; 323 char *p, *q;
320 324
321 /* We need to move the initrd down into lowmem */ 325 /* We need to move the initrd down into directly mapped mem */
322 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, 326 ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
323 PAGE_SIZE); 327 area_size, PAGE_SIZE);
324 328
325 if (!ramdisk_here) 329 if (!ramdisk_here)
326 panic("Cannot find place for new RAMDISK of size %lld\n", 330 panic("Cannot find place for new RAMDISK of size %lld\n",
327 ramdisk_size); 331 ramdisk_size);
328 332
329 /* Note: this includes all the lowmem currently occupied by 333 /* Note: this includes all the mem currently occupied by
330 the initrd, we rely on that fact to keep the data intact. */ 334 the initrd, we rely on that fact to keep the data intact. */
331 memblock_reserve(ramdisk_here, area_size); 335 memblock_reserve(ramdisk_here, area_size);
332 initrd_start = ramdisk_here + PAGE_OFFSET; 336 initrd_start = ramdisk_here + PAGE_OFFSET;
@@ -336,17 +340,7 @@ static void __init relocate_initrd(void)
336 340
337 q = (char *)initrd_start; 341 q = (char *)initrd_start;
338 342
339 /* Copy any lowmem portion of the initrd */ 343 /* Copy the initrd */
340 if (ramdisk_image < end_of_lowmem) {
341 clen = end_of_lowmem - ramdisk_image;
342 p = (char *)__va(ramdisk_image);
343 memcpy(q, p, clen);
344 q += clen;
345 ramdisk_image += clen;
346 ramdisk_size -= clen;
347 }
348
349 /* Copy the highmem portion of the initrd */
350 while (ramdisk_size) { 344 while (ramdisk_size) {
351 slop = ramdisk_image & ~PAGE_MASK; 345 slop = ramdisk_image & ~PAGE_MASK;
352 clen = ramdisk_size; 346 clen = ramdisk_size;
@@ -360,22 +354,35 @@ static void __init relocate_initrd(void)
360 ramdisk_image += clen; 354 ramdisk_image += clen;
361 ramdisk_size -= clen; 355 ramdisk_size -= clen;
362 } 356 }
363 /* high pages is not converted by early_res_to_bootmem */ 357
364 ramdisk_image = boot_params.hdr.ramdisk_image; 358 ramdisk_image = get_ramdisk_image();
365 ramdisk_size = boot_params.hdr.ramdisk_size; 359 ramdisk_size = get_ramdisk_size();
366 printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" 360 printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
367 " [mem %#010llx-%#010llx]\n", 361 " [mem %#010llx-%#010llx]\n",
368 ramdisk_image, ramdisk_image + ramdisk_size - 1, 362 ramdisk_image, ramdisk_image + ramdisk_size - 1,
369 ramdisk_here, ramdisk_here + ramdisk_size - 1); 363 ramdisk_here, ramdisk_here + ramdisk_size - 1);
370} 364}
371 365
366static void __init early_reserve_initrd(void)
367{
368 /* Assume only end is not page aligned */
369 u64 ramdisk_image = get_ramdisk_image();
370 u64 ramdisk_size = get_ramdisk_size();
371 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
372
373 if (!boot_params.hdr.type_of_loader ||
374 !ramdisk_image || !ramdisk_size)
375 return; /* No initrd provided by bootloader */
376
377 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
378}
372static void __init reserve_initrd(void) 379static void __init reserve_initrd(void)
373{ 380{
374 /* Assume only end is not page aligned */ 381 /* Assume only end is not page aligned */
375 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 382 u64 ramdisk_image = get_ramdisk_image();
376 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 383 u64 ramdisk_size = get_ramdisk_size();
377 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 384 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
378 u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; 385 u64 mapped_size;
379 386
380 if (!boot_params.hdr.type_of_loader || 387 if (!boot_params.hdr.type_of_loader ||
381 !ramdisk_image || !ramdisk_size) 388 !ramdisk_image || !ramdisk_size)
@@ -383,22 +390,18 @@ static void __init reserve_initrd(void)
383 390
384 initrd_start = 0; 391 initrd_start = 0;
385 392
386 if (ramdisk_size >= (end_of_lowmem>>1)) { 393 mapped_size = memblock_mem_size(max_pfn_mapped);
394 if (ramdisk_size >= (mapped_size>>1))
387 panic("initrd too large to handle, " 395 panic("initrd too large to handle, "
388 "disabling initrd (%lld needed, %lld available)\n", 396 "disabling initrd (%lld needed, %lld available)\n",
389 ramdisk_size, end_of_lowmem>>1); 397 ramdisk_size, mapped_size>>1);
390 }
391 398
392 printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, 399 printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
393 ramdisk_end - 1); 400 ramdisk_end - 1);
394 401
395 402 if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image),
396 if (ramdisk_end <= end_of_lowmem) { 403 PFN_DOWN(ramdisk_end))) {
397 /* All in lowmem, easy case */ 404 /* All are mapped, easy case */
398 /*
399 * don't need to reserve again, already reserved early
400 * in i386_start_kernel
401 */
402 initrd_start = ramdisk_image + PAGE_OFFSET; 405 initrd_start = ramdisk_image + PAGE_OFFSET;
403 initrd_end = initrd_start + ramdisk_size; 406 initrd_end = initrd_start + ramdisk_size;
404 return; 407 return;
@@ -409,6 +412,9 @@ static void __init reserve_initrd(void)
409 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); 412 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
410} 413}
411#else 414#else
415static void __init early_reserve_initrd(void)
416{
417}
412static void __init reserve_initrd(void) 418static void __init reserve_initrd(void)
413{ 419{
414} 420}
@@ -419,8 +425,6 @@ static void __init parse_setup_data(void)
419 struct setup_data *data; 425 struct setup_data *data;
420 u64 pa_data; 426 u64 pa_data;
421 427
422 if (boot_params.hdr.version < 0x0209)
423 return;
424 pa_data = boot_params.hdr.setup_data; 428 pa_data = boot_params.hdr.setup_data;
425 while (pa_data) { 429 while (pa_data) {
426 u32 data_len, map_len; 430 u32 data_len, map_len;
@@ -456,8 +460,6 @@ static void __init e820_reserve_setup_data(void)
456 u64 pa_data; 460 u64 pa_data;
457 int found = 0; 461 int found = 0;
458 462
459 if (boot_params.hdr.version < 0x0209)
460 return;
461 pa_data = boot_params.hdr.setup_data; 463 pa_data = boot_params.hdr.setup_data;
462 while (pa_data) { 464 while (pa_data) {
463 data = early_memremap(pa_data, sizeof(*data)); 465 data = early_memremap(pa_data, sizeof(*data));
@@ -481,8 +483,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)
481 struct setup_data *data; 483 struct setup_data *data;
482 u64 pa_data; 484 u64 pa_data;
483 485
484 if (boot_params.hdr.version < 0x0209)
485 return;
486 pa_data = boot_params.hdr.setup_data; 486 pa_data = boot_params.hdr.setup_data;
487 while (pa_data) { 487 while (pa_data) {
488 data = early_memremap(pa_data, sizeof(*data)); 488 data = early_memremap(pa_data, sizeof(*data));
@@ -501,17 +501,51 @@ static void __init memblock_x86_reserve_range_setup_data(void)
501/* 501/*
502 * Keep the crash kernel below this limit. On 32 bits earlier kernels 502 * Keep the crash kernel below this limit. On 32 bits earlier kernels
503 * would limit the kernel to the low 512 MiB due to mapping restrictions. 503 * would limit the kernel to the low 512 MiB due to mapping restrictions.
504 * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
505 * limit once kexec-tools are fixed.
506 */ 504 */
507#ifdef CONFIG_X86_32 505#ifdef CONFIG_X86_32
508# define CRASH_KERNEL_ADDR_MAX (512 << 20) 506# define CRASH_KERNEL_ADDR_MAX (512 << 20)
509#else 507#else
510# define CRASH_KERNEL_ADDR_MAX (896 << 20) 508# define CRASH_KERNEL_ADDR_MAX MAXMEM
511#endif 509#endif
512 510
511static void __init reserve_crashkernel_low(void)
512{
513#ifdef CONFIG_X86_64
514 const unsigned long long alignment = 16<<20; /* 16M */
515 unsigned long long low_base = 0, low_size = 0;
516 unsigned long total_low_mem;
517 unsigned long long base;
518 int ret;
519
520 total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
521 ret = parse_crashkernel_low(boot_command_line, total_low_mem,
522 &low_size, &base);
523 if (ret != 0 || low_size <= 0)
524 return;
525
526 low_base = memblock_find_in_range(low_size, (1ULL<<32),
527 low_size, alignment);
528
529 if (!low_base) {
530 pr_info("crashkernel low reservation failed - No suitable area found.\n");
531
532 return;
533 }
534
535 memblock_reserve(low_base, low_size);
536 pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
537 (unsigned long)(low_size >> 20),
538 (unsigned long)(low_base >> 20),
539 (unsigned long)(total_low_mem >> 20));
540 crashk_low_res.start = low_base;
541 crashk_low_res.end = low_base + low_size - 1;
542 insert_resource(&iomem_resource, &crashk_low_res);
543#endif
544}
545
513static void __init reserve_crashkernel(void) 546static void __init reserve_crashkernel(void)
514{ 547{
548 const unsigned long long alignment = 16<<20; /* 16M */
515 unsigned long long total_mem; 549 unsigned long long total_mem;
516 unsigned long long crash_size, crash_base; 550 unsigned long long crash_size, crash_base;
517 int ret; 551 int ret;
@@ -525,8 +559,6 @@ static void __init reserve_crashkernel(void)
525 559
526 /* 0 means: find the address automatically */ 560 /* 0 means: find the address automatically */
527 if (crash_base <= 0) { 561 if (crash_base <= 0) {
528 const unsigned long long alignment = 16<<20; /* 16M */
529
530 /* 562 /*
531 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX 563 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
532 */ 564 */
@@ -537,6 +569,7 @@ static void __init reserve_crashkernel(void)
537 pr_info("crashkernel reservation failed - No suitable area found.\n"); 569 pr_info("crashkernel reservation failed - No suitable area found.\n");
538 return; 570 return;
539 } 571 }
572
540 } else { 573 } else {
541 unsigned long long start; 574 unsigned long long start;
542 575
@@ -558,6 +591,9 @@ static void __init reserve_crashkernel(void)
558 crashk_res.start = crash_base; 591 crashk_res.start = crash_base;
559 crashk_res.end = crash_base + crash_size - 1; 592 crashk_res.end = crash_base + crash_size - 1;
560 insert_resource(&iomem_resource, &crashk_res); 593 insert_resource(&iomem_resource, &crashk_res);
594
595 if (crash_base >= (1ULL<<32))
596 reserve_crashkernel_low();
561} 597}
562#else 598#else
563static void __init reserve_crashkernel(void) 599static void __init reserve_crashkernel(void)
@@ -608,7 +644,82 @@ static __init void reserve_ibft_region(void)
608 memblock_reserve(addr, size); 644 memblock_reserve(addr, size);
609} 645}
610 646
611static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; 647static bool __init snb_gfx_workaround_needed(void)
648{
649#ifdef CONFIG_PCI
650 int i;
651 u16 vendor, devid;
652 static const __initconst u16 snb_ids[] = {
653 0x0102,
654 0x0112,
655 0x0122,
656 0x0106,
657 0x0116,
658 0x0126,
659 0x010a,
660 };
661
662 /* Assume no if something weird is going on with PCI */
663 if (!early_pci_allowed())
664 return false;
665
666 vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID);
667 if (vendor != 0x8086)
668 return false;
669
670 devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID);
671 for (i = 0; i < ARRAY_SIZE(snb_ids); i++)
672 if (devid == snb_ids[i])
673 return true;
674#endif
675
676 return false;
677}
678
679/*
680 * Sandy Bridge graphics has trouble with certain ranges, exclude
681 * them from allocation.
682 */
683static void __init trim_snb_memory(void)
684{
685 static const __initconst unsigned long bad_pages[] = {
686 0x20050000,
687 0x20110000,
688 0x20130000,
689 0x20138000,
690 0x40004000,
691 };
692 int i;
693
694 if (!snb_gfx_workaround_needed())
695 return;
696
697 printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n");
698
699 /*
700 * Reserve all memory below the 1 MB mark that has not
701 * already been reserved.
702 */
703 memblock_reserve(0, 1<<20);
704
705 for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
706 if (memblock_reserve(bad_pages[i], PAGE_SIZE))
707 printk(KERN_WARNING "failed to reserve 0x%08lx\n",
708 bad_pages[i]);
709 }
710}
711
712/*
713 * Here we put platform-specific memory range workarounds, i.e.
714 * memory known to be corrupt or otherwise in need to be reserved on
715 * specific platforms.
716 *
717 * If this gets used more widely it could use a real dispatch mechanism.
718 */
719static void __init trim_platform_memory_ranges(void)
720{
721 trim_snb_memory();
722}
612 723
613static void __init trim_bios_range(void) 724static void __init trim_bios_range(void)
614{ 725{
@@ -621,8 +732,7 @@ static void __init trim_bios_range(void)
621 * since some BIOSes are known to corrupt low memory. See the 732 * since some BIOSes are known to corrupt low memory. See the
622 * Kconfig help text for X86_RESERVE_LOW. 733 * Kconfig help text for X86_RESERVE_LOW.
623 */ 734 */
624 e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE), 735 e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
625 E820_RAM, E820_RESERVED);
626 736
627 /* 737 /*
628 * special case: Some BIOSen report the PC BIOS 738 * special case: Some BIOSen report the PC BIOS
@@ -630,9 +740,33 @@ static void __init trim_bios_range(void)
630 * take them out. 740 * take them out.
631 */ 741 */
632 e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); 742 e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
743
633 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 744 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
634} 745}
635 746
747/* called before trim_bios_range() to spare extra sanitize */
748static void __init e820_add_kernel_range(void)
749{
750 u64 start = __pa_symbol(_text);
751 u64 size = __pa_symbol(_end) - start;
752
753 /*
754 * Complain if .text .data and .bss are not marked as E820_RAM and
755 * attempt to fix it by adding the range. We may have a confused BIOS,
756 * or the user may have used memmap=exactmap or memmap=xxM$yyM to
757 * exclude kernel range. If we really are running on top non-RAM,
758 * we will crash later anyways.
759 */
760 if (e820_all_mapped(start, start + size, E820_RAM))
761 return;
762
763 pr_warn(".text .data .bss are not marked as E820_RAM!\n");
764 e820_remove_range(start, size, E820_RAM, 0);
765 e820_add_region(start, size, E820_RAM);
766}
767
768static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
769
636static int __init parse_reservelow(char *p) 770static int __init parse_reservelow(char *p)
637{ 771{
638 unsigned long long size; 772 unsigned long long size;
@@ -655,6 +789,11 @@ static int __init parse_reservelow(char *p)
655 789
656early_param("reservelow", parse_reservelow); 790early_param("reservelow", parse_reservelow);
657 791
792static void __init trim_low_memory_range(void)
793{
794 memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
795}
796
658/* 797/*
659 * Determine if we were loaded by an EFI loader. If so, then we have also been 798 * Determine if we were loaded by an EFI loader. If so, then we have also been
660 * passed the efi memmap, systab, etc., so we should use these data structures 799 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -670,6 +809,17 @@ early_param("reservelow", parse_reservelow);
670 809
671void __init setup_arch(char **cmdline_p) 810void __init setup_arch(char **cmdline_p)
672{ 811{
812 memblock_reserve(__pa_symbol(_text),
813 (unsigned long)__bss_stop - (unsigned long)_text);
814
815 early_reserve_initrd();
816
817 /*
818 * At this point everything still needed from the boot loader
819 * or BIOS or kernel text should be early reserved or marked not
820 * RAM in e820. All other memory is free game.
821 */
822
673#ifdef CONFIG_X86_32 823#ifdef CONFIG_X86_32
674 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 824 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
675 visws_early_detect(); 825 visws_early_detect();
@@ -729,15 +879,15 @@ void __init setup_arch(char **cmdline_p)
729#ifdef CONFIG_EFI 879#ifdef CONFIG_EFI
730 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 880 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
731 "EL32", 4)) { 881 "EL32", 4)) {
732 efi_enabled = 1; 882 set_bit(EFI_BOOT, &x86_efi_facility);
733 efi_64bit = false;
734 } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 883 } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
735 "EL64", 4)) { 884 "EL64", 4)) {
736 efi_enabled = 1; 885 set_bit(EFI_BOOT, &x86_efi_facility);
737 efi_64bit = true; 886 set_bit(EFI_64BIT, &x86_efi_facility);
738 } 887 }
739 if (efi_enabled && efi_memblock_x86_reserve_range()) 888
740 efi_enabled = 0; 889 if (efi_enabled(EFI_BOOT))
890 efi_memblock_x86_reserve_range();
741#endif 891#endif
742 892
743 x86_init.oem.arch_setup(); 893 x86_init.oem.arch_setup();
@@ -757,12 +907,12 @@ void __init setup_arch(char **cmdline_p)
757 init_mm.end_data = (unsigned long) _edata; 907 init_mm.end_data = (unsigned long) _edata;
758 init_mm.brk = _brk_end; 908 init_mm.brk = _brk_end;
759 909
760 code_resource.start = virt_to_phys(_text); 910 code_resource.start = __pa_symbol(_text);
761 code_resource.end = virt_to_phys(_etext)-1; 911 code_resource.end = __pa_symbol(_etext)-1;
762 data_resource.start = virt_to_phys(_etext); 912 data_resource.start = __pa_symbol(_etext);
763 data_resource.end = virt_to_phys(_edata)-1; 913 data_resource.end = __pa_symbol(_edata)-1;
764 bss_resource.start = virt_to_phys(&__bss_start); 914 bss_resource.start = __pa_symbol(__bss_start);
765 bss_resource.end = virt_to_phys(&__bss_stop)-1; 915 bss_resource.end = __pa_symbol(__bss_stop)-1;
766 916
767#ifdef CONFIG_CMDLINE_BOOL 917#ifdef CONFIG_CMDLINE_BOOL
768#ifdef CONFIG_CMDLINE_OVERRIDE 918#ifdef CONFIG_CMDLINE_OVERRIDE
@@ -810,7 +960,7 @@ void __init setup_arch(char **cmdline_p)
810 960
811 finish_e820_parsing(); 961 finish_e820_parsing();
812 962
813 if (efi_enabled) 963 if (efi_enabled(EFI_BOOT))
814 efi_init(); 964 efi_init();
815 965
816 dmi_scan_machine(); 966 dmi_scan_machine();
@@ -828,6 +978,7 @@ void __init setup_arch(char **cmdline_p)
828 insert_resource(&iomem_resource, &data_resource); 978 insert_resource(&iomem_resource, &data_resource);
829 insert_resource(&iomem_resource, &bss_resource); 979 insert_resource(&iomem_resource, &bss_resource);
830 980
981 e820_add_kernel_range();
831 trim_bios_range(); 982 trim_bios_range();
832#ifdef CONFIG_X86_32 983#ifdef CONFIG_X86_32
833 if (ppro_with_ram_bug()) { 984 if (ppro_with_ram_bug()) {
@@ -877,6 +1028,8 @@ void __init setup_arch(char **cmdline_p)
877 1028
878 reserve_ibft_region(); 1029 reserve_ibft_region();
879 1030
1031 early_alloc_pgt_buf();
1032
880 /* 1033 /*
881 * Need to conclude brk, before memblock_x86_fill() 1034 * Need to conclude brk, before memblock_x86_fill()
882 * it could use memblock_find_in_range, could overlap with 1035 * it could use memblock_find_in_range, could overlap with
@@ -886,14 +1039,14 @@ void __init setup_arch(char **cmdline_p)
886 1039
887 cleanup_highmap(); 1040 cleanup_highmap();
888 1041
889 memblock.current_limit = get_max_mapped(); 1042 memblock.current_limit = ISA_END_ADDRESS;
890 memblock_x86_fill(); 1043 memblock_x86_fill();
891 1044
892 /* 1045 /*
893 * The EFI specification says that boot service code won't be called 1046 * The EFI specification says that boot service code won't be called
894 * after ExitBootServices(). This is, in fact, a lie. 1047 * after ExitBootServices(). This is, in fact, a lie.
895 */ 1048 */
896 if (efi_enabled) 1049 if (efi_enabled(EFI_MEMMAP))
897 efi_reserve_boot_services(); 1050 efi_reserve_boot_services();
898 1051
899 /* preallocate 4k for mptable mpc */ 1052 /* preallocate 4k for mptable mpc */
@@ -903,39 +1056,31 @@ void __init setup_arch(char **cmdline_p)
903 setup_bios_corruption_check(); 1056 setup_bios_corruption_check();
904#endif 1057#endif
905 1058
1059 /*
1060 * In the memory hotplug case, the kernel needs info from SRAT to
1061 * determine which memory is hotpluggable before allocating memory
1062 * using memblock.
1063 */
1064 acpi_boot_table_init();
1065 early_acpi_boot_init();
1066 early_parse_srat();
1067
1068#ifdef CONFIG_X86_32
906 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", 1069 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
907 (max_pfn_mapped<<PAGE_SHIFT) - 1); 1070 (max_pfn_mapped<<PAGE_SHIFT) - 1);
1071#endif
908 1072
909 setup_real_mode(); 1073 reserve_real_mode();
910
911 init_gbpages();
912
913 /* max_pfn_mapped is updated here */
914 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
915 max_pfn_mapped = max_low_pfn_mapped;
916 1074
917#ifdef CONFIG_X86_64 1075 trim_platform_memory_ranges();
918 if (max_pfn > max_low_pfn) { 1076 trim_low_memory_range();
919 int i;
920 unsigned long start, end;
921 unsigned long start_pfn, end_pfn;
922 1077
923 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, 1078 init_mem_mapping();
924 NULL) {
925 1079
926 end = PFN_PHYS(end_pfn); 1080 early_trap_pf_init();
927 if (end <= (1UL<<32))
928 continue;
929 1081
930 start = PFN_PHYS(start_pfn); 1082 setup_real_mode();
931 max_pfn_mapped = init_memory_mapping(
932 max((1UL<<32), start), end);
933 }
934 1083
935 /* can we preseve max_low_pfn ?*/
936 max_low_pfn = max_pfn;
937 }
938#endif
939 memblock.current_limit = get_max_mapped(); 1084 memblock.current_limit = get_max_mapped();
940 dma_contiguous_reserve(0); 1085 dma_contiguous_reserve(0);
941 1086
@@ -965,10 +1110,6 @@ void __init setup_arch(char **cmdline_p)
965 /* 1110 /*
966 * Parse the ACPI tables for possible boot-time SMP configuration. 1111 * Parse the ACPI tables for possible boot-time SMP configuration.
967 */ 1112 */
968 acpi_boot_table_init();
969
970 early_acpi_boot_init();
971
972 initmem_init(); 1113 initmem_init();
973 memblock_find_dma_reserve(); 1114 memblock_find_dma_reserve();
974 1115
@@ -1034,7 +1175,7 @@ void __init setup_arch(char **cmdline_p)
1034 1175
1035#ifdef CONFIG_VT 1176#ifdef CONFIG_VT
1036#if defined(CONFIG_VGA_CONSOLE) 1177#if defined(CONFIG_VGA_CONSOLE)
1037 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) 1178 if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1038 conswitchp = &vga_con; 1179 conswitchp = &vga_con;
1039#elif defined(CONFIG_DUMMY_CONSOLE) 1180#elif defined(CONFIG_DUMMY_CONSOLE)
1040 conswitchp = &dummy_con; 1181 conswitchp = &dummy_con;
@@ -1051,14 +1192,14 @@ void __init setup_arch(char **cmdline_p)
1051 register_refined_jiffies(CLOCK_TICK_RATE); 1192 register_refined_jiffies(CLOCK_TICK_RATE);
1052 1193
1053#ifdef CONFIG_EFI 1194#ifdef CONFIG_EFI
1054 /* Once setup is done above, disable efi_enabled on mismatched 1195 /* Once setup is done above, unmap the EFI memory map on
1055 * firmware/kernel archtectures since there is no support for 1196 * mismatched firmware/kernel archtectures since there is no
1056 * runtime services. 1197 * support for runtime services.
1057 */ 1198 */
1058 if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) { 1199 if (efi_enabled(EFI_BOOT) &&
1200 IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) {
1059 pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); 1201 pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
1060 efi_unmap_memmap(); 1202 efi_unmap_memmap();
1061 efi_enabled = 0;
1062 } 1203 }
1063#endif 1204#endif
1064} 1205}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index d6bf1f34a6e9..69562992e457 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -278,7 +278,7 @@ static const struct {
278}; 278};
279 279
280static int 280static int
281__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, 281__setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
282 struct pt_regs *regs) 282 struct pt_regs *regs)
283{ 283{
284 struct sigframe __user *frame; 284 struct sigframe __user *frame;
@@ -286,7 +286,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
286 int err = 0; 286 int err = 0;
287 void __user *fpstate = NULL; 287 void __user *fpstate = NULL;
288 288
289 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 289 frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
290 290
291 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 291 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
292 return -EFAULT; 292 return -EFAULT;
@@ -307,8 +307,8 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
307 restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); 307 restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
308 else 308 else
309 restorer = &frame->retcode; 309 restorer = &frame->retcode;
310 if (ka->sa.sa_flags & SA_RESTORER) 310 if (ksig->ka.sa.sa_flags & SA_RESTORER)
311 restorer = ka->sa.sa_restorer; 311 restorer = ksig->ka.sa.sa_restorer;
312 312
313 /* Set up to return from userspace. */ 313 /* Set up to return from userspace. */
314 err |= __put_user(restorer, &frame->pretcode); 314 err |= __put_user(restorer, &frame->pretcode);
@@ -327,7 +327,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
327 327
328 /* Set up registers for signal handler */ 328 /* Set up registers for signal handler */
329 regs->sp = (unsigned long)frame; 329 regs->sp = (unsigned long)frame;
330 regs->ip = (unsigned long)ka->sa.sa_handler; 330 regs->ip = (unsigned long)ksig->ka.sa.sa_handler;
331 regs->ax = (unsigned long)sig; 331 regs->ax = (unsigned long)sig;
332 regs->dx = 0; 332 regs->dx = 0;
333 regs->cx = 0; 333 regs->cx = 0;
@@ -340,7 +340,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
340 return 0; 340 return 0;
341} 341}
342 342
343static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 343static int __setup_rt_frame(int sig, struct ksignal *ksig,
344 sigset_t *set, struct pt_regs *regs) 344 sigset_t *set, struct pt_regs *regs)
345{ 345{
346 struct rt_sigframe __user *frame; 346 struct rt_sigframe __user *frame;
@@ -348,7 +348,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
348 int err = 0; 348 int err = 0;
349 void __user *fpstate = NULL; 349 void __user *fpstate = NULL;
350 350
351 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 351 frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
352 352
353 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 353 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
354 return -EFAULT; 354 return -EFAULT;
@@ -368,8 +368,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
368 368
369 /* Set up to return from userspace. */ 369 /* Set up to return from userspace. */
370 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); 370 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
371 if (ka->sa.sa_flags & SA_RESTORER) 371 if (ksig->ka.sa.sa_flags & SA_RESTORER)
372 restorer = ka->sa.sa_restorer; 372 restorer = ksig->ka.sa.sa_restorer;
373 put_user_ex(restorer, &frame->pretcode); 373 put_user_ex(restorer, &frame->pretcode);
374 374
375 /* 375 /*
@@ -382,7 +382,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
382 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); 382 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
383 } put_user_catch(err); 383 } put_user_catch(err);
384 384
385 err |= copy_siginfo_to_user(&frame->info, info); 385 err |= copy_siginfo_to_user(&frame->info, &ksig->info);
386 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, 386 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
387 regs, set->sig[0]); 387 regs, set->sig[0]);
388 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 388 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -392,7 +392,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
392 392
393 /* Set up registers for signal handler */ 393 /* Set up registers for signal handler */
394 regs->sp = (unsigned long)frame; 394 regs->sp = (unsigned long)frame;
395 regs->ip = (unsigned long)ka->sa.sa_handler; 395 regs->ip = (unsigned long)ksig->ka.sa.sa_handler;
396 regs->ax = (unsigned long)sig; 396 regs->ax = (unsigned long)sig;
397 regs->dx = (unsigned long)&frame->info; 397 regs->dx = (unsigned long)&frame->info;
398 regs->cx = (unsigned long)&frame->uc; 398 regs->cx = (unsigned long)&frame->uc;
@@ -405,20 +405,20 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
405 return 0; 405 return 0;
406} 406}
407#else /* !CONFIG_X86_32 */ 407#else /* !CONFIG_X86_32 */
408static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 408static int __setup_rt_frame(int sig, struct ksignal *ksig,
409 sigset_t *set, struct pt_regs *regs) 409 sigset_t *set, struct pt_regs *regs)
410{ 410{
411 struct rt_sigframe __user *frame; 411 struct rt_sigframe __user *frame;
412 void __user *fp = NULL; 412 void __user *fp = NULL;
413 int err = 0; 413 int err = 0;
414 414
415 frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp); 415 frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
416 416
417 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 417 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
418 return -EFAULT; 418 return -EFAULT;
419 419
420 if (ka->sa.sa_flags & SA_SIGINFO) { 420 if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
421 if (copy_siginfo_to_user(&frame->info, info)) 421 if (copy_siginfo_to_user(&frame->info, &ksig->info))
422 return -EFAULT; 422 return -EFAULT;
423 } 423 }
424 424
@@ -434,8 +434,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
434 /* Set up to return from userspace. If provided, use a stub 434 /* Set up to return from userspace. If provided, use a stub
435 already in userspace. */ 435 already in userspace. */
436 /* x86-64 should always use SA_RESTORER. */ 436 /* x86-64 should always use SA_RESTORER. */
437 if (ka->sa.sa_flags & SA_RESTORER) { 437 if (ksig->ka.sa.sa_flags & SA_RESTORER) {
438 put_user_ex(ka->sa.sa_restorer, &frame->pretcode); 438 put_user_ex(ksig->ka.sa.sa_restorer, &frame->pretcode);
439 } else { 439 } else {
440 /* could use a vstub here */ 440 /* could use a vstub here */
441 err |= -EFAULT; 441 err |= -EFAULT;
@@ -457,7 +457,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
457 next argument after the signal number on the stack. */ 457 next argument after the signal number on the stack. */
458 regs->si = (unsigned long)&frame->info; 458 regs->si = (unsigned long)&frame->info;
459 regs->dx = (unsigned long)&frame->uc; 459 regs->dx = (unsigned long)&frame->uc;
460 regs->ip = (unsigned long) ka->sa.sa_handler; 460 regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
461 461
462 regs->sp = (unsigned long)frame; 462 regs->sp = (unsigned long)frame;
463 463
@@ -469,8 +469,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
469} 469}
470#endif /* CONFIG_X86_32 */ 470#endif /* CONFIG_X86_32 */
471 471
472static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, 472static int x32_setup_rt_frame(struct ksignal *ksig,
473 siginfo_t *info, compat_sigset_t *set, 473 compat_sigset_t *set,
474 struct pt_regs *regs) 474 struct pt_regs *regs)
475{ 475{
476#ifdef CONFIG_X86_X32_ABI 476#ifdef CONFIG_X86_X32_ABI
@@ -479,13 +479,13 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
479 int err = 0; 479 int err = 0;
480 void __user *fpstate = NULL; 480 void __user *fpstate = NULL;
481 481
482 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 482 frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
483 483
484 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 484 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
485 return -EFAULT; 485 return -EFAULT;
486 486
487 if (ka->sa.sa_flags & SA_SIGINFO) { 487 if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
488 if (copy_siginfo_to_user32(&frame->info, info)) 488 if (copy_siginfo_to_user32(&frame->info, &ksig->info))
489 return -EFAULT; 489 return -EFAULT;
490 } 490 }
491 491
@@ -499,8 +499,8 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
499 err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); 499 err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
500 put_user_ex(0, &frame->uc.uc__pad0); 500 put_user_ex(0, &frame->uc.uc__pad0);
501 501
502 if (ka->sa.sa_flags & SA_RESTORER) { 502 if (ksig->ka.sa.sa_flags & SA_RESTORER) {
503 restorer = ka->sa.sa_restorer; 503 restorer = ksig->ka.sa.sa_restorer;
504 } else { 504 } else {
505 /* could use a vstub here */ 505 /* could use a vstub here */
506 restorer = NULL; 506 restorer = NULL;
@@ -518,10 +518,10 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
518 518
519 /* Set up registers for signal handler */ 519 /* Set up registers for signal handler */
520 regs->sp = (unsigned long) frame; 520 regs->sp = (unsigned long) frame;
521 regs->ip = (unsigned long) ka->sa.sa_handler; 521 regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
522 522
523 /* We use the x32 calling convention here... */ 523 /* We use the x32 calling convention here... */
524 regs->di = sig; 524 regs->di = ksig->sig;
525 regs->si = (unsigned long) &frame->info; 525 regs->si = (unsigned long) &frame->info;
526 regs->dx = (unsigned long) &frame->uc; 526 regs->dx = (unsigned long) &frame->uc;
527 527
@@ -535,70 +535,13 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
535 return 0; 535 return 0;
536} 536}
537 537
538#ifdef CONFIG_X86_32
539/*
540 * Atomically swap in the new signal mask, and wait for a signal.
541 */
542asmlinkage int
543sys_sigsuspend(int history0, int history1, old_sigset_t mask)
544{
545 sigset_t blocked;
546 siginitset(&blocked, mask);
547 return sigsuspend(&blocked);
548}
549
550asmlinkage int
551sys_sigaction(int sig, const struct old_sigaction __user *act,
552 struct old_sigaction __user *oact)
553{
554 struct k_sigaction new_ka, old_ka;
555 int ret = 0;
556
557 if (act) {
558 old_sigset_t mask;
559
560 if (!access_ok(VERIFY_READ, act, sizeof(*act)))
561 return -EFAULT;
562
563 get_user_try {
564 get_user_ex(new_ka.sa.sa_handler, &act->sa_handler);
565 get_user_ex(new_ka.sa.sa_flags, &act->sa_flags);
566 get_user_ex(mask, &act->sa_mask);
567 get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer);
568 } get_user_catch(ret);
569
570 if (ret)
571 return -EFAULT;
572 siginitset(&new_ka.sa.sa_mask, mask);
573 }
574
575 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
576
577 if (!ret && oact) {
578 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)))
579 return -EFAULT;
580
581 put_user_try {
582 put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler);
583 put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags);
584 put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
585 put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer);
586 } put_user_catch(ret);
587
588 if (ret)
589 return -EFAULT;
590 }
591
592 return ret;
593}
594#endif /* CONFIG_X86_32 */
595
596/* 538/*
597 * Do a signal return; undo the signal stack. 539 * Do a signal return; undo the signal stack.
598 */ 540 */
599#ifdef CONFIG_X86_32 541#ifdef CONFIG_X86_32
600unsigned long sys_sigreturn(struct pt_regs *regs) 542unsigned long sys_sigreturn(void)
601{ 543{
544 struct pt_regs *regs = current_pt_regs();
602 struct sigframe __user *frame; 545 struct sigframe __user *frame;
603 unsigned long ax; 546 unsigned long ax;
604 sigset_t set; 547 sigset_t set;
@@ -625,8 +568,9 @@ badframe:
625} 568}
626#endif /* CONFIG_X86_32 */ 569#endif /* CONFIG_X86_32 */
627 570
628long sys_rt_sigreturn(struct pt_regs *regs) 571long sys_rt_sigreturn(void)
629{ 572{
573 struct pt_regs *regs = current_pt_regs();
630 struct rt_sigframe __user *frame; 574 struct rt_sigframe __user *frame;
631 unsigned long ax; 575 unsigned long ax;
632 sigset_t set; 576 sigset_t set;
@@ -667,30 +611,29 @@ static int signr_convert(int sig)
667} 611}
668 612
669static int 613static int
670setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 614setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
671 struct pt_regs *regs)
672{ 615{
673 int usig = signr_convert(sig); 616 int usig = signr_convert(ksig->sig);
674 sigset_t *set = sigmask_to_save(); 617 sigset_t *set = sigmask_to_save();
675 compat_sigset_t *cset = (compat_sigset_t *) set; 618 compat_sigset_t *cset = (compat_sigset_t *) set;
676 619
677 /* Set up the stack frame */ 620 /* Set up the stack frame */
678 if (is_ia32_frame()) { 621 if (is_ia32_frame()) {
679 if (ka->sa.sa_flags & SA_SIGINFO) 622 if (ksig->ka.sa.sa_flags & SA_SIGINFO)
680 return ia32_setup_rt_frame(usig, ka, info, cset, regs); 623 return ia32_setup_rt_frame(usig, ksig, cset, regs);
681 else 624 else
682 return ia32_setup_frame(usig, ka, cset, regs); 625 return ia32_setup_frame(usig, ksig, cset, regs);
683 } else if (is_x32_frame()) { 626 } else if (is_x32_frame()) {
684 return x32_setup_rt_frame(usig, ka, info, cset, regs); 627 return x32_setup_rt_frame(ksig, cset, regs);
685 } else { 628 } else {
686 return __setup_rt_frame(sig, ka, info, set, regs); 629 return __setup_rt_frame(ksig->sig, ksig, set, regs);
687 } 630 }
688} 631}
689 632
690static void 633static void
691handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, 634handle_signal(struct ksignal *ksig, struct pt_regs *regs)
692 struct pt_regs *regs)
693{ 635{
636 bool failed;
694 /* Are we from a system call? */ 637 /* Are we from a system call? */
695 if (syscall_get_nr(current, regs) >= 0) { 638 if (syscall_get_nr(current, regs) >= 0) {
696 /* If so, check system call restarting.. */ 639 /* If so, check system call restarting.. */
@@ -701,7 +644,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
701 break; 644 break;
702 645
703 case -ERESTARTSYS: 646 case -ERESTARTSYS:
704 if (!(ka->sa.sa_flags & SA_RESTART)) { 647 if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
705 regs->ax = -EINTR; 648 regs->ax = -EINTR;
706 break; 649 break;
707 } 650 }
@@ -721,26 +664,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
721 likely(test_and_clear_thread_flag(TIF_FORCED_TF))) 664 likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
722 regs->flags &= ~X86_EFLAGS_TF; 665 regs->flags &= ~X86_EFLAGS_TF;
723 666
724 if (setup_rt_frame(sig, ka, info, regs) < 0) { 667 failed = (setup_rt_frame(ksig, regs) < 0);
725 force_sigsegv(sig, current); 668 if (!failed) {
726 return; 669 /*
670 * Clear the direction flag as per the ABI for function entry.
671 */
672 regs->flags &= ~X86_EFLAGS_DF;
673 /*
674 * Clear TF when entering the signal handler, but
675 * notify any tracer that was single-stepping it.
676 * The tracer may want to single-step inside the
677 * handler too.
678 */
679 regs->flags &= ~X86_EFLAGS_TF;
727 } 680 }
728 681 signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
729 /*
730 * Clear the direction flag as per the ABI for function entry.
731 */
732 regs->flags &= ~X86_EFLAGS_DF;
733
734 /*
735 * Clear TF when entering the signal handler, but
736 * notify any tracer that was single-stepping it.
737 * The tracer may want to single-step inside the
738 * handler too.
739 */
740 regs->flags &= ~X86_EFLAGS_TF;
741
742 signal_delivered(sig, info, ka, regs,
743 test_thread_flag(TIF_SINGLESTEP));
744} 682}
745 683
746#ifdef CONFIG_X86_32 684#ifdef CONFIG_X86_32
@@ -757,14 +695,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
757 */ 695 */
758static void do_signal(struct pt_regs *regs) 696static void do_signal(struct pt_regs *regs)
759{ 697{
760 struct k_sigaction ka; 698 struct ksignal ksig;
761 siginfo_t info;
762 int signr;
763 699
764 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 700 if (get_signal(&ksig)) {
765 if (signr > 0) {
766 /* Whee! Actually deliver the signal. */ 701 /* Whee! Actually deliver the signal. */
767 handle_signal(signr, &info, &ka, regs); 702 handle_signal(&ksig, regs);
768 return; 703 return;
769 } 704 }
770 705
@@ -843,8 +778,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
843} 778}
844 779
845#ifdef CONFIG_X86_X32_ABI 780#ifdef CONFIG_X86_X32_ABI
846asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) 781asmlinkage long sys32_x32_rt_sigreturn(void)
847{ 782{
783 struct pt_regs *regs = current_pt_regs();
848 struct rt_sigframe_x32 __user *frame; 784 struct rt_sigframe_x32 __user *frame;
849 sigset_t set; 785 sigset_t set;
850 unsigned long ax; 786 unsigned long ax;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ed0fe385289d..a6ceaedc396a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1369,7 +1369,7 @@ static inline void mwait_play_dead(void)
1369 void *mwait_ptr; 1369 void *mwait_ptr;
1370 struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); 1370 struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
1371 1371
1372 if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) 1372 if (!this_cpu_has(X86_FEATURE_MWAIT))
1373 return; 1373 return;
1374 if (!this_cpu_has(X86_FEATURE_CLFLSH)) 1374 if (!this_cpu_has(X86_FEATURE_CLFLSH))
1375 return; 1375 return;
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index cd3b2438a980..9b4d51d0c0d0 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -165,10 +165,11 @@ void set_task_blockstep(struct task_struct *task, bool on)
165 * Ensure irq/preemption can't change debugctl in between. 165 * Ensure irq/preemption can't change debugctl in between.
166 * Note also that both TIF_BLOCKSTEP and debugctl should 166 * Note also that both TIF_BLOCKSTEP and debugctl should
167 * be changed atomically wrt preemption. 167 * be changed atomically wrt preemption.
168 * FIXME: this means that set/clear TIF_BLOCKSTEP is simply 168 *
169 * wrong if task != current, SIGKILL can wakeup the stopped 169 * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
170 * tracee and set/clear can play with the running task, this 170 * task is current or it can't be running, otherwise we can race
171 * can confuse the next __switch_to_xtra(). 171 * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
172 * PTRACE_KILL is not safe.
172 */ 173 */
173 local_irq_disable(); 174 local_irq_disable();
174 debugctl = get_debugctlmsr(); 175 debugctl = get_debugctlmsr();
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 97ef74b88e0f..dbded5aedb81 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
157 if (flags & MAP_FIXED) 157 if (flags & MAP_FIXED)
158 return addr; 158 return addr;
159 159
160 /* for MAP_32BIT mappings we force the legact mmap base */ 160 /* for MAP_32BIT mappings we force the legacy mmap base */
161 if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) 161 if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
162 goto bottomup; 162 goto bottomup;
163 163
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ecffca11f4e9..68bda7a84159 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -688,10 +688,19 @@ void __init early_trap_init(void)
688 set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); 688 set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
689 /* int3 can be called from all */ 689 /* int3 can be called from all */
690 set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); 690 set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
691#ifdef CONFIG_X86_32
691 set_intr_gate(X86_TRAP_PF, &page_fault); 692 set_intr_gate(X86_TRAP_PF, &page_fault);
693#endif
692 load_idt(&idt_descr); 694 load_idt(&idt_descr);
693} 695}
694 696
697void __init early_trap_pf_init(void)
698{
699#ifdef CONFIG_X86_64
700 set_intr_gate(X86_TRAP_PF, &page_fault);
701#endif
702}
703
695void __init trap_init(void) 704void __init trap_init(void)
696{ 705{
697 int i; 706 int i;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 06ccb5073a3f..4b9ea101fe3b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
623 ns_now = __cycles_2_ns(tsc_now); 623 ns_now = __cycles_2_ns(tsc_now);
624 624
625 if (cpu_khz) { 625 if (cpu_khz) {
626 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; 626 *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
627 cpu_khz / 2) / cpu_khz;
627 *offset = ns_now - mult_frac(tsc_now, *scale, 628 *offset = ns_now - mult_frac(tsc_now, *scale,
628 (1UL << CYC2NS_SCALE_FACTOR)); 629 (1UL << CYC2NS_SCALE_FACTOR));
629 } 630 }
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index c71025b67462..0ba4cfb4f412 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
680 if (auprobe->insn[i] == 0x66) 680 if (auprobe->insn[i] == 0x66)
681 continue; 681 continue;
682 682
683 if (auprobe->insn[i] == 0x90) 683 if (auprobe->insn[i] == 0x90) {
684 regs->ip += i + 1;
684 return true; 685 return true;
686 }
685 687
686 break; 688 break;
687 } 689 }
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 1dfe69cc78a8..1cf5766dde16 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -202,7 +202,7 @@ out:
202static int do_vm86_irq_handling(int subfunction, int irqnumber); 202static int do_vm86_irq_handling(int subfunction, int irqnumber);
203static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); 203static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
204 204
205int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) 205int sys_vm86old(struct vm86_struct __user *v86)
206{ 206{
207 struct kernel_vm86_struct info; /* declare this _on top_, 207 struct kernel_vm86_struct info; /* declare this _on top_,
208 * this avoids wasting of stack space. 208 * this avoids wasting of stack space.
@@ -222,7 +222,7 @@ int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs)
222 if (tmp) 222 if (tmp)
223 goto out; 223 goto out;
224 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); 224 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
225 info.regs32 = regs; 225 info.regs32 = current_pt_regs();
226 tsk->thread.vm86_info = v86; 226 tsk->thread.vm86_info = v86;
227 do_sys_vm86(&info, tsk); 227 do_sys_vm86(&info, tsk);
228 ret = 0; /* we never return here */ 228 ret = 0; /* we never return here */
@@ -231,7 +231,7 @@ out:
231} 231}
232 232
233 233
234int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) 234int sys_vm86(unsigned long cmd, unsigned long arg)
235{ 235{
236 struct kernel_vm86_struct info; /* declare this _on top_, 236 struct kernel_vm86_struct info; /* declare this _on top_,
237 * this avoids wasting of stack space. 237 * this avoids wasting of stack space.
@@ -272,7 +272,7 @@ int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs)
272 ret = -EFAULT; 272 ret = -EFAULT;
273 if (tmp) 273 if (tmp)
274 goto out; 274 goto out;
275 info.regs32 = regs; 275 info.regs32 = current_pt_regs();
276 info.vm86plus.is_vm86pus = 1; 276 info.vm86plus.is_vm86pus = 1;
277 tsk->thread.vm86_info = (struct vm86_struct __user *)v86; 277 tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
278 do_sys_vm86(&info, tsk); 278 do_sys_vm86(&info, tsk);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1330dd102950..b014d9414d08 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -59,6 +59,9 @@ EXPORT_SYMBOL(memcpy);
59EXPORT_SYMBOL(__memcpy); 59EXPORT_SYMBOL(__memcpy);
60EXPORT_SYMBOL(memmove); 60EXPORT_SYMBOL(memmove);
61 61
62#ifndef CONFIG_DEBUG_VIRTUAL
63EXPORT_SYMBOL(phys_base);
64#endif
62EXPORT_SYMBOL(empty_zero_page); 65EXPORT_SYMBOL(empty_zero_page);
63#ifndef CONFIG_PARAVIRT 66#ifndef CONFIG_PARAVIRT
64EXPORT_SYMBOL(native_load_gs_index); 67EXPORT_SYMBOL(native_load_gs_index);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 7a3d075a814a..45a14dbbddaf 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -19,6 +19,7 @@
19#include <asm/time.h> 19#include <asm/time.h>
20#include <asm/irq.h> 20#include <asm/irq.h>
21#include <asm/io_apic.h> 21#include <asm/io_apic.h>
22#include <asm/hpet.h>
22#include <asm/pat.h> 23#include <asm/pat.h>
23#include <asm/tsc.h> 24#include <asm/tsc.h>
24#include <asm/iommu.h> 25#include <asm/iommu.h>
@@ -62,10 +63,6 @@ struct x86_init_ops x86_init __initdata = {
62 .banner = default_banner, 63 .banner = default_banner,
63 }, 64 },
64 65
65 .mapping = {
66 .pagetable_reserve = native_pagetable_reserve,
67 },
68
69 .paging = { 66 .paging = {
70 .pagetable_init = native_pagetable_init, 67 .pagetable_init = native_pagetable_init,
71 }, 68 },
@@ -111,15 +108,22 @@ struct x86_platform_ops x86_platform = {
111 108
112EXPORT_SYMBOL_GPL(x86_platform); 109EXPORT_SYMBOL_GPL(x86_platform);
113struct x86_msi_ops x86_msi = { 110struct x86_msi_ops x86_msi = {
114 .setup_msi_irqs = native_setup_msi_irqs, 111 .setup_msi_irqs = native_setup_msi_irqs,
115 .teardown_msi_irq = native_teardown_msi_irq, 112 .compose_msi_msg = native_compose_msi_msg,
116 .teardown_msi_irqs = default_teardown_msi_irqs, 113 .teardown_msi_irq = native_teardown_msi_irq,
117 .restore_msi_irqs = default_restore_msi_irqs, 114 .teardown_msi_irqs = default_teardown_msi_irqs,
115 .restore_msi_irqs = default_restore_msi_irqs,
116 .setup_hpet_msi = default_setup_hpet_msi,
118}; 117};
119 118
120struct x86_io_apic_ops x86_io_apic_ops = { 119struct x86_io_apic_ops x86_io_apic_ops = {
121 .init = native_io_apic_init_mappings, 120 .init = native_io_apic_init_mappings,
122 .read = native_io_apic_read, 121 .read = native_io_apic_read,
123 .write = native_io_apic_write, 122 .write = native_io_apic_write,
124 .modify = native_io_apic_modify, 123 .modify = native_io_apic_modify,
124 .disable = native_disable_io_apic,
125 .print_entries = native_io_apic_print_entries,
126 .set_affinity = native_ioapic_set_affinity,
127 .setup_entry = native_setup_ioapic_entry,
128 .eoi_ioapic_pin = native_eoi_ioapic_pin,
125}; 129};
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a27e76371108..a335cc6cde72 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -24,6 +24,7 @@
24#include "kvm_cache_regs.h" 24#include "kvm_cache_regs.h"
25#include <linux/module.h> 25#include <linux/module.h>
26#include <asm/kvm_emulate.h> 26#include <asm/kvm_emulate.h>
27#include <linux/stringify.h>
27 28
28#include "x86.h" 29#include "x86.h"
29#include "tss.h" 30#include "tss.h"
@@ -43,7 +44,7 @@
43#define OpCL 9ull /* CL register (for shifts) */ 44#define OpCL 9ull /* CL register (for shifts) */
44#define OpImmByte 10ull /* 8-bit sign extended immediate */ 45#define OpImmByte 10ull /* 8-bit sign extended immediate */
45#define OpOne 11ull /* Implied 1 */ 46#define OpOne 11ull /* Implied 1 */
46#define OpImm 12ull /* Sign extended immediate */ 47#define OpImm 12ull /* Sign extended up to 32-bit immediate */
47#define OpMem16 13ull /* Memory operand (16-bit). */ 48#define OpMem16 13ull /* Memory operand (16-bit). */
48#define OpMem32 14ull /* Memory operand (32-bit). */ 49#define OpMem32 14ull /* Memory operand (32-bit). */
49#define OpImmU 15ull /* Immediate operand, zero extended */ 50#define OpImmU 15ull /* Immediate operand, zero extended */
@@ -58,6 +59,7 @@
58#define OpFS 24ull /* FS */ 59#define OpFS 24ull /* FS */
59#define OpGS 25ull /* GS */ 60#define OpGS 25ull /* GS */
60#define OpMem8 26ull /* 8-bit zero extended memory operand */ 61#define OpMem8 26ull /* 8-bit zero extended memory operand */
62#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
61 63
62#define OpBits 5 /* Width of operand field */ 64#define OpBits 5 /* Width of operand field */
63#define OpMask ((1ull << OpBits) - 1) 65#define OpMask ((1ull << OpBits) - 1)
@@ -101,6 +103,7 @@
101#define SrcMemFAddr (OpMemFAddr << SrcShift) 103#define SrcMemFAddr (OpMemFAddr << SrcShift)
102#define SrcAcc (OpAcc << SrcShift) 104#define SrcAcc (OpAcc << SrcShift)
103#define SrcImmU16 (OpImmU16 << SrcShift) 105#define SrcImmU16 (OpImmU16 << SrcShift)
106#define SrcImm64 (OpImm64 << SrcShift)
104#define SrcDX (OpDX << SrcShift) 107#define SrcDX (OpDX << SrcShift)
105#define SrcMem8 (OpMem8 << SrcShift) 108#define SrcMem8 (OpMem8 << SrcShift)
106#define SrcMask (OpMask << SrcShift) 109#define SrcMask (OpMask << SrcShift)
@@ -113,6 +116,7 @@
113#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ 116#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
114#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ 117#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
115#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ 118#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
119#define Escape (5<<15) /* Escape to coprocessor instruction */
116#define Sse (1<<18) /* SSE Vector instruction */ 120#define Sse (1<<18) /* SSE Vector instruction */
117/* Generic ModRM decode. */ 121/* Generic ModRM decode. */
118#define ModRM (1<<19) 122#define ModRM (1<<19)
@@ -146,6 +150,8 @@
146#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ 150#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
147#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ 151#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
148#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ 152#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
153#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
154#define NoWrite ((u64)1 << 45) /* No writeback */
149 155
150#define X2(x...) x, x 156#define X2(x...) x, x
151#define X3(x...) X2(x), x 157#define X3(x...) X2(x), x
@@ -156,6 +162,27 @@
156#define X8(x...) X4(x), X4(x) 162#define X8(x...) X4(x), X4(x)
157#define X16(x...) X8(x), X8(x) 163#define X16(x...) X8(x), X8(x)
158 164
165#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
166#define FASTOP_SIZE 8
167
168/*
169 * fastop functions have a special calling convention:
170 *
171 * dst: [rdx]:rax (in/out)
172 * src: rbx (in/out)
173 * src2: rcx (in)
174 * flags: rflags (in/out)
175 *
176 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
177 * different operand sizes can be reached by calculation, rather than a jump
178 * table (which would be bigger than the code).
179 *
180 * fastop functions are declared as taking a never-defined fastop parameter,
181 * so they can't be called from C directly.
182 */
183
184struct fastop;
185
159struct opcode { 186struct opcode {
160 u64 flags : 56; 187 u64 flags : 56;
161 u64 intercept : 8; 188 u64 intercept : 8;
@@ -164,6 +191,8 @@ struct opcode {
164 const struct opcode *group; 191 const struct opcode *group;
165 const struct group_dual *gdual; 192 const struct group_dual *gdual;
166 const struct gprefix *gprefix; 193 const struct gprefix *gprefix;
194 const struct escape *esc;
195 void (*fastop)(struct fastop *fake);
167 } u; 196 } u;
168 int (*check_perm)(struct x86_emulate_ctxt *ctxt); 197 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
169}; 198};
@@ -180,6 +209,11 @@ struct gprefix {
180 struct opcode pfx_f3; 209 struct opcode pfx_f3;
181}; 210};
182 211
212struct escape {
213 struct opcode op[8];
214 struct opcode high[64];
215};
216
183/* EFLAGS bit definitions. */ 217/* EFLAGS bit definitions. */
184#define EFLG_ID (1<<21) 218#define EFLG_ID (1<<21)
185#define EFLG_VIP (1<<20) 219#define EFLG_VIP (1<<20)
@@ -407,6 +441,97 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
407 } \ 441 } \
408 } while (0) 442 } while (0)
409 443
444static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
445
446#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
447#define FOP_RET "ret \n\t"
448
449#define FOP_START(op) \
450 extern void em_##op(struct fastop *fake); \
451 asm(".pushsection .text, \"ax\" \n\t" \
452 ".global em_" #op " \n\t" \
453 FOP_ALIGN \
454 "em_" #op ": \n\t"
455
456#define FOP_END \
457 ".popsection")
458
459#define FOPNOP() FOP_ALIGN FOP_RET
460
461#define FOP1E(op, dst) \
462 FOP_ALIGN #op " %" #dst " \n\t" FOP_RET
463
464#define FASTOP1(op) \
465 FOP_START(op) \
466 FOP1E(op##b, al) \
467 FOP1E(op##w, ax) \
468 FOP1E(op##l, eax) \
469 ON64(FOP1E(op##q, rax)) \
470 FOP_END
471
472#define FOP2E(op, dst, src) \
473 FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
474
475#define FASTOP2(op) \
476 FOP_START(op) \
477 FOP2E(op##b, al, bl) \
478 FOP2E(op##w, ax, bx) \
479 FOP2E(op##l, eax, ebx) \
480 ON64(FOP2E(op##q, rax, rbx)) \
481 FOP_END
482
483/* 2 operand, word only */
484#define FASTOP2W(op) \
485 FOP_START(op) \
486 FOPNOP() \
487 FOP2E(op##w, ax, bx) \
488 FOP2E(op##l, eax, ebx) \
489 ON64(FOP2E(op##q, rax, rbx)) \
490 FOP_END
491
492/* 2 operand, src is CL */
493#define FASTOP2CL(op) \
494 FOP_START(op) \
495 FOP2E(op##b, al, cl) \
496 FOP2E(op##w, ax, cl) \
497 FOP2E(op##l, eax, cl) \
498 ON64(FOP2E(op##q, rax, cl)) \
499 FOP_END
500
501#define FOP3E(op, dst, src, src2) \
502 FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
503
504/* 3-operand, word-only, src2=cl */
505#define FASTOP3WCL(op) \
506 FOP_START(op) \
507 FOPNOP() \
508 FOP3E(op##w, ax, bx, cl) \
509 FOP3E(op##l, eax, ebx, cl) \
510 ON64(FOP3E(op##q, rax, rbx, cl)) \
511 FOP_END
512
513/* Special case for SETcc - 1 instruction per cc */
514#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
515
516FOP_START(setcc)
517FOP_SETCC(seto)
518FOP_SETCC(setno)
519FOP_SETCC(setc)
520FOP_SETCC(setnc)
521FOP_SETCC(setz)
522FOP_SETCC(setnz)
523FOP_SETCC(setbe)
524FOP_SETCC(setnbe)
525FOP_SETCC(sets)
526FOP_SETCC(setns)
527FOP_SETCC(setp)
528FOP_SETCC(setnp)
529FOP_SETCC(setl)
530FOP_SETCC(setnl)
531FOP_SETCC(setle)
532FOP_SETCC(setnle)
533FOP_END;
534
410#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ 535#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \
411 do { \ 536 do { \
412 unsigned long _tmp; \ 537 unsigned long _tmp; \
@@ -663,7 +788,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
663 ulong la; 788 ulong la;
664 u32 lim; 789 u32 lim;
665 u16 sel; 790 u16 sel;
666 unsigned cpl, rpl; 791 unsigned cpl;
667 792
668 la = seg_base(ctxt, addr.seg) + addr.ea; 793 la = seg_base(ctxt, addr.seg) + addr.ea;
669 switch (ctxt->mode) { 794 switch (ctxt->mode) {
@@ -697,11 +822,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
697 goto bad; 822 goto bad;
698 } 823 }
699 cpl = ctxt->ops->cpl(ctxt); 824 cpl = ctxt->ops->cpl(ctxt);
700 if (ctxt->mode == X86EMUL_MODE_REAL)
701 rpl = 0;
702 else
703 rpl = sel & 3;
704 cpl = max(cpl, rpl);
705 if (!(desc.type & 8)) { 825 if (!(desc.type & 8)) {
706 /* data segment */ 826 /* data segment */
707 if (cpl > desc.dpl) 827 if (cpl > desc.dpl)
@@ -852,39 +972,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
852 return rc; 972 return rc;
853} 973}
854 974
855static int test_cc(unsigned int condition, unsigned int flags) 975FASTOP2(add);
856{ 976FASTOP2(or);
857 int rc = 0; 977FASTOP2(adc);
858 978FASTOP2(sbb);
859 switch ((condition & 15) >> 1) { 979FASTOP2(and);
860 case 0: /* o */ 980FASTOP2(sub);
861 rc |= (flags & EFLG_OF); 981FASTOP2(xor);
862 break; 982FASTOP2(cmp);
863 case 1: /* b/c/nae */ 983FASTOP2(test);
864 rc |= (flags & EFLG_CF); 984
865 break; 985FASTOP3WCL(shld);
866 case 2: /* z/e */ 986FASTOP3WCL(shrd);
867 rc |= (flags & EFLG_ZF); 987
868 break; 988FASTOP2W(imul);
869 case 3: /* be/na */ 989
870 rc |= (flags & (EFLG_CF|EFLG_ZF)); 990FASTOP1(not);
871 break; 991FASTOP1(neg);
872 case 4: /* s */ 992FASTOP1(inc);
873 rc |= (flags & EFLG_SF); 993FASTOP1(dec);
874 break; 994
875 case 5: /* p/pe */ 995FASTOP2CL(rol);
876 rc |= (flags & EFLG_PF); 996FASTOP2CL(ror);
877 break; 997FASTOP2CL(rcl);
878 case 7: /* le/ng */ 998FASTOP2CL(rcr);
879 rc |= (flags & EFLG_ZF); 999FASTOP2CL(shl);
880 /* fall through */ 1000FASTOP2CL(shr);
881 case 6: /* l/nge */ 1001FASTOP2CL(sar);
882 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); 1002
883 break; 1003FASTOP2W(bsf);
884 } 1004FASTOP2W(bsr);
885 1005FASTOP2W(bt);
886 /* Odd condition identifiers (lsb == 1) have inverted sense. */ 1006FASTOP2W(bts);
887 return (!!rc ^ (condition & 1)); 1007FASTOP2W(btr);
1008FASTOP2W(btc);
1009
1010static u8 test_cc(unsigned int condition, unsigned long flags)
1011{
1012 u8 rc;
1013 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1014
1015 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1016 asm("push %[flags]; popf; call *%[fastop]"
1017 : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
1018 return rc;
888} 1019}
889 1020
890static void fetch_register_operand(struct operand *op) 1021static void fetch_register_operand(struct operand *op)
@@ -994,6 +1125,53 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
994 ctxt->ops->put_fpu(ctxt); 1125 ctxt->ops->put_fpu(ctxt);
995} 1126}
996 1127
1128static int em_fninit(struct x86_emulate_ctxt *ctxt)
1129{
1130 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1131 return emulate_nm(ctxt);
1132
1133 ctxt->ops->get_fpu(ctxt);
1134 asm volatile("fninit");
1135 ctxt->ops->put_fpu(ctxt);
1136 return X86EMUL_CONTINUE;
1137}
1138
1139static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1140{
1141 u16 fcw;
1142
1143 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1144 return emulate_nm(ctxt);
1145
1146 ctxt->ops->get_fpu(ctxt);
1147 asm volatile("fnstcw %0": "+m"(fcw));
1148 ctxt->ops->put_fpu(ctxt);
1149
1150 /* force 2 byte destination */
1151 ctxt->dst.bytes = 2;
1152 ctxt->dst.val = fcw;
1153
1154 return X86EMUL_CONTINUE;
1155}
1156
1157static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1158{
1159 u16 fsw;
1160
1161 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1162 return emulate_nm(ctxt);
1163
1164 ctxt->ops->get_fpu(ctxt);
1165 asm volatile("fnstsw %0": "+m"(fsw));
1166 ctxt->ops->put_fpu(ctxt);
1167
1168 /* force 2 byte destination */
1169 ctxt->dst.bytes = 2;
1170 ctxt->dst.val = fsw;
1171
1172 return X86EMUL_CONTINUE;
1173}
1174
997static void decode_register_operand(struct x86_emulate_ctxt *ctxt, 1175static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
998 struct operand *op) 1176 struct operand *op)
999{ 1177{
@@ -1534,6 +1712,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
1534{ 1712{
1535 int rc; 1713 int rc;
1536 1714
1715 if (ctxt->d & NoWrite)
1716 return X86EMUL_CONTINUE;
1717
1537 switch (ctxt->dst.type) { 1718 switch (ctxt->dst.type) {
1538 case OP_REG: 1719 case OP_REG:
1539 write_register_operand(&ctxt->dst); 1720 write_register_operand(&ctxt->dst);
@@ -1918,47 +2099,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
1918 return X86EMUL_CONTINUE; 2099 return X86EMUL_CONTINUE;
1919} 2100}
1920 2101
1921static int em_grp2(struct x86_emulate_ctxt *ctxt)
1922{
1923 switch (ctxt->modrm_reg) {
1924 case 0: /* rol */
1925 emulate_2op_SrcB(ctxt, "rol");
1926 break;
1927 case 1: /* ror */
1928 emulate_2op_SrcB(ctxt, "ror");
1929 break;
1930 case 2: /* rcl */
1931 emulate_2op_SrcB(ctxt, "rcl");
1932 break;
1933 case 3: /* rcr */
1934 emulate_2op_SrcB(ctxt, "rcr");
1935 break;
1936 case 4: /* sal/shl */
1937 case 6: /* sal/shl */
1938 emulate_2op_SrcB(ctxt, "sal");
1939 break;
1940 case 5: /* shr */
1941 emulate_2op_SrcB(ctxt, "shr");
1942 break;
1943 case 7: /* sar */
1944 emulate_2op_SrcB(ctxt, "sar");
1945 break;
1946 }
1947 return X86EMUL_CONTINUE;
1948}
1949
1950static int em_not(struct x86_emulate_ctxt *ctxt)
1951{
1952 ctxt->dst.val = ~ctxt->dst.val;
1953 return X86EMUL_CONTINUE;
1954}
1955
1956static int em_neg(struct x86_emulate_ctxt *ctxt)
1957{
1958 emulate_1op(ctxt, "neg");
1959 return X86EMUL_CONTINUE;
1960}
1961
1962static int em_mul_ex(struct x86_emulate_ctxt *ctxt) 2102static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
1963{ 2103{
1964 u8 ex = 0; 2104 u8 ex = 0;
@@ -2000,12 +2140,6 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
2000 int rc = X86EMUL_CONTINUE; 2140 int rc = X86EMUL_CONTINUE;
2001 2141
2002 switch (ctxt->modrm_reg) { 2142 switch (ctxt->modrm_reg) {
2003 case 0: /* inc */
2004 emulate_1op(ctxt, "inc");
2005 break;
2006 case 1: /* dec */
2007 emulate_1op(ctxt, "dec");
2008 break;
2009 case 2: /* call near abs */ { 2143 case 2: /* call near abs */ {
2010 long int old_eip; 2144 long int old_eip;
2011 old_eip = ctxt->_eip; 2145 old_eip = ctxt->_eip;
@@ -2075,7 +2209,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2075 /* Save real source value, then compare EAX against destination. */ 2209 /* Save real source value, then compare EAX against destination. */
2076 ctxt->src.orig_val = ctxt->src.val; 2210 ctxt->src.orig_val = ctxt->src.val;
2077 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); 2211 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
2078 emulate_2op_SrcV(ctxt, "cmp"); 2212 fastop(ctxt, em_cmp);
2079 2213
2080 if (ctxt->eflags & EFLG_ZF) { 2214 if (ctxt->eflags & EFLG_ZF) {
2081 /* Success: write back to memory. */ 2215 /* Success: write back to memory. */
@@ -2843,7 +2977,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
2843 ctxt->src.type = OP_IMM; 2977 ctxt->src.type = OP_IMM;
2844 ctxt->src.val = 0; 2978 ctxt->src.val = 0;
2845 ctxt->src.bytes = 1; 2979 ctxt->src.bytes = 1;
2846 emulate_2op_SrcV(ctxt, "or"); 2980 fastop(ctxt, em_or);
2847 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); 2981 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
2848 if (cf) 2982 if (cf)
2849 ctxt->eflags |= X86_EFLAGS_CF; 2983 ctxt->eflags |= X86_EFLAGS_CF;
@@ -2852,6 +2986,24 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
2852 return X86EMUL_CONTINUE; 2986 return X86EMUL_CONTINUE;
2853} 2987}
2854 2988
2989static int em_aad(struct x86_emulate_ctxt *ctxt)
2990{
2991 u8 al = ctxt->dst.val & 0xff;
2992 u8 ah = (ctxt->dst.val >> 8) & 0xff;
2993
2994 al = (al + (ah * ctxt->src.val)) & 0xff;
2995
2996 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
2997
2998 /* Set PF, ZF, SF */
2999 ctxt->src.type = OP_IMM;
3000 ctxt->src.val = 0;
3001 ctxt->src.bytes = 1;
3002 fastop(ctxt, em_or);
3003
3004 return X86EMUL_CONTINUE;
3005}
3006
2855static int em_call(struct x86_emulate_ctxt *ctxt) 3007static int em_call(struct x86_emulate_ctxt *ctxt)
2856{ 3008{
2857 long rel = ctxt->src.val; 3009 long rel = ctxt->src.val;
@@ -2900,64 +3052,6 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2900 return X86EMUL_CONTINUE; 3052 return X86EMUL_CONTINUE;
2901} 3053}
2902 3054
2903static int em_add(struct x86_emulate_ctxt *ctxt)
2904{
2905 emulate_2op_SrcV(ctxt, "add");
2906 return X86EMUL_CONTINUE;
2907}
2908
2909static int em_or(struct x86_emulate_ctxt *ctxt)
2910{
2911 emulate_2op_SrcV(ctxt, "or");
2912 return X86EMUL_CONTINUE;
2913}
2914
2915static int em_adc(struct x86_emulate_ctxt *ctxt)
2916{
2917 emulate_2op_SrcV(ctxt, "adc");
2918 return X86EMUL_CONTINUE;
2919}
2920
2921static int em_sbb(struct x86_emulate_ctxt *ctxt)
2922{
2923 emulate_2op_SrcV(ctxt, "sbb");
2924 return X86EMUL_CONTINUE;
2925}
2926
2927static int em_and(struct x86_emulate_ctxt *ctxt)
2928{
2929 emulate_2op_SrcV(ctxt, "and");
2930 return X86EMUL_CONTINUE;
2931}
2932
2933static int em_sub(struct x86_emulate_ctxt *ctxt)
2934{
2935 emulate_2op_SrcV(ctxt, "sub");
2936 return X86EMUL_CONTINUE;
2937}
2938
2939static int em_xor(struct x86_emulate_ctxt *ctxt)
2940{
2941 emulate_2op_SrcV(ctxt, "xor");
2942 return X86EMUL_CONTINUE;
2943}
2944
2945static int em_cmp(struct x86_emulate_ctxt *ctxt)
2946{
2947 emulate_2op_SrcV(ctxt, "cmp");
2948 /* Disable writeback. */
2949 ctxt->dst.type = OP_NONE;
2950 return X86EMUL_CONTINUE;
2951}
2952
2953static int em_test(struct x86_emulate_ctxt *ctxt)
2954{
2955 emulate_2op_SrcV(ctxt, "test");
2956 /* Disable writeback. */
2957 ctxt->dst.type = OP_NONE;
2958 return X86EMUL_CONTINUE;
2959}
2960
2961static int em_xchg(struct x86_emulate_ctxt *ctxt) 3055static int em_xchg(struct x86_emulate_ctxt *ctxt)
2962{ 3056{
2963 /* Write back the register source. */ 3057 /* Write back the register source. */
@@ -2970,16 +3064,10 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt)
2970 return X86EMUL_CONTINUE; 3064 return X86EMUL_CONTINUE;
2971} 3065}
2972 3066
2973static int em_imul(struct x86_emulate_ctxt *ctxt)
2974{
2975 emulate_2op_SrcV_nobyte(ctxt, "imul");
2976 return X86EMUL_CONTINUE;
2977}
2978
2979static int em_imul_3op(struct x86_emulate_ctxt *ctxt) 3067static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
2980{ 3068{
2981 ctxt->dst.val = ctxt->src2.val; 3069 ctxt->dst.val = ctxt->src2.val;
2982 return em_imul(ctxt); 3070 return fastop(ctxt, em_imul);
2983} 3071}
2984 3072
2985static int em_cwd(struct x86_emulate_ctxt *ctxt) 3073static int em_cwd(struct x86_emulate_ctxt *ctxt)
@@ -3300,47 +3388,6 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
3300 return X86EMUL_CONTINUE; 3388 return X86EMUL_CONTINUE;
3301} 3389}
3302 3390
3303static int em_bt(struct x86_emulate_ctxt *ctxt)
3304{
3305 /* Disable writeback. */
3306 ctxt->dst.type = OP_NONE;
3307 /* only subword offset */
3308 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
3309
3310 emulate_2op_SrcV_nobyte(ctxt, "bt");
3311 return X86EMUL_CONTINUE;
3312}
3313
3314static int em_bts(struct x86_emulate_ctxt *ctxt)
3315{
3316 emulate_2op_SrcV_nobyte(ctxt, "bts");
3317 return X86EMUL_CONTINUE;
3318}
3319
3320static int em_btr(struct x86_emulate_ctxt *ctxt)
3321{
3322 emulate_2op_SrcV_nobyte(ctxt, "btr");
3323 return X86EMUL_CONTINUE;
3324}
3325
3326static int em_btc(struct x86_emulate_ctxt *ctxt)
3327{
3328 emulate_2op_SrcV_nobyte(ctxt, "btc");
3329 return X86EMUL_CONTINUE;
3330}
3331
3332static int em_bsf(struct x86_emulate_ctxt *ctxt)
3333{
3334 emulate_2op_SrcV_nobyte(ctxt, "bsf");
3335 return X86EMUL_CONTINUE;
3336}
3337
3338static int em_bsr(struct x86_emulate_ctxt *ctxt)
3339{
3340 emulate_2op_SrcV_nobyte(ctxt, "bsr");
3341 return X86EMUL_CONTINUE;
3342}
3343
3344static int em_cpuid(struct x86_emulate_ctxt *ctxt) 3391static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3345{ 3392{
3346 u32 eax, ebx, ecx, edx; 3393 u32 eax, ebx, ecx, edx;
@@ -3572,7 +3619,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3572#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } 3619#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3573#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } 3620#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3574#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } 3621#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3622#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3575#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3623#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3624#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
3576#define II(_f, _e, _i) \ 3625#define II(_f, _e, _i) \
3577 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } 3626 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
3578#define IIP(_f, _e, _i, _p) \ 3627#define IIP(_f, _e, _i, _p) \
@@ -3583,12 +3632,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3583#define D2bv(_f) D((_f) | ByteOp), D(_f) 3632#define D2bv(_f) D((_f) | ByteOp), D(_f)
3584#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) 3633#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
3585#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) 3634#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
3635#define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
3586#define I2bvIP(_f, _e, _i, _p) \ 3636#define I2bvIP(_f, _e, _i, _p) \
3587 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) 3637 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
3588 3638
3589#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ 3639#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
3590 I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ 3640 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
3591 I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) 3641 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3592 3642
3593static const struct opcode group7_rm1[] = { 3643static const struct opcode group7_rm1[] = {
3594 DI(SrcNone | Priv, monitor), 3644 DI(SrcNone | Priv, monitor),
@@ -3614,25 +3664,36 @@ static const struct opcode group7_rm7[] = {
3614}; 3664};
3615 3665
3616static const struct opcode group1[] = { 3666static const struct opcode group1[] = {
3617 I(Lock, em_add), 3667 F(Lock, em_add),
3618 I(Lock | PageTable, em_or), 3668 F(Lock | PageTable, em_or),
3619 I(Lock, em_adc), 3669 F(Lock, em_adc),
3620 I(Lock, em_sbb), 3670 F(Lock, em_sbb),
3621 I(Lock | PageTable, em_and), 3671 F(Lock | PageTable, em_and),
3622 I(Lock, em_sub), 3672 F(Lock, em_sub),
3623 I(Lock, em_xor), 3673 F(Lock, em_xor),
3624 I(0, em_cmp), 3674 F(NoWrite, em_cmp),
3625}; 3675};
3626 3676
3627static const struct opcode group1A[] = { 3677static const struct opcode group1A[] = {
3628 I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, 3678 I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
3629}; 3679};
3630 3680
3681static const struct opcode group2[] = {
3682 F(DstMem | ModRM, em_rol),
3683 F(DstMem | ModRM, em_ror),
3684 F(DstMem | ModRM, em_rcl),
3685 F(DstMem | ModRM, em_rcr),
3686 F(DstMem | ModRM, em_shl),
3687 F(DstMem | ModRM, em_shr),
3688 F(DstMem | ModRM, em_shl),
3689 F(DstMem | ModRM, em_sar),
3690};
3691
3631static const struct opcode group3[] = { 3692static const struct opcode group3[] = {
3632 I(DstMem | SrcImm, em_test), 3693 F(DstMem | SrcImm | NoWrite, em_test),
3633 I(DstMem | SrcImm, em_test), 3694 F(DstMem | SrcImm | NoWrite, em_test),
3634 I(DstMem | SrcNone | Lock, em_not), 3695 F(DstMem | SrcNone | Lock, em_not),
3635 I(DstMem | SrcNone | Lock, em_neg), 3696 F(DstMem | SrcNone | Lock, em_neg),
3636 I(SrcMem, em_mul_ex), 3697 I(SrcMem, em_mul_ex),
3637 I(SrcMem, em_imul_ex), 3698 I(SrcMem, em_imul_ex),
3638 I(SrcMem, em_div_ex), 3699 I(SrcMem, em_div_ex),
@@ -3640,14 +3701,14 @@ static const struct opcode group3[] = {
3640}; 3701};
3641 3702
3642static const struct opcode group4[] = { 3703static const struct opcode group4[] = {
3643 I(ByteOp | DstMem | SrcNone | Lock, em_grp45), 3704 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
3644 I(ByteOp | DstMem | SrcNone | Lock, em_grp45), 3705 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
3645 N, N, N, N, N, N, 3706 N, N, N, N, N, N,
3646}; 3707};
3647 3708
3648static const struct opcode group5[] = { 3709static const struct opcode group5[] = {
3649 I(DstMem | SrcNone | Lock, em_grp45), 3710 F(DstMem | SrcNone | Lock, em_inc),
3650 I(DstMem | SrcNone | Lock, em_grp45), 3711 F(DstMem | SrcNone | Lock, em_dec),
3651 I(SrcMem | Stack, em_grp45), 3712 I(SrcMem | Stack, em_grp45),
3652 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), 3713 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
3653 I(SrcMem | Stack, em_grp45), 3714 I(SrcMem | Stack, em_grp45),
@@ -3682,10 +3743,10 @@ static const struct group_dual group7 = { {
3682 3743
3683static const struct opcode group8[] = { 3744static const struct opcode group8[] = {
3684 N, N, N, N, 3745 N, N, N, N,
3685 I(DstMem | SrcImmByte, em_bt), 3746 F(DstMem | SrcImmByte | NoWrite, em_bt),
3686 I(DstMem | SrcImmByte | Lock | PageTable, em_bts), 3747 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
3687 I(DstMem | SrcImmByte | Lock, em_btr), 3748 F(DstMem | SrcImmByte | Lock, em_btr),
3688 I(DstMem | SrcImmByte | Lock | PageTable, em_btc), 3749 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
3689}; 3750};
3690 3751
3691static const struct group_dual group9 = { { 3752static const struct group_dual group9 = { {
@@ -3707,33 +3768,96 @@ static const struct gprefix pfx_vmovntpx = {
3707 I(0, em_mov), N, N, N, 3768 I(0, em_mov), N, N, N,
3708}; 3769};
3709 3770
3771static const struct escape escape_d9 = { {
3772 N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
3773}, {
3774 /* 0xC0 - 0xC7 */
3775 N, N, N, N, N, N, N, N,
3776 /* 0xC8 - 0xCF */
3777 N, N, N, N, N, N, N, N,
3778 /* 0xD0 - 0xC7 */
3779 N, N, N, N, N, N, N, N,
3780 /* 0xD8 - 0xDF */
3781 N, N, N, N, N, N, N, N,
3782 /* 0xE0 - 0xE7 */
3783 N, N, N, N, N, N, N, N,
3784 /* 0xE8 - 0xEF */
3785 N, N, N, N, N, N, N, N,
3786 /* 0xF0 - 0xF7 */
3787 N, N, N, N, N, N, N, N,
3788 /* 0xF8 - 0xFF */
3789 N, N, N, N, N, N, N, N,
3790} };
3791
3792static const struct escape escape_db = { {
3793 N, N, N, N, N, N, N, N,
3794}, {
3795 /* 0xC0 - 0xC7 */
3796 N, N, N, N, N, N, N, N,
3797 /* 0xC8 - 0xCF */
3798 N, N, N, N, N, N, N, N,
3799 /* 0xD0 - 0xC7 */
3800 N, N, N, N, N, N, N, N,
3801 /* 0xD8 - 0xDF */
3802 N, N, N, N, N, N, N, N,
3803 /* 0xE0 - 0xE7 */
3804 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
3805 /* 0xE8 - 0xEF */
3806 N, N, N, N, N, N, N, N,
3807 /* 0xF0 - 0xF7 */
3808 N, N, N, N, N, N, N, N,
3809 /* 0xF8 - 0xFF */
3810 N, N, N, N, N, N, N, N,
3811} };
3812
3813static const struct escape escape_dd = { {
3814 N, N, N, N, N, N, N, I(DstMem, em_fnstsw),
3815}, {
3816 /* 0xC0 - 0xC7 */
3817 N, N, N, N, N, N, N, N,
3818 /* 0xC8 - 0xCF */
3819 N, N, N, N, N, N, N, N,
3820 /* 0xD0 - 0xC7 */
3821 N, N, N, N, N, N, N, N,
3822 /* 0xD8 - 0xDF */
3823 N, N, N, N, N, N, N, N,
3824 /* 0xE0 - 0xE7 */
3825 N, N, N, N, N, N, N, N,
3826 /* 0xE8 - 0xEF */
3827 N, N, N, N, N, N, N, N,
3828 /* 0xF0 - 0xF7 */
3829 N, N, N, N, N, N, N, N,
3830 /* 0xF8 - 0xFF */
3831 N, N, N, N, N, N, N, N,
3832} };
3833
3710static const struct opcode opcode_table[256] = { 3834static const struct opcode opcode_table[256] = {
3711 /* 0x00 - 0x07 */ 3835 /* 0x00 - 0x07 */
3712 I6ALU(Lock, em_add), 3836 F6ALU(Lock, em_add),
3713 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), 3837 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
3714 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), 3838 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
3715 /* 0x08 - 0x0F */ 3839 /* 0x08 - 0x0F */
3716 I6ALU(Lock | PageTable, em_or), 3840 F6ALU(Lock | PageTable, em_or),
3717 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), 3841 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
3718 N, 3842 N,
3719 /* 0x10 - 0x17 */ 3843 /* 0x10 - 0x17 */
3720 I6ALU(Lock, em_adc), 3844 F6ALU(Lock, em_adc),
3721 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), 3845 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
3722 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), 3846 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
3723 /* 0x18 - 0x1F */ 3847 /* 0x18 - 0x1F */
3724 I6ALU(Lock, em_sbb), 3848 F6ALU(Lock, em_sbb),
3725 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), 3849 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
3726 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), 3850 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
3727 /* 0x20 - 0x27 */ 3851 /* 0x20 - 0x27 */
3728 I6ALU(Lock | PageTable, em_and), N, N, 3852 F6ALU(Lock | PageTable, em_and), N, N,
3729 /* 0x28 - 0x2F */ 3853 /* 0x28 - 0x2F */
3730 I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), 3854 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
3731 /* 0x30 - 0x37 */ 3855 /* 0x30 - 0x37 */
3732 I6ALU(Lock, em_xor), N, N, 3856 F6ALU(Lock, em_xor), N, N,
3733 /* 0x38 - 0x3F */ 3857 /* 0x38 - 0x3F */
3734 I6ALU(0, em_cmp), N, N, 3858 F6ALU(NoWrite, em_cmp), N, N,
3735 /* 0x40 - 0x4F */ 3859 /* 0x40 - 0x4F */
3736 X16(D(DstReg)), 3860 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
3737 /* 0x50 - 0x57 */ 3861 /* 0x50 - 0x57 */
3738 X8(I(SrcReg | Stack, em_push)), 3862 X8(I(SrcReg | Stack, em_push)),
3739 /* 0x58 - 0x5F */ 3863 /* 0x58 - 0x5F */
@@ -3757,7 +3881,7 @@ static const struct opcode opcode_table[256] = {
3757 G(DstMem | SrcImm, group1), 3881 G(DstMem | SrcImm, group1),
3758 G(ByteOp | DstMem | SrcImm | No64, group1), 3882 G(ByteOp | DstMem | SrcImm | No64, group1),
3759 G(DstMem | SrcImmByte, group1), 3883 G(DstMem | SrcImmByte, group1),
3760 I2bv(DstMem | SrcReg | ModRM, em_test), 3884 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
3761 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), 3885 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
3762 /* 0x88 - 0x8F */ 3886 /* 0x88 - 0x8F */
3763 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), 3887 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
@@ -3777,18 +3901,18 @@ static const struct opcode opcode_table[256] = {
3777 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 3901 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
3778 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), 3902 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
3779 I2bv(SrcSI | DstDI | Mov | String, em_mov), 3903 I2bv(SrcSI | DstDI | Mov | String, em_mov),
3780 I2bv(SrcSI | DstDI | String, em_cmp), 3904 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
3781 /* 0xA8 - 0xAF */ 3905 /* 0xA8 - 0xAF */
3782 I2bv(DstAcc | SrcImm, em_test), 3906 F2bv(DstAcc | SrcImm | NoWrite, em_test),
3783 I2bv(SrcAcc | DstDI | Mov | String, em_mov), 3907 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
3784 I2bv(SrcSI | DstAcc | Mov | String, em_mov), 3908 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
3785 I2bv(SrcAcc | DstDI | String, em_cmp), 3909 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
3786 /* 0xB0 - 0xB7 */ 3910 /* 0xB0 - 0xB7 */
3787 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), 3911 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
3788 /* 0xB8 - 0xBF */ 3912 /* 0xB8 - 0xBF */
3789 X8(I(DstReg | SrcImm | Mov, em_mov)), 3913 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
3790 /* 0xC0 - 0xC7 */ 3914 /* 0xC0 - 0xC7 */
3791 D2bv(DstMem | SrcImmByte | ModRM), 3915 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
3792 I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), 3916 I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
3793 I(ImplicitOps | Stack, em_ret), 3917 I(ImplicitOps | Stack, em_ret),
3794 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), 3918 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
@@ -3800,10 +3924,11 @@ static const struct opcode opcode_table[256] = {
3800 D(ImplicitOps), DI(SrcImmByte, intn), 3924 D(ImplicitOps), DI(SrcImmByte, intn),
3801 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), 3925 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
3802 /* 0xD0 - 0xD7 */ 3926 /* 0xD0 - 0xD7 */
3803 D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), 3927 G(Src2One | ByteOp, group2), G(Src2One, group2),
3804 N, N, N, N, 3928 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
3929 N, I(DstAcc | SrcImmByte | No64, em_aad), N, N,
3805 /* 0xD8 - 0xDF */ 3930 /* 0xD8 - 0xDF */
3806 N, N, N, N, N, N, N, N, 3931 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
3807 /* 0xE0 - 0xE7 */ 3932 /* 0xE0 - 0xE7 */
3808 X3(I(SrcImmByte, em_loop)), 3933 X3(I(SrcImmByte, em_loop)),
3809 I(SrcImmByte, em_jcxz), 3934 I(SrcImmByte, em_jcxz),
@@ -3870,28 +3995,29 @@ static const struct opcode twobyte_table[256] = {
3870 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), 3995 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
3871 /* 0xA0 - 0xA7 */ 3996 /* 0xA0 - 0xA7 */
3872 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), 3997 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
3873 II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), 3998 II(ImplicitOps, em_cpuid, cpuid),
3874 D(DstMem | SrcReg | Src2ImmByte | ModRM), 3999 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
3875 D(DstMem | SrcReg | Src2CL | ModRM), N, N, 4000 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4001 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
3876 /* 0xA8 - 0xAF */ 4002 /* 0xA8 - 0xAF */
3877 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), 4003 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
3878 DI(ImplicitOps, rsm), 4004 DI(ImplicitOps, rsm),
3879 I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), 4005 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
3880 D(DstMem | SrcReg | Src2ImmByte | ModRM), 4006 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
3881 D(DstMem | SrcReg | Src2CL | ModRM), 4007 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
3882 D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), 4008 D(ModRM), F(DstReg | SrcMem | ModRM, em_imul),
3883 /* 0xB0 - 0xB7 */ 4009 /* 0xB0 - 0xB7 */
3884 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), 4010 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
3885 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), 4011 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
3886 I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), 4012 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
3887 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), 4013 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
3888 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), 4014 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
3889 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 4015 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3890 /* 0xB8 - 0xBF */ 4016 /* 0xB8 - 0xBF */
3891 N, N, 4017 N, N,
3892 G(BitOp, group8), 4018 G(BitOp, group8),
3893 I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), 4019 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
3894 I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), 4020 F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
3895 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 4021 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3896 /* 0xC0 - 0xC7 */ 4022 /* 0xC0 - 0xC7 */
3897 D2bv(DstMem | SrcReg | ModRM | Lock), 4023 D2bv(DstMem | SrcReg | ModRM | Lock),
@@ -3950,6 +4076,9 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
3950 case 4: 4076 case 4:
3951 op->val = insn_fetch(s32, ctxt); 4077 op->val = insn_fetch(s32, ctxt);
3952 break; 4078 break;
4079 case 8:
4080 op->val = insn_fetch(s64, ctxt);
4081 break;
3953 } 4082 }
3954 if (!sign_extension) { 4083 if (!sign_extension) {
3955 switch (op->bytes) { 4084 switch (op->bytes) {
@@ -4028,6 +4157,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4028 case OpImm: 4157 case OpImm:
4029 rc = decode_imm(ctxt, op, imm_size(ctxt), true); 4158 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4030 break; 4159 break;
4160 case OpImm64:
4161 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4162 break;
4031 case OpMem8: 4163 case OpMem8:
4032 ctxt->memop.bytes = 1; 4164 ctxt->memop.bytes = 1;
4033 goto mem_common; 4165 goto mem_common;
@@ -4222,6 +4354,12 @@ done_prefixes:
4222 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; 4354 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4223 } 4355 }
4224 break; 4356 break;
4357 case Escape:
4358 if (ctxt->modrm > 0xbf)
4359 opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
4360 else
4361 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4362 break;
4225 default: 4363 default:
4226 return EMULATION_FAILED; 4364 return EMULATION_FAILED;
4227 } 4365 }
@@ -4354,6 +4492,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
4354 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); 4492 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
4355} 4493}
4356 4494
4495static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
4496{
4497 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
4498 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
4499 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
4500 : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags)
4501 : "c"(ctxt->src2.val), [fastop]"S"(fop));
4502 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
4503 return X86EMUL_CONTINUE;
4504}
4357 4505
4358int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) 4506int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4359{ 4507{
@@ -4483,6 +4631,13 @@ special_insn:
4483 } 4631 }
4484 4632
4485 if (ctxt->execute) { 4633 if (ctxt->execute) {
4634 if (ctxt->d & Fastop) {
4635 void (*fop)(struct fastop *) = (void *)ctxt->execute;
4636 rc = fastop(ctxt, fop);
4637 if (rc != X86EMUL_CONTINUE)
4638 goto done;
4639 goto writeback;
4640 }
4486 rc = ctxt->execute(ctxt); 4641 rc = ctxt->execute(ctxt);
4487 if (rc != X86EMUL_CONTINUE) 4642 if (rc != X86EMUL_CONTINUE)
4488 goto done; 4643 goto done;
@@ -4493,12 +4648,6 @@ special_insn:
4493 goto twobyte_insn; 4648 goto twobyte_insn;
4494 4649
4495 switch (ctxt->b) { 4650 switch (ctxt->b) {
4496 case 0x40 ... 0x47: /* inc r16/r32 */
4497 emulate_1op(ctxt, "inc");
4498 break;
4499 case 0x48 ... 0x4f: /* dec r16/r32 */
4500 emulate_1op(ctxt, "dec");
4501 break;
4502 case 0x63: /* movsxd */ 4651 case 0x63: /* movsxd */
4503 if (ctxt->mode != X86EMUL_MODE_PROT64) 4652 if (ctxt->mode != X86EMUL_MODE_PROT64)
4504 goto cannot_emulate; 4653 goto cannot_emulate;
@@ -4523,9 +4672,6 @@ special_insn:
4523 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break; 4672 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
4524 } 4673 }
4525 break; 4674 break;
4526 case 0xc0 ... 0xc1:
4527 rc = em_grp2(ctxt);
4528 break;
4529 case 0xcc: /* int3 */ 4675 case 0xcc: /* int3 */
4530 rc = emulate_int(ctxt, 3); 4676 rc = emulate_int(ctxt, 3);
4531 break; 4677 break;
@@ -4536,13 +4682,6 @@ special_insn:
4536 if (ctxt->eflags & EFLG_OF) 4682 if (ctxt->eflags & EFLG_OF)
4537 rc = emulate_int(ctxt, 4); 4683 rc = emulate_int(ctxt, 4);
4538 break; 4684 break;
4539 case 0xd0 ... 0xd1: /* Grp2 */
4540 rc = em_grp2(ctxt);
4541 break;
4542 case 0xd2 ... 0xd3: /* Grp2 */
4543 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX);
4544 rc = em_grp2(ctxt);
4545 break;
4546 case 0xe9: /* jmp rel */ 4685 case 0xe9: /* jmp rel */
4547 case 0xeb: /* jmp rel short */ 4686 case 0xeb: /* jmp rel short */
4548 jmp_rel(ctxt, ctxt->src.val); 4687 jmp_rel(ctxt, ctxt->src.val);
@@ -4661,14 +4800,6 @@ twobyte_insn:
4661 case 0x90 ... 0x9f: /* setcc r/m8 */ 4800 case 0x90 ... 0x9f: /* setcc r/m8 */
4662 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); 4801 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
4663 break; 4802 break;
4664 case 0xa4: /* shld imm8, r, r/m */
4665 case 0xa5: /* shld cl, r, r/m */
4666 emulate_2op_cl(ctxt, "shld");
4667 break;
4668 case 0xac: /* shrd imm8, r, r/m */
4669 case 0xad: /* shrd cl, r, r/m */
4670 emulate_2op_cl(ctxt, "shrd");
4671 break;
4672 case 0xae: /* clflush */ 4803 case 0xae: /* clflush */
4673 break; 4804 break;
4674 case 0xb6 ... 0xb7: /* movzx */ 4805 case 0xb6 ... 0xb7: /* movzx */
@@ -4682,7 +4813,7 @@ twobyte_insn:
4682 (s16) ctxt->src.val; 4813 (s16) ctxt->src.val;
4683 break; 4814 break;
4684 case 0xc0 ... 0xc1: /* xadd */ 4815 case 0xc0 ... 0xc1: /* xadd */
4685 emulate_2op_SrcV(ctxt, "add"); 4816 fastop(ctxt, em_add);
4686 /* Write back the register source. */ 4817 /* Write back the register source. */
4687 ctxt->src.val = ctxt->dst.orig_val; 4818 ctxt->src.val = ctxt->dst.orig_val;
4688 write_register_operand(&ctxt->src); 4819 write_register_operand(&ctxt->src);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 11300d2fa714..c1d30b2fc9bb 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -122,7 +122,6 @@ static s64 __kpit_elapsed(struct kvm *kvm)
122 */ 122 */
123 remaining = hrtimer_get_remaining(&ps->timer); 123 remaining = hrtimer_get_remaining(&ps->timer);
124 elapsed = ps->period - ktime_to_ns(remaining); 124 elapsed = ps->period - ktime_to_ns(remaining);
125 elapsed = mod_64(elapsed, ps->period);
126 125
127 return elapsed; 126 return elapsed;
128} 127}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 848206df0967..cc31f7c06d3d 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -241,6 +241,8 @@ int kvm_pic_read_irq(struct kvm *kvm)
241 int irq, irq2, intno; 241 int irq, irq2, intno;
242 struct kvm_pic *s = pic_irqchip(kvm); 242 struct kvm_pic *s = pic_irqchip(kvm);
243 243
244 s->output = 0;
245
244 pic_lock(s); 246 pic_lock(s);
245 irq = pic_get_irq(&s->pics[0]); 247 irq = pic_get_irq(&s->pics[0]);
246 if (irq >= 0) { 248 if (irq >= 0) {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 7e06ba1618bd..484bc874688b 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,49 +38,81 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
38EXPORT_SYMBOL(kvm_cpu_has_pending_timer); 38EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
39 39
40/* 40/*
41 * check if there is pending interrupt from
42 * non-APIC source without intack.
43 */
44static int kvm_cpu_has_extint(struct kvm_vcpu *v)
45{
46 if (kvm_apic_accept_pic_intr(v))
47 return pic_irqchip(v->kvm)->output; /* PIC */
48 else
49 return 0;
50}
51
52/*
53 * check if there is injectable interrupt:
54 * when virtual interrupt delivery enabled,
55 * interrupt from apic will handled by hardware,
56 * we don't need to check it here.
57 */
58int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
59{
60 if (!irqchip_in_kernel(v->kvm))
61 return v->arch.interrupt.pending;
62
63 if (kvm_cpu_has_extint(v))
64 return 1;
65
66 if (kvm_apic_vid_enabled(v->kvm))
67 return 0;
68
69 return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
70}
71
72/*
41 * check if there is pending interrupt without 73 * check if there is pending interrupt without
42 * intack. 74 * intack.
43 */ 75 */
44int kvm_cpu_has_interrupt(struct kvm_vcpu *v) 76int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
45{ 77{
46 struct kvm_pic *s;
47
48 if (!irqchip_in_kernel(v->kvm)) 78 if (!irqchip_in_kernel(v->kvm))
49 return v->arch.interrupt.pending; 79 return v->arch.interrupt.pending;
50 80
51 if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ 81 if (kvm_cpu_has_extint(v))
52 if (kvm_apic_accept_pic_intr(v)) { 82 return 1;
53 s = pic_irqchip(v->kvm); /* PIC */ 83
54 return s->output; 84 return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
55 } else
56 return 0;
57 }
58 return 1;
59} 85}
60EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); 86EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
61 87
62/* 88/*
89 * Read pending interrupt(from non-APIC source)
90 * vector and intack.
91 */
92static int kvm_cpu_get_extint(struct kvm_vcpu *v)
93{
94 if (kvm_cpu_has_extint(v))
95 return kvm_pic_read_irq(v->kvm); /* PIC */
96 return -1;
97}
98
99/*
63 * Read pending interrupt vector and intack. 100 * Read pending interrupt vector and intack.
64 */ 101 */
65int kvm_cpu_get_interrupt(struct kvm_vcpu *v) 102int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
66{ 103{
67 struct kvm_pic *s;
68 int vector; 104 int vector;
69 105
70 if (!irqchip_in_kernel(v->kvm)) 106 if (!irqchip_in_kernel(v->kvm))
71 return v->arch.interrupt.nr; 107 return v->arch.interrupt.nr;
72 108
73 vector = kvm_get_apic_interrupt(v); /* APIC */ 109 vector = kvm_cpu_get_extint(v);
74 if (vector == -1) { 110
75 if (kvm_apic_accept_pic_intr(v)) { 111 if (kvm_apic_vid_enabled(v->kvm) || vector != -1)
76 s = pic_irqchip(v->kvm); 112 return vector; /* PIC */
77 s->output = 0; /* PIC */ 113
78 vector = kvm_pic_read_irq(v->kvm); 114 return kvm_get_apic_interrupt(v); /* APIC */
79 }
80 }
81 return vector;
82} 115}
83EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
84 116
85void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) 117void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
86{ 118{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9392f527f107..02b51dd4e4ad 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -140,31 +140,56 @@ static inline int apic_enabled(struct kvm_lapic *apic)
140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
142 142
143static inline int apic_x2apic_mode(struct kvm_lapic *apic)
144{
145 return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
146}
147
148static inline int kvm_apic_id(struct kvm_lapic *apic) 143static inline int kvm_apic_id(struct kvm_lapic *apic)
149{ 144{
150 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 145 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
151} 146}
152 147
153static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) 148void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
149 struct kvm_lapic_irq *irq,
150 u64 *eoi_exit_bitmap)
154{ 151{
155 u16 cid; 152 struct kvm_lapic **dst;
156 ldr >>= 32 - map->ldr_bits; 153 struct kvm_apic_map *map;
157 cid = (ldr >> map->cid_shift) & map->cid_mask; 154 unsigned long bitmap = 1;
155 int i;
158 156
159 BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); 157 rcu_read_lock();
158 map = rcu_dereference(vcpu->kvm->arch.apic_map);
160 159
161 return cid; 160 if (unlikely(!map)) {
162} 161 __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
162 goto out;
163 }
163 164
164static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) 165 if (irq->dest_mode == 0) { /* physical mode */
165{ 166 if (irq->delivery_mode == APIC_DM_LOWEST ||
166 ldr >>= (32 - map->ldr_bits); 167 irq->dest_id == 0xff) {
167 return ldr & map->lid_mask; 168 __set_bit(irq->vector,
169 (unsigned long *)eoi_exit_bitmap);
170 goto out;
171 }
172 dst = &map->phys_map[irq->dest_id & 0xff];
173 } else {
174 u32 mda = irq->dest_id << (32 - map->ldr_bits);
175
176 dst = map->logical_map[apic_cluster_id(map, mda)];
177
178 bitmap = apic_logical_id(map, mda);
179 }
180
181 for_each_set_bit(i, &bitmap, 16) {
182 if (!dst[i])
183 continue;
184 if (dst[i]->vcpu == vcpu) {
185 __set_bit(irq->vector,
186 (unsigned long *)eoi_exit_bitmap);
187 break;
188 }
189 }
190
191out:
192 rcu_read_unlock();
168} 193}
169 194
170static void recalculate_apic_map(struct kvm *kvm) 195static void recalculate_apic_map(struct kvm *kvm)
@@ -230,6 +255,8 @@ out:
230 255
231 if (old) 256 if (old)
232 kfree_rcu(old, rcu); 257 kfree_rcu(old, rcu);
258
259 kvm_ioapic_make_eoibitmap_request(kvm);
233} 260}
234 261
235static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 262static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
@@ -345,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
345{ 372{
346 int result; 373 int result;
347 374
375 /*
376 * Note that irr_pending is just a hint. It will be always
377 * true with virtual interrupt delivery enabled.
378 */
348 if (!apic->irr_pending) 379 if (!apic->irr_pending)
349 return -1; 380 return -1;
350 381
@@ -461,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
461static inline int apic_find_highest_isr(struct kvm_lapic *apic) 492static inline int apic_find_highest_isr(struct kvm_lapic *apic)
462{ 493{
463 int result; 494 int result;
495
496 /* Note that isr_count is always 1 with vid enabled */
464 if (!apic->isr_count) 497 if (!apic->isr_count)
465 return -1; 498 return -1;
466 if (likely(apic->highest_isr_cache != -1)) 499 if (likely(apic->highest_isr_cache != -1))
@@ -740,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
740 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 773 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
741} 774}
742 775
776static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
777{
778 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
779 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
780 int trigger_mode;
781 if (apic_test_vector(vector, apic->regs + APIC_TMR))
782 trigger_mode = IOAPIC_LEVEL_TRIG;
783 else
784 trigger_mode = IOAPIC_EDGE_TRIG;
785 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
786 }
787}
788
743static int apic_set_eoi(struct kvm_lapic *apic) 789static int apic_set_eoi(struct kvm_lapic *apic)
744{ 790{
745 int vector = apic_find_highest_isr(apic); 791 int vector = apic_find_highest_isr(apic);
@@ -756,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic)
756 apic_clear_isr(vector, apic); 802 apic_clear_isr(vector, apic);
757 apic_update_ppr(apic); 803 apic_update_ppr(apic);
758 804
759 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 805 kvm_ioapic_send_eoi(apic, vector);
760 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
761 int trigger_mode;
762 if (apic_test_vector(vector, apic->regs + APIC_TMR))
763 trigger_mode = IOAPIC_LEVEL_TRIG;
764 else
765 trigger_mode = IOAPIC_EDGE_TRIG;
766 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
767 }
768 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 806 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
769 return vector; 807 return vector;
770} 808}
771 809
810/*
811 * this interface assumes a trap-like exit, which has already finished
812 * desired side effect including vISR and vPPR update.
813 */
814void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
815{
816 struct kvm_lapic *apic = vcpu->arch.apic;
817
818 trace_kvm_eoi(apic, vector);
819
820 kvm_ioapic_send_eoi(apic, vector);
821 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
822}
823EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
824
772static void apic_send_ipi(struct kvm_lapic *apic) 825static void apic_send_ipi(struct kvm_lapic *apic)
773{ 826{
774 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); 827 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
@@ -1212,6 +1265,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
1212} 1265}
1213EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1266EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
1214 1267
1268/* emulate APIC access in a trap manner */
1269void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
1270{
1271 u32 val = 0;
1272
1273 /* hw has done the conditional check and inst decode */
1274 offset &= 0xff0;
1275
1276 apic_reg_read(vcpu->arch.apic, offset, 4, &val);
1277
1278 /* TODO: optimize to just emulate side effect w/o one more write */
1279 apic_reg_write(vcpu->arch.apic, offset, val);
1280}
1281EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
1282
1215void kvm_free_lapic(struct kvm_vcpu *vcpu) 1283void kvm_free_lapic(struct kvm_vcpu *vcpu)
1216{ 1284{
1217 struct kvm_lapic *apic = vcpu->arch.apic; 1285 struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1288,6 +1356,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
1288 1356
1289void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 1357void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
1290{ 1358{
1359 u64 old_value = vcpu->arch.apic_base;
1291 struct kvm_lapic *apic = vcpu->arch.apic; 1360 struct kvm_lapic *apic = vcpu->arch.apic;
1292 1361
1293 if (!apic) { 1362 if (!apic) {
@@ -1309,11 +1378,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
1309 value &= ~MSR_IA32_APICBASE_BSP; 1378 value &= ~MSR_IA32_APICBASE_BSP;
1310 1379
1311 vcpu->arch.apic_base = value; 1380 vcpu->arch.apic_base = value;
1312 if (apic_x2apic_mode(apic)) { 1381 if ((old_value ^ value) & X2APIC_ENABLE) {
1313 u32 id = kvm_apic_id(apic); 1382 if (value & X2APIC_ENABLE) {
1314 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 1383 u32 id = kvm_apic_id(apic);
1315 kvm_apic_set_ldr(apic, ldr); 1384 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
1385 kvm_apic_set_ldr(apic, ldr);
1386 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
1387 } else
1388 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
1316 } 1389 }
1390
1317 apic->base_address = apic->vcpu->arch.apic_base & 1391 apic->base_address = apic->vcpu->arch.apic_base &
1318 MSR_IA32_APICBASE_BASE; 1392 MSR_IA32_APICBASE_BASE;
1319 1393
@@ -1359,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
1359 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1433 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
1360 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1434 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
1361 } 1435 }
1362 apic->irr_pending = false; 1436 apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
1363 apic->isr_count = 0; 1437 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm);
1364 apic->highest_isr_cache = -1; 1438 apic->highest_isr_cache = -1;
1365 update_divide_count(apic); 1439 update_divide_count(apic);
1366 atomic_set(&apic->lapic_timer.pending, 0); 1440 atomic_set(&apic->lapic_timer.pending, 0);
@@ -1575,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
1575 update_divide_count(apic); 1649 update_divide_count(apic);
1576 start_apic_timer(apic); 1650 start_apic_timer(apic);
1577 apic->irr_pending = true; 1651 apic->irr_pending = true;
1578 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 1652 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
1653 1 : count_vectors(apic->regs + APIC_ISR);
1579 apic->highest_isr_cache = -1; 1654 apic->highest_isr_cache = -1;
1655 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
1580 kvm_make_request(KVM_REQ_EVENT, vcpu); 1656 kvm_make_request(KVM_REQ_EVENT, vcpu);
1581} 1657}
1582 1658
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index e5ebf9f3571f..1676d34ddb4e 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -64,6 +64,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
64u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); 64u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
65void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); 65void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
66 66
67void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
68void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
69
67void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); 70void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
68void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); 71void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
69void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); 72void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
@@ -124,4 +127,35 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
124 return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); 127 return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
125} 128}
126 129
130static inline int apic_x2apic_mode(struct kvm_lapic *apic)
131{
132 return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
133}
134
135static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
136{
137 return kvm_x86_ops->vm_has_apicv(kvm);
138}
139
140static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
141{
142 u16 cid;
143 ldr >>= 32 - map->ldr_bits;
144 cid = (ldr >> map->cid_shift) & map->cid_mask;
145
146 BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
147
148 return cid;
149}
150
151static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
152{
153 ldr >>= (32 - map->ldr_bits);
154 return ldr & map->lid_mask;
155}
156
157void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
158 struct kvm_lapic_irq *irq,
159 u64 *eoi_bitmap);
160
127#endif 161#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 01d7c2ad05f5..4ed3edbe06bd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte)
448 448
449static bool spte_is_locklessly_modifiable(u64 spte) 449static bool spte_is_locklessly_modifiable(u64 spte)
450{ 450{
451 return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)); 451 return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
452 (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
452} 453}
453 454
454static bool spte_has_volatile_bits(u64 spte) 455static bool spte_has_volatile_bits(u64 spte)
@@ -831,8 +832,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
831 if (host_level == PT_PAGE_TABLE_LEVEL) 832 if (host_level == PT_PAGE_TABLE_LEVEL)
832 return host_level; 833 return host_level;
833 834
834 max_level = kvm_x86_ops->get_lpage_level() < host_level ? 835 max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
835 kvm_x86_ops->get_lpage_level() : host_level;
836 836
837 for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) 837 for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
838 if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) 838 if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
@@ -1142,7 +1142,7 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
1142} 1142}
1143 1143
1144static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, 1144static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
1145 int level, bool pt_protect) 1145 bool pt_protect)
1146{ 1146{
1147 u64 *sptep; 1147 u64 *sptep;
1148 struct rmap_iterator iter; 1148 struct rmap_iterator iter;
@@ -1180,7 +1180,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
1180 while (mask) { 1180 while (mask) {
1181 rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), 1181 rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
1182 PT_PAGE_TABLE_LEVEL, slot); 1182 PT_PAGE_TABLE_LEVEL, slot);
1183 __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); 1183 __rmap_write_protect(kvm, rmapp, false);
1184 1184
1185 /* clear the first set bit */ 1185 /* clear the first set bit */
1186 mask &= mask - 1; 1186 mask &= mask - 1;
@@ -1199,7 +1199,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
1199 for (i = PT_PAGE_TABLE_LEVEL; 1199 for (i = PT_PAGE_TABLE_LEVEL;
1200 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 1200 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
1201 rmapp = __gfn_to_rmap(gfn, i, slot); 1201 rmapp = __gfn_to_rmap(gfn, i, slot);
1202 write_protected |= __rmap_write_protect(kvm, rmapp, i, true); 1202 write_protected |= __rmap_write_protect(kvm, rmapp, true);
1203 } 1203 }
1204 1204
1205 return write_protected; 1205 return write_protected;
@@ -1460,28 +1460,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
1460 percpu_counter_add(&kvm_total_used_mmu_pages, nr); 1460 percpu_counter_add(&kvm_total_used_mmu_pages, nr);
1461} 1461}
1462 1462
1463/* 1463static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
1464 * Remove the sp from shadow page cache, after call it,
1465 * we can not find this sp from the cache, and the shadow
1466 * page table is still valid.
1467 * It should be under the protection of mmu lock.
1468 */
1469static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp)
1470{ 1464{
1471 ASSERT(is_empty_shadow_page(sp->spt)); 1465 ASSERT(is_empty_shadow_page(sp->spt));
1472 hlist_del(&sp->hash_link); 1466 hlist_del(&sp->hash_link);
1473 if (!sp->role.direct)
1474 free_page((unsigned long)sp->gfns);
1475}
1476
1477/*
1478 * Free the shadow page table and the sp, we can do it
1479 * out of the protection of mmu lock.
1480 */
1481static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
1482{
1483 list_del(&sp->link); 1467 list_del(&sp->link);
1484 free_page((unsigned long)sp->spt); 1468 free_page((unsigned long)sp->spt);
1469 if (!sp->role.direct)
1470 free_page((unsigned long)sp->gfns);
1485 kmem_cache_free(mmu_page_header_cache, sp); 1471 kmem_cache_free(mmu_page_header_cache, sp);
1486} 1472}
1487 1473
@@ -1522,7 +1508,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
1522 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); 1508 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
1523 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 1509 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
1524 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 1510 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
1525 bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM);
1526 sp->parent_ptes = 0; 1511 sp->parent_ptes = 0;
1527 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1512 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
1528 kvm_mod_used_mmu_pages(vcpu->kvm, +1); 1513 kvm_mod_used_mmu_pages(vcpu->kvm, +1);
@@ -1973,9 +1958,9 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
1973{ 1958{
1974 u64 spte; 1959 u64 spte;
1975 1960
1976 spte = __pa(sp->spt) 1961 spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
1977 | PT_PRESENT_MASK | PT_ACCESSED_MASK 1962 shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
1978 | PT_WRITABLE_MASK | PT_USER_MASK; 1963
1979 mmu_spte_set(sptep, spte); 1964 mmu_spte_set(sptep, spte);
1980} 1965}
1981 1966
@@ -2126,7 +2111,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
2126 do { 2111 do {
2127 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); 2112 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
2128 WARN_ON(!sp->role.invalid || sp->root_count); 2113 WARN_ON(!sp->role.invalid || sp->root_count);
2129 kvm_mmu_isolate_page(sp);
2130 kvm_mmu_free_page(sp); 2114 kvm_mmu_free_page(sp);
2131 } while (!list_empty(invalid_list)); 2115 } while (!list_empty(invalid_list));
2132} 2116}
@@ -2144,6 +2128,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
2144 * change the value 2128 * change the value
2145 */ 2129 */
2146 2130
2131 spin_lock(&kvm->mmu_lock);
2132
2147 if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { 2133 if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
2148 while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && 2134 while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages &&
2149 !list_empty(&kvm->arch.active_mmu_pages)) { 2135 !list_empty(&kvm->arch.active_mmu_pages)) {
@@ -2158,6 +2144,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
2158 } 2144 }
2159 2145
2160 kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; 2146 kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
2147
2148 spin_unlock(&kvm->mmu_lock);
2161} 2149}
2162 2150
2163int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) 2151int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
@@ -2183,14 +2171,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
2183} 2171}
2184EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); 2172EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
2185 2173
2186static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
2187{
2188 int slot = memslot_id(kvm, gfn);
2189 struct kvm_mmu_page *sp = page_header(__pa(pte));
2190
2191 __set_bit(slot, sp->slot_bitmap);
2192}
2193
2194/* 2174/*
2195 * The function is based on mtrr_type_lookup() in 2175 * The function is based on mtrr_type_lookup() in
2196 * arch/x86/kernel/cpu/mtrr/generic.c 2176 * arch/x86/kernel/cpu/mtrr/generic.c
@@ -2332,9 +2312,8 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
2332 if (s->role.level != PT_PAGE_TABLE_LEVEL) 2312 if (s->role.level != PT_PAGE_TABLE_LEVEL)
2333 return 1; 2313 return 1;
2334 2314
2335 if (!need_unsync && !s->unsync) { 2315 if (!s->unsync)
2336 need_unsync = true; 2316 need_unsync = true;
2337 }
2338 } 2317 }
2339 if (need_unsync) 2318 if (need_unsync)
2340 kvm_unsync_pages(vcpu, gfn); 2319 kvm_unsync_pages(vcpu, gfn);
@@ -2342,8 +2321,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
2342} 2321}
2343 2322
2344static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, 2323static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2345 unsigned pte_access, int user_fault, 2324 unsigned pte_access, int level,
2346 int write_fault, int level,
2347 gfn_t gfn, pfn_t pfn, bool speculative, 2325 gfn_t gfn, pfn_t pfn, bool speculative,
2348 bool can_unsync, bool host_writable) 2326 bool can_unsync, bool host_writable)
2349{ 2327{
@@ -2378,20 +2356,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2378 2356
2379 spte |= (u64)pfn << PAGE_SHIFT; 2357 spte |= (u64)pfn << PAGE_SHIFT;
2380 2358
2381 if ((pte_access & ACC_WRITE_MASK) 2359 if (pte_access & ACC_WRITE_MASK) {
2382 || (!vcpu->arch.mmu.direct_map && write_fault
2383 && !is_write_protection(vcpu) && !user_fault)) {
2384 2360
2385 /* 2361 /*
2386 * There are two cases: 2362 * Other vcpu creates new sp in the window between
2387 * - the one is other vcpu creates new sp in the window 2363 * mapping_level() and acquiring mmu-lock. We can
2388 * between mapping_level() and acquiring mmu-lock. 2364 * allow guest to retry the access, the mapping can
2389 * - the another case is the new sp is created by itself 2365 * be fixed if guest refault.
2390 * (page-fault path) when guest uses the target gfn as
2391 * its page table.
2392 * Both of these cases can be fixed by allowing guest to
2393 * retry the access, it will refault, then we can establish
2394 * the mapping by using small page.
2395 */ 2366 */
2396 if (level > PT_PAGE_TABLE_LEVEL && 2367 if (level > PT_PAGE_TABLE_LEVEL &&
2397 has_wrprotected_page(vcpu->kvm, gfn, level)) 2368 has_wrprotected_page(vcpu->kvm, gfn, level))
@@ -2399,19 +2370,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2399 2370
2400 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; 2371 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
2401 2372
2402 if (!vcpu->arch.mmu.direct_map
2403 && !(pte_access & ACC_WRITE_MASK)) {
2404 spte &= ~PT_USER_MASK;
2405 /*
2406 * If we converted a user page to a kernel page,
2407 * so that the kernel can write to it when cr0.wp=0,
2408 * then we should prevent the kernel from executing it
2409 * if SMEP is enabled.
2410 */
2411 if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
2412 spte |= PT64_NX_MASK;
2413 }
2414
2415 /* 2373 /*
2416 * Optimization: for pte sync, if spte was writable the hash 2374 * Optimization: for pte sync, if spte was writable the hash
2417 * lookup is unnecessary (and expensive). Write protection 2375 * lookup is unnecessary (and expensive). Write protection
@@ -2441,19 +2399,15 @@ done:
2441} 2399}
2442 2400
2443static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, 2401static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2444 unsigned pt_access, unsigned pte_access, 2402 unsigned pte_access, int write_fault, int *emulate,
2445 int user_fault, int write_fault, 2403 int level, gfn_t gfn, pfn_t pfn, bool speculative,
2446 int *emulate, int level, gfn_t gfn,
2447 pfn_t pfn, bool speculative,
2448 bool host_writable) 2404 bool host_writable)
2449{ 2405{
2450 int was_rmapped = 0; 2406 int was_rmapped = 0;
2451 int rmap_count; 2407 int rmap_count;
2452 2408
2453 pgprintk("%s: spte %llx access %x write_fault %d" 2409 pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
2454 " user_fault %d gfn %llx\n", 2410 *sptep, write_fault, gfn);
2455 __func__, *sptep, pt_access,
2456 write_fault, user_fault, gfn);
2457 2411
2458 if (is_rmap_spte(*sptep)) { 2412 if (is_rmap_spte(*sptep)) {
2459 /* 2413 /*
@@ -2477,9 +2431,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2477 was_rmapped = 1; 2431 was_rmapped = 1;
2478 } 2432 }
2479 2433
2480 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, 2434 if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
2481 level, gfn, pfn, speculative, true, 2435 true, host_writable)) {
2482 host_writable)) {
2483 if (write_fault) 2436 if (write_fault)
2484 *emulate = 1; 2437 *emulate = 1;
2485 kvm_mmu_flush_tlb(vcpu); 2438 kvm_mmu_flush_tlb(vcpu);
@@ -2497,7 +2450,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2497 ++vcpu->kvm->stat.lpages; 2450 ++vcpu->kvm->stat.lpages;
2498 2451
2499 if (is_shadow_present_pte(*sptep)) { 2452 if (is_shadow_present_pte(*sptep)) {
2500 page_header_update_slot(vcpu->kvm, sptep, gfn);
2501 if (!was_rmapped) { 2453 if (!was_rmapped) {
2502 rmap_count = rmap_add(vcpu, sptep, gfn); 2454 rmap_count = rmap_add(vcpu, sptep, gfn);
2503 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 2455 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
@@ -2571,10 +2523,9 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
2571 return -1; 2523 return -1;
2572 2524
2573 for (i = 0; i < ret; i++, gfn++, start++) 2525 for (i = 0; i < ret; i++, gfn++, start++)
2574 mmu_set_spte(vcpu, start, ACC_ALL, 2526 mmu_set_spte(vcpu, start, access, 0, NULL,
2575 access, 0, 0, NULL, 2527 sp->role.level, gfn, page_to_pfn(pages[i]),
2576 sp->role.level, gfn, 2528 true, true);
2577 page_to_pfn(pages[i]), true, true);
2578 2529
2579 return 0; 2530 return 0;
2580} 2531}
@@ -2633,11 +2584,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2633 2584
2634 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2585 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
2635 if (iterator.level == level) { 2586 if (iterator.level == level) {
2636 unsigned pte_access = ACC_ALL; 2587 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
2637 2588 write, &emulate, level, gfn, pfn,
2638 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, 2589 prefault, map_writable);
2639 0, write, &emulate,
2640 level, gfn, pfn, prefault, map_writable);
2641 direct_pte_prefetch(vcpu, iterator.sptep); 2590 direct_pte_prefetch(vcpu, iterator.sptep);
2642 ++vcpu->stat.pf_fixed; 2591 ++vcpu->stat.pf_fixed;
2643 break; 2592 break;
@@ -2652,11 +2601,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2652 iterator.level - 1, 2601 iterator.level - 1,
2653 1, ACC_ALL, iterator.sptep); 2602 1, ACC_ALL, iterator.sptep);
2654 2603
2655 mmu_spte_set(iterator.sptep, 2604 link_shadow_page(iterator.sptep, sp);
2656 __pa(sp->spt)
2657 | PT_PRESENT_MASK | PT_WRITABLE_MASK
2658 | shadow_user_mask | shadow_x_mask
2659 | shadow_accessed_mask);
2660 } 2605 }
2661 } 2606 }
2662 return emulate; 2607 return emulate;
@@ -3719,6 +3664,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
3719 else 3664 else
3720 r = paging32_init_context(vcpu, context); 3665 r = paging32_init_context(vcpu, context);
3721 3666
3667 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
3722 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); 3668 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
3723 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); 3669 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
3724 vcpu->arch.mmu.base_role.smep_andnot_wp 3670 vcpu->arch.mmu.base_role.smep_andnot_wp
@@ -3885,7 +3831,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
3885 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ 3831 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
3886 *gpa &= ~(gpa_t)7; 3832 *gpa &= ~(gpa_t)7;
3887 *bytes = 8; 3833 *bytes = 8;
3888 r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); 3834 r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8);
3889 if (r) 3835 if (r)
3890 gentry = 0; 3836 gentry = 0;
3891 new = (const u8 *)&gentry; 3837 new = (const u8 *)&gentry;
@@ -4039,7 +3985,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4039 !((sp->role.word ^ vcpu->arch.mmu.base_role.word) 3985 !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
4040 & mask.word) && rmap_can_add(vcpu)) 3986 & mask.word) && rmap_can_add(vcpu))
4041 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); 3987 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
4042 if (!remote_flush && need_remote_flush(entry, *spte)) 3988 if (need_remote_flush(entry, *spte))
4043 remote_flush = true; 3989 remote_flush = true;
4044 ++spte; 3990 ++spte;
4045 } 3991 }
@@ -4198,26 +4144,36 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
4198 4144
4199void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) 4145void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
4200{ 4146{
4201 struct kvm_mmu_page *sp; 4147 struct kvm_memory_slot *memslot;
4202 bool flush = false; 4148 gfn_t last_gfn;
4149 int i;
4203 4150
4204 list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { 4151 memslot = id_to_memslot(kvm->memslots, slot);
4205 int i; 4152 last_gfn = memslot->base_gfn + memslot->npages - 1;
4206 u64 *pt;
4207 4153
4208 if (!test_bit(slot, sp->slot_bitmap)) 4154 spin_lock(&kvm->mmu_lock);
4209 continue;
4210 4155
4211 pt = sp->spt; 4156 for (i = PT_PAGE_TABLE_LEVEL;
4212 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 4157 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
4213 if (!is_shadow_present_pte(pt[i]) || 4158 unsigned long *rmapp;
4214 !is_last_spte(pt[i], sp->role.level)) 4159 unsigned long last_index, index;
4215 continue;
4216 4160
4217 spte_write_protect(kvm, &pt[i], &flush, false); 4161 rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
4162 last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
4163
4164 for (index = 0; index <= last_index; ++index, ++rmapp) {
4165 if (*rmapp)
4166 __rmap_write_protect(kvm, rmapp, false);
4167
4168 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
4169 kvm_flush_remote_tlbs(kvm);
4170 cond_resched_lock(&kvm->mmu_lock);
4171 }
4218 } 4172 }
4219 } 4173 }
4174
4220 kvm_flush_remote_tlbs(kvm); 4175 kvm_flush_remote_tlbs(kvm);
4176 spin_unlock(&kvm->mmu_lock);
4221} 4177}
4222 4178
4223void kvm_mmu_zap_all(struct kvm *kvm) 4179void kvm_mmu_zap_all(struct kvm *kvm)
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index cd6e98333ba3..b8f6172f4174 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -195,12 +195,6 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
195 TP_ARGS(sp) 195 TP_ARGS(sp)
196); 196);
197 197
198DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_delay_free_pages,
199 TP_PROTO(struct kvm_mmu_page *sp),
200
201 TP_ARGS(sp)
202);
203
204TRACE_EVENT( 198TRACE_EVENT(
205 mark_mmio_spte, 199 mark_mmio_spte,
206 TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), 200 TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access),
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 891eb6d93b8b..105dd5bd550e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
151 pt_element_t pte; 151 pt_element_t pte;
152 pt_element_t __user *uninitialized_var(ptep_user); 152 pt_element_t __user *uninitialized_var(ptep_user);
153 gfn_t table_gfn; 153 gfn_t table_gfn;
154 unsigned index, pt_access, pte_access, accessed_dirty, shift; 154 unsigned index, pt_access, pte_access, accessed_dirty;
155 gpa_t pte_gpa; 155 gpa_t pte_gpa;
156 int offset; 156 int offset;
157 const int write_fault = access & PFERR_WRITE_MASK; 157 const int write_fault = access & PFERR_WRITE_MASK;
@@ -249,16 +249,12 @@ retry_walk:
249 249
250 if (!write_fault) 250 if (!write_fault)
251 protect_clean_gpte(&pte_access, pte); 251 protect_clean_gpte(&pte_access, pte);
252 252 else
253 /* 253 /*
254 * On a write fault, fold the dirty bit into accessed_dirty by shifting it one 254 * On a write fault, fold the dirty bit into accessed_dirty by
255 * place right. 255 * shifting it one place right.
256 * 256 */
257 * On a read fault, do nothing. 257 accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT);
258 */
259 shift = write_fault >> ilog2(PFERR_WRITE_MASK);
260 shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
261 accessed_dirty &= pte >> shift;
262 258
263 if (unlikely(!accessed_dirty)) { 259 if (unlikely(!accessed_dirty)) {
264 ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); 260 ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
@@ -330,8 +326,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
330 * we call mmu_set_spte() with host_writable = true because 326 * we call mmu_set_spte() with host_writable = true because
331 * pte_prefetch_gfn_to_pfn always gets a writable pfn. 327 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
332 */ 328 */
333 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, 329 mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL,
334 NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); 330 gfn, pfn, true, true);
335 331
336 return true; 332 return true;
337} 333}
@@ -405,7 +401,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
405 */ 401 */
406static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 402static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
407 struct guest_walker *gw, 403 struct guest_walker *gw,
408 int user_fault, int write_fault, int hlevel, 404 int write_fault, int hlevel,
409 pfn_t pfn, bool map_writable, bool prefault) 405 pfn_t pfn, bool map_writable, bool prefault)
410{ 406{
411 struct kvm_mmu_page *sp = NULL; 407 struct kvm_mmu_page *sp = NULL;
@@ -413,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
413 unsigned direct_access, access = gw->pt_access; 409 unsigned direct_access, access = gw->pt_access;
414 int top_level, emulate = 0; 410 int top_level, emulate = 0;
415 411
416 if (!is_present_gpte(gw->ptes[gw->level - 1]))
417 return 0;
418
419 direct_access = gw->pte_access; 412 direct_access = gw->pte_access;
420 413
421 top_level = vcpu->arch.mmu.root_level; 414 top_level = vcpu->arch.mmu.root_level;
@@ -477,9 +470,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
477 } 470 }
478 471
479 clear_sp_write_flooding_count(it.sptep); 472 clear_sp_write_flooding_count(it.sptep);
480 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, 473 mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate,
481 user_fault, write_fault, &emulate, it.level, 474 it.level, gw->gfn, pfn, prefault, map_writable);
482 gw->gfn, pfn, prefault, map_writable);
483 FNAME(pte_prefetch)(vcpu, gw, it.sptep); 475 FNAME(pte_prefetch)(vcpu, gw, it.sptep);
484 476
485 return emulate; 477 return emulate;
@@ -491,6 +483,46 @@ out_gpte_changed:
491 return 0; 483 return 0;
492} 484}
493 485
486 /*
487 * To see whether the mapped gfn can write its page table in the current
488 * mapping.
489 *
490 * It is the helper function of FNAME(page_fault). When guest uses large page
491 * size to map the writable gfn which is used as current page table, we should
492 * force kvm to use small page size to map it because new shadow page will be
493 * created when kvm establishes shadow page table that stop kvm using large
494 * page size. Do it early can avoid unnecessary #PF and emulation.
495 *
496 * @write_fault_to_shadow_pgtable will return true if the fault gfn is
497 * currently used as its page table.
498 *
499 * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
500 * since the PDPT is always shadowed, that means, we can not use large page
501 * size to map the gfn which is used as PDPT.
502 */
503static bool
504FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
505 struct guest_walker *walker, int user_fault,
506 bool *write_fault_to_shadow_pgtable)
507{
508 int level;
509 gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
510 bool self_changed = false;
511
512 if (!(walker->pte_access & ACC_WRITE_MASK ||
513 (!is_write_protection(vcpu) && !user_fault)))
514 return false;
515
516 for (level = walker->level; level <= walker->max_level; level++) {
517 gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
518
519 self_changed |= !(gfn & mask);
520 *write_fault_to_shadow_pgtable |= !gfn;
521 }
522
523 return self_changed;
524}
525
494/* 526/*
495 * Page fault handler. There are several causes for a page fault: 527 * Page fault handler. There are several causes for a page fault:
496 * - there is no shadow pte for the guest pte 528 * - there is no shadow pte for the guest pte
@@ -516,7 +548,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
516 int level = PT_PAGE_TABLE_LEVEL; 548 int level = PT_PAGE_TABLE_LEVEL;
517 int force_pt_level; 549 int force_pt_level;
518 unsigned long mmu_seq; 550 unsigned long mmu_seq;
519 bool map_writable; 551 bool map_writable, is_self_change_mapping;
520 552
521 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 553 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
522 554
@@ -544,8 +576,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
544 return 0; 576 return 0;
545 } 577 }
546 578
579 vcpu->arch.write_fault_to_shadow_pgtable = false;
580
581 is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
582 &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
583
547 if (walker.level >= PT_DIRECTORY_LEVEL) 584 if (walker.level >= PT_DIRECTORY_LEVEL)
548 force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn); 585 force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
586 || is_self_change_mapping;
549 else 587 else
550 force_pt_level = 1; 588 force_pt_level = 1;
551 if (!force_pt_level) { 589 if (!force_pt_level) {
@@ -564,6 +602,26 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
564 walker.gfn, pfn, walker.pte_access, &r)) 602 walker.gfn, pfn, walker.pte_access, &r))
565 return r; 603 return r;
566 604
605 /*
606 * Do not change pte_access if the pfn is a mmio page, otherwise
607 * we will cache the incorrect access into mmio spte.
608 */
609 if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) &&
610 !is_write_protection(vcpu) && !user_fault &&
611 !is_noslot_pfn(pfn)) {
612 walker.pte_access |= ACC_WRITE_MASK;
613 walker.pte_access &= ~ACC_USER_MASK;
614
615 /*
616 * If we converted a user page to a kernel page,
617 * so that the kernel can write to it when cr0.wp=0,
618 * then we should prevent the kernel from executing it
619 * if SMEP is enabled.
620 */
621 if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
622 walker.pte_access &= ~ACC_EXEC_MASK;
623 }
624
567 spin_lock(&vcpu->kvm->mmu_lock); 625 spin_lock(&vcpu->kvm->mmu_lock);
568 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) 626 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
569 goto out_unlock; 627 goto out_unlock;
@@ -572,7 +630,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
572 kvm_mmu_free_some_pages(vcpu); 630 kvm_mmu_free_some_pages(vcpu);
573 if (!force_pt_level) 631 if (!force_pt_level)
574 transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); 632 transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
575 r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 633 r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
576 level, pfn, map_writable, prefault); 634 level, pfn, map_writable, prefault);
577 ++vcpu->stat.pf_fixed; 635 ++vcpu->stat.pf_fixed;
578 kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); 636 kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
@@ -747,7 +805,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
747 805
748 host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; 806 host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
749 807
750 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 808 set_spte(vcpu, &sp->spt[i], pte_access,
751 PT_PAGE_TABLE_LEVEL, gfn, 809 PT_PAGE_TABLE_LEVEL, gfn,
752 spte_to_pfn(sp->spt[i]), true, false, 810 spte_to_pfn(sp->spt[i]), true, false,
753 host_writable); 811 host_writable);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d29d3cd1c156..e1b1ce21bc00 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3571,6 +3571,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3571 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 3571 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3572} 3572}
3573 3573
3574static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
3575{
3576 return;
3577}
3578
3579static int svm_vm_has_apicv(struct kvm *kvm)
3580{
3581 return 0;
3582}
3583
3584static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
3585{
3586 return;
3587}
3588
3589static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
3590{
3591 return;
3592}
3593
3574static int svm_nmi_allowed(struct kvm_vcpu *vcpu) 3594static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3575{ 3595{
3576 struct vcpu_svm *svm = to_svm(vcpu); 3596 struct vcpu_svm *svm = to_svm(vcpu);
@@ -4290,6 +4310,10 @@ static struct kvm_x86_ops svm_x86_ops = {
4290 .enable_nmi_window = enable_nmi_window, 4310 .enable_nmi_window = enable_nmi_window,
4291 .enable_irq_window = enable_irq_window, 4311 .enable_irq_window = enable_irq_window,
4292 .update_cr8_intercept = update_cr8_intercept, 4312 .update_cr8_intercept = update_cr8_intercept,
4313 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
4314 .vm_has_apicv = svm_vm_has_apicv,
4315 .load_eoi_exitmap = svm_load_eoi_exitmap,
4316 .hwapic_isr_update = svm_hwapic_isr_update,
4293 4317
4294 .set_tss_addr = svm_set_tss_addr, 4318 .set_tss_addr = svm_set_tss_addr,
4295 .get_tdp_level = get_npt_level, 4319 .get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9120ae1901e4..6667042714cc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,6 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
84static bool __read_mostly fasteoi = 1; 84static bool __read_mostly fasteoi = 1;
85module_param(fasteoi, bool, S_IRUGO); 85module_param(fasteoi, bool, S_IRUGO);
86 86
87static bool __read_mostly enable_apicv_reg_vid;
88
87/* 89/*
88 * If nested=1, nested virtualization is supported, i.e., guests may use 90 * If nested=1, nested virtualization is supported, i.e., guests may use
89 * VMX and be a hypervisor for its own guests. If nested=0, guests may not 91 * VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -92,12 +94,8 @@ module_param(fasteoi, bool, S_IRUGO);
92static bool __read_mostly nested = 0; 94static bool __read_mostly nested = 0;
93module_param(nested, bool, S_IRUGO); 95module_param(nested, bool, S_IRUGO);
94 96
95#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ 97#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
96 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) 98#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
97#define KVM_GUEST_CR0_MASK \
98 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
99#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
100 (X86_CR0_WP | X86_CR0_NE)
101#define KVM_VM_CR0_ALWAYS_ON \ 99#define KVM_VM_CR0_ALWAYS_ON \
102 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 100 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
103#define KVM_CR4_GUEST_OWNED_BITS \ 101#define KVM_CR4_GUEST_OWNED_BITS \
@@ -624,6 +622,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
624 struct kvm_segment *var, int seg); 622 struct kvm_segment *var, int seg);
625static void vmx_get_segment(struct kvm_vcpu *vcpu, 623static void vmx_get_segment(struct kvm_vcpu *vcpu,
626 struct kvm_segment *var, int seg); 624 struct kvm_segment *var, int seg);
625static bool guest_state_valid(struct kvm_vcpu *vcpu);
626static u32 vmx_segment_access_rights(struct kvm_segment *var);
627 627
628static DEFINE_PER_CPU(struct vmcs *, vmxarea); 628static DEFINE_PER_CPU(struct vmcs *, vmxarea);
629static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 629static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -638,6 +638,8 @@ static unsigned long *vmx_io_bitmap_a;
638static unsigned long *vmx_io_bitmap_b; 638static unsigned long *vmx_io_bitmap_b;
639static unsigned long *vmx_msr_bitmap_legacy; 639static unsigned long *vmx_msr_bitmap_legacy;
640static unsigned long *vmx_msr_bitmap_longmode; 640static unsigned long *vmx_msr_bitmap_longmode;
641static unsigned long *vmx_msr_bitmap_legacy_x2apic;
642static unsigned long *vmx_msr_bitmap_longmode_x2apic;
641 643
642static bool cpu_has_load_ia32_efer; 644static bool cpu_has_load_ia32_efer;
643static bool cpu_has_load_perf_global_ctrl; 645static bool cpu_has_load_perf_global_ctrl;
@@ -762,6 +764,24 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
762 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 764 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
763} 765}
764 766
767static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
768{
769 return vmcs_config.cpu_based_2nd_exec_ctrl &
770 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
771}
772
773static inline bool cpu_has_vmx_apic_register_virt(void)
774{
775 return vmcs_config.cpu_based_2nd_exec_ctrl &
776 SECONDARY_EXEC_APIC_REGISTER_VIRT;
777}
778
779static inline bool cpu_has_vmx_virtual_intr_delivery(void)
780{
781 return vmcs_config.cpu_based_2nd_exec_ctrl &
782 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
783}
784
765static inline bool cpu_has_vmx_flexpriority(void) 785static inline bool cpu_has_vmx_flexpriority(void)
766{ 786{
767 return cpu_has_vmx_tpr_shadow() && 787 return cpu_has_vmx_tpr_shadow() &&
@@ -1694,7 +1714,6 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
1694static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1714static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1695{ 1715{
1696 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); 1716 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
1697 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
1698 to_vmx(vcpu)->rflags = rflags; 1717 to_vmx(vcpu)->rflags = rflags;
1699 if (to_vmx(vcpu)->rmode.vm86_active) { 1718 if (to_vmx(vcpu)->rmode.vm86_active) {
1700 to_vmx(vcpu)->rmode.save_rflags = rflags; 1719 to_vmx(vcpu)->rmode.save_rflags = rflags;
@@ -1820,6 +1839,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
1820 vmx->guest_msrs[from] = tmp; 1839 vmx->guest_msrs[from] = tmp;
1821} 1840}
1822 1841
1842static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
1843{
1844 unsigned long *msr_bitmap;
1845
1846 if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
1847 if (is_long_mode(vcpu))
1848 msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
1849 else
1850 msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
1851 } else {
1852 if (is_long_mode(vcpu))
1853 msr_bitmap = vmx_msr_bitmap_longmode;
1854 else
1855 msr_bitmap = vmx_msr_bitmap_legacy;
1856 }
1857
1858 vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
1859}
1860
1823/* 1861/*
1824 * Set up the vmcs to automatically save and restore system 1862 * Set up the vmcs to automatically save and restore system
1825 * msrs. Don't touch the 64-bit msrs if the guest is in legacy 1863 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
@@ -1828,7 +1866,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
1828static void setup_msrs(struct vcpu_vmx *vmx) 1866static void setup_msrs(struct vcpu_vmx *vmx)
1829{ 1867{
1830 int save_nmsrs, index; 1868 int save_nmsrs, index;
1831 unsigned long *msr_bitmap;
1832 1869
1833 save_nmsrs = 0; 1870 save_nmsrs = 0;
1834#ifdef CONFIG_X86_64 1871#ifdef CONFIG_X86_64
@@ -1860,14 +1897,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
1860 1897
1861 vmx->save_nmsrs = save_nmsrs; 1898 vmx->save_nmsrs = save_nmsrs;
1862 1899
1863 if (cpu_has_vmx_msr_bitmap()) { 1900 if (cpu_has_vmx_msr_bitmap())
1864 if (is_long_mode(&vmx->vcpu)) 1901 vmx_set_msr_bitmap(&vmx->vcpu);
1865 msr_bitmap = vmx_msr_bitmap_longmode;
1866 else
1867 msr_bitmap = vmx_msr_bitmap_legacy;
1868
1869 vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
1870 }
1871} 1902}
1872 1903
1873/* 1904/*
@@ -2533,13 +2564,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2533 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 2564 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
2534 min2 = 0; 2565 min2 = 0;
2535 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2566 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2567 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2536 SECONDARY_EXEC_WBINVD_EXITING | 2568 SECONDARY_EXEC_WBINVD_EXITING |
2537 SECONDARY_EXEC_ENABLE_VPID | 2569 SECONDARY_EXEC_ENABLE_VPID |
2538 SECONDARY_EXEC_ENABLE_EPT | 2570 SECONDARY_EXEC_ENABLE_EPT |
2539 SECONDARY_EXEC_UNRESTRICTED_GUEST | 2571 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2540 SECONDARY_EXEC_PAUSE_LOOP_EXITING | 2572 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
2541 SECONDARY_EXEC_RDTSCP | 2573 SECONDARY_EXEC_RDTSCP |
2542 SECONDARY_EXEC_ENABLE_INVPCID; 2574 SECONDARY_EXEC_ENABLE_INVPCID |
2575 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2576 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
2543 if (adjust_vmx_controls(min2, opt2, 2577 if (adjust_vmx_controls(min2, opt2,
2544 MSR_IA32_VMX_PROCBASED_CTLS2, 2578 MSR_IA32_VMX_PROCBASED_CTLS2,
2545 &_cpu_based_2nd_exec_control) < 0) 2579 &_cpu_based_2nd_exec_control) < 0)
@@ -2550,6 +2584,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2550 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) 2584 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
2551 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; 2585 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
2552#endif 2586#endif
2587
2588 if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
2589 _cpu_based_2nd_exec_control &= ~(
2590 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2591 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2592 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
2593
2553 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { 2594 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
2554 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT 2595 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
2555 enabled */ 2596 enabled */
@@ -2747,6 +2788,15 @@ static __init int hardware_setup(void)
2747 if (!cpu_has_vmx_ple()) 2788 if (!cpu_has_vmx_ple())
2748 ple_gap = 0; 2789 ple_gap = 0;
2749 2790
2791 if (!cpu_has_vmx_apic_register_virt() ||
2792 !cpu_has_vmx_virtual_intr_delivery())
2793 enable_apicv_reg_vid = 0;
2794
2795 if (enable_apicv_reg_vid)
2796 kvm_x86_ops->update_cr8_intercept = NULL;
2797 else
2798 kvm_x86_ops->hwapic_irr_update = NULL;
2799
2750 if (nested) 2800 if (nested)
2751 nested_vmx_setup_ctls_msrs(); 2801 nested_vmx_setup_ctls_msrs();
2752 2802
@@ -2758,18 +2808,28 @@ static __exit void hardware_unsetup(void)
2758 free_kvm_area(); 2808 free_kvm_area();
2759} 2809}
2760 2810
2761static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) 2811static bool emulation_required(struct kvm_vcpu *vcpu)
2762{ 2812{
2763 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2813 return emulate_invalid_guest_state && !guest_state_valid(vcpu);
2764 struct kvm_segment tmp = *save; 2814}
2765 2815
2766 if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { 2816static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
2767 tmp.base = vmcs_readl(sf->base); 2817 struct kvm_segment *save)
2768 tmp.selector = vmcs_read16(sf->selector); 2818{
2769 tmp.dpl = tmp.selector & SELECTOR_RPL_MASK; 2819 if (!emulate_invalid_guest_state) {
2770 tmp.s = 1; 2820 /*
2821 * CS and SS RPL should be equal during guest entry according
2822 * to VMX spec, but in reality it is not always so. Since vcpu
2823 * is in the middle of the transition from real mode to
2824 * protected mode it is safe to assume that RPL 0 is a good
2825 * default value.
2826 */
2827 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
2828 save->selector &= ~SELECTOR_RPL_MASK;
2829 save->dpl = save->selector & SELECTOR_RPL_MASK;
2830 save->s = 1;
2771 } 2831 }
2772 vmx_set_segment(vcpu, &tmp, seg); 2832 vmx_set_segment(vcpu, save, seg);
2773} 2833}
2774 2834
2775static void enter_pmode(struct kvm_vcpu *vcpu) 2835static void enter_pmode(struct kvm_vcpu *vcpu)
@@ -2777,7 +2837,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
2777 unsigned long flags; 2837 unsigned long flags;
2778 struct vcpu_vmx *vmx = to_vmx(vcpu); 2838 struct vcpu_vmx *vmx = to_vmx(vcpu);
2779 2839
2780 vmx->emulation_required = 1; 2840 /*
2841 * Update real mode segment cache. It may be not up-to-date if sement
2842 * register was written while vcpu was in a guest mode.
2843 */
2844 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2845 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2846 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2847 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2848 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2849 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2850
2781 vmx->rmode.vm86_active = 0; 2851 vmx->rmode.vm86_active = 0;
2782 2852
2783 vmx_segment_cache_clear(vmx); 2853 vmx_segment_cache_clear(vmx);
@@ -2794,22 +2864,16 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
2794 2864
2795 update_exception_bitmap(vcpu); 2865 update_exception_bitmap(vcpu);
2796 2866
2797 if (emulate_invalid_guest_state) 2867 fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2798 return; 2868 fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2799 2869 fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2800 fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); 2870 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2801 fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); 2871 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2802 fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); 2872 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2803 fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2804
2805 vmx_segment_cache_clear(vmx);
2806 2873
2807 vmcs_write16(GUEST_SS_SELECTOR, 0); 2874 /* CPL is always 0 when CPU enters protected mode */
2808 vmcs_write32(GUEST_SS_AR_BYTES, 0x93); 2875 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2809 2876 vmx->cpl = 0;
2810 vmcs_write16(GUEST_CS_SELECTOR,
2811 vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
2812 vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
2813} 2877}
2814 2878
2815static gva_t rmode_tss_base(struct kvm *kvm) 2879static gva_t rmode_tss_base(struct kvm *kvm)
@@ -2831,36 +2895,51 @@ static gva_t rmode_tss_base(struct kvm *kvm)
2831static void fix_rmode_seg(int seg, struct kvm_segment *save) 2895static void fix_rmode_seg(int seg, struct kvm_segment *save)
2832{ 2896{
2833 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2897 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2834 2898 struct kvm_segment var = *save;
2835 vmcs_write16(sf->selector, save->base >> 4); 2899
2836 vmcs_write32(sf->base, save->base & 0xffff0); 2900 var.dpl = 0x3;
2837 vmcs_write32(sf->limit, 0xffff); 2901 if (seg == VCPU_SREG_CS)
2838 vmcs_write32(sf->ar_bytes, 0xf3); 2902 var.type = 0x3;
2839 if (save->base & 0xf) 2903
2840 printk_once(KERN_WARNING "kvm: segment base is not paragraph" 2904 if (!emulate_invalid_guest_state) {
2841 " aligned when entering protected mode (seg=%d)", 2905 var.selector = var.base >> 4;
2842 seg); 2906 var.base = var.base & 0xffff0;
2907 var.limit = 0xffff;
2908 var.g = 0;
2909 var.db = 0;
2910 var.present = 1;
2911 var.s = 1;
2912 var.l = 0;
2913 var.unusable = 0;
2914 var.type = 0x3;
2915 var.avl = 0;
2916 if (save->base & 0xf)
2917 printk_once(KERN_WARNING "kvm: segment base is not "
2918 "paragraph aligned when entering "
2919 "protected mode (seg=%d)", seg);
2920 }
2921
2922 vmcs_write16(sf->selector, var.selector);
2923 vmcs_write32(sf->base, var.base);
2924 vmcs_write32(sf->limit, var.limit);
2925 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
2843} 2926}
2844 2927
2845static void enter_rmode(struct kvm_vcpu *vcpu) 2928static void enter_rmode(struct kvm_vcpu *vcpu)
2846{ 2929{
2847 unsigned long flags; 2930 unsigned long flags;
2848 struct vcpu_vmx *vmx = to_vmx(vcpu); 2931 struct vcpu_vmx *vmx = to_vmx(vcpu);
2849 struct kvm_segment var;
2850
2851 if (enable_unrestricted_guest)
2852 return;
2853 2932
2854 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); 2933 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2855 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); 2934 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2856 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); 2935 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2857 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); 2936 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2858 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); 2937 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2938 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2939 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2859 2940
2860 vmx->emulation_required = 1;
2861 vmx->rmode.vm86_active = 1; 2941 vmx->rmode.vm86_active = 1;
2862 2942
2863
2864 /* 2943 /*
2865 * Very old userspace does not call KVM_SET_TSS_ADDR before entering 2944 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
2866 * vcpu. Call it here with phys address pointing 16M below 4G. 2945 * vcpu. Call it here with phys address pointing 16M below 4G.
@@ -2888,28 +2967,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
2888 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); 2967 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
2889 update_exception_bitmap(vcpu); 2968 update_exception_bitmap(vcpu);
2890 2969
2891 if (emulate_invalid_guest_state) 2970 fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2892 goto continue_rmode; 2971 fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2893 2972 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2894 vmx_get_segment(vcpu, &var, VCPU_SREG_SS); 2973 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2895 vmx_set_segment(vcpu, &var, VCPU_SREG_SS); 2974 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2896 2975 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2897 vmx_get_segment(vcpu, &var, VCPU_SREG_CS);
2898 vmx_set_segment(vcpu, &var, VCPU_SREG_CS);
2899
2900 vmx_get_segment(vcpu, &var, VCPU_SREG_ES);
2901 vmx_set_segment(vcpu, &var, VCPU_SREG_ES);
2902
2903 vmx_get_segment(vcpu, &var, VCPU_SREG_DS);
2904 vmx_set_segment(vcpu, &var, VCPU_SREG_DS);
2905 2976
2906 vmx_get_segment(vcpu, &var, VCPU_SREG_GS);
2907 vmx_set_segment(vcpu, &var, VCPU_SREG_GS);
2908
2909 vmx_get_segment(vcpu, &var, VCPU_SREG_FS);
2910 vmx_set_segment(vcpu, &var, VCPU_SREG_FS);
2911
2912continue_rmode:
2913 kvm_mmu_reset_context(vcpu); 2977 kvm_mmu_reset_context(vcpu);
2914} 2978}
2915 2979
@@ -3068,17 +3132,18 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3068 struct vcpu_vmx *vmx = to_vmx(vcpu); 3132 struct vcpu_vmx *vmx = to_vmx(vcpu);
3069 unsigned long hw_cr0; 3133 unsigned long hw_cr0;
3070 3134
3135 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
3071 if (enable_unrestricted_guest) 3136 if (enable_unrestricted_guest)
3072 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST) 3137 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
3073 | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; 3138 else {
3074 else 3139 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
3075 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
3076 3140
3077 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) 3141 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
3078 enter_pmode(vcpu); 3142 enter_pmode(vcpu);
3079 3143
3080 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) 3144 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
3081 enter_rmode(vcpu); 3145 enter_rmode(vcpu);
3146 }
3082 3147
3083#ifdef CONFIG_X86_64 3148#ifdef CONFIG_X86_64
3084 if (vcpu->arch.efer & EFER_LME) { 3149 if (vcpu->arch.efer & EFER_LME) {
@@ -3098,7 +3163,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3098 vmcs_writel(CR0_READ_SHADOW, cr0); 3163 vmcs_writel(CR0_READ_SHADOW, cr0);
3099 vmcs_writel(GUEST_CR0, hw_cr0); 3164 vmcs_writel(GUEST_CR0, hw_cr0);
3100 vcpu->arch.cr0 = cr0; 3165 vcpu->arch.cr0 = cr0;
3101 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); 3166
3167 /* depends on vcpu->arch.cr0 to be set to a new value */
3168 vmx->emulation_required = emulation_required(vcpu);
3102} 3169}
3103 3170
3104static u64 construct_eptp(unsigned long root_hpa) 3171static u64 construct_eptp(unsigned long root_hpa)
@@ -3155,6 +3222,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3155 if (!is_paging(vcpu)) { 3222 if (!is_paging(vcpu)) {
3156 hw_cr4 &= ~X86_CR4_PAE; 3223 hw_cr4 &= ~X86_CR4_PAE;
3157 hw_cr4 |= X86_CR4_PSE; 3224 hw_cr4 |= X86_CR4_PSE;
3225 /*
3226 * SMEP is disabled if CPU is in non-paging mode in
3227 * hardware. However KVM always uses paging mode to
3228 * emulate guest non-paging mode with TDP.
3229 * To emulate this behavior, SMEP needs to be manually
3230 * disabled when guest switches to non-paging mode.
3231 */
3232 hw_cr4 &= ~X86_CR4_SMEP;
3158 } else if (!(cr4 & X86_CR4_PAE)) { 3233 } else if (!(cr4 & X86_CR4_PAE)) {
3159 hw_cr4 &= ~X86_CR4_PAE; 3234 hw_cr4 &= ~X86_CR4_PAE;
3160 } 3235 }
@@ -3171,10 +3246,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
3171 struct vcpu_vmx *vmx = to_vmx(vcpu); 3246 struct vcpu_vmx *vmx = to_vmx(vcpu);
3172 u32 ar; 3247 u32 ar;
3173 3248
3174 if (vmx->rmode.vm86_active 3249 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3175 && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
3176 || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
3177 || seg == VCPU_SREG_GS)) {
3178 *var = vmx->rmode.segs[seg]; 3250 *var = vmx->rmode.segs[seg];
3179 if (seg == VCPU_SREG_TR 3251 if (seg == VCPU_SREG_TR
3180 || var->selector == vmx_read_guest_seg_selector(vmx, seg)) 3252 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
@@ -3187,8 +3259,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
3187 var->limit = vmx_read_guest_seg_limit(vmx, seg); 3259 var->limit = vmx_read_guest_seg_limit(vmx, seg);
3188 var->selector = vmx_read_guest_seg_selector(vmx, seg); 3260 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3189 ar = vmx_read_guest_seg_ar(vmx, seg); 3261 ar = vmx_read_guest_seg_ar(vmx, seg);
3190 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
3191 ar = 0;
3192 var->type = ar & 15; 3262 var->type = ar & 15;
3193 var->s = (ar >> 4) & 1; 3263 var->s = (ar >> 4) & 1;
3194 var->dpl = (ar >> 5) & 3; 3264 var->dpl = (ar >> 5) & 3;
@@ -3211,8 +3281,10 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
3211 return vmx_read_guest_seg_base(to_vmx(vcpu), seg); 3281 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
3212} 3282}
3213 3283
3214static int __vmx_get_cpl(struct kvm_vcpu *vcpu) 3284static int vmx_get_cpl(struct kvm_vcpu *vcpu)
3215{ 3285{
3286 struct vcpu_vmx *vmx = to_vmx(vcpu);
3287
3216 if (!is_protmode(vcpu)) 3288 if (!is_protmode(vcpu))
3217 return 0; 3289 return 0;
3218 3290
@@ -3220,24 +3292,9 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
3220 && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ 3292 && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
3221 return 3; 3293 return 3;
3222 3294
3223 return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
3224}
3225
3226static int vmx_get_cpl(struct kvm_vcpu *vcpu)
3227{
3228 struct vcpu_vmx *vmx = to_vmx(vcpu);
3229
3230 /*
3231 * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations
3232 * fail; use the cache instead.
3233 */
3234 if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) {
3235 return vmx->cpl;
3236 }
3237
3238 if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { 3295 if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
3239 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); 3296 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3240 vmx->cpl = __vmx_get_cpl(vcpu); 3297 vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3;
3241 } 3298 }
3242 3299
3243 return vmx->cpl; 3300 return vmx->cpl;
@@ -3269,28 +3326,23 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3269{ 3326{
3270 struct vcpu_vmx *vmx = to_vmx(vcpu); 3327 struct vcpu_vmx *vmx = to_vmx(vcpu);
3271 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 3328 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3272 u32 ar;
3273 3329
3274 vmx_segment_cache_clear(vmx); 3330 vmx_segment_cache_clear(vmx);
3331 if (seg == VCPU_SREG_CS)
3332 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3275 3333
3276 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { 3334 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3277 vmcs_write16(sf->selector, var->selector); 3335 vmx->rmode.segs[seg] = *var;
3278 vmx->rmode.segs[VCPU_SREG_TR] = *var; 3336 if (seg == VCPU_SREG_TR)
3279 return; 3337 vmcs_write16(sf->selector, var->selector);
3338 else if (var->s)
3339 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3340 goto out;
3280 } 3341 }
3342
3281 vmcs_writel(sf->base, var->base); 3343 vmcs_writel(sf->base, var->base);
3282 vmcs_write32(sf->limit, var->limit); 3344 vmcs_write32(sf->limit, var->limit);
3283 vmcs_write16(sf->selector, var->selector); 3345 vmcs_write16(sf->selector, var->selector);
3284 if (vmx->rmode.vm86_active && var->s) {
3285 vmx->rmode.segs[seg] = *var;
3286 /*
3287 * Hack real-mode segments into vm86 compatibility.
3288 */
3289 if (var->base == 0xffff0000 && var->selector == 0xf000)
3290 vmcs_writel(sf->base, 0xf0000);
3291 ar = 0xf3;
3292 } else
3293 ar = vmx_segment_access_rights(var);
3294 3346
3295 /* 3347 /*
3296 * Fix the "Accessed" bit in AR field of segment registers for older 3348 * Fix the "Accessed" bit in AR field of segment registers for older
@@ -3304,42 +3356,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3304 * kvm hack. 3356 * kvm hack.
3305 */ 3357 */
3306 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) 3358 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
3307 ar |= 0x1; /* Accessed */ 3359 var->type |= 0x1; /* Accessed */
3308 3360
3309 vmcs_write32(sf->ar_bytes, ar); 3361 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
3310 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3311 3362
3312 /* 3363out:
3313 * Fix segments for real mode guest in hosts that don't have 3364 vmx->emulation_required |= emulation_required(vcpu);
3314 * "unrestricted_mode" or it was disabled.
3315 * This is done to allow migration of the guests from hosts with
3316 * unrestricted guest like Westmere to older host that don't have
3317 * unrestricted guest like Nehelem.
3318 */
3319 if (vmx->rmode.vm86_active) {
3320 switch (seg) {
3321 case VCPU_SREG_CS:
3322 vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
3323 vmcs_write32(GUEST_CS_LIMIT, 0xffff);
3324 if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
3325 vmcs_writel(GUEST_CS_BASE, 0xf0000);
3326 vmcs_write16(GUEST_CS_SELECTOR,
3327 vmcs_readl(GUEST_CS_BASE) >> 4);
3328 break;
3329 case VCPU_SREG_ES:
3330 case VCPU_SREG_DS:
3331 case VCPU_SREG_GS:
3332 case VCPU_SREG_FS:
3333 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3334 break;
3335 case VCPU_SREG_SS:
3336 vmcs_write16(GUEST_SS_SELECTOR,
3337 vmcs_readl(GUEST_SS_BASE) >> 4);
3338 vmcs_write32(GUEST_SS_LIMIT, 0xffff);
3339 vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
3340 break;
3341 }
3342 }
3343} 3365}
3344 3366
3345static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 3367static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -3380,13 +3402,16 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
3380 u32 ar; 3402 u32 ar;
3381 3403
3382 vmx_get_segment(vcpu, &var, seg); 3404 vmx_get_segment(vcpu, &var, seg);
3405 var.dpl = 0x3;
3406 if (seg == VCPU_SREG_CS)
3407 var.type = 0x3;
3383 ar = vmx_segment_access_rights(&var); 3408 ar = vmx_segment_access_rights(&var);
3384 3409
3385 if (var.base != (var.selector << 4)) 3410 if (var.base != (var.selector << 4))
3386 return false; 3411 return false;
3387 if (var.limit < 0xffff) 3412 if (var.limit != 0xffff)
3388 return false; 3413 return false;
3389 if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) 3414 if (ar != 0xf3)
3390 return false; 3415 return false;
3391 3416
3392 return true; 3417 return true;
@@ -3521,6 +3546,9 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
3521 */ 3546 */
3522static bool guest_state_valid(struct kvm_vcpu *vcpu) 3547static bool guest_state_valid(struct kvm_vcpu *vcpu)
3523{ 3548{
3549 if (enable_unrestricted_guest)
3550 return true;
3551
3524 /* real mode guest state checks */ 3552 /* real mode guest state checks */
3525 if (!is_protmode(vcpu)) { 3553 if (!is_protmode(vcpu)) {
3526 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) 3554 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
@@ -3644,12 +3672,9 @@ static void seg_setup(int seg)
3644 vmcs_write16(sf->selector, 0); 3672 vmcs_write16(sf->selector, 0);
3645 vmcs_writel(sf->base, 0); 3673 vmcs_writel(sf->base, 0);
3646 vmcs_write32(sf->limit, 0xffff); 3674 vmcs_write32(sf->limit, 0xffff);
3647 if (enable_unrestricted_guest) { 3675 ar = 0x93;
3648 ar = 0x93; 3676 if (seg == VCPU_SREG_CS)
3649 if (seg == VCPU_SREG_CS) 3677 ar |= 0x08; /* code segment */
3650 ar |= 0x08; /* code segment */
3651 } else
3652 ar = 0xf3;
3653 3678
3654 vmcs_write32(sf->ar_bytes, ar); 3679 vmcs_write32(sf->ar_bytes, ar);
3655} 3680}
@@ -3667,7 +3692,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
3667 kvm_userspace_mem.flags = 0; 3692 kvm_userspace_mem.flags = 0;
3668 kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; 3693 kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
3669 kvm_userspace_mem.memory_size = PAGE_SIZE; 3694 kvm_userspace_mem.memory_size = PAGE_SIZE;
3670 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); 3695 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
3671 if (r) 3696 if (r)
3672 goto out; 3697 goto out;
3673 3698
@@ -3697,7 +3722,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
3697 kvm_userspace_mem.guest_phys_addr = 3722 kvm_userspace_mem.guest_phys_addr =
3698 kvm->arch.ept_identity_map_addr; 3723 kvm->arch.ept_identity_map_addr;
3699 kvm_userspace_mem.memory_size = PAGE_SIZE; 3724 kvm_userspace_mem.memory_size = PAGE_SIZE;
3700 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); 3725 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
3701 if (r) 3726 if (r)
3702 goto out; 3727 goto out;
3703 3728
@@ -3739,7 +3764,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
3739 spin_unlock(&vmx_vpid_lock); 3764 spin_unlock(&vmx_vpid_lock);
3740} 3765}
3741 3766
3742static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) 3767#define MSR_TYPE_R 1
3768#define MSR_TYPE_W 2
3769static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
3770 u32 msr, int type)
3743{ 3771{
3744 int f = sizeof(unsigned long); 3772 int f = sizeof(unsigned long);
3745 3773
@@ -3752,20 +3780,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
3752 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. 3780 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
3753 */ 3781 */
3754 if (msr <= 0x1fff) { 3782 if (msr <= 0x1fff) {
3755 __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ 3783 if (type & MSR_TYPE_R)
3756 __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ 3784 /* read-low */
3785 __clear_bit(msr, msr_bitmap + 0x000 / f);
3786
3787 if (type & MSR_TYPE_W)
3788 /* write-low */
3789 __clear_bit(msr, msr_bitmap + 0x800 / f);
3790
3757 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { 3791 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3758 msr &= 0x1fff; 3792 msr &= 0x1fff;
3759 __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ 3793 if (type & MSR_TYPE_R)
3760 __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ 3794 /* read-high */
3795 __clear_bit(msr, msr_bitmap + 0x400 / f);
3796
3797 if (type & MSR_TYPE_W)
3798 /* write-high */
3799 __clear_bit(msr, msr_bitmap + 0xc00 / f);
3800
3801 }
3802}
3803
3804static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
3805 u32 msr, int type)
3806{
3807 int f = sizeof(unsigned long);
3808
3809 if (!cpu_has_vmx_msr_bitmap())
3810 return;
3811
3812 /*
3813 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
3814 * have the write-low and read-high bitmap offsets the wrong way round.
3815 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
3816 */
3817 if (msr <= 0x1fff) {
3818 if (type & MSR_TYPE_R)
3819 /* read-low */
3820 __set_bit(msr, msr_bitmap + 0x000 / f);
3821
3822 if (type & MSR_TYPE_W)
3823 /* write-low */
3824 __set_bit(msr, msr_bitmap + 0x800 / f);
3825
3826 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3827 msr &= 0x1fff;
3828 if (type & MSR_TYPE_R)
3829 /* read-high */
3830 __set_bit(msr, msr_bitmap + 0x400 / f);
3831
3832 if (type & MSR_TYPE_W)
3833 /* write-high */
3834 __set_bit(msr, msr_bitmap + 0xc00 / f);
3835
3761 } 3836 }
3762} 3837}
3763 3838
3764static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) 3839static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
3765{ 3840{
3766 if (!longmode_only) 3841 if (!longmode_only)
3767 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); 3842 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
3768 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); 3843 msr, MSR_TYPE_R | MSR_TYPE_W);
3844 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
3845 msr, MSR_TYPE_R | MSR_TYPE_W);
3846}
3847
3848static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
3849{
3850 __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3851 msr, MSR_TYPE_R);
3852 __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3853 msr, MSR_TYPE_R);
3854}
3855
3856static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
3857{
3858 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3859 msr, MSR_TYPE_R);
3860 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3861 msr, MSR_TYPE_R);
3862}
3863
3864static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
3865{
3866 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3867 msr, MSR_TYPE_W);
3868 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3869 msr, MSR_TYPE_W);
3769} 3870}
3770 3871
3771/* 3872/*
@@ -3844,6 +3945,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
3844 return exec_control; 3945 return exec_control;
3845} 3946}
3846 3947
3948static int vmx_vm_has_apicv(struct kvm *kvm)
3949{
3950 return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
3951}
3952
3847static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) 3953static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
3848{ 3954{
3849 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; 3955 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
@@ -3861,6 +3967,10 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
3861 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; 3967 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
3862 if (!ple_gap) 3968 if (!ple_gap)
3863 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; 3969 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
3970 if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
3971 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
3972 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
3973 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
3864 return exec_control; 3974 return exec_control;
3865} 3975}
3866 3976
@@ -3905,6 +4015,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
3905 vmx_secondary_exec_control(vmx)); 4015 vmx_secondary_exec_control(vmx));
3906 } 4016 }
3907 4017
4018 if (enable_apicv_reg_vid) {
4019 vmcs_write64(EOI_EXIT_BITMAP0, 0);
4020 vmcs_write64(EOI_EXIT_BITMAP1, 0);
4021 vmcs_write64(EOI_EXIT_BITMAP2, 0);
4022 vmcs_write64(EOI_EXIT_BITMAP3, 0);
4023
4024 vmcs_write16(GUEST_INTR_STATUS, 0);
4025 }
4026
3908 if (ple_gap) { 4027 if (ple_gap) {
3909 vmcs_write32(PLE_GAP, ple_gap); 4028 vmcs_write32(PLE_GAP, ple_gap);
3910 vmcs_write32(PLE_WINDOW, ple_window); 4029 vmcs_write32(PLE_WINDOW, ple_window);
@@ -3990,14 +4109,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
3990 vmx_segment_cache_clear(vmx); 4109 vmx_segment_cache_clear(vmx);
3991 4110
3992 seg_setup(VCPU_SREG_CS); 4111 seg_setup(VCPU_SREG_CS);
3993 /* 4112 if (kvm_vcpu_is_bsp(&vmx->vcpu))
3994 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
3995 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
3996 */
3997 if (kvm_vcpu_is_bsp(&vmx->vcpu)) {
3998 vmcs_write16(GUEST_CS_SELECTOR, 0xf000); 4113 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
3999 vmcs_writel(GUEST_CS_BASE, 0x000f0000); 4114 else {
4000 } else {
4001 vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); 4115 vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
4002 vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); 4116 vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
4003 } 4117 }
@@ -4073,9 +4187,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4073 4187
4074 ret = 0; 4188 ret = 0;
4075 4189
4076 /* HACK: Don't enable emulation on guest boot/reset */
4077 vmx->emulation_required = 0;
4078
4079 return ret; 4190 return ret;
4080} 4191}
4081 4192
@@ -4251,7 +4362,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
4251 .flags = 0, 4362 .flags = 0,
4252 }; 4363 };
4253 4364
4254 ret = kvm_set_memory_region(kvm, &tss_mem, 0); 4365 ret = kvm_set_memory_region(kvm, &tss_mem, false);
4255 if (ret) 4366 if (ret)
4256 return ret; 4367 return ret;
4257 kvm->arch.tss_addr = addr; 4368 kvm->arch.tss_addr = addr;
@@ -4261,28 +4372,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
4261 return 0; 4372 return 0;
4262} 4373}
4263 4374
4264static int handle_rmode_exception(struct kvm_vcpu *vcpu, 4375static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
4265 int vec, u32 err_code)
4266{ 4376{
4267 /*
4268 * Instruction with address size override prefix opcode 0x67
4269 * Cause the #SS fault with 0 error code in VM86 mode.
4270 */
4271 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
4272 if (emulate_instruction(vcpu, 0) == EMULATE_DONE)
4273 return 1;
4274 /*
4275 * Forward all other exceptions that are valid in real mode.
4276 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
4277 * the required debugging infrastructure rework.
4278 */
4279 switch (vec) { 4377 switch (vec) {
4280 case DB_VECTOR:
4281 if (vcpu->guest_debug &
4282 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
4283 return 0;
4284 kvm_queue_exception(vcpu, vec);
4285 return 1;
4286 case BP_VECTOR: 4378 case BP_VECTOR:
4287 /* 4379 /*
4288 * Update instruction length as we may reinject the exception 4380 * Update instruction length as we may reinject the exception
@@ -4291,7 +4383,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4291 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = 4383 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
4292 vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 4384 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4293 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 4385 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
4294 return 0; 4386 return false;
4387 /* fall through */
4388 case DB_VECTOR:
4389 if (vcpu->guest_debug &
4390 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
4391 return false;
4295 /* fall through */ 4392 /* fall through */
4296 case DE_VECTOR: 4393 case DE_VECTOR:
4297 case OF_VECTOR: 4394 case OF_VECTOR:
@@ -4301,10 +4398,37 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4301 case SS_VECTOR: 4398 case SS_VECTOR:
4302 case GP_VECTOR: 4399 case GP_VECTOR:
4303 case MF_VECTOR: 4400 case MF_VECTOR:
4304 kvm_queue_exception(vcpu, vec); 4401 return true;
4305 return 1; 4402 break;
4306 } 4403 }
4307 return 0; 4404 return false;
4405}
4406
4407static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4408 int vec, u32 err_code)
4409{
4410 /*
4411 * Instruction with address size override prefix opcode 0x67
4412 * Cause the #SS fault with 0 error code in VM86 mode.
4413 */
4414 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
4415 if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
4416 if (vcpu->arch.halt_request) {
4417 vcpu->arch.halt_request = 0;
4418 return kvm_emulate_halt(vcpu);
4419 }
4420 return 1;
4421 }
4422 return 0;
4423 }
4424
4425 /*
4426 * Forward all other exceptions that are valid in real mode.
4427 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
4428 * the required debugging infrastructure rework.
4429 */
4430 kvm_queue_exception(vcpu, vec);
4431 return 1;
4308} 4432}
4309 4433
4310/* 4434/*
@@ -4392,17 +4516,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4392 return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); 4516 return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
4393 } 4517 }
4394 4518
4395 if (vmx->rmode.vm86_active &&
4396 handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
4397 error_code)) {
4398 if (vcpu->arch.halt_request) {
4399 vcpu->arch.halt_request = 0;
4400 return kvm_emulate_halt(vcpu);
4401 }
4402 return 1;
4403 }
4404
4405 ex_no = intr_info & INTR_INFO_VECTOR_MASK; 4519 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
4520
4521 if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
4522 return handle_rmode_exception(vcpu, ex_no, error_code);
4523
4406 switch (ex_no) { 4524 switch (ex_no) {
4407 case DB_VECTOR: 4525 case DB_VECTOR:
4408 dr6 = vmcs_readl(EXIT_QUALIFICATION); 4526 dr6 = vmcs_readl(EXIT_QUALIFICATION);
@@ -4820,6 +4938,26 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
4820 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 4938 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
4821} 4939}
4822 4940
4941static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
4942{
4943 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4944 int vector = exit_qualification & 0xff;
4945
4946 /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
4947 kvm_apic_set_eoi_accelerated(vcpu, vector);
4948 return 1;
4949}
4950
4951static int handle_apic_write(struct kvm_vcpu *vcpu)
4952{
4953 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4954 u32 offset = exit_qualification & 0xfff;
4955
4956 /* APIC-write VM exit is trap-like and thus no need to adjust IP */
4957 kvm_apic_write_nodecode(vcpu, offset);
4958 return 1;
4959}
4960
4823static int handle_task_switch(struct kvm_vcpu *vcpu) 4961static int handle_task_switch(struct kvm_vcpu *vcpu)
4824{ 4962{
4825 struct vcpu_vmx *vmx = to_vmx(vcpu); 4963 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5065,7 +5203,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5065 schedule(); 5203 schedule();
5066 } 5204 }
5067 5205
5068 vmx->emulation_required = !guest_state_valid(vcpu); 5206 vmx->emulation_required = emulation_required(vcpu);
5069out: 5207out:
5070 return ret; 5208 return ret;
5071} 5209}
@@ -5754,6 +5892,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
5754 [EXIT_REASON_VMON] = handle_vmon, 5892 [EXIT_REASON_VMON] = handle_vmon,
5755 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 5893 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
5756 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 5894 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
5895 [EXIT_REASON_APIC_WRITE] = handle_apic_write,
5896 [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
5757 [EXIT_REASON_WBINVD] = handle_wbinvd, 5897 [EXIT_REASON_WBINVD] = handle_wbinvd,
5758 [EXIT_REASON_XSETBV] = handle_xsetbv, 5898 [EXIT_REASON_XSETBV] = handle_xsetbv,
5759 [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 5899 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
@@ -5780,7 +5920,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
5780 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; 5920 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
5781 gpa_t bitmap; 5921 gpa_t bitmap;
5782 5922
5783 if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS)) 5923 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
5784 return 1; 5924 return 1;
5785 5925
5786 /* 5926 /*
@@ -6008,7 +6148,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6008 u32 vectoring_info = vmx->idt_vectoring_info; 6148 u32 vectoring_info = vmx->idt_vectoring_info;
6009 6149
6010 /* If guest state is invalid, start emulating */ 6150 /* If guest state is invalid, start emulating */
6011 if (vmx->emulation_required && emulate_invalid_guest_state) 6151 if (vmx->emulation_required)
6012 return handle_invalid_guest_state(vcpu); 6152 return handle_invalid_guest_state(vcpu);
6013 6153
6014 /* 6154 /*
@@ -6103,6 +6243,85 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
6103 vmcs_write32(TPR_THRESHOLD, irr); 6243 vmcs_write32(TPR_THRESHOLD, irr);
6104} 6244}
6105 6245
6246static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
6247{
6248 u32 sec_exec_control;
6249
6250 /*
6251 * There is not point to enable virtualize x2apic without enable
6252 * apicv
6253 */
6254 if (!cpu_has_vmx_virtualize_x2apic_mode() ||
6255 !vmx_vm_has_apicv(vcpu->kvm))
6256 return;
6257
6258 if (!vm_need_tpr_shadow(vcpu->kvm))
6259 return;
6260
6261 sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6262
6263 if (set) {
6264 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6265 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6266 } else {
6267 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6268 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6269 }
6270 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
6271
6272 vmx_set_msr_bitmap(vcpu);
6273}
6274
6275static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
6276{
6277 u16 status;
6278 u8 old;
6279
6280 if (!vmx_vm_has_apicv(kvm))
6281 return;
6282
6283 if (isr == -1)
6284 isr = 0;
6285
6286 status = vmcs_read16(GUEST_INTR_STATUS);
6287 old = status >> 8;
6288 if (isr != old) {
6289 status &= 0xff;
6290 status |= isr << 8;
6291 vmcs_write16(GUEST_INTR_STATUS, status);
6292 }
6293}
6294
6295static void vmx_set_rvi(int vector)
6296{
6297 u16 status;
6298 u8 old;
6299
6300 status = vmcs_read16(GUEST_INTR_STATUS);
6301 old = (u8)status & 0xff;
6302 if ((u8)vector != old) {
6303 status &= ~0xff;
6304 status |= (u8)vector;
6305 vmcs_write16(GUEST_INTR_STATUS, status);
6306 }
6307}
6308
6309static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
6310{
6311 if (max_irr == -1)
6312 return;
6313
6314 vmx_set_rvi(max_irr);
6315}
6316
6317static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6318{
6319 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
6320 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
6321 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
6322 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
6323}
6324
6106static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) 6325static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
6107{ 6326{
6108 u32 exit_intr_info; 6327 u32 exit_intr_info;
@@ -6291,7 +6510,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6291 6510
6292 /* Don't enter VMX if guest state is invalid, let the exit handler 6511 /* Don't enter VMX if guest state is invalid, let the exit handler
6293 start emulation until we arrive back to a valid state */ 6512 start emulation until we arrive back to a valid state */
6294 if (vmx->emulation_required && emulate_invalid_guest_state) 6513 if (vmx->emulation_required)
6295 return; 6514 return;
6296 6515
6297 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) 6516 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
@@ -7366,6 +7585,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
7366 .enable_nmi_window = enable_nmi_window, 7585 .enable_nmi_window = enable_nmi_window,
7367 .enable_irq_window = enable_irq_window, 7586 .enable_irq_window = enable_irq_window,
7368 .update_cr8_intercept = update_cr8_intercept, 7587 .update_cr8_intercept = update_cr8_intercept,
7588 .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
7589 .vm_has_apicv = vmx_vm_has_apicv,
7590 .load_eoi_exitmap = vmx_load_eoi_exitmap,
7591 .hwapic_irr_update = vmx_hwapic_irr_update,
7592 .hwapic_isr_update = vmx_hwapic_isr_update,
7369 7593
7370 .set_tss_addr = vmx_set_tss_addr, 7594 .set_tss_addr = vmx_set_tss_addr,
7371 .get_tdp_level = get_ept_level, 7595 .get_tdp_level = get_ept_level,
@@ -7398,7 +7622,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
7398 7622
7399static int __init vmx_init(void) 7623static int __init vmx_init(void)
7400{ 7624{
7401 int r, i; 7625 int r, i, msr;
7402 7626
7403 rdmsrl_safe(MSR_EFER, &host_efer); 7627 rdmsrl_safe(MSR_EFER, &host_efer);
7404 7628
@@ -7419,11 +7643,19 @@ static int __init vmx_init(void)
7419 if (!vmx_msr_bitmap_legacy) 7643 if (!vmx_msr_bitmap_legacy)
7420 goto out1; 7644 goto out1;
7421 7645
7646 vmx_msr_bitmap_legacy_x2apic =
7647 (unsigned long *)__get_free_page(GFP_KERNEL);
7648 if (!vmx_msr_bitmap_legacy_x2apic)
7649 goto out2;
7422 7650
7423 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); 7651 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
7424 if (!vmx_msr_bitmap_longmode) 7652 if (!vmx_msr_bitmap_longmode)
7425 goto out2; 7653 goto out3;
7426 7654
7655 vmx_msr_bitmap_longmode_x2apic =
7656 (unsigned long *)__get_free_page(GFP_KERNEL);
7657 if (!vmx_msr_bitmap_longmode_x2apic)
7658 goto out4;
7427 7659
7428 /* 7660 /*
7429 * Allow direct access to the PC debug port (it is often used for I/O 7661 * Allow direct access to the PC debug port (it is often used for I/O
@@ -7455,6 +7687,28 @@ static int __init vmx_init(void)
7455 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); 7687 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
7456 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); 7688 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
7457 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); 7689 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
7690 memcpy(vmx_msr_bitmap_legacy_x2apic,
7691 vmx_msr_bitmap_legacy, PAGE_SIZE);
7692 memcpy(vmx_msr_bitmap_longmode_x2apic,
7693 vmx_msr_bitmap_longmode, PAGE_SIZE);
7694
7695 if (enable_apicv_reg_vid) {
7696 for (msr = 0x800; msr <= 0x8ff; msr++)
7697 vmx_disable_intercept_msr_read_x2apic(msr);
7698
7699 /* According SDM, in x2apic mode, the whole id reg is used.
7700 * But in KVM, it only use the highest eight bits. Need to
7701 * intercept it */
7702 vmx_enable_intercept_msr_read_x2apic(0x802);
7703 /* TMCCT */
7704 vmx_enable_intercept_msr_read_x2apic(0x839);
7705 /* TPR */
7706 vmx_disable_intercept_msr_write_x2apic(0x808);
7707 /* EOI */
7708 vmx_disable_intercept_msr_write_x2apic(0x80b);
7709 /* SELF-IPI */
7710 vmx_disable_intercept_msr_write_x2apic(0x83f);
7711 }
7458 7712
7459 if (enable_ept) { 7713 if (enable_ept) {
7460 kvm_mmu_set_mask_ptes(0ull, 7714 kvm_mmu_set_mask_ptes(0ull,
@@ -7468,8 +7722,10 @@ static int __init vmx_init(void)
7468 7722
7469 return 0; 7723 return 0;
7470 7724
7471out3: 7725out4:
7472 free_page((unsigned long)vmx_msr_bitmap_longmode); 7726 free_page((unsigned long)vmx_msr_bitmap_longmode);
7727out3:
7728 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
7473out2: 7729out2:
7474 free_page((unsigned long)vmx_msr_bitmap_legacy); 7730 free_page((unsigned long)vmx_msr_bitmap_legacy);
7475out1: 7731out1:
@@ -7481,6 +7737,8 @@ out:
7481 7737
7482static void __exit vmx_exit(void) 7738static void __exit vmx_exit(void)
7483{ 7739{
7740 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
7741 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
7484 free_page((unsigned long)vmx_msr_bitmap_legacy); 7742 free_page((unsigned long)vmx_msr_bitmap_legacy);
7485 free_page((unsigned long)vmx_msr_bitmap_longmode); 7743 free_page((unsigned long)vmx_msr_bitmap_longmode);
7486 free_page((unsigned long)vmx_io_bitmap_b); 7744 free_page((unsigned long)vmx_io_bitmap_b);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 76f54461f7cb..f71500af1f81 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -120,7 +120,7 @@ struct kvm_shared_msrs {
120}; 120};
121 121
122static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; 122static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
123static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs); 123static struct kvm_shared_msrs __percpu *shared_msrs;
124 124
125struct kvm_stats_debugfs_item debugfs_entries[] = { 125struct kvm_stats_debugfs_item debugfs_entries[] = {
126 { "pf_fixed", VCPU_STAT(pf_fixed) }, 126 { "pf_fixed", VCPU_STAT(pf_fixed) },
@@ -191,10 +191,10 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
191 191
192static void shared_msr_update(unsigned slot, u32 msr) 192static void shared_msr_update(unsigned slot, u32 msr)
193{ 193{
194 struct kvm_shared_msrs *smsr;
195 u64 value; 194 u64 value;
195 unsigned int cpu = smp_processor_id();
196 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
196 197
197 smsr = &__get_cpu_var(shared_msrs);
198 /* only read, and nobody should modify it at this time, 198 /* only read, and nobody should modify it at this time,
199 * so don't need lock */ 199 * so don't need lock */
200 if (slot >= shared_msrs_global.nr) { 200 if (slot >= shared_msrs_global.nr) {
@@ -226,7 +226,8 @@ static void kvm_shared_msr_cpu_online(void)
226 226
227void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 227void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
228{ 228{
229 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); 229 unsigned int cpu = smp_processor_id();
230 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
230 231
231 if (((value ^ smsr->values[slot].curr) & mask) == 0) 232 if (((value ^ smsr->values[slot].curr) & mask) == 0)
232 return; 233 return;
@@ -242,7 +243,8 @@ EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
242 243
243static void drop_user_return_notifiers(void *ignore) 244static void drop_user_return_notifiers(void *ignore)
244{ 245{
245 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); 246 unsigned int cpu = smp_processor_id();
247 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
246 248
247 if (smsr->registered) 249 if (smsr->registered)
248 kvm_on_user_return(&smsr->urn); 250 kvm_on_user_return(&smsr->urn);
@@ -870,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
870 872
871 kvm_x86_ops->set_efer(vcpu, efer); 873 kvm_x86_ops->set_efer(vcpu, efer);
872 874
873 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
874
875 /* Update reserved bits */ 875 /* Update reserved bits */
876 if ((efer ^ old_efer) & EFER_NX) 876 if ((efer ^ old_efer) & EFER_NX)
877 kvm_mmu_reset_context(vcpu); 877 kvm_mmu_reset_context(vcpu);
@@ -1879,6 +1879,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1879 u64 data = msr_info->data; 1879 u64 data = msr_info->data;
1880 1880
1881 switch (msr) { 1881 switch (msr) {
1882 case MSR_AMD64_NB_CFG:
1883 case MSR_IA32_UCODE_REV:
1884 case MSR_IA32_UCODE_WRITE:
1885 case MSR_VM_HSAVE_PA:
1886 case MSR_AMD64_PATCH_LOADER:
1887 case MSR_AMD64_BU_CFG2:
1888 break;
1889
1882 case MSR_EFER: 1890 case MSR_EFER:
1883 return set_efer(vcpu, data); 1891 return set_efer(vcpu, data);
1884 case MSR_K7_HWCR: 1892 case MSR_K7_HWCR:
@@ -1898,8 +1906,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1898 return 1; 1906 return 1;
1899 } 1907 }
1900 break; 1908 break;
1901 case MSR_AMD64_NB_CFG:
1902 break;
1903 case MSR_IA32_DEBUGCTLMSR: 1909 case MSR_IA32_DEBUGCTLMSR:
1904 if (!data) { 1910 if (!data) {
1905 /* We support the non-activated case already */ 1911 /* We support the non-activated case already */
@@ -1912,11 +1918,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1912 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", 1918 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1913 __func__, data); 1919 __func__, data);
1914 break; 1920 break;
1915 case MSR_IA32_UCODE_REV:
1916 case MSR_IA32_UCODE_WRITE:
1917 case MSR_VM_HSAVE_PA:
1918 case MSR_AMD64_PATCH_LOADER:
1919 break;
1920 case 0x200 ... 0x2ff: 1921 case 0x200 ... 0x2ff:
1921 return set_msr_mtrr(vcpu, msr, data); 1922 return set_msr_mtrr(vcpu, msr, data);
1922 case MSR_IA32_APICBASE: 1923 case MSR_IA32_APICBASE:
@@ -2251,6 +2252,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2251 case MSR_K8_INT_PENDING_MSG: 2252 case MSR_K8_INT_PENDING_MSG:
2252 case MSR_AMD64_NB_CFG: 2253 case MSR_AMD64_NB_CFG:
2253 case MSR_FAM10H_MMIO_CONF_BASE: 2254 case MSR_FAM10H_MMIO_CONF_BASE:
2255 case MSR_AMD64_BU_CFG2:
2254 data = 0; 2256 data = 0;
2255 break; 2257 break;
2256 case MSR_P6_PERFCTR0: 2258 case MSR_P6_PERFCTR0:
@@ -2518,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2518 r = KVM_MAX_VCPUS; 2520 r = KVM_MAX_VCPUS;
2519 break; 2521 break;
2520 case KVM_CAP_NR_MEMSLOTS: 2522 case KVM_CAP_NR_MEMSLOTS:
2521 r = KVM_MEMORY_SLOTS; 2523 r = KVM_USER_MEM_SLOTS;
2522 break; 2524 break;
2523 case KVM_CAP_PV_MMU: /* obsolete */ 2525 case KVM_CAP_PV_MMU: /* obsolete */
2524 r = 0; 2526 r = 0;
@@ -3270,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3270 return -EINVAL; 3272 return -EINVAL;
3271 3273
3272 mutex_lock(&kvm->slots_lock); 3274 mutex_lock(&kvm->slots_lock);
3273 spin_lock(&kvm->mmu_lock);
3274 3275
3275 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 3276 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3276 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 3277 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3277 3278
3278 spin_unlock(&kvm->mmu_lock);
3279 mutex_unlock(&kvm->slots_lock); 3279 mutex_unlock(&kvm->slots_lock);
3280 return 0; 3280 return 0;
3281} 3281}
@@ -3435,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3435 mutex_lock(&kvm->slots_lock); 3435 mutex_lock(&kvm->slots_lock);
3436 3436
3437 r = -EINVAL; 3437 r = -EINVAL;
3438 if (log->slot >= KVM_MEMORY_SLOTS) 3438 if (log->slot >= KVM_USER_MEM_SLOTS)
3439 goto out; 3439 goto out;
3440 3440
3441 memslot = id_to_memslot(kvm->memslots, log->slot); 3441 memslot = id_to_memslot(kvm->memslots, log->slot);
@@ -4491,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4491 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); 4491 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4492 *selector = var.selector; 4492 *selector = var.selector;
4493 4493
4494 if (var.unusable) 4494 if (var.unusable) {
4495 memset(desc, 0, sizeof(*desc));
4495 return false; 4496 return false;
4497 }
4496 4498
4497 if (var.g) 4499 if (var.g)
4498 var.limit >>= 12; 4500 var.limit >>= 12;
@@ -4753,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4753 return r; 4755 return r;
4754} 4756}
4755 4757
4756static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) 4758static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
4759 bool write_fault_to_shadow_pgtable)
4757{ 4760{
4758 gpa_t gpa; 4761 gpa_t gpa = cr2;
4759 pfn_t pfn; 4762 pfn_t pfn;
4760 4763
4761 if (tdp_enabled) 4764 if (!vcpu->arch.mmu.direct_map) {
4762 return false; 4765 /*
4763 4766 * Write permission should be allowed since only
4764 /* 4767 * write access need to be emulated.
4765 * if emulation was due to access to shadowed page table 4768 */
4766 * and it failed try to unshadow page and re-enter the 4769 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4767 * guest to let CPU execute the instruction.
4768 */
4769 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4770 return true;
4771
4772 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4773 4770
4774 if (gpa == UNMAPPED_GVA) 4771 /*
4775 return true; /* let cpu generate fault */ 4772 * If the mapping is invalid in guest, let cpu retry
4773 * it to generate fault.
4774 */
4775 if (gpa == UNMAPPED_GVA)
4776 return true;
4777 }
4776 4778
4777 /* 4779 /*
4778 * Do not retry the unhandleable instruction if it faults on the 4780 * Do not retry the unhandleable instruction if it faults on the
@@ -4781,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4781 * instruction -> ... 4783 * instruction -> ...
4782 */ 4784 */
4783 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); 4785 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4784 if (!is_error_noslot_pfn(pfn)) { 4786
4785 kvm_release_pfn_clean(pfn); 4787 /*
4788 * If the instruction failed on the error pfn, it can not be fixed,
4789 * report the error to userspace.
4790 */
4791 if (is_error_noslot_pfn(pfn))
4792 return false;
4793
4794 kvm_release_pfn_clean(pfn);
4795
4796 /* The instructions are well-emulated on direct mmu. */
4797 if (vcpu->arch.mmu.direct_map) {
4798 unsigned int indirect_shadow_pages;
4799
4800 spin_lock(&vcpu->kvm->mmu_lock);
4801 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
4802 spin_unlock(&vcpu->kvm->mmu_lock);
4803
4804 if (indirect_shadow_pages)
4805 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4806
4786 return true; 4807 return true;
4787 } 4808 }
4788 4809
4789 return false; 4810 /*
4811 * if emulation was due to access to shadowed page table
4812 * and it failed try to unshadow page and re-enter the
4813 * guest to let CPU execute the instruction.
4814 */
4815 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4816
4817 /*
4818 * If the access faults on its page table, it can not
4819 * be fixed by unprotecting shadow page and it should
4820 * be reported to userspace.
4821 */
4822 return !write_fault_to_shadow_pgtable;
4790} 4823}
4791 4824
4792static bool retry_instruction(struct x86_emulate_ctxt *ctxt, 4825static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
@@ -4828,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4828 if (!vcpu->arch.mmu.direct_map) 4861 if (!vcpu->arch.mmu.direct_map)
4829 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); 4862 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4830 4863
4831 kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 4864 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4832 4865
4833 return true; 4866 return true;
4834} 4867}
@@ -4845,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4845 int r; 4878 int r;
4846 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 4879 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4847 bool writeback = true; 4880 bool writeback = true;
4881 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
4848 4882
4883 /*
4884 * Clear write_fault_to_shadow_pgtable here to ensure it is
4885 * never reused.
4886 */
4887 vcpu->arch.write_fault_to_shadow_pgtable = false;
4849 kvm_clear_exception_queue(vcpu); 4888 kvm_clear_exception_queue(vcpu);
4850 4889
4851 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 4890 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
@@ -4864,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4864 if (r != EMULATION_OK) { 4903 if (r != EMULATION_OK) {
4865 if (emulation_type & EMULTYPE_TRAP_UD) 4904 if (emulation_type & EMULTYPE_TRAP_UD)
4866 return EMULATE_FAIL; 4905 return EMULATE_FAIL;
4867 if (reexecute_instruction(vcpu, cr2)) 4906 if (reexecute_instruction(vcpu, cr2,
4907 write_fault_to_spt))
4868 return EMULATE_DONE; 4908 return EMULATE_DONE;
4869 if (emulation_type & EMULTYPE_SKIP) 4909 if (emulation_type & EMULTYPE_SKIP)
4870 return EMULATE_FAIL; 4910 return EMULATE_FAIL;
@@ -4894,7 +4934,7 @@ restart:
4894 return EMULATE_DONE; 4934 return EMULATE_DONE;
4895 4935
4896 if (r == EMULATION_FAILED) { 4936 if (r == EMULATION_FAILED) {
4897 if (reexecute_instruction(vcpu, cr2)) 4937 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
4898 return EMULATE_DONE; 4938 return EMULATE_DONE;
4899 4939
4900 return handle_emulation_failure(vcpu); 4940 return handle_emulation_failure(vcpu);
@@ -5233,9 +5273,16 @@ int kvm_arch_init(void *opaque)
5233 goto out; 5273 goto out;
5234 } 5274 }
5235 5275
5276 r = -ENOMEM;
5277 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
5278 if (!shared_msrs) {
5279 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
5280 goto out;
5281 }
5282
5236 r = kvm_mmu_module_init(); 5283 r = kvm_mmu_module_init();
5237 if (r) 5284 if (r)
5238 goto out; 5285 goto out_free_percpu;
5239 5286
5240 kvm_set_mmio_spte_mask(); 5287 kvm_set_mmio_spte_mask();
5241 kvm_init_msr_list(); 5288 kvm_init_msr_list();
@@ -5258,6 +5305,8 @@ int kvm_arch_init(void *opaque)
5258 5305
5259 return 0; 5306 return 0;
5260 5307
5308out_free_percpu:
5309 free_percpu(shared_msrs);
5261out: 5310out:
5262 return r; 5311 return r;
5263} 5312}
@@ -5275,6 +5324,7 @@ void kvm_arch_exit(void)
5275#endif 5324#endif
5276 kvm_x86_ops = NULL; 5325 kvm_x86_ops = NULL;
5277 kvm_mmu_module_exit(); 5326 kvm_mmu_module_exit();
5327 free_percpu(shared_msrs);
5278} 5328}
5279 5329
5280int kvm_emulate_halt(struct kvm_vcpu *vcpu) 5330int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -5527,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
5527 vcpu->arch.nmi_injected = true; 5577 vcpu->arch.nmi_injected = true;
5528 kvm_x86_ops->set_nmi(vcpu); 5578 kvm_x86_ops->set_nmi(vcpu);
5529 } 5579 }
5530 } else if (kvm_cpu_has_interrupt(vcpu)) { 5580 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
5531 if (kvm_x86_ops->interrupt_allowed(vcpu)) { 5581 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5532 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), 5582 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5533 false); 5583 false);
@@ -5595,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
5595#endif 5645#endif
5596} 5646}
5597 5647
5648static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
5649{
5650 u64 eoi_exit_bitmap[4];
5651
5652 memset(eoi_exit_bitmap, 0, 32);
5653
5654 kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
5655 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
5656}
5657
5598static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5658static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5599{ 5659{
5600 int r; 5660 int r;
@@ -5648,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5648 kvm_handle_pmu_event(vcpu); 5708 kvm_handle_pmu_event(vcpu);
5649 if (kvm_check_request(KVM_REQ_PMI, vcpu)) 5709 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5650 kvm_deliver_pmi(vcpu); 5710 kvm_deliver_pmi(vcpu);
5711 if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
5712 update_eoi_exitmap(vcpu);
5651 } 5713 }
5652 5714
5653 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 5715 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -5656,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5656 /* enable NMI/IRQ window open exits if needed */ 5718 /* enable NMI/IRQ window open exits if needed */
5657 if (vcpu->arch.nmi_pending) 5719 if (vcpu->arch.nmi_pending)
5658 kvm_x86_ops->enable_nmi_window(vcpu); 5720 kvm_x86_ops->enable_nmi_window(vcpu);
5659 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) 5721 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
5660 kvm_x86_ops->enable_irq_window(vcpu); 5722 kvm_x86_ops->enable_irq_window(vcpu);
5661 5723
5662 if (kvm_lapic_enabled(vcpu)) { 5724 if (kvm_lapic_enabled(vcpu)) {
5725 /*
5726 * Update architecture specific hints for APIC
5727 * virtual interrupt delivery.
5728 */
5729 if (kvm_x86_ops->hwapic_irr_update)
5730 kvm_x86_ops->hwapic_irr_update(vcpu,
5731 kvm_lapic_find_highest_irr(vcpu));
5663 update_cr8_intercept(vcpu); 5732 update_cr8_intercept(vcpu);
5664 kvm_lapic_sync_to_vapic(vcpu); 5733 kvm_lapic_sync_to_vapic(vcpu);
5665 } 5734 }
@@ -6839,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
6839 struct kvm_memory_slot *memslot, 6908 struct kvm_memory_slot *memslot,
6840 struct kvm_memory_slot old, 6909 struct kvm_memory_slot old,
6841 struct kvm_userspace_memory_region *mem, 6910 struct kvm_userspace_memory_region *mem,
6842 int user_alloc) 6911 bool user_alloc)
6843{ 6912{
6844 int npages = memslot->npages; 6913 int npages = memslot->npages;
6845 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6846
6847 /* Prevent internal slot pages from being moved by fork()/COW. */
6848 if (memslot->id >= KVM_MEMORY_SLOTS)
6849 map_flags = MAP_SHARED | MAP_ANONYMOUS;
6850 6914
6851 /*To keep backward compatibility with older userspace, 6915 /*
6852 *x86 needs to handle !user_alloc case. 6916 * Only private memory slots need to be mapped here since
6917 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
6853 */ 6918 */
6854 if (!user_alloc) { 6919 if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
6855 if (npages && !old.npages) { 6920 unsigned long userspace_addr;
6856 unsigned long userspace_addr;
6857 6921
6858 userspace_addr = vm_mmap(NULL, 0, 6922 /*
6859 npages * PAGE_SIZE, 6923 * MAP_SHARED to prevent internal slot pages from being moved
6860 PROT_READ | PROT_WRITE, 6924 * by fork()/COW.
6861 map_flags, 6925 */
6862 0); 6926 userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
6927 PROT_READ | PROT_WRITE,
6928 MAP_SHARED | MAP_ANONYMOUS, 0);
6863 6929
6864 if (IS_ERR((void *)userspace_addr)) 6930 if (IS_ERR((void *)userspace_addr))
6865 return PTR_ERR((void *)userspace_addr); 6931 return PTR_ERR((void *)userspace_addr);
6866 6932
6867 memslot->userspace_addr = userspace_addr; 6933 memslot->userspace_addr = userspace_addr;
6868 }
6869 } 6934 }
6870 6935
6871
6872 return 0; 6936 return 0;
6873} 6937}
6874 6938
6875void kvm_arch_commit_memory_region(struct kvm *kvm, 6939void kvm_arch_commit_memory_region(struct kvm *kvm,
6876 struct kvm_userspace_memory_region *mem, 6940 struct kvm_userspace_memory_region *mem,
6877 struct kvm_memory_slot old, 6941 struct kvm_memory_slot old,
6878 int user_alloc) 6942 bool user_alloc)
6879{ 6943{
6880 6944
6881 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; 6945 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6882 6946
6883 if (!user_alloc && !old.user_alloc && old.npages && !npages) { 6947 if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
6884 int ret; 6948 int ret;
6885 6949
6886 ret = vm_munmap(old.userspace_addr, 6950 ret = vm_munmap(old.userspace_addr,
@@ -6894,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
6894 if (!kvm->arch.n_requested_mmu_pages) 6958 if (!kvm->arch.n_requested_mmu_pages)
6895 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 6959 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
6896 6960
6897 spin_lock(&kvm->mmu_lock);
6898 if (nr_mmu_pages) 6961 if (nr_mmu_pages)
6899 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); 6962 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6900 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 6963 /*
6901 spin_unlock(&kvm->mmu_lock); 6964 * Write protect all pages for dirty logging.
6965 * Existing largepage mappings are destroyed here and new ones will
6966 * not be created until the end of the logging.
6967 */
6968 if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
6969 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6902 /* 6970 /*
6903 * If memory slot is created, or moved, we need to clear all 6971 * If memory slot is created, or moved, we need to clear all
6904 * mmio sptes. 6972 * mmio sptes.
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 7872a3330fb5..29043d2048a0 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -2,6 +2,7 @@ config LGUEST_GUEST
2 bool "Lguest guest support" 2 bool "Lguest guest support"
3 select PARAVIRT 3 select PARAVIRT
4 depends on X86_32 4 depends on X86_32
5 select TTY
5 select VIRTUALIZATION 6 select VIRTUALIZATION
6 select VIRTIO 7 select VIRTIO
7 select VIRTIO_CONSOLE 8 select VIRTIO_CONSOLE
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index df4176cdbb32..1cbd89ca5569 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -552,7 +552,8 @@ static void lguest_write_cr3(unsigned long cr3)
552 current_cr3 = cr3; 552 current_cr3 = cr3;
553 553
554 /* These two page tables are simple, linear, and used during boot */ 554 /* These two page tables are simple, linear, and used during boot */
555 if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) 555 if (cr3 != __pa_symbol(swapper_pg_dir) &&
556 cr3 != __pa_symbol(initial_page_table))
556 cr3_changed = true; 557 cr3_changed = true;
557} 558}
558 559
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index e395693abdb1..7c3bee636e2f 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -98,7 +98,7 @@ void use_tsc_delay(void)
98 delay_fn = delay_tsc; 98 delay_fn = delay_tsc;
99} 99}
100 100
101int __devinit read_current_timer(unsigned long *timer_val) 101int read_current_timer(unsigned long *timer_val)
102{ 102{
103 if (delay_fn == delay_tsc) { 103 if (delay_fn == delay_tsc) {
104 rdtscll(*timer_val); 104 rdtscll(*timer_val);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 156b9c804670..a4512359656a 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -15,11 +15,10 @@
15 * __get_user_X 15 * __get_user_X
16 * 16 *
17 * Inputs: %[r|e]ax contains the address. 17 * Inputs: %[r|e]ax contains the address.
18 * The register is modified, but all changes are undone
19 * before returning because the C code doesn't know about it.
20 * 18 *
21 * Outputs: %[r|e]ax is error code (0 or -EFAULT) 19 * Outputs: %[r|e]ax is error code (0 or -EFAULT)
22 * %[r|e]dx contains zero-extended value 20 * %[r|e]dx contains zero-extended value
21 * %ecx contains the high half for 32-bit __get_user_8
23 * 22 *
24 * 23 *
25 * These functions should not modify any other registers, 24 * These functions should not modify any other registers,
@@ -42,7 +41,7 @@ ENTRY(__get_user_1)
42 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 41 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
43 jae bad_get_user 42 jae bad_get_user
44 ASM_STAC 43 ASM_STAC
451: movzb (%_ASM_AX),%edx 441: movzbl (%_ASM_AX),%edx
46 xor %eax,%eax 45 xor %eax,%eax
47 ASM_CLAC 46 ASM_CLAC
48 ret 47 ret
@@ -72,29 +71,42 @@ ENTRY(__get_user_4)
72 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 71 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
73 jae bad_get_user 72 jae bad_get_user
74 ASM_STAC 73 ASM_STAC
753: mov -3(%_ASM_AX),%edx 743: movl -3(%_ASM_AX),%edx
76 xor %eax,%eax 75 xor %eax,%eax
77 ASM_CLAC 76 ASM_CLAC
78 ret 77 ret
79 CFI_ENDPROC 78 CFI_ENDPROC
80ENDPROC(__get_user_4) 79ENDPROC(__get_user_4)
81 80
82#ifdef CONFIG_X86_64
83ENTRY(__get_user_8) 81ENTRY(__get_user_8)
84 CFI_STARTPROC 82 CFI_STARTPROC
83#ifdef CONFIG_X86_64
85 add $7,%_ASM_AX 84 add $7,%_ASM_AX
86 jc bad_get_user 85 jc bad_get_user
87 GET_THREAD_INFO(%_ASM_DX) 86 GET_THREAD_INFO(%_ASM_DX)
88 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 87 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
89 jae bad_get_user 88 jae bad_get_user
90 ASM_STAC 89 ASM_STAC
914: movq -7(%_ASM_AX),%_ASM_DX 904: movq -7(%_ASM_AX),%rdx
92 xor %eax,%eax 91 xor %eax,%eax
93 ASM_CLAC 92 ASM_CLAC
94 ret 93 ret
94#else
95 add $7,%_ASM_AX
96 jc bad_get_user_8
97 GET_THREAD_INFO(%_ASM_DX)
98 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
99 jae bad_get_user_8
100 ASM_STAC
1014: movl -7(%_ASM_AX),%edx
1025: movl -3(%_ASM_AX),%ecx
103 xor %eax,%eax
104 ASM_CLAC
105 ret
106#endif
95 CFI_ENDPROC 107 CFI_ENDPROC
96ENDPROC(__get_user_8) 108ENDPROC(__get_user_8)
97#endif 109
98 110
99bad_get_user: 111bad_get_user:
100 CFI_STARTPROC 112 CFI_STARTPROC
@@ -105,9 +117,24 @@ bad_get_user:
105 CFI_ENDPROC 117 CFI_ENDPROC
106END(bad_get_user) 118END(bad_get_user)
107 119
120#ifdef CONFIG_X86_32
121bad_get_user_8:
122 CFI_STARTPROC
123 xor %edx,%edx
124 xor %ecx,%ecx
125 mov $(-EFAULT),%_ASM_AX
126 ASM_CLAC
127 ret
128 CFI_ENDPROC
129END(bad_get_user_8)
130#endif
131
108 _ASM_EXTABLE(1b,bad_get_user) 132 _ASM_EXTABLE(1b,bad_get_user)
109 _ASM_EXTABLE(2b,bad_get_user) 133 _ASM_EXTABLE(2b,bad_get_user)
110 _ASM_EXTABLE(3b,bad_get_user) 134 _ASM_EXTABLE(3b,bad_get_user)
111#ifdef CONFIG_X86_64 135#ifdef CONFIG_X86_64
112 _ASM_EXTABLE(4b,bad_get_user) 136 _ASM_EXTABLE(4b,bad_get_user)
137#else
138 _ASM_EXTABLE(4b,bad_get_user_8)
139 _ASM_EXTABLE(5b,bad_get_user_8)
113#endif 140#endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 027088f2f7dd..fb674fd3fc22 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -748,13 +748,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
748 return; 748 return;
749 } 749 }
750#endif 750#endif
751 /* Kernel addresses are always protection faults: */
752 if (address >= TASK_SIZE)
753 error_code |= PF_PROT;
751 754
752 if (unlikely(show_unhandled_signals)) 755 if (likely(show_unhandled_signals))
753 show_signal_msg(regs, error_code, address, tsk); 756 show_signal_msg(regs, error_code, address, tsk);
754 757
755 /* Kernel addresses are always protection faults: */
756 tsk->thread.cr2 = address; 758 tsk->thread.cr2 = address;
757 tsk->thread.error_code = error_code | (address >= TASK_SIZE); 759 tsk->thread.error_code = error_code;
758 tsk->thread.trap_nr = X86_TRAP_PF; 760 tsk->thread.trap_nr = X86_TRAP_PF;
759 761
760 force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); 762 force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d7aea41563b3..4903a03ae876 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -16,87 +16,134 @@
16#include <asm/tlb.h> 16#include <asm/tlb.h>
17#include <asm/proto.h> 17#include <asm/proto.h>
18#include <asm/dma.h> /* for MAX_DMA_PFN */ 18#include <asm/dma.h> /* for MAX_DMA_PFN */
19#include <asm/microcode.h>
19 20
20unsigned long __initdata pgt_buf_start; 21#include "mm_internal.h"
21unsigned long __meminitdata pgt_buf_end;
22unsigned long __meminitdata pgt_buf_top;
23 22
24int after_bootmem; 23static unsigned long __initdata pgt_buf_start;
24static unsigned long __initdata pgt_buf_end;
25static unsigned long __initdata pgt_buf_top;
25 26
26int direct_gbpages 27static unsigned long min_pfn_mapped;
27#ifdef CONFIG_DIRECT_GBPAGES
28 = 1
29#endif
30;
31 28
32struct map_range { 29static bool __initdata can_use_brk_pgt = true;
33 unsigned long start;
34 unsigned long end;
35 unsigned page_size_mask;
36};
37 30
38/* 31/*
39 * First calculate space needed for kernel direct mapping page tables to cover 32 * Pages returned are already directly mapped.
40 * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB 33 *
41 * pages. Then find enough contiguous space for those page tables. 34 * Changing that is likely to break Xen, see commit:
35 *
36 * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve
37 *
38 * for detailed information.
42 */ 39 */
43static void __init find_early_table_space(struct map_range *mr, int nr_range) 40__ref void *alloc_low_pages(unsigned int num)
44{ 41{
42 unsigned long pfn;
45 int i; 43 int i;
46 unsigned long puds = 0, pmds = 0, ptes = 0, tables;
47 unsigned long start = 0, good_end;
48 phys_addr_t base;
49 44
50 for (i = 0; i < nr_range; i++) { 45 if (after_bootmem) {
51 unsigned long range, extra; 46 unsigned int order;
52 47
53 range = mr[i].end - mr[i].start; 48 order = get_order((unsigned long)num << PAGE_SHIFT);
54 puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; 49 return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK |
50 __GFP_ZERO, order);
51 }
55 52
56 if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { 53 if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
57 extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); 54 unsigned long ret;
58 pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; 55 if (min_pfn_mapped >= max_pfn_mapped)
59 } else { 56 panic("alloc_low_page: ran out of memory");
60 pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; 57 ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
61 } 58 max_pfn_mapped << PAGE_SHIFT,
59 PAGE_SIZE * num , PAGE_SIZE);
60 if (!ret)
61 panic("alloc_low_page: can not alloc memory");
62 memblock_reserve(ret, PAGE_SIZE * num);
63 pfn = ret >> PAGE_SHIFT;
64 } else {
65 pfn = pgt_buf_end;
66 pgt_buf_end += num;
67 printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n",
68 pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
69 }
62 70
63 if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { 71 for (i = 0; i < num; i++) {
64 extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); 72 void *adr;
65#ifdef CONFIG_X86_32 73
66 extra += PMD_SIZE; 74 adr = __va((pfn + i) << PAGE_SHIFT);
67#endif 75 clear_page(adr);
68 ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
69 } else {
70 ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT;
71 }
72 } 76 }
73 77
74 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); 78 return __va(pfn << PAGE_SHIFT);
75 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); 79}
76 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
77 80
78#ifdef CONFIG_X86_32 81/* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */
79 /* for fixmap */ 82#define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE)
80 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); 83RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
81#endif 84void __init early_alloc_pgt_buf(void)
82 good_end = max_pfn_mapped << PAGE_SHIFT; 85{
86 unsigned long tables = INIT_PGT_BUF_SIZE;
87 phys_addr_t base;
83 88
84 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); 89 base = __pa(extend_brk(tables, PAGE_SIZE));
85 if (!base)
86 panic("Cannot find space for the kernel page tables");
87 90
88 pgt_buf_start = base >> PAGE_SHIFT; 91 pgt_buf_start = base >> PAGE_SHIFT;
89 pgt_buf_end = pgt_buf_start; 92 pgt_buf_end = pgt_buf_start;
90 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); 93 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
94}
91 95
92 printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", 96int after_bootmem;
93 mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, 97
94 (pgt_buf_top << PAGE_SHIFT) - 1); 98int direct_gbpages
99#ifdef CONFIG_DIRECT_GBPAGES
100 = 1
101#endif
102;
103
104static void __init init_gbpages(void)
105{
106#ifdef CONFIG_X86_64
107 if (direct_gbpages && cpu_has_gbpages)
108 printk(KERN_INFO "Using GB pages for direct mapping\n");
109 else
110 direct_gbpages = 0;
111#endif
95} 112}
96 113
97void __init native_pagetable_reserve(u64 start, u64 end) 114struct map_range {
115 unsigned long start;
116 unsigned long end;
117 unsigned page_size_mask;
118};
119
120static int page_size_mask;
121
122static void __init probe_page_size_mask(void)
98{ 123{
99 memblock_reserve(start, end - start); 124 init_gbpages();
125
126#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
127 /*
128 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
129 * This will simplify cpa(), which otherwise needs to support splitting
130 * large pages into small in interrupt context, etc.
131 */
132 if (direct_gbpages)
133 page_size_mask |= 1 << PG_LEVEL_1G;
134 if (cpu_has_pse)
135 page_size_mask |= 1 << PG_LEVEL_2M;
136#endif
137
138 /* Enable PSE if available */
139 if (cpu_has_pse)
140 set_in_cr4(X86_CR4_PSE);
141
142 /* Enable PGE if available */
143 if (cpu_has_pge) {
144 set_in_cr4(X86_CR4_PGE);
145 __supported_pte_mask |= _PAGE_GLOBAL;
146 }
100} 147}
101 148
102#ifdef CONFIG_X86_32 149#ifdef CONFIG_X86_32
@@ -122,58 +169,51 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
122} 169}
123 170
124/* 171/*
125 * Setup the direct mapping of the physical memory at PAGE_OFFSET. 172 * adjust the page_size_mask for small range to go with
126 * This runs before bootmem is initialized and gets pages directly from 173 * big page size instead small one if nearby are ram too.
127 * the physical memory. To access them they are temporarily mapped.
128 */ 174 */
129unsigned long __init_refok init_memory_mapping(unsigned long start, 175static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
130 unsigned long end) 176 int nr_range)
131{ 177{
132 unsigned long page_size_mask = 0; 178 int i;
133 unsigned long start_pfn, end_pfn;
134 unsigned long ret = 0;
135 unsigned long pos;
136
137 struct map_range mr[NR_RANGE_MR];
138 int nr_range, i;
139 int use_pse, use_gbpages;
140 179
141 printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n", 180 for (i = 0; i < nr_range; i++) {
142 start, end - 1); 181 if ((page_size_mask & (1<<PG_LEVEL_2M)) &&
182 !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) {
183 unsigned long start = round_down(mr[i].start, PMD_SIZE);
184 unsigned long end = round_up(mr[i].end, PMD_SIZE);
143 185
144#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) 186#ifdef CONFIG_X86_32
145 /* 187 if ((end >> PAGE_SHIFT) > max_low_pfn)
146 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. 188 continue;
147 * This will simplify cpa(), which otherwise needs to support splitting
148 * large pages into small in interrupt context, etc.
149 */
150 use_pse = use_gbpages = 0;
151#else
152 use_pse = cpu_has_pse;
153 use_gbpages = direct_gbpages;
154#endif 189#endif
155 190
156 /* Enable PSE if available */ 191 if (memblock_is_region_memory(start, end - start))
157 if (cpu_has_pse) 192 mr[i].page_size_mask |= 1<<PG_LEVEL_2M;
158 set_in_cr4(X86_CR4_PSE); 193 }
194 if ((page_size_mask & (1<<PG_LEVEL_1G)) &&
195 !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) {
196 unsigned long start = round_down(mr[i].start, PUD_SIZE);
197 unsigned long end = round_up(mr[i].end, PUD_SIZE);
159 198
160 /* Enable PGE if available */ 199 if (memblock_is_region_memory(start, end - start))
161 if (cpu_has_pge) { 200 mr[i].page_size_mask |= 1<<PG_LEVEL_1G;
162 set_in_cr4(X86_CR4_PGE); 201 }
163 __supported_pte_mask |= _PAGE_GLOBAL;
164 } 202 }
203}
165 204
166 if (use_gbpages) 205static int __meminit split_mem_range(struct map_range *mr, int nr_range,
167 page_size_mask |= 1 << PG_LEVEL_1G; 206 unsigned long start,
168 if (use_pse) 207 unsigned long end)
169 page_size_mask |= 1 << PG_LEVEL_2M; 208{
209 unsigned long start_pfn, end_pfn, limit_pfn;
210 unsigned long pfn;
211 int i;
170 212
171 memset(mr, 0, sizeof(mr)); 213 limit_pfn = PFN_DOWN(end);
172 nr_range = 0;
173 214
174 /* head if not big page alignment ? */ 215 /* head if not big page alignment ? */
175 start_pfn = start >> PAGE_SHIFT; 216 pfn = start_pfn = PFN_DOWN(start);
176 pos = start_pfn << PAGE_SHIFT;
177#ifdef CONFIG_X86_32 217#ifdef CONFIG_X86_32
178 /* 218 /*
179 * Don't use a large page for the first 2/4MB of memory 219 * Don't use a large page for the first 2/4MB of memory
@@ -181,66 +221,60 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
181 * and overlapping MTRRs into large pages can cause 221 * and overlapping MTRRs into large pages can cause
182 * slowdowns. 222 * slowdowns.
183 */ 223 */
184 if (pos == 0) 224 if (pfn == 0)
185 end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); 225 end_pfn = PFN_DOWN(PMD_SIZE);
186 else 226 else
187 end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) 227 end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
188 << (PMD_SHIFT - PAGE_SHIFT);
189#else /* CONFIG_X86_64 */ 228#else /* CONFIG_X86_64 */
190 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) 229 end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
191 << (PMD_SHIFT - PAGE_SHIFT);
192#endif 230#endif
193 if (end_pfn > (end >> PAGE_SHIFT)) 231 if (end_pfn > limit_pfn)
194 end_pfn = end >> PAGE_SHIFT; 232 end_pfn = limit_pfn;
195 if (start_pfn < end_pfn) { 233 if (start_pfn < end_pfn) {
196 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); 234 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
197 pos = end_pfn << PAGE_SHIFT; 235 pfn = end_pfn;
198 } 236 }
199 237
200 /* big page (2M) range */ 238 /* big page (2M) range */
201 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) 239 start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
202 << (PMD_SHIFT - PAGE_SHIFT);
203#ifdef CONFIG_X86_32 240#ifdef CONFIG_X86_32
204 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); 241 end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
205#else /* CONFIG_X86_64 */ 242#else /* CONFIG_X86_64 */
206 end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) 243 end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE));
207 << (PUD_SHIFT - PAGE_SHIFT); 244 if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE)))
208 if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) 245 end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
209 end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
210#endif 246#endif
211 247
212 if (start_pfn < end_pfn) { 248 if (start_pfn < end_pfn) {
213 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 249 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
214 page_size_mask & (1<<PG_LEVEL_2M)); 250 page_size_mask & (1<<PG_LEVEL_2M));
215 pos = end_pfn << PAGE_SHIFT; 251 pfn = end_pfn;
216 } 252 }
217 253
218#ifdef CONFIG_X86_64 254#ifdef CONFIG_X86_64
219 /* big page (1G) range */ 255 /* big page (1G) range */
220 start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) 256 start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE));
221 << (PUD_SHIFT - PAGE_SHIFT); 257 end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE));
222 end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
223 if (start_pfn < end_pfn) { 258 if (start_pfn < end_pfn) {
224 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 259 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
225 page_size_mask & 260 page_size_mask &
226 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); 261 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
227 pos = end_pfn << PAGE_SHIFT; 262 pfn = end_pfn;
228 } 263 }
229 264
230 /* tail is not big page (1G) alignment */ 265 /* tail is not big page (1G) alignment */
231 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) 266 start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
232 << (PMD_SHIFT - PAGE_SHIFT); 267 end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
233 end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
234 if (start_pfn < end_pfn) { 268 if (start_pfn < end_pfn) {
235 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 269 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
236 page_size_mask & (1<<PG_LEVEL_2M)); 270 page_size_mask & (1<<PG_LEVEL_2M));
237 pos = end_pfn << PAGE_SHIFT; 271 pfn = end_pfn;
238 } 272 }
239#endif 273#endif
240 274
241 /* tail is not big page (2M) alignment */ 275 /* tail is not big page (2M) alignment */
242 start_pfn = pos>>PAGE_SHIFT; 276 start_pfn = pfn;
243 end_pfn = end>>PAGE_SHIFT; 277 end_pfn = limit_pfn;
244 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); 278 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
245 279
246 /* try to merge same page size and continuous */ 280 /* try to merge same page size and continuous */
@@ -257,59 +291,169 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
257 nr_range--; 291 nr_range--;
258 } 292 }
259 293
294 if (!after_bootmem)
295 adjust_range_page_size_mask(mr, nr_range);
296
260 for (i = 0; i < nr_range; i++) 297 for (i = 0; i < nr_range; i++)
261 printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", 298 printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
262 mr[i].start, mr[i].end - 1, 299 mr[i].start, mr[i].end - 1,
263 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( 300 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
264 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); 301 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
265 302
266 /* 303 return nr_range;
267 * Find space for the kernel direct mapping tables. 304}
268 * 305
269 * Later we should allocate these tables in the local node of the 306struct range pfn_mapped[E820_X_MAX];
270 * memory mapped. Unfortunately this is done currently before the 307int nr_pfn_mapped;
271 * nodes are discovered. 308
272 */ 309static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
273 if (!after_bootmem) 310{
274 find_early_table_space(mr, nr_range); 311 nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX,
312 nr_pfn_mapped, start_pfn, end_pfn);
313 nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX);
314
315 max_pfn_mapped = max(max_pfn_mapped, end_pfn);
316
317 if (start_pfn < (1UL<<(32-PAGE_SHIFT)))
318 max_low_pfn_mapped = max(max_low_pfn_mapped,
319 min(end_pfn, 1UL<<(32-PAGE_SHIFT)));
320}
321
322bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
323{
324 int i;
325
326 for (i = 0; i < nr_pfn_mapped; i++)
327 if ((start_pfn >= pfn_mapped[i].start) &&
328 (end_pfn <= pfn_mapped[i].end))
329 return true;
330
331 return false;
332}
333
334/*
335 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
336 * This runs before bootmem is initialized and gets pages directly from
337 * the physical memory. To access them they are temporarily mapped.
338 */
339unsigned long __init_refok init_memory_mapping(unsigned long start,
340 unsigned long end)
341{
342 struct map_range mr[NR_RANGE_MR];
343 unsigned long ret = 0;
344 int nr_range, i;
345
346 pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n",
347 start, end - 1);
348
349 memset(mr, 0, sizeof(mr));
350 nr_range = split_mem_range(mr, 0, start, end);
275 351
276 for (i = 0; i < nr_range; i++) 352 for (i = 0; i < nr_range; i++)
277 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, 353 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
278 mr[i].page_size_mask); 354 mr[i].page_size_mask);
279 355
280#ifdef CONFIG_X86_32 356 add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT);
281 early_ioremap_page_table_range_init();
282 357
283 load_cr3(swapper_pg_dir); 358 return ret >> PAGE_SHIFT;
284#endif 359}
285 360
286 __flush_tlb_all(); 361/*
362 * would have hole in the middle or ends, and only ram parts will be mapped.
363 */
364static unsigned long __init init_range_memory_mapping(
365 unsigned long r_start,
366 unsigned long r_end)
367{
368 unsigned long start_pfn, end_pfn;
369 unsigned long mapped_ram_size = 0;
370 int i;
287 371
288 /* 372 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
289 * Reserve the kernel pagetable pages we used (pgt_buf_start - 373 u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end);
290 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) 374 u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end);
291 * so that they can be reused for other purposes. 375 if (start >= end)
292 * 376 continue;
293 * On native it just means calling memblock_reserve, on Xen it also
294 * means marking RW the pagetable pages that we allocated before
295 * but that haven't been used.
296 *
297 * In fact on xen we mark RO the whole range pgt_buf_start -
298 * pgt_buf_top, because we have to make sure that when
299 * init_memory_mapping reaches the pagetable pages area, it maps
300 * RO all the pagetable pages, including the ones that are beyond
301 * pgt_buf_end at that time.
302 */
303 if (!after_bootmem && pgt_buf_end > pgt_buf_start)
304 x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
305 PFN_PHYS(pgt_buf_end));
306 377
307 if (!after_bootmem) 378 /*
308 early_memtest(start, end); 379 * if it is overlapping with brk pgt, we need to
380 * alloc pgt buf from memblock instead.
381 */
382 can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >=
383 min(end, (u64)pgt_buf_top<<PAGE_SHIFT);
384 init_memory_mapping(start, end);
385 mapped_ram_size += end - start;
386 can_use_brk_pgt = true;
387 }
309 388
310 return ret >> PAGE_SHIFT; 389 return mapped_ram_size;
311} 390}
312 391
392/* (PUD_SHIFT-PMD_SHIFT)/2 */
393#define STEP_SIZE_SHIFT 5
394void __init init_mem_mapping(void)
395{
396 unsigned long end, real_end, start, last_start;
397 unsigned long step_size;
398 unsigned long addr;
399 unsigned long mapped_ram_size = 0;
400 unsigned long new_mapped_ram_size;
401
402 probe_page_size_mask();
403
404#ifdef CONFIG_X86_64
405 end = max_pfn << PAGE_SHIFT;
406#else
407 end = max_low_pfn << PAGE_SHIFT;
408#endif
409
410 /* the ISA range is always mapped regardless of memory holes */
411 init_memory_mapping(0, ISA_END_ADDRESS);
412
413 /* xen has big range in reserved near end of ram, skip it at first */
414 addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE,
415 PAGE_SIZE);
416 real_end = addr + PMD_SIZE;
417
418 /* step_size need to be small so pgt_buf from BRK could cover it */
419 step_size = PMD_SIZE;
420 max_pfn_mapped = 0; /* will get exact value next */
421 min_pfn_mapped = real_end >> PAGE_SHIFT;
422 last_start = start = real_end;
423 while (last_start > ISA_END_ADDRESS) {
424 if (last_start > step_size) {
425 start = round_down(last_start - 1, step_size);
426 if (start < ISA_END_ADDRESS)
427 start = ISA_END_ADDRESS;
428 } else
429 start = ISA_END_ADDRESS;
430 new_mapped_ram_size = init_range_memory_mapping(start,
431 last_start);
432 last_start = start;
433 min_pfn_mapped = last_start >> PAGE_SHIFT;
434 /* only increase step_size after big range get mapped */
435 if (new_mapped_ram_size > mapped_ram_size)
436 step_size <<= STEP_SIZE_SHIFT;
437 mapped_ram_size += new_mapped_ram_size;
438 }
439
440 if (real_end < end)
441 init_range_memory_mapping(real_end, end);
442
443#ifdef CONFIG_X86_64
444 if (max_pfn > max_low_pfn) {
445 /* can we preseve max_low_pfn ?*/
446 max_low_pfn = max_pfn;
447 }
448#else
449 early_ioremap_page_table_range_init();
450#endif
451
452 load_cr3(swapper_pg_dir);
453 __flush_tlb_all();
454
455 early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
456}
313 457
314/* 458/*
315 * devmem_is_allowed() checks to see if /dev/mem access to a certain address 459 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
@@ -391,6 +535,15 @@ void free_initmem(void)
391#ifdef CONFIG_BLK_DEV_INITRD 535#ifdef CONFIG_BLK_DEV_INITRD
392void __init free_initrd_mem(unsigned long start, unsigned long end) 536void __init free_initrd_mem(unsigned long start, unsigned long end)
393{ 537{
538#ifdef CONFIG_MICROCODE_EARLY
539 /*
540 * Remember, initrd memory may contain microcode or other useful things.
541 * Before we lose initrd mem, we need to find a place to hold them
542 * now that normal virtual memory is enabled.
543 */
544 save_microcode_in_initrd();
545#endif
546
394 /* 547 /*
395 * end could be not aligned, and We can not align that, 548 * end could be not aligned, and We can not align that,
396 * decompresser could be confused by aligned initrd_end 549 * decompresser could be confused by aligned initrd_end
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 745d66b843c8..2d19001151d5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -53,25 +53,14 @@
53#include <asm/page_types.h> 53#include <asm/page_types.h>
54#include <asm/init.h> 54#include <asm/init.h>
55 55
56#include "mm_internal.h"
57
56unsigned long highstart_pfn, highend_pfn; 58unsigned long highstart_pfn, highend_pfn;
57 59
58static noinline int do_test_wp_bit(void); 60static noinline int do_test_wp_bit(void);
59 61
60bool __read_mostly __vmalloc_start_set = false; 62bool __read_mostly __vmalloc_start_set = false;
61 63
62static __init void *alloc_low_page(void)
63{
64 unsigned long pfn = pgt_buf_end++;
65 void *adr;
66
67 if (pfn >= pgt_buf_top)
68 panic("alloc_low_page: ran out of memory");
69
70 adr = __va(pfn * PAGE_SIZE);
71 clear_page(adr);
72 return adr;
73}
74
75/* 64/*
76 * Creates a middle page table and puts a pointer to it in the 65 * Creates a middle page table and puts a pointer to it in the
77 * given global directory entry. This only returns the gd entry 66 * given global directory entry. This only returns the gd entry
@@ -84,10 +73,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
84 73
85#ifdef CONFIG_X86_PAE 74#ifdef CONFIG_X86_PAE
86 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 75 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
87 if (after_bootmem) 76 pmd_table = (pmd_t *)alloc_low_page();
88 pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
89 else
90 pmd_table = (pmd_t *)alloc_low_page();
91 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 77 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
92 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 78 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
93 pud = pud_offset(pgd, 0); 79 pud = pud_offset(pgd, 0);
@@ -109,17 +95,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
109static pte_t * __init one_page_table_init(pmd_t *pmd) 95static pte_t * __init one_page_table_init(pmd_t *pmd)
110{ 96{
111 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { 97 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
112 pte_t *page_table = NULL; 98 pte_t *page_table = (pte_t *)alloc_low_page();
113
114 if (after_bootmem) {
115#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
116 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
117#endif
118 if (!page_table)
119 page_table =
120 (pte_t *)alloc_bootmem_pages(PAGE_SIZE);
121 } else
122 page_table = (pte_t *)alloc_low_page();
123 99
124 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); 100 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
125 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 101 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -146,8 +122,39 @@ pte_t * __init populate_extra_pte(unsigned long vaddr)
146 return one_page_table_init(pmd) + pte_idx; 122 return one_page_table_init(pmd) + pte_idx;
147} 123}
148 124
125static unsigned long __init
126page_table_range_init_count(unsigned long start, unsigned long end)
127{
128 unsigned long count = 0;
129#ifdef CONFIG_HIGHMEM
130 int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
131 int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
132 int pgd_idx, pmd_idx;
133 unsigned long vaddr;
134
135 if (pmd_idx_kmap_begin == pmd_idx_kmap_end)
136 return 0;
137
138 vaddr = start;
139 pgd_idx = pgd_index(vaddr);
140
141 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) {
142 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
143 pmd_idx++) {
144 if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin &&
145 (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end)
146 count++;
147 vaddr += PMD_SIZE;
148 }
149 pmd_idx = 0;
150 }
151#endif
152 return count;
153}
154
149static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, 155static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
150 unsigned long vaddr, pte_t *lastpte) 156 unsigned long vaddr, pte_t *lastpte,
157 void **adr)
151{ 158{
152#ifdef CONFIG_HIGHMEM 159#ifdef CONFIG_HIGHMEM
153 /* 160 /*
@@ -161,16 +168,15 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
161 168
162 if (pmd_idx_kmap_begin != pmd_idx_kmap_end 169 if (pmd_idx_kmap_begin != pmd_idx_kmap_end
163 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin 170 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
164 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end 171 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) {
165 && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
166 || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
167 pte_t *newpte; 172 pte_t *newpte;
168 int i; 173 int i;
169 174
170 BUG_ON(after_bootmem); 175 BUG_ON(after_bootmem);
171 newpte = alloc_low_page(); 176 newpte = *adr;
172 for (i = 0; i < PTRS_PER_PTE; i++) 177 for (i = 0; i < PTRS_PER_PTE; i++)
173 set_pte(newpte + i, pte[i]); 178 set_pte(newpte + i, pte[i]);
179 *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
174 180
175 paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); 181 paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
176 set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); 182 set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
@@ -204,6 +210,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
204 pgd_t *pgd; 210 pgd_t *pgd;
205 pmd_t *pmd; 211 pmd_t *pmd;
206 pte_t *pte = NULL; 212 pte_t *pte = NULL;
213 unsigned long count = page_table_range_init_count(start, end);
214 void *adr = NULL;
215
216 if (count)
217 adr = alloc_low_pages(count);
207 218
208 vaddr = start; 219 vaddr = start;
209 pgd_idx = pgd_index(vaddr); 220 pgd_idx = pgd_index(vaddr);
@@ -216,7 +227,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
216 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); 227 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
217 pmd++, pmd_idx++) { 228 pmd++, pmd_idx++) {
218 pte = page_table_kmap_check(one_page_table_init(pmd), 229 pte = page_table_kmap_check(one_page_table_init(pmd),
219 pmd, vaddr, pte); 230 pmd, vaddr, pte, &adr);
220 231
221 vaddr += PMD_SIZE; 232 vaddr += PMD_SIZE;
222 } 233 }
@@ -310,6 +321,7 @@ repeat:
310 __pgprot(PTE_IDENT_ATTR | 321 __pgprot(PTE_IDENT_ATTR |
311 _PAGE_PSE); 322 _PAGE_PSE);
312 323
324 pfn &= PMD_MASK >> PAGE_SHIFT;
313 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + 325 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
314 PAGE_OFFSET + PAGE_SIZE-1; 326 PAGE_OFFSET + PAGE_SIZE-1;
315 327
@@ -455,9 +467,14 @@ void __init native_pagetable_init(void)
455 467
456 /* 468 /*
457 * Remove any mappings which extend past the end of physical 469 * Remove any mappings which extend past the end of physical
458 * memory from the boot time page table: 470 * memory from the boot time page table.
471 * In virtual address space, we should have at least two pages
472 * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END
473 * definition. And max_low_pfn is set to VMALLOC_END physical
474 * address. If initial memory mapping is doing right job, we
475 * should have pte used near max_low_pfn or one pmd is not present.
459 */ 476 */
460 for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { 477 for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
461 va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); 478 va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
462 pgd = base + pgd_index(va); 479 pgd = base + pgd_index(va);
463 if (!pgd_present(*pgd)) 480 if (!pgd_present(*pgd))
@@ -468,10 +485,19 @@ void __init native_pagetable_init(void)
468 if (!pmd_present(*pmd)) 485 if (!pmd_present(*pmd))
469 break; 486 break;
470 487
488 /* should not be large page here */
489 if (pmd_large(*pmd)) {
490 pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n",
491 pfn, pmd, __pa(pmd));
492 BUG_ON(1);
493 }
494
471 pte = pte_offset_kernel(pmd, va); 495 pte = pte_offset_kernel(pmd, va);
472 if (!pte_present(*pte)) 496 if (!pte_present(*pte))
473 break; 497 break;
474 498
499 printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n",
500 pfn, pmd, __pa(pmd), pte, __pa(pte));
475 pte_clear(NULL, va, pte); 501 pte_clear(NULL, va, pte);
476 } 502 }
477 paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); 503 paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
@@ -550,7 +576,7 @@ early_param("highmem", parse_highmem);
550 * artificially via the highmem=x boot parameter then create 576 * artificially via the highmem=x boot parameter then create
551 * it: 577 * it:
552 */ 578 */
553void __init lowmem_pfn_init(void) 579static void __init lowmem_pfn_init(void)
554{ 580{
555 /* max_low_pfn is 0, we already have early_res support */ 581 /* max_low_pfn is 0, we already have early_res support */
556 max_low_pfn = max_pfn; 582 max_low_pfn = max_pfn;
@@ -586,7 +612,7 @@ void __init lowmem_pfn_init(void)
586 * We have more RAM than fits into lowmem - we try to put it into 612 * We have more RAM than fits into lowmem - we try to put it into
587 * highmem, also taking the highmem=x boot parameter into account: 613 * highmem, also taking the highmem=x boot parameter into account:
588 */ 614 */
589void __init highmem_pfn_init(void) 615static void __init highmem_pfn_init(void)
590{ 616{
591 max_low_pfn = MAXMEM_PFN; 617 max_low_pfn = MAXMEM_PFN;
592 618
@@ -669,8 +695,6 @@ void __init setup_bootmem_allocator(void)
669 printk(KERN_INFO " mapped low ram: 0 - %08lx\n", 695 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
670 max_pfn_mapped<<PAGE_SHIFT); 696 max_pfn_mapped<<PAGE_SHIFT);
671 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); 697 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
672
673 after_bootmem = 1;
674} 698}
675 699
676/* 700/*
@@ -753,6 +777,8 @@ void __init mem_init(void)
753 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) 777 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
754 reservedpages++; 778 reservedpages++;
755 779
780 after_bootmem = 1;
781
756 codesize = (unsigned long) &_etext - (unsigned long) &_text; 782 codesize = (unsigned long) &_etext - (unsigned long) &_text;
757 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 783 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
758 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 784 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
@@ -836,6 +862,18 @@ int arch_add_memory(int nid, u64 start, u64 size)
836 862
837 return __add_pages(nid, zone, start_pfn, nr_pages); 863 return __add_pages(nid, zone, start_pfn, nr_pages);
838} 864}
865
866#ifdef CONFIG_MEMORY_HOTREMOVE
867int arch_remove_memory(u64 start, u64 size)
868{
869 unsigned long start_pfn = start >> PAGE_SHIFT;
870 unsigned long nr_pages = size >> PAGE_SHIFT;
871 struct zone *zone;
872
873 zone = page_zone(pfn_to_page(start_pfn));
874 return __remove_pages(zone, start_pfn, nr_pages);
875}
876#endif
839#endif 877#endif
840 878
841/* 879/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 2ead3c8a4c84..474e28f10815 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -54,6 +54,82 @@
54#include <asm/uv/uv.h> 54#include <asm/uv/uv.h>
55#include <asm/setup.h> 55#include <asm/setup.h>
56 56
57#include "mm_internal.h"
58
59static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
60 unsigned long addr, unsigned long end)
61{
62 addr &= PMD_MASK;
63 for (; addr < end; addr += PMD_SIZE) {
64 pmd_t *pmd = pmd_page + pmd_index(addr);
65
66 if (!pmd_present(*pmd))
67 set_pmd(pmd, __pmd(addr | pmd_flag));
68 }
69}
70static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
71 unsigned long addr, unsigned long end)
72{
73 unsigned long next;
74
75 for (; addr < end; addr = next) {
76 pud_t *pud = pud_page + pud_index(addr);
77 pmd_t *pmd;
78
79 next = (addr & PUD_MASK) + PUD_SIZE;
80 if (next > end)
81 next = end;
82
83 if (pud_present(*pud)) {
84 pmd = pmd_offset(pud, 0);
85 ident_pmd_init(info->pmd_flag, pmd, addr, next);
86 continue;
87 }
88 pmd = (pmd_t *)info->alloc_pgt_page(info->context);
89 if (!pmd)
90 return -ENOMEM;
91 ident_pmd_init(info->pmd_flag, pmd, addr, next);
92 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
93 }
94
95 return 0;
96}
97
98int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
99 unsigned long addr, unsigned long end)
100{
101 unsigned long next;
102 int result;
103 int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
104
105 for (; addr < end; addr = next) {
106 pgd_t *pgd = pgd_page + pgd_index(addr) + off;
107 pud_t *pud;
108
109 next = (addr & PGDIR_MASK) + PGDIR_SIZE;
110 if (next > end)
111 next = end;
112
113 if (pgd_present(*pgd)) {
114 pud = pud_offset(pgd, 0);
115 result = ident_pud_init(info, pud, addr, next);
116 if (result)
117 return result;
118 continue;
119 }
120
121 pud = (pud_t *)info->alloc_pgt_page(info->context);
122 if (!pud)
123 return -ENOMEM;
124 result = ident_pud_init(info, pud, addr, next);
125 if (result)
126 return result;
127 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
128 }
129
130 return 0;
131}
132
57static int __init parse_direct_gbpages_off(char *arg) 133static int __init parse_direct_gbpages_off(char *arg)
58{ 134{
59 direct_gbpages = 0; 135 direct_gbpages = 0;
@@ -302,10 +378,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
302void __init cleanup_highmap(void) 378void __init cleanup_highmap(void)
303{ 379{
304 unsigned long vaddr = __START_KERNEL_map; 380 unsigned long vaddr = __START_KERNEL_map;
305 unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); 381 unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE;
306 unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; 382 unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
307 pmd_t *pmd = level2_kernel_pgt; 383 pmd_t *pmd = level2_kernel_pgt;
308 384
385 /*
386 * Native path, max_pfn_mapped is not set yet.
387 * Xen has valid max_pfn_mapped set in
388 * arch/x86/xen/mmu.c:xen_setup_kernel_pagetable().
389 */
390 if (max_pfn_mapped)
391 vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
392
309 for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { 393 for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
310 if (pmd_none(*pmd)) 394 if (pmd_none(*pmd))
311 continue; 395 continue;
@@ -314,69 +398,24 @@ void __init cleanup_highmap(void)
314 } 398 }
315} 399}
316 400
317static __ref void *alloc_low_page(unsigned long *phys)
318{
319 unsigned long pfn = pgt_buf_end++;
320 void *adr;
321
322 if (after_bootmem) {
323 adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
324 *phys = __pa(adr);
325
326 return adr;
327 }
328
329 if (pfn >= pgt_buf_top)
330 panic("alloc_low_page: ran out of memory");
331
332 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
333 clear_page(adr);
334 *phys = pfn * PAGE_SIZE;
335 return adr;
336}
337
338static __ref void *map_low_page(void *virt)
339{
340 void *adr;
341 unsigned long phys, left;
342
343 if (after_bootmem)
344 return virt;
345
346 phys = __pa(virt);
347 left = phys & (PAGE_SIZE - 1);
348 adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
349 adr = (void *)(((unsigned long)adr) | left);
350
351 return adr;
352}
353
354static __ref void unmap_low_page(void *adr)
355{
356 if (after_bootmem)
357 return;
358
359 early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
360}
361
362static unsigned long __meminit 401static unsigned long __meminit
363phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, 402phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
364 pgprot_t prot) 403 pgprot_t prot)
365{ 404{
366 unsigned pages = 0; 405 unsigned long pages = 0, next;
367 unsigned long last_map_addr = end; 406 unsigned long last_map_addr = end;
368 int i; 407 int i;
369 408
370 pte_t *pte = pte_page + pte_index(addr); 409 pte_t *pte = pte_page + pte_index(addr);
371 410
372 for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { 411 for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
373 412 next = (addr & PAGE_MASK) + PAGE_SIZE;
374 if (addr >= end) { 413 if (addr >= end) {
375 if (!after_bootmem) { 414 if (!after_bootmem &&
376 for(; i < PTRS_PER_PTE; i++, pte++) 415 !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
377 set_pte(pte, __pte(0)); 416 !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
378 } 417 set_pte(pte, __pte(0));
379 break; 418 continue;
380 } 419 }
381 420
382 /* 421 /*
@@ -414,28 +453,25 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
414 int i = pmd_index(address); 453 int i = pmd_index(address);
415 454
416 for (; i < PTRS_PER_PMD; i++, address = next) { 455 for (; i < PTRS_PER_PMD; i++, address = next) {
417 unsigned long pte_phys;
418 pmd_t *pmd = pmd_page + pmd_index(address); 456 pmd_t *pmd = pmd_page + pmd_index(address);
419 pte_t *pte; 457 pte_t *pte;
420 pgprot_t new_prot = prot; 458 pgprot_t new_prot = prot;
421 459
460 next = (address & PMD_MASK) + PMD_SIZE;
422 if (address >= end) { 461 if (address >= end) {
423 if (!after_bootmem) { 462 if (!after_bootmem &&
424 for (; i < PTRS_PER_PMD; i++, pmd++) 463 !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
425 set_pmd(pmd, __pmd(0)); 464 !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
426 } 465 set_pmd(pmd, __pmd(0));
427 break; 466 continue;
428 } 467 }
429 468
430 next = (address & PMD_MASK) + PMD_SIZE;
431
432 if (pmd_val(*pmd)) { 469 if (pmd_val(*pmd)) {
433 if (!pmd_large(*pmd)) { 470 if (!pmd_large(*pmd)) {
434 spin_lock(&init_mm.page_table_lock); 471 spin_lock(&init_mm.page_table_lock);
435 pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd)); 472 pte = (pte_t *)pmd_page_vaddr(*pmd);
436 last_map_addr = phys_pte_init(pte, address, 473 last_map_addr = phys_pte_init(pte, address,
437 end, prot); 474 end, prot);
438 unmap_low_page(pte);
439 spin_unlock(&init_mm.page_table_lock); 475 spin_unlock(&init_mm.page_table_lock);
440 continue; 476 continue;
441 } 477 }
@@ -464,19 +500,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
464 pages++; 500 pages++;
465 spin_lock(&init_mm.page_table_lock); 501 spin_lock(&init_mm.page_table_lock);
466 set_pte((pte_t *)pmd, 502 set_pte((pte_t *)pmd,
467 pfn_pte(address >> PAGE_SHIFT, 503 pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
468 __pgprot(pgprot_val(prot) | _PAGE_PSE))); 504 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
469 spin_unlock(&init_mm.page_table_lock); 505 spin_unlock(&init_mm.page_table_lock);
470 last_map_addr = next; 506 last_map_addr = next;
471 continue; 507 continue;
472 } 508 }
473 509
474 pte = alloc_low_page(&pte_phys); 510 pte = alloc_low_page();
475 last_map_addr = phys_pte_init(pte, address, end, new_prot); 511 last_map_addr = phys_pte_init(pte, address, end, new_prot);
476 unmap_low_page(pte);
477 512
478 spin_lock(&init_mm.page_table_lock); 513 spin_lock(&init_mm.page_table_lock);
479 pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); 514 pmd_populate_kernel(&init_mm, pmd, pte);
480 spin_unlock(&init_mm.page_table_lock); 515 spin_unlock(&init_mm.page_table_lock);
481 } 516 }
482 update_page_count(PG_LEVEL_2M, pages); 517 update_page_count(PG_LEVEL_2M, pages);
@@ -492,27 +527,24 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
492 int i = pud_index(addr); 527 int i = pud_index(addr);
493 528
494 for (; i < PTRS_PER_PUD; i++, addr = next) { 529 for (; i < PTRS_PER_PUD; i++, addr = next) {
495 unsigned long pmd_phys;
496 pud_t *pud = pud_page + pud_index(addr); 530 pud_t *pud = pud_page + pud_index(addr);
497 pmd_t *pmd; 531 pmd_t *pmd;
498 pgprot_t prot = PAGE_KERNEL; 532 pgprot_t prot = PAGE_KERNEL;
499 533
500 if (addr >= end)
501 break;
502
503 next = (addr & PUD_MASK) + PUD_SIZE; 534 next = (addr & PUD_MASK) + PUD_SIZE;
504 535 if (addr >= end) {
505 if (!after_bootmem && !e820_any_mapped(addr, next, 0)) { 536 if (!after_bootmem &&
506 set_pud(pud, __pud(0)); 537 !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
538 !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
539 set_pud(pud, __pud(0));
507 continue; 540 continue;
508 } 541 }
509 542
510 if (pud_val(*pud)) { 543 if (pud_val(*pud)) {
511 if (!pud_large(*pud)) { 544 if (!pud_large(*pud)) {
512 pmd = map_low_page(pmd_offset(pud, 0)); 545 pmd = pmd_offset(pud, 0);
513 last_map_addr = phys_pmd_init(pmd, addr, end, 546 last_map_addr = phys_pmd_init(pmd, addr, end,
514 page_size_mask, prot); 547 page_size_mask, prot);
515 unmap_low_page(pmd);
516 __flush_tlb_all(); 548 __flush_tlb_all();
517 continue; 549 continue;
518 } 550 }
@@ -541,19 +573,19 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
541 pages++; 573 pages++;
542 spin_lock(&init_mm.page_table_lock); 574 spin_lock(&init_mm.page_table_lock);
543 set_pte((pte_t *)pud, 575 set_pte((pte_t *)pud,
544 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 576 pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
577 PAGE_KERNEL_LARGE));
545 spin_unlock(&init_mm.page_table_lock); 578 spin_unlock(&init_mm.page_table_lock);
546 last_map_addr = next; 579 last_map_addr = next;
547 continue; 580 continue;
548 } 581 }
549 582
550 pmd = alloc_low_page(&pmd_phys); 583 pmd = alloc_low_page();
551 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, 584 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
552 prot); 585 prot);
553 unmap_low_page(pmd);
554 586
555 spin_lock(&init_mm.page_table_lock); 587 spin_lock(&init_mm.page_table_lock);
556 pud_populate(&init_mm, pud, __va(pmd_phys)); 588 pud_populate(&init_mm, pud, pmd);
557 spin_unlock(&init_mm.page_table_lock); 589 spin_unlock(&init_mm.page_table_lock);
558 } 590 }
559 __flush_tlb_all(); 591 __flush_tlb_all();
@@ -578,34 +610,29 @@ kernel_physical_mapping_init(unsigned long start,
578 610
579 for (; start < end; start = next) { 611 for (; start < end; start = next) {
580 pgd_t *pgd = pgd_offset_k(start); 612 pgd_t *pgd = pgd_offset_k(start);
581 unsigned long pud_phys;
582 pud_t *pud; 613 pud_t *pud;
583 614
584 next = (start + PGDIR_SIZE) & PGDIR_MASK; 615 next = (start & PGDIR_MASK) + PGDIR_SIZE;
585 if (next > end)
586 next = end;
587 616
588 if (pgd_val(*pgd)) { 617 if (pgd_val(*pgd)) {
589 pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd)); 618 pud = (pud_t *)pgd_page_vaddr(*pgd);
590 last_map_addr = phys_pud_init(pud, __pa(start), 619 last_map_addr = phys_pud_init(pud, __pa(start),
591 __pa(end), page_size_mask); 620 __pa(end), page_size_mask);
592 unmap_low_page(pud);
593 continue; 621 continue;
594 } 622 }
595 623
596 pud = alloc_low_page(&pud_phys); 624 pud = alloc_low_page();
597 last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), 625 last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
598 page_size_mask); 626 page_size_mask);
599 unmap_low_page(pud);
600 627
601 spin_lock(&init_mm.page_table_lock); 628 spin_lock(&init_mm.page_table_lock);
602 pgd_populate(&init_mm, pgd, __va(pud_phys)); 629 pgd_populate(&init_mm, pgd, pud);
603 spin_unlock(&init_mm.page_table_lock); 630 spin_unlock(&init_mm.page_table_lock);
604 pgd_changed = true; 631 pgd_changed = true;
605 } 632 }
606 633
607 if (pgd_changed) 634 if (pgd_changed)
608 sync_global_pgds(addr, end); 635 sync_global_pgds(addr, end - 1);
609 636
610 __flush_tlb_all(); 637 __flush_tlb_all();
611 638
@@ -664,13 +691,11 @@ int arch_add_memory(int nid, u64 start, u64 size)
664{ 691{
665 struct pglist_data *pgdat = NODE_DATA(nid); 692 struct pglist_data *pgdat = NODE_DATA(nid);
666 struct zone *zone = pgdat->node_zones + ZONE_NORMAL; 693 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
667 unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT; 694 unsigned long start_pfn = start >> PAGE_SHIFT;
668 unsigned long nr_pages = size >> PAGE_SHIFT; 695 unsigned long nr_pages = size >> PAGE_SHIFT;
669 int ret; 696 int ret;
670 697
671 last_mapped_pfn = init_memory_mapping(start, start + size); 698 init_memory_mapping(start, start + size);
672 if (last_mapped_pfn > max_pfn_mapped)
673 max_pfn_mapped = last_mapped_pfn;
674 699
675 ret = __add_pages(nid, zone, start_pfn, nr_pages); 700 ret = __add_pages(nid, zone, start_pfn, nr_pages);
676 WARN_ON_ONCE(ret); 701 WARN_ON_ONCE(ret);
@@ -682,10 +707,357 @@ int arch_add_memory(int nid, u64 start, u64 size)
682} 707}
683EXPORT_SYMBOL_GPL(arch_add_memory); 708EXPORT_SYMBOL_GPL(arch_add_memory);
684 709
710#define PAGE_INUSE 0xFD
711
712static void __meminit free_pagetable(struct page *page, int order)
713{
714 struct zone *zone;
715 bool bootmem = false;
716 unsigned long magic;
717 unsigned int nr_pages = 1 << order;
718
719 /* bootmem page has reserved flag */
720 if (PageReserved(page)) {
721 __ClearPageReserved(page);
722 bootmem = true;
723
724 magic = (unsigned long)page->lru.next;
725 if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
726 while (nr_pages--)
727 put_page_bootmem(page++);
728 } else
729 __free_pages_bootmem(page, order);
730 } else
731 free_pages((unsigned long)page_address(page), order);
732
733 /*
734 * SECTION_INFO pages and MIX_SECTION_INFO pages
735 * are all allocated by bootmem.
736 */
737 if (bootmem) {
738 zone = page_zone(page);
739 zone_span_writelock(zone);
740 zone->present_pages += nr_pages;
741 zone_span_writeunlock(zone);
742 totalram_pages += nr_pages;
743 }
744}
745
746static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
747{
748 pte_t *pte;
749 int i;
750
751 for (i = 0; i < PTRS_PER_PTE; i++) {
752 pte = pte_start + i;
753 if (pte_val(*pte))
754 return;
755 }
756
757 /* free a pte talbe */
758 free_pagetable(pmd_page(*pmd), 0);
759 spin_lock(&init_mm.page_table_lock);
760 pmd_clear(pmd);
761 spin_unlock(&init_mm.page_table_lock);
762}
763
764static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
765{
766 pmd_t *pmd;
767 int i;
768
769 for (i = 0; i < PTRS_PER_PMD; i++) {
770 pmd = pmd_start + i;
771 if (pmd_val(*pmd))
772 return;
773 }
774
775 /* free a pmd talbe */
776 free_pagetable(pud_page(*pud), 0);
777 spin_lock(&init_mm.page_table_lock);
778 pud_clear(pud);
779 spin_unlock(&init_mm.page_table_lock);
780}
781
782/* Return true if pgd is changed, otherwise return false. */
783static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
784{
785 pud_t *pud;
786 int i;
787
788 for (i = 0; i < PTRS_PER_PUD; i++) {
789 pud = pud_start + i;
790 if (pud_val(*pud))
791 return false;
792 }
793
794 /* free a pud table */
795 free_pagetable(pgd_page(*pgd), 0);
796 spin_lock(&init_mm.page_table_lock);
797 pgd_clear(pgd);
798 spin_unlock(&init_mm.page_table_lock);
799
800 return true;
801}
802
803static void __meminit
804remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
805 bool direct)
806{
807 unsigned long next, pages = 0;
808 pte_t *pte;
809 void *page_addr;
810 phys_addr_t phys_addr;
811
812 pte = pte_start + pte_index(addr);
813 for (; addr < end; addr = next, pte++) {
814 next = (addr + PAGE_SIZE) & PAGE_MASK;
815 if (next > end)
816 next = end;
817
818 if (!pte_present(*pte))
819 continue;
820
821 /*
822 * We mapped [0,1G) memory as identity mapping when
823 * initializing, in arch/x86/kernel/head_64.S. These
824 * pagetables cannot be removed.
825 */
826 phys_addr = pte_val(*pte) + (addr & PAGE_MASK);
827 if (phys_addr < (phys_addr_t)0x40000000)
828 return;
829
830 if (IS_ALIGNED(addr, PAGE_SIZE) &&
831 IS_ALIGNED(next, PAGE_SIZE)) {
832 /*
833 * Do not free direct mapping pages since they were
834 * freed when offlining, or simplely not in use.
835 */
836 if (!direct)
837 free_pagetable(pte_page(*pte), 0);
838
839 spin_lock(&init_mm.page_table_lock);
840 pte_clear(&init_mm, addr, pte);
841 spin_unlock(&init_mm.page_table_lock);
842
843 /* For non-direct mapping, pages means nothing. */
844 pages++;
845 } else {
846 /*
847 * If we are here, we are freeing vmemmap pages since
848 * direct mapped memory ranges to be freed are aligned.
849 *
850 * If we are not removing the whole page, it means
851 * other page structs in this page are being used and
852 * we canot remove them. So fill the unused page_structs
853 * with 0xFD, and remove the page when it is wholly
854 * filled with 0xFD.
855 */
856 memset((void *)addr, PAGE_INUSE, next - addr);
857
858 page_addr = page_address(pte_page(*pte));
859 if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
860 free_pagetable(pte_page(*pte), 0);
861
862 spin_lock(&init_mm.page_table_lock);
863 pte_clear(&init_mm, addr, pte);
864 spin_unlock(&init_mm.page_table_lock);
865 }
866 }
867 }
868
869 /* Call free_pte_table() in remove_pmd_table(). */
870 flush_tlb_all();
871 if (direct)
872 update_page_count(PG_LEVEL_4K, -pages);
873}
874
875static void __meminit
876remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
877 bool direct)
878{
879 unsigned long next, pages = 0;
880 pte_t *pte_base;
881 pmd_t *pmd;
882 void *page_addr;
883
884 pmd = pmd_start + pmd_index(addr);
885 for (; addr < end; addr = next, pmd++) {
886 next = pmd_addr_end(addr, end);
887
888 if (!pmd_present(*pmd))
889 continue;
890
891 if (pmd_large(*pmd)) {
892 if (IS_ALIGNED(addr, PMD_SIZE) &&
893 IS_ALIGNED(next, PMD_SIZE)) {
894 if (!direct)
895 free_pagetable(pmd_page(*pmd),
896 get_order(PMD_SIZE));
897
898 spin_lock(&init_mm.page_table_lock);
899 pmd_clear(pmd);
900 spin_unlock(&init_mm.page_table_lock);
901 pages++;
902 } else {
903 /* If here, we are freeing vmemmap pages. */
904 memset((void *)addr, PAGE_INUSE, next - addr);
905
906 page_addr = page_address(pmd_page(*pmd));
907 if (!memchr_inv(page_addr, PAGE_INUSE,
908 PMD_SIZE)) {
909 free_pagetable(pmd_page(*pmd),
910 get_order(PMD_SIZE));
911
912 spin_lock(&init_mm.page_table_lock);
913 pmd_clear(pmd);
914 spin_unlock(&init_mm.page_table_lock);
915 }
916 }
917
918 continue;
919 }
920
921 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
922 remove_pte_table(pte_base, addr, next, direct);
923 free_pte_table(pte_base, pmd);
924 }
925
926 /* Call free_pmd_table() in remove_pud_table(). */
927 if (direct)
928 update_page_count(PG_LEVEL_2M, -pages);
929}
930
931static void __meminit
932remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
933 bool direct)
934{
935 unsigned long next, pages = 0;
936 pmd_t *pmd_base;
937 pud_t *pud;
938 void *page_addr;
939
940 pud = pud_start + pud_index(addr);
941 for (; addr < end; addr = next, pud++) {
942 next = pud_addr_end(addr, end);
943
944 if (!pud_present(*pud))
945 continue;
946
947 if (pud_large(*pud)) {
948 if (IS_ALIGNED(addr, PUD_SIZE) &&
949 IS_ALIGNED(next, PUD_SIZE)) {
950 if (!direct)
951 free_pagetable(pud_page(*pud),
952 get_order(PUD_SIZE));
953
954 spin_lock(&init_mm.page_table_lock);
955 pud_clear(pud);
956 spin_unlock(&init_mm.page_table_lock);
957 pages++;
958 } else {
959 /* If here, we are freeing vmemmap pages. */
960 memset((void *)addr, PAGE_INUSE, next - addr);
961
962 page_addr = page_address(pud_page(*pud));
963 if (!memchr_inv(page_addr, PAGE_INUSE,
964 PUD_SIZE)) {
965 free_pagetable(pud_page(*pud),
966 get_order(PUD_SIZE));
967
968 spin_lock(&init_mm.page_table_lock);
969 pud_clear(pud);
970 spin_unlock(&init_mm.page_table_lock);
971 }
972 }
973
974 continue;
975 }
976
977 pmd_base = (pmd_t *)pud_page_vaddr(*pud);
978 remove_pmd_table(pmd_base, addr, next, direct);
979 free_pmd_table(pmd_base, pud);
980 }
981
982 if (direct)
983 update_page_count(PG_LEVEL_1G, -pages);
984}
985
986/* start and end are both virtual address. */
987static void __meminit
988remove_pagetable(unsigned long start, unsigned long end, bool direct)
989{
990 unsigned long next;
991 pgd_t *pgd;
992 pud_t *pud;
993 bool pgd_changed = false;
994
995 for (; start < end; start = next) {
996 next = pgd_addr_end(start, end);
997
998 pgd = pgd_offset_k(start);
999 if (!pgd_present(*pgd))
1000 continue;
1001
1002 pud = (pud_t *)pgd_page_vaddr(*pgd);
1003 remove_pud_table(pud, start, next, direct);
1004 if (free_pud_table(pud, pgd))
1005 pgd_changed = true;
1006 }
1007
1008 if (pgd_changed)
1009 sync_global_pgds(start, end - 1);
1010
1011 flush_tlb_all();
1012}
1013
1014void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages)
1015{
1016 unsigned long start = (unsigned long)memmap;
1017 unsigned long end = (unsigned long)(memmap + nr_pages);
1018
1019 remove_pagetable(start, end, false);
1020}
1021
1022static void __meminit
1023kernel_physical_mapping_remove(unsigned long start, unsigned long end)
1024{
1025 start = (unsigned long)__va(start);
1026 end = (unsigned long)__va(end);
1027
1028 remove_pagetable(start, end, true);
1029}
1030
1031#ifdef CONFIG_MEMORY_HOTREMOVE
1032int __ref arch_remove_memory(u64 start, u64 size)
1033{
1034 unsigned long start_pfn = start >> PAGE_SHIFT;
1035 unsigned long nr_pages = size >> PAGE_SHIFT;
1036 struct zone *zone;
1037 int ret;
1038
1039 zone = page_zone(pfn_to_page(start_pfn));
1040 kernel_physical_mapping_remove(start, start + size);
1041 ret = __remove_pages(zone, start_pfn, nr_pages);
1042 WARN_ON_ONCE(ret);
1043
1044 return ret;
1045}
1046#endif
685#endif /* CONFIG_MEMORY_HOTPLUG */ 1047#endif /* CONFIG_MEMORY_HOTPLUG */
686 1048
687static struct kcore_list kcore_vsyscall; 1049static struct kcore_list kcore_vsyscall;
688 1050
1051static void __init register_page_bootmem_info(void)
1052{
1053#ifdef CONFIG_NUMA
1054 int i;
1055
1056 for_each_online_node(i)
1057 register_page_bootmem_info_node(NODE_DATA(i));
1058#endif
1059}
1060
689void __init mem_init(void) 1061void __init mem_init(void)
690{ 1062{
691 long codesize, reservedpages, datasize, initsize; 1063 long codesize, reservedpages, datasize, initsize;
@@ -698,11 +1070,8 @@ void __init mem_init(void)
698 reservedpages = 0; 1070 reservedpages = 0;
699 1071
700 /* this will put all low memory onto the freelists */ 1072 /* this will put all low memory onto the freelists */
701#ifdef CONFIG_NUMA 1073 register_page_bootmem_info();
702 totalram_pages = numa_free_all_bootmem();
703#else
704 totalram_pages = free_all_bootmem(); 1074 totalram_pages = free_all_bootmem();
705#endif
706 1075
707 absent_pages = absent_pages_in_range(0, max_pfn); 1076 absent_pages = absent_pages_in_range(0, max_pfn);
708 reservedpages = max_pfn - totalram_pages - absent_pages; 1077 reservedpages = max_pfn - totalram_pages - absent_pages;
@@ -772,12 +1141,11 @@ void set_kernel_text_ro(void)
772void mark_rodata_ro(void) 1141void mark_rodata_ro(void)
773{ 1142{
774 unsigned long start = PFN_ALIGN(_text); 1143 unsigned long start = PFN_ALIGN(_text);
775 unsigned long rodata_start = 1144 unsigned long rodata_start = PFN_ALIGN(__start_rodata);
776 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
777 unsigned long end = (unsigned long) &__end_rodata_hpage_align; 1145 unsigned long end = (unsigned long) &__end_rodata_hpage_align;
778 unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); 1146 unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
779 unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); 1147 unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
780 unsigned long data_start = (unsigned long) &_sdata; 1148 unsigned long all_end = PFN_ALIGN(&_end);
781 1149
782 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 1150 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
783 (end - start) >> 10); 1151 (end - start) >> 10);
@@ -786,10 +1154,10 @@ void mark_rodata_ro(void)
786 kernel_set_to_readonly = 1; 1154 kernel_set_to_readonly = 1;
787 1155
788 /* 1156 /*
789 * The rodata section (but not the kernel text!) should also be 1157 * The rodata/data/bss/brk section (but not the kernel text!)
790 * not-executable. 1158 * should also be not-executable.
791 */ 1159 */
792 set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); 1160 set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
793 1161
794 rodata_test(); 1162 rodata_test();
795 1163
@@ -802,12 +1170,12 @@ void mark_rodata_ro(void)
802#endif 1170#endif
803 1171
804 free_init_pages("unused kernel memory", 1172 free_init_pages("unused kernel memory",
805 (unsigned long) page_address(virt_to_page(text_end)), 1173 (unsigned long) __va(__pa_symbol(text_end)),
806 (unsigned long) 1174 (unsigned long) __va(__pa_symbol(rodata_start)));
807 page_address(virt_to_page(rodata_start))); 1175
808 free_init_pages("unused kernel memory", 1176 free_init_pages("unused kernel memory",
809 (unsigned long) page_address(virt_to_page(rodata_end)), 1177 (unsigned long) __va(__pa_symbol(rodata_end)),
810 (unsigned long) page_address(virt_to_page(data_start))); 1178 (unsigned long) __va(__pa_symbol(_sdata)));
811} 1179}
812 1180
813#endif 1181#endif
@@ -831,6 +1199,9 @@ int kern_addr_valid(unsigned long addr)
831 if (pud_none(*pud)) 1199 if (pud_none(*pud))
832 return 0; 1200 return 0;
833 1201
1202 if (pud_large(*pud))
1203 return pfn_valid(pud_pfn(*pud));
1204
834 pmd = pmd_offset(pud, addr); 1205 pmd = pmd_offset(pud, addr);
835 if (pmd_none(*pmd)) 1206 if (pmd_none(*pmd))
836 return 0; 1207 return 0;
@@ -981,10 +1352,70 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
981 } 1352 }
982 1353
983 } 1354 }
984 sync_global_pgds((unsigned long)start_page, end); 1355 sync_global_pgds((unsigned long)start_page, end - 1);
985 return 0; 1356 return 0;
986} 1357}
987 1358
1359#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
1360void register_page_bootmem_memmap(unsigned long section_nr,
1361 struct page *start_page, unsigned long size)
1362{
1363 unsigned long addr = (unsigned long)start_page;
1364 unsigned long end = (unsigned long)(start_page + size);
1365 unsigned long next;
1366 pgd_t *pgd;
1367 pud_t *pud;
1368 pmd_t *pmd;
1369 unsigned int nr_pages;
1370 struct page *page;
1371
1372 for (; addr < end; addr = next) {
1373 pte_t *pte = NULL;
1374
1375 pgd = pgd_offset_k(addr);
1376 if (pgd_none(*pgd)) {
1377 next = (addr + PAGE_SIZE) & PAGE_MASK;
1378 continue;
1379 }
1380 get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
1381
1382 pud = pud_offset(pgd, addr);
1383 if (pud_none(*pud)) {
1384 next = (addr + PAGE_SIZE) & PAGE_MASK;
1385 continue;
1386 }
1387 get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
1388
1389 if (!cpu_has_pse) {
1390 next = (addr + PAGE_SIZE) & PAGE_MASK;
1391 pmd = pmd_offset(pud, addr);
1392 if (pmd_none(*pmd))
1393 continue;
1394 get_page_bootmem(section_nr, pmd_page(*pmd),
1395 MIX_SECTION_INFO);
1396
1397 pte = pte_offset_kernel(pmd, addr);
1398 if (pte_none(*pte))
1399 continue;
1400 get_page_bootmem(section_nr, pte_page(*pte),
1401 SECTION_INFO);
1402 } else {
1403 next = pmd_addr_end(addr, end);
1404
1405 pmd = pmd_offset(pud, addr);
1406 if (pmd_none(*pmd))
1407 continue;
1408
1409 nr_pages = 1 << (get_order(PMD_SIZE));
1410 page = pmd_page(*pmd);
1411 while (nr_pages--)
1412 get_page_bootmem(section_nr, page++,
1413 SECTION_INFO);
1414 }
1415 }
1416}
1417#endif
1418
988void __meminit vmemmap_populate_print_last(void) 1419void __meminit vmemmap_populate_print_last(void)
989{ 1420{
990 if (p_start) { 1421 if (p_start) {
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index c80b9fb95734..8dabbed409ee 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -9,6 +9,7 @@
9#include <linux/memblock.h> 9#include <linux/memblock.h>
10 10
11static u64 patterns[] __initdata = { 11static u64 patterns[] __initdata = {
12 /* The first entry has to be 0 to leave memtest with zeroed memory */
12 0, 13 0,
13 0xffffffffffffffffULL, 14 0xffffffffffffffffULL,
14 0x5555555555555555ULL, 15 0x5555555555555555ULL,
@@ -110,15 +111,8 @@ void __init early_memtest(unsigned long start, unsigned long end)
110 return; 111 return;
111 112
112 printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); 113 printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
113 for (i = 0; i < memtest_pattern; i++) { 114 for (i = memtest_pattern-1; i < UINT_MAX; --i) {
114 idx = i % ARRAY_SIZE(patterns); 115 idx = i % ARRAY_SIZE(patterns);
115 do_one_pass(patterns[idx], start, end); 116 do_one_pass(patterns[idx], start, end);
116 } 117 }
117
118 if (idx > 0) {
119 printk(KERN_INFO "early_memtest: wipe out "
120 "test pattern from memory\n");
121 /* additional test with pattern 0 will do this */
122 do_one_pass(0, start, end);
123 }
124} 118}
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
new file mode 100644
index 000000000000..6b563a118891
--- /dev/null
+++ b/arch/x86/mm/mm_internal.h
@@ -0,0 +1,19 @@
1#ifndef __X86_MM_INTERNAL_H
2#define __X86_MM_INTERNAL_H
3
4void *alloc_low_pages(unsigned int num);
5static inline void *alloc_low_page(void)
6{
7 return alloc_low_pages(1);
8}
9
10void early_ioremap_page_table_range_init(void);
11
12unsigned long kernel_physical_mapping_init(unsigned long start,
13 unsigned long end,
14 unsigned long page_size_mask);
15void zone_sizes_init(void);
16
17extern int after_bootmem;
18
19#endif /* __X86_MM_INTERNAL_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 2d125be1bae9..dfd30259eb89 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -56,7 +56,7 @@ early_param("numa", numa_setup);
56/* 56/*
57 * apicid, cpu, node mappings 57 * apicid, cpu, node mappings
58 */ 58 */
59s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 59s16 __apicid_to_node[MAX_LOCAL_APIC] = {
60 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 60 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
61}; 61};
62 62
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(node_to_cpumask_map);
78DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 78DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
79EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 79EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
80 80
81void __cpuinit numa_set_node(int cpu, int node) 81void numa_set_node(int cpu, int node)
82{ 82{
83 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 83 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
84 84
@@ -101,7 +101,7 @@ void __cpuinit numa_set_node(int cpu, int node)
101 set_cpu_numa_node(cpu, node); 101 set_cpu_numa_node(cpu, node);
102} 102}
103 103
104void __cpuinit numa_clear_node(int cpu) 104void numa_clear_node(int cpu)
105{ 105{
106 numa_set_node(cpu, NUMA_NO_NODE); 106 numa_set_node(cpu, NUMA_NO_NODE);
107} 107}
@@ -193,7 +193,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
193static void __init setup_node_data(int nid, u64 start, u64 end) 193static void __init setup_node_data(int nid, u64 start, u64 end)
194{ 194{
195 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 195 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
196 bool remapped = false;
197 u64 nd_pa; 196 u64 nd_pa;
198 void *nd; 197 void *nd;
199 int tnid; 198 int tnid;
@@ -205,37 +204,27 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
205 if (end && (end - start) < NODE_MIN_SIZE) 204 if (end && (end - start) < NODE_MIN_SIZE)
206 return; 205 return;
207 206
208 /* initialize remap allocator before aligning to ZONE_ALIGN */
209 init_alloc_remap(nid, start, end);
210
211 start = roundup(start, ZONE_ALIGN); 207 start = roundup(start, ZONE_ALIGN);
212 208
213 printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", 209 printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
214 nid, start, end - 1); 210 nid, start, end - 1);
215 211
216 /* 212 /*
217 * Allocate node data. Try remap allocator first, node-local 213 * Allocate node data. Try node-local memory and then any node.
218 * memory and then any node. Never allocate in DMA zone. 214 * Never allocate in DMA zone.
219 */ 215 */
220 nd = alloc_remap(nid, nd_size); 216 nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
221 if (nd) { 217 if (!nd_pa) {
222 nd_pa = __pa(nd); 218 pr_err("Cannot find %zu bytes in any node\n", nd_size);
223 remapped = true; 219 return;
224 } else {
225 nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
226 if (!nd_pa) {
227 pr_err("Cannot find %zu bytes in node %d\n",
228 nd_size, nid);
229 return;
230 }
231 nd = __va(nd_pa);
232 } 220 }
221 nd = __va(nd_pa);
233 222
234 /* report and initialize */ 223 /* report and initialize */
235 printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n", 224 printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n",
236 nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); 225 nd_pa, nd_pa + nd_size - 1);
237 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 226 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
238 if (!remapped && tnid != nid) 227 if (tnid != nid)
239 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); 228 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid);
240 229
241 node_data[nid] = nd; 230 node_data[nid] = nd;
@@ -571,10 +560,12 @@ static int __init numa_init(int (*init_func)(void))
571 for (i = 0; i < MAX_LOCAL_APIC; i++) 560 for (i = 0; i < MAX_LOCAL_APIC; i++)
572 set_apicid_to_node(i, NUMA_NO_NODE); 561 set_apicid_to_node(i, NUMA_NO_NODE);
573 562
574 nodes_clear(numa_nodes_parsed); 563 /*
564 * Do not clear numa_nodes_parsed or zero numa_meminfo here, because
565 * SRAT was parsed earlier in early_parse_srat().
566 */
575 nodes_clear(node_possible_map); 567 nodes_clear(node_possible_map);
576 nodes_clear(node_online_map); 568 nodes_clear(node_online_map);
577 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
578 WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); 569 WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
579 numa_reset_distance(); 570 numa_reset_distance();
580 571
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 534255a36b6b..73a6d7395bd3 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
73 73
74extern unsigned long highend_pfn, highstart_pfn; 74extern unsigned long highend_pfn, highstart_pfn;
75 75
76#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
77
78static void *node_remap_start_vaddr[MAX_NUMNODES];
79void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
80
81/*
82 * Remap memory allocator
83 */
84static unsigned long node_remap_start_pfn[MAX_NUMNODES];
85static void *node_remap_end_vaddr[MAX_NUMNODES];
86static void *node_remap_alloc_vaddr[MAX_NUMNODES];
87
88/**
89 * alloc_remap - Allocate remapped memory
90 * @nid: NUMA node to allocate memory from
91 * @size: The size of allocation
92 *
93 * Allocate @size bytes from the remap area of NUMA node @nid. The
94 * size of the remap area is predetermined by init_alloc_remap() and
95 * only the callers considered there should call this function. For
96 * more info, please read the comment on top of init_alloc_remap().
97 *
98 * The caller must be ready to handle allocation failure from this
99 * function and fall back to regular memory allocator in such cases.
100 *
101 * CONTEXT:
102 * Single CPU early boot context.
103 *
104 * RETURNS:
105 * Pointer to the allocated memory on success, %NULL on failure.
106 */
107void *alloc_remap(int nid, unsigned long size)
108{
109 void *allocation = node_remap_alloc_vaddr[nid];
110
111 size = ALIGN(size, L1_CACHE_BYTES);
112
113 if (!allocation || (allocation + size) > node_remap_end_vaddr[nid])
114 return NULL;
115
116 node_remap_alloc_vaddr[nid] += size;
117 memset(allocation, 0, size);
118
119 return allocation;
120}
121
122#ifdef CONFIG_HIBERNATION
123/**
124 * resume_map_numa_kva - add KVA mapping to the temporary page tables created
125 * during resume from hibernation
126 * @pgd_base - temporary resume page directory
127 */
128void resume_map_numa_kva(pgd_t *pgd_base)
129{
130 int node;
131
132 for_each_online_node(node) {
133 unsigned long start_va, start_pfn, nr_pages, pfn;
134
135 start_va = (unsigned long)node_remap_start_vaddr[node];
136 start_pfn = node_remap_start_pfn[node];
137 nr_pages = (node_remap_end_vaddr[node] -
138 node_remap_start_vaddr[node]) >> PAGE_SHIFT;
139
140 printk(KERN_DEBUG "%s: node %d\n", __func__, node);
141
142 for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) {
143 unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
144 pgd_t *pgd = pgd_base + pgd_index(vaddr);
145 pud_t *pud = pud_offset(pgd, vaddr);
146 pmd_t *pmd = pmd_offset(pud, vaddr);
147
148 set_pmd(pmd, pfn_pmd(start_pfn + pfn,
149 PAGE_KERNEL_LARGE_EXEC));
150
151 printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
152 __func__, vaddr, start_pfn + pfn);
153 }
154 }
155}
156#endif
157
158/**
159 * init_alloc_remap - Initialize remap allocator for a NUMA node
160 * @nid: NUMA node to initizlie remap allocator for
161 *
162 * NUMA nodes may end up without any lowmem. As allocating pgdat and
163 * memmap on a different node with lowmem is inefficient, a special
164 * remap allocator is implemented which can be used by alloc_remap().
165 *
166 * For each node, the amount of memory which will be necessary for
167 * pgdat and memmap is calculated and two memory areas of the size are
168 * allocated - one in the node and the other in lowmem; then, the area
169 * in the node is remapped to the lowmem area.
170 *
171 * As pgdat and memmap must be allocated in lowmem anyway, this
172 * doesn't waste lowmem address space; however, the actual lowmem
173 * which gets remapped over is wasted. The amount shouldn't be
174 * problematic on machines this feature will be used.
175 *
176 * Initialization failure isn't fatal. alloc_remap() is used
177 * opportunistically and the callers will fall back to other memory
178 * allocation mechanisms on failure.
179 */
180void __init init_alloc_remap(int nid, u64 start, u64 end)
181{
182 unsigned long start_pfn = start >> PAGE_SHIFT;
183 unsigned long end_pfn = end >> PAGE_SHIFT;
184 unsigned long size, pfn;
185 u64 node_pa, remap_pa;
186 void *remap_va;
187
188 /*
189 * The acpi/srat node info can show hot-add memroy zones where
190 * memory could be added but not currently present.
191 */
192 printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
193 nid, start_pfn, end_pfn);
194
195 /* calculate the necessary space aligned to large page size */
196 size = node_memmap_size_bytes(nid, start_pfn, end_pfn);
197 size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
198 size = ALIGN(size, LARGE_PAGE_BYTES);
199
200 /* allocate node memory and the lowmem remap area */
201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
202 if (!node_pa) {
203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
204 size, nid);
205 return;
206 }
207 memblock_reserve(node_pa, size);
208
209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
210 max_low_pfn << PAGE_SHIFT,
211 size, LARGE_PAGE_BYTES);
212 if (!remap_pa) {
213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
214 size, nid);
215 memblock_free(node_pa, size);
216 return;
217 }
218 memblock_reserve(remap_pa, size);
219 remap_va = phys_to_virt(remap_pa);
220
221 /* perform actual remap */
222 for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE)
223 set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT),
224 (node_pa >> PAGE_SHIFT) + pfn,
225 PAGE_KERNEL_LARGE);
226
227 /* initialize remap allocator parameters */
228 node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
229 node_remap_start_vaddr[nid] = remap_va;
230 node_remap_end_vaddr[nid] = remap_va + size;
231 node_remap_alloc_vaddr[nid] = remap_va;
232
233 printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
234 nid, node_pa, node_pa + size, remap_va, remap_va + size);
235}
236
237void __init initmem_init(void) 76void __init initmem_init(void)
238{ 77{
239 x86_numa_init(); 78 x86_numa_init();
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 92e27119ee1a..9405ffc91502 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -10,16 +10,3 @@ void __init initmem_init(void)
10{ 10{
11 x86_numa_init(); 11 x86_numa_init();
12} 12}
13
14unsigned long __init numa_free_all_bootmem(void)
15{
16 unsigned long pages = 0;
17 int i;
18
19 for_each_online_node(i)
20 pages += free_all_bootmem_node(NODE_DATA(i));
21
22 pages += free_low_memory_core_early(MAX_NUMNODES);
23
24 return pages;
25}
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index 7178c3afe05e..ad86ec91e640 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -21,12 +21,6 @@ void __init numa_reset_distance(void);
21 21
22void __init x86_numa_init(void); 22void __init x86_numa_init(void);
23 23
24#ifdef CONFIG_X86_64
25static inline void init_alloc_remap(int nid, u64 start, u64 end) { }
26#else
27void __init init_alloc_remap(int nid, u64 start, u64 end);
28#endif
29
30#ifdef CONFIG_NUMA_EMU 24#ifdef CONFIG_NUMA_EMU
31void __init numa_emulation(struct numa_meminfo *numa_meminfo, 25void __init numa_emulation(struct numa_meminfo *numa_meminfo,
32 int numa_dist_cnt); 26 int numa_dist_cnt);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a718e0d23503..ca1f1c2bb7be 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -94,12 +94,12 @@ static inline void split_page_count(int level) { }
94 94
95static inline unsigned long highmap_start_pfn(void) 95static inline unsigned long highmap_start_pfn(void)
96{ 96{
97 return __pa(_text) >> PAGE_SHIFT; 97 return __pa_symbol(_text) >> PAGE_SHIFT;
98} 98}
99 99
100static inline unsigned long highmap_end_pfn(void) 100static inline unsigned long highmap_end_pfn(void)
101{ 101{
102 return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; 102 return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
103} 103}
104 104
105#endif 105#endif
@@ -276,8 +276,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
276 * The .rodata section needs to be read-only. Using the pfn 276 * The .rodata section needs to be read-only. Using the pfn
277 * catches all aliases. 277 * catches all aliases.
278 */ 278 */
279 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, 279 if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
280 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) 280 __pa_symbol(__end_rodata) >> PAGE_SHIFT))
281 pgprot_val(forbidden) |= _PAGE_RW; 281 pgprot_val(forbidden) |= _PAGE_RW;
282 282
283#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) 283#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
@@ -364,6 +364,37 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
364EXPORT_SYMBOL_GPL(lookup_address); 364EXPORT_SYMBOL_GPL(lookup_address);
365 365
366/* 366/*
367 * This is necessary because __pa() does not work on some
368 * kinds of memory, like vmalloc() or the alloc_remap()
369 * areas on 32-bit NUMA systems. The percpu areas can
370 * end up in this kind of memory, for instance.
371 *
372 * This could be optimized, but it is only intended to be
373 * used at inititalization time, and keeping it
374 * unoptimized should increase the testing coverage for
375 * the more obscure platforms.
376 */
377phys_addr_t slow_virt_to_phys(void *__virt_addr)
378{
379 unsigned long virt_addr = (unsigned long)__virt_addr;
380 phys_addr_t phys_addr;
381 unsigned long offset;
382 enum pg_level level;
383 unsigned long psize;
384 unsigned long pmask;
385 pte_t *pte;
386
387 pte = lookup_address(virt_addr, &level);
388 BUG_ON(!pte);
389 psize = page_level_size(level);
390 pmask = page_level_mask(level);
391 offset = virt_addr & ~pmask;
392 phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
393 return (phys_addr | offset);
394}
395EXPORT_SYMBOL_GPL(slow_virt_to_phys);
396
397/*
367 * Set the new pmd in all the pgds we know about: 398 * Set the new pmd in all the pgds we know about:
368 */ 399 */
369static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 400static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
@@ -396,7 +427,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
396 pte_t new_pte, old_pte, *tmp; 427 pte_t new_pte, old_pte, *tmp;
397 pgprot_t old_prot, new_prot, req_prot; 428 pgprot_t old_prot, new_prot, req_prot;
398 int i, do_split = 1; 429 int i, do_split = 1;
399 unsigned int level; 430 enum pg_level level;
400 431
401 if (cpa->force_split) 432 if (cpa->force_split)
402 return 1; 433 return 1;
@@ -412,15 +443,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
412 443
413 switch (level) { 444 switch (level) {
414 case PG_LEVEL_2M: 445 case PG_LEVEL_2M:
415 psize = PMD_PAGE_SIZE;
416 pmask = PMD_PAGE_MASK;
417 break;
418#ifdef CONFIG_X86_64 446#ifdef CONFIG_X86_64
419 case PG_LEVEL_1G: 447 case PG_LEVEL_1G:
420 psize = PUD_PAGE_SIZE;
421 pmask = PUD_PAGE_MASK;
422 break;
423#endif 448#endif
449 psize = page_level_size(level);
450 pmask = page_level_mask(level);
451 break;
424 default: 452 default:
425 do_split = -EINVAL; 453 do_split = -EINVAL;
426 goto out_unlock; 454 goto out_unlock;
@@ -501,21 +529,13 @@ out_unlock:
501 return do_split; 529 return do_split;
502} 530}
503 531
504static int split_large_page(pte_t *kpte, unsigned long address) 532int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
505{ 533{
506 unsigned long pfn, pfninc = 1; 534 unsigned long pfn, pfninc = 1;
507 unsigned int i, level; 535 unsigned int i, level;
508 pte_t *pbase, *tmp; 536 pte_t *tmp;
509 pgprot_t ref_prot; 537 pgprot_t ref_prot;
510 struct page *base; 538 struct page *base = virt_to_page(pbase);
511
512 if (!debug_pagealloc)
513 spin_unlock(&cpa_lock);
514 base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
515 if (!debug_pagealloc)
516 spin_lock(&cpa_lock);
517 if (!base)
518 return -ENOMEM;
519 539
520 spin_lock(&pgd_lock); 540 spin_lock(&pgd_lock);
521 /* 541 /*
@@ -523,10 +543,11 @@ static int split_large_page(pte_t *kpte, unsigned long address)
523 * up for us already: 543 * up for us already:
524 */ 544 */
525 tmp = lookup_address(address, &level); 545 tmp = lookup_address(address, &level);
526 if (tmp != kpte) 546 if (tmp != kpte) {
527 goto out_unlock; 547 spin_unlock(&pgd_lock);
548 return 1;
549 }
528 550
529 pbase = (pte_t *)page_address(base);
530 paravirt_alloc_pte(&init_mm, page_to_pfn(base)); 551 paravirt_alloc_pte(&init_mm, page_to_pfn(base));
531 ref_prot = pte_pgprot(pte_clrhuge(*kpte)); 552 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
532 /* 553 /*
@@ -551,16 +572,10 @@ static int split_large_page(pte_t *kpte, unsigned long address)
551 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) 572 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
552 set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); 573 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
553 574
554 if (address >= (unsigned long)__va(0) && 575 if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
555 address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) 576 PFN_DOWN(__pa(address)) + 1))
556 split_page_count(level); 577 split_page_count(level);
557 578
558#ifdef CONFIG_X86_64
559 if (address >= (unsigned long)__va(1UL<<32) &&
560 address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
561 split_page_count(level);
562#endif
563
564 /* 579 /*
565 * Install the new, split up pagetable. 580 * Install the new, split up pagetable.
566 * 581 *
@@ -579,17 +594,27 @@ static int split_large_page(pte_t *kpte, unsigned long address)
579 * going on. 594 * going on.
580 */ 595 */
581 __flush_tlb_all(); 596 __flush_tlb_all();
597 spin_unlock(&pgd_lock);
598
599 return 0;
600}
582 601
583 base = NULL; 602static int split_large_page(pte_t *kpte, unsigned long address)
603{
604 pte_t *pbase;
605 struct page *base;
584 606
585out_unlock: 607 if (!debug_pagealloc)
586 /* 608 spin_unlock(&cpa_lock);
587 * If we dropped out via the lookup_address check under 609 base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
588 * pgd_lock then stick the page back into the pool: 610 if (!debug_pagealloc)
589 */ 611 spin_lock(&cpa_lock);
590 if (base) 612 if (!base)
613 return -ENOMEM;
614
615 pbase = (pte_t *)page_address(base);
616 if (__split_large_page(kpte, address, pbase))
591 __free_page(base); 617 __free_page(base);
592 spin_unlock(&pgd_lock);
593 618
594 return 0; 619 return 0;
595} 620}
@@ -729,13 +754,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
729 unsigned long vaddr; 754 unsigned long vaddr;
730 int ret; 755 int ret;
731 756
732 if (cpa->pfn >= max_pfn_mapped) 757 if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
733 return 0; 758 return 0;
734 759
735#ifdef CONFIG_X86_64
736 if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
737 return 0;
738#endif
739 /* 760 /*
740 * No need to redo, when the primary call touched the direct 761 * No need to redo, when the primary call touched the direct
741 * mapping already: 762 * mapping already:
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 0eb572eda406..2610bd93c896 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -560,10 +560,10 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
560{ 560{
561 unsigned long id_sz; 561 unsigned long id_sz;
562 562
563 if (base >= __pa(high_memory)) 563 if (base > __pa(high_memory-1))
564 return 0; 564 return 0;
565 565
566 id_sz = (__pa(high_memory) < base + size) ? 566 id_sz = (__pa(high_memory-1) <= base + size) ?
567 __pa(high_memory) - base : 567 __pa(high_memory) - base :
568 size; 568 size;
569 569
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e27fbf887f3b..193350b51f90 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -334,7 +334,12 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
334 if (changed && dirty) { 334 if (changed && dirty) {
335 *pmdp = entry; 335 *pmdp = entry;
336 pmd_update_defer(vma->vm_mm, address, pmdp); 336 pmd_update_defer(vma->vm_mm, address, pmdp);
337 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 337 /*
338 * We had a write-protection fault here and changed the pmd
339 * to to more permissive. No need to flush the TLB for that,
340 * #PF is architecturally guaranteed to do that and in the
341 * worst-case we'll generate a spurious fault.
342 */
338 } 343 }
339 344
340 return changed; 345 return changed;
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index d2e2735327b4..e666cbbb9261 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -1,3 +1,4 @@
1#include <linux/bootmem.h>
1#include <linux/mmdebug.h> 2#include <linux/mmdebug.h>
2#include <linux/module.h> 3#include <linux/module.h>
3#include <linux/mm.h> 4#include <linux/mm.h>
@@ -8,33 +9,54 @@
8 9
9#ifdef CONFIG_X86_64 10#ifdef CONFIG_X86_64
10 11
12#ifdef CONFIG_DEBUG_VIRTUAL
11unsigned long __phys_addr(unsigned long x) 13unsigned long __phys_addr(unsigned long x)
12{ 14{
13 if (x >= __START_KERNEL_map) { 15 unsigned long y = x - __START_KERNEL_map;
14 x -= __START_KERNEL_map; 16
15 VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); 17 /* use the carry flag to determine if x was < __START_KERNEL_map */
16 x += phys_base; 18 if (unlikely(x > y)) {
19 x = y + phys_base;
20
21 VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
17 } else { 22 } else {
18 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 23 x = y + (__START_KERNEL_map - PAGE_OFFSET);
19 x -= PAGE_OFFSET; 24
20 VIRTUAL_BUG_ON(!phys_addr_valid(x)); 25 /* carry flag will be set if starting x was >= PAGE_OFFSET */
26 VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x));
21 } 27 }
28
22 return x; 29 return x;
23} 30}
24EXPORT_SYMBOL(__phys_addr); 31EXPORT_SYMBOL(__phys_addr);
25 32
33unsigned long __phys_addr_symbol(unsigned long x)
34{
35 unsigned long y = x - __START_KERNEL_map;
36
37 /* only check upper bounds since lower bounds will trigger carry */
38 VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
39
40 return y + phys_base;
41}
42EXPORT_SYMBOL(__phys_addr_symbol);
43#endif
44
26bool __virt_addr_valid(unsigned long x) 45bool __virt_addr_valid(unsigned long x)
27{ 46{
28 if (x >= __START_KERNEL_map) { 47 unsigned long y = x - __START_KERNEL_map;
29 x -= __START_KERNEL_map; 48
30 if (x >= KERNEL_IMAGE_SIZE) 49 /* use the carry flag to determine if x was < __START_KERNEL_map */
50 if (unlikely(x > y)) {
51 x = y + phys_base;
52
53 if (y >= KERNEL_IMAGE_SIZE)
31 return false; 54 return false;
32 x += phys_base;
33 } else { 55 } else {
34 if (x < PAGE_OFFSET) 56 x = y + (__START_KERNEL_map - PAGE_OFFSET);
35 return false; 57
36 x -= PAGE_OFFSET; 58 /* carry flag will be set if starting x was >= PAGE_OFFSET */
37 if (!phys_addr_valid(x)) 59 if ((x > y) || !phys_addr_valid(x))
38 return false; 60 return false;
39 } 61 }
40 62
@@ -47,10 +69,16 @@ EXPORT_SYMBOL(__virt_addr_valid);
47#ifdef CONFIG_DEBUG_VIRTUAL 69#ifdef CONFIG_DEBUG_VIRTUAL
48unsigned long __phys_addr(unsigned long x) 70unsigned long __phys_addr(unsigned long x)
49{ 71{
72 unsigned long phys_addr = x - PAGE_OFFSET;
50 /* VMALLOC_* aren't constants */ 73 /* VMALLOC_* aren't constants */
51 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 74 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
52 VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); 75 VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
53 return x - PAGE_OFFSET; 76 /* max_low_pfn is set early, but not _that_ early */
77 if (max_low_pfn) {
78 VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn);
79 BUG_ON(slow_virt_to_phys((void *)x) != phys_addr);
80 }
81 return phys_addr;
54} 82}
55EXPORT_SYMBOL(__phys_addr); 83EXPORT_SYMBOL(__phys_addr);
56#endif 84#endif
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 4ddf497ca65b..79836d01f789 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -141,47 +141,167 @@ static inline int save_add_info(void) {return 1;}
141static inline int save_add_info(void) {return 0;} 141static inline int save_add_info(void) {return 0;}
142#endif 142#endif
143 143
144#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
145static void __init
146handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
147{
148 int overlap, i;
149 unsigned long start_pfn, end_pfn;
150
151 start_pfn = PFN_DOWN(start);
152 end_pfn = PFN_UP(end);
153
154 /*
155 * For movablemem_map=acpi:
156 *
157 * SRAT: |_____| |_____| |_________| |_________| ......
158 * node id: 0 1 1 2
159 * hotpluggable: n y y n
160 * movablemem_map: |_____| |_________|
161 *
162 * Using movablemem_map, we can prevent memblock from allocating memory
163 * on ZONE_MOVABLE at boot time.
164 *
165 * Before parsing SRAT, memblock has already reserve some memory ranges
166 * for other purposes, such as for kernel image. We cannot prevent
167 * kernel from using these memory, so we need to exclude these memory
168 * even if it is hotpluggable.
169 * Furthermore, to ensure the kernel has enough memory to boot, we make
170 * all the memory on the node which the kernel resides in
171 * un-hotpluggable.
172 */
173 if (hotpluggable && movablemem_map.acpi) {
174 /* Exclude ranges reserved by memblock. */
175 struct memblock_type *rgn = &memblock.reserved;
176
177 for (i = 0; i < rgn->cnt; i++) {
178 if (end <= rgn->regions[i].base ||
179 start >= rgn->regions[i].base +
180 rgn->regions[i].size)
181 continue;
182
183 /*
184 * If the memory range overlaps the memory reserved by
185 * memblock, then the kernel resides in this node.
186 */
187 node_set(node, movablemem_map.numa_nodes_kernel);
188
189 goto out;
190 }
191
192 /*
193 * If the kernel resides in this node, then the whole node
194 * should not be hotpluggable.
195 */
196 if (node_isset(node, movablemem_map.numa_nodes_kernel))
197 goto out;
198
199 insert_movablemem_map(start_pfn, end_pfn);
200
201 /*
202 * numa_nodes_hotplug nodemask represents which nodes are put
203 * into movablemem_map.map[].
204 */
205 node_set(node, movablemem_map.numa_nodes_hotplug);
206 goto out;
207 }
208
209 /*
210 * For movablemem_map=nn[KMG]@ss[KMG]:
211 *
212 * SRAT: |_____| |_____| |_________| |_________| ......
213 * node id: 0 1 1 2
214 * user specified: |__| |___|
215 * movablemem_map: |___| |_________| |______| ......
216 *
217 * Using movablemem_map, we can prevent memblock from allocating memory
218 * on ZONE_MOVABLE at boot time.
219 *
220 * NOTE: In this case, SRAT info will be ingored.
221 */
222 overlap = movablemem_map_overlap(start_pfn, end_pfn);
223 if (overlap >= 0) {
224 /*
225 * If part of this range is in movablemem_map, we need to
226 * add the range after it to extend the range to the end
227 * of the node, because from the min address specified to
228 * the end of the node will be ZONE_MOVABLE.
229 */
230 start_pfn = max(start_pfn,
231 movablemem_map.map[overlap].start_pfn);
232 insert_movablemem_map(start_pfn, end_pfn);
233
234 /*
235 * Set the nodemask, so that if the address range on one node
236 * is not continuse, we can add the subsequent ranges on the
237 * same node into movablemem_map.
238 */
239 node_set(node, movablemem_map.numa_nodes_hotplug);
240 } else {
241 if (node_isset(node, movablemem_map.numa_nodes_hotplug))
242 /*
243 * Insert the range if we already have movable ranges
244 * on the same node.
245 */
246 insert_movablemem_map(start_pfn, end_pfn);
247 }
248out:
249 return;
250}
251#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
252static inline void
253handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
254{
255}
256#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
257
144/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ 258/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
145int __init 259int __init
146acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) 260acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
147{ 261{
148 u64 start, end; 262 u64 start, end;
263 u32 hotpluggable;
149 int node, pxm; 264 int node, pxm;
150 265
151 if (srat_disabled()) 266 if (srat_disabled())
152 return -1; 267 goto out_err;
153 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) { 268 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
154 bad_srat(); 269 goto out_err_bad_srat;
155 return -1;
156 }
157 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) 270 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
158 return -1; 271 goto out_err;
272 hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
273 if (hotpluggable && !save_add_info())
274 goto out_err;
159 275
160 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
161 return -1;
162 start = ma->base_address; 276 start = ma->base_address;
163 end = start + ma->length; 277 end = start + ma->length;
164 pxm = ma->proximity_domain; 278 pxm = ma->proximity_domain;
165 if (acpi_srat_revision <= 1) 279 if (acpi_srat_revision <= 1)
166 pxm &= 0xff; 280 pxm &= 0xff;
281
167 node = setup_node(pxm); 282 node = setup_node(pxm);
168 if (node < 0) { 283 if (node < 0) {
169 printk(KERN_ERR "SRAT: Too many proximity domains.\n"); 284 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
170 bad_srat(); 285 goto out_err_bad_srat;
171 return -1;
172 } 286 }
173 287
174 if (numa_add_memblk(node, start, end) < 0) { 288 if (numa_add_memblk(node, start, end) < 0)
175 bad_srat(); 289 goto out_err_bad_srat;
176 return -1;
177 }
178 290
179 node_set(node, numa_nodes_parsed); 291 node_set(node, numa_nodes_parsed);
180 292
181 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", 293 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n",
182 node, pxm, 294 node, pxm,
183 (unsigned long long) start, (unsigned long long) end - 1); 295 (unsigned long long) start, (unsigned long long) end - 1,
296 hotpluggable ? "Hot Pluggable": "");
297
298 handle_movablemem(node, start, end, hotpluggable);
299
184 return 0; 300 return 0;
301out_err_bad_srat:
302 bad_srat();
303out_err:
304 return -1;
185} 305}
186 306
187void __init acpi_numa_arch_fixup(void) {} 307void __init acpi_numa_arch_fixup(void) {}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 13a6b29e2e5d..282375f13c7e 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -335,7 +335,7 @@ static const struct file_operations fops_tlbflush = {
335 .llseek = default_llseek, 335 .llseek = default_llseek,
336}; 336};
337 337
338static int __cpuinit create_tlb_flushall_shift(void) 338static int __init create_tlb_flushall_shift(void)
339{ 339{
340 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, 340 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
341 arch_debugfs_dir, NULL, &fops_tlbflush); 341 arch_debugfs_dir, NULL, &fops_tlbflush);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index d11a47099d33..3cbe45381bbb 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,6 +1,6 @@
1/* bpf_jit_comp.c : BPF JIT compiler 1/* bpf_jit_comp.c : BPF JIT compiler
2 * 2 *
3 * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) 3 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License 6 * modify it under the terms of the GNU General Public License
@@ -124,6 +124,26 @@ static inline void bpf_flush_icache(void *start, void *end)
124#define CHOOSE_LOAD_FUNC(K, func) \ 124#define CHOOSE_LOAD_FUNC(K, func) \
125 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) 125 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
126 126
127/* Helper to find the offset of pkt_type in sk_buff
128 * We want to make sure its still a 3bit field starting at a byte boundary.
129 */
130#define PKT_TYPE_MAX 7
131static int pkt_type_offset(void)
132{
133 struct sk_buff skb_probe = {
134 .pkt_type = ~0,
135 };
136 char *ct = (char *)&skb_probe;
137 unsigned int off;
138
139 for (off = 0; off < sizeof(struct sk_buff); off++) {
140 if (ct[off] == PKT_TYPE_MAX)
141 return off;
142 }
143 pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
144 return -1;
145}
146
127void bpf_jit_compile(struct sk_filter *fp) 147void bpf_jit_compile(struct sk_filter *fp)
128{ 148{
129 u8 temp[64]; 149 u8 temp[64];
@@ -216,6 +236,7 @@ void bpf_jit_compile(struct sk_filter *fp)
216 case BPF_S_ANC_VLAN_TAG: 236 case BPF_S_ANC_VLAN_TAG:
217 case BPF_S_ANC_VLAN_TAG_PRESENT: 237 case BPF_S_ANC_VLAN_TAG_PRESENT:
218 case BPF_S_ANC_QUEUE: 238 case BPF_S_ANC_QUEUE:
239 case BPF_S_ANC_PKTTYPE:
219 case BPF_S_LD_W_ABS: 240 case BPF_S_LD_W_ABS:
220 case BPF_S_LD_H_ABS: 241 case BPF_S_LD_H_ABS:
221 case BPF_S_LD_B_ABS: 242 case BPF_S_LD_B_ABS:
@@ -536,6 +557,23 @@ void bpf_jit_compile(struct sk_filter *fp)
536 EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */ 557 EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */
537 } 558 }
538 break; 559 break;
560 case BPF_S_ANC_PKTTYPE:
561 {
562 int off = pkt_type_offset();
563
564 if (off < 0)
565 goto out;
566 if (is_imm8(off)) {
567 /* movzbl off8(%rdi),%eax */
568 EMIT4(0x0f, 0xb6, 0x47, off);
569 } else {
570 /* movbl off32(%rdi),%eax */
571 EMIT3(0x0f, 0xb6, 0x87);
572 EMIT(off, 4);
573 }
574 EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and $0x7,%eax */
575 break;
576 }
539 case BPF_S_LD_W_ABS: 577 case BPF_S_LD_W_ABS:
540 func = CHOOSE_LOAD_FUNC(K, sk_load_word); 578 func = CHOOSE_LOAD_FUNC(K, sk_load_word);
541common_load: seen |= SEEN_DATAREF; 579common_load: seen |= SEEN_DATAREF;
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 3d49094ed3e8..3e724256dbee 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -145,7 +145,7 @@ void __init pci_acpi_crs_quirks(void)
145} 145}
146 146
147#ifdef CONFIG_PCI_MMCONFIG 147#ifdef CONFIG_PCI_MMCONFIG
148static int __devinit check_segment(u16 seg, struct device *dev, char *estr) 148static int check_segment(u16 seg, struct device *dev, char *estr)
149{ 149{
150 if (seg) { 150 if (seg) {
151 dev_err(dev, 151 dev_err(dev,
@@ -168,9 +168,8 @@ static int __devinit check_segment(u16 seg, struct device *dev, char *estr)
168 return 0; 168 return 0;
169} 169}
170 170
171static int __devinit setup_mcfg_map(struct pci_root_info *info, 171static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start,
172 u16 seg, u8 start, u8 end, 172 u8 end, phys_addr_t addr)
173 phys_addr_t addr)
174{ 173{
175 int result; 174 int result;
176 struct device *dev = &info->bridge->dev; 175 struct device *dev = &info->bridge->dev;
@@ -208,7 +207,7 @@ static void teardown_mcfg_map(struct pci_root_info *info)
208 } 207 }
209} 208}
210#else 209#else
211static int __devinit setup_mcfg_map(struct pci_root_info *info, 210static int setup_mcfg_map(struct pci_root_info *info,
212 u16 seg, u8 start, u8 end, 211 u16 seg, u8 start, u8 end,
213 phys_addr_t addr) 212 phys_addr_t addr)
214{ 213{
@@ -474,7 +473,7 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
474 info); 473 info);
475} 474}
476 475
477struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) 476struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
478{ 477{
479 struct acpi_device *device = root->device; 478 struct acpi_device *device = root->device;
480 struct pci_root_info *info = NULL; 479 struct pci_root_info *info = NULL;
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index d37e2fec97e5..c2735feb2508 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -93,8 +93,8 @@ struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max,
93 return info; 93 return info;
94} 94}
95 95
96void __devinit update_res(struct pci_root_info *info, resource_size_t start, 96void update_res(struct pci_root_info *info, resource_size_t start,
97 resource_size_t end, unsigned long flags, int merge) 97 resource_size_t end, unsigned long flags, int merge)
98{ 98{
99 struct resource *res; 99 struct resource *res;
100 struct pci_root_res *root_res; 100 struct pci_root_res *root_res;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 505731b139f4..901177d75ff5 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -80,14 +80,14 @@ struct pci_ops pci_root_ops = {
80 */ 80 */
81DEFINE_RAW_SPINLOCK(pci_config_lock); 81DEFINE_RAW_SPINLOCK(pci_config_lock);
82 82
83static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) 83static int can_skip_ioresource_align(const struct dmi_system_id *d)
84{ 84{
85 pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; 85 pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
86 printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident); 86 printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);
87 return 0; 87 return 0;
88} 88}
89 89
90static const struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitconst = { 90static const struct dmi_system_id can_skip_pciprobe_dmi_table[] = {
91/* 91/*
92 * Systems where PCI IO resource ISA alignment can be skipped 92 * Systems where PCI IO resource ISA alignment can be skipped
93 * when the ISA enable bit in the bridge control is not set 93 * when the ISA enable bit in the bridge control is not set
@@ -124,7 +124,7 @@ void __init dmi_check_skip_isa_align(void)
124 dmi_check_system(can_skip_pciprobe_dmi_table); 124 dmi_check_system(can_skip_pciprobe_dmi_table);
125} 125}
126 126
127static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) 127static void pcibios_fixup_device_resources(struct pci_dev *dev)
128{ 128{
129 struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; 129 struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
130 struct resource *bar_r; 130 struct resource *bar_r;
@@ -161,7 +161,7 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
161 * are examined. 161 * are examined.
162 */ 162 */
163 163
164void __devinit pcibios_fixup_bus(struct pci_bus *b) 164void pcibios_fixup_bus(struct pci_bus *b)
165{ 165{
166 struct pci_dev *dev; 166 struct pci_dev *dev;
167 167
@@ -175,7 +175,7 @@ void __devinit pcibios_fixup_bus(struct pci_bus *b)
175 * on the kernel command line (which was parsed earlier). 175 * on the kernel command line (which was parsed earlier).
176 */ 176 */
177 177
178static int __devinit set_bf_sort(const struct dmi_system_id *d) 178static int set_bf_sort(const struct dmi_system_id *d)
179{ 179{
180 if (pci_bf_sort == pci_bf_sort_default) { 180 if (pci_bf_sort == pci_bf_sort_default) {
181 pci_bf_sort = pci_dmi_bf; 181 pci_bf_sort = pci_dmi_bf;
@@ -184,7 +184,7 @@ static int __devinit set_bf_sort(const struct dmi_system_id *d)
184 return 0; 184 return 0;
185} 185}
186 186
187static void __devinit read_dmi_type_b1(const struct dmi_header *dm, 187static void read_dmi_type_b1(const struct dmi_header *dm,
188 void *private_data) 188 void *private_data)
189{ 189{
190 u8 *d = (u8 *)dm + 4; 190 u8 *d = (u8 *)dm + 4;
@@ -206,7 +206,7 @@ static void __devinit read_dmi_type_b1(const struct dmi_header *dm,
206 } 206 }
207} 207}
208 208
209static int __devinit find_sort_method(const struct dmi_system_id *d) 209static int find_sort_method(const struct dmi_system_id *d)
210{ 210{
211 dmi_walk(read_dmi_type_b1, NULL); 211 dmi_walk(read_dmi_type_b1, NULL);
212 212
@@ -221,7 +221,7 @@ static int __devinit find_sort_method(const struct dmi_system_id *d)
221 * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus) 221 * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus)
222 */ 222 */
223#ifdef __i386__ 223#ifdef __i386__
224static int __devinit assign_all_busses(const struct dmi_system_id *d) 224static int assign_all_busses(const struct dmi_system_id *d)
225{ 225{
226 pci_probe |= PCI_ASSIGN_ALL_BUSSES; 226 pci_probe |= PCI_ASSIGN_ALL_BUSSES;
227 printk(KERN_INFO "%s detected: enabling PCI bus# renumbering" 227 printk(KERN_INFO "%s detected: enabling PCI bus# renumbering"
@@ -230,7 +230,7 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d)
230} 230}
231#endif 231#endif
232 232
233static int __devinit set_scan_all(const struct dmi_system_id *d) 233static int set_scan_all(const struct dmi_system_id *d)
234{ 234{
235 printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n", 235 printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n",
236 d->ident); 236 d->ident);
@@ -238,7 +238,7 @@ static int __devinit set_scan_all(const struct dmi_system_id *d)
238 return 0; 238 return 0;
239} 239}
240 240
241static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { 241static const struct dmi_system_id pciprobe_dmi_table[] = {
242#ifdef __i386__ 242#ifdef __i386__
243/* 243/*
244 * Laptops which need pci=assign-busses to see Cardbus cards 244 * Laptops which need pci=assign-busses to see Cardbus cards
@@ -445,7 +445,7 @@ void __init dmi_check_pciprobe(void)
445 dmi_check_system(pciprobe_dmi_table); 445 dmi_check_system(pciprobe_dmi_table);
446} 446}
447 447
448struct pci_bus * __devinit pcibios_scan_root(int busnum) 448struct pci_bus *pcibios_scan_root(int busnum)
449{ 449{
450 struct pci_bus *bus = NULL; 450 struct pci_bus *bus = NULL;
451 451
@@ -664,7 +664,7 @@ int pci_ext_cfg_avail(void)
664 return 0; 664 return 0;
665} 665}
666 666
667struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) 667struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
668{ 668{
669 LIST_HEAD(resources); 669 LIST_HEAD(resources);
670 struct pci_bus *bus = NULL; 670 struct pci_bus *bus = NULL;
@@ -692,7 +692,7 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops,
692 return bus; 692 return bus;
693} 693}
694 694
695struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno) 695struct pci_bus *pci_scan_bus_with_sysdata(int busno)
696{ 696{
697 return pci_scan_bus_on_node(busno, &pci_root_ops, -1); 697 return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
698} 698}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index af8a224db216..f5809fa2753e 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -9,7 +9,7 @@
9#include <linux/vgaarb.h> 9#include <linux/vgaarb.h>
10#include <asm/pci_x86.h> 10#include <asm/pci_x86.h>
11 11
12static void __devinit pci_fixup_i450nx(struct pci_dev *d) 12static void pci_fixup_i450nx(struct pci_dev *d)
13{ 13{
14 /* 14 /*
15 * i450NX -- Find and scan all secondary buses on all PXB's. 15 * i450NX -- Find and scan all secondary buses on all PXB's.
@@ -34,7 +34,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
34} 34}
35DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); 35DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
36 36
37static void __devinit pci_fixup_i450gx(struct pci_dev *d) 37static void pci_fixup_i450gx(struct pci_dev *d)
38{ 38{
39 /* 39 /*
40 * i450GX and i450KX -- Find and scan all secondary buses. 40 * i450GX and i450KX -- Find and scan all secondary buses.
@@ -48,7 +48,7 @@ static void __devinit pci_fixup_i450gx(struct pci_dev *d)
48} 48}
49DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx); 49DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx);
50 50
51static void __devinit pci_fixup_umc_ide(struct pci_dev *d) 51static void pci_fixup_umc_ide(struct pci_dev *d)
52{ 52{
53 /* 53 /*
54 * UM8886BF IDE controller sets region type bits incorrectly, 54 * UM8886BF IDE controller sets region type bits incorrectly,
@@ -62,7 +62,7 @@ static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
62} 62}
63DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide); 63DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide);
64 64
65static void __devinit pci_fixup_ncr53c810(struct pci_dev *d) 65static void pci_fixup_ncr53c810(struct pci_dev *d)
66{ 66{
67 /* 67 /*
68 * NCR 53C810 returns class code 0 (at least on some systems). 68 * NCR 53C810 returns class code 0 (at least on some systems).
@@ -75,7 +75,7 @@ static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
75} 75}
76DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810); 76DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810);
77 77
78static void __devinit pci_fixup_latency(struct pci_dev *d) 78static void pci_fixup_latency(struct pci_dev *d)
79{ 79{
80 /* 80 /*
81 * SiS 5597 and 5598 chipsets require latency timer set to 81 * SiS 5597 and 5598 chipsets require latency timer set to
@@ -87,7 +87,7 @@ static void __devinit pci_fixup_latency(struct pci_dev *d)
87DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency); 87DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency);
88DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency); 88DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency);
89 89
90static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d) 90static void pci_fixup_piix4_acpi(struct pci_dev *d)
91{ 91{
92 /* 92 /*
93 * PIIX4 ACPI device: hardwired IRQ9 93 * PIIX4 ACPI device: hardwired IRQ9
@@ -163,7 +163,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_
163 * system to PCI bus no matter what are their window settings, so they are 163 * system to PCI bus no matter what are their window settings, so they are
164 * "transparent" (or subtractive decoding) from programmers point of view. 164 * "transparent" (or subtractive decoding) from programmers point of view.
165 */ 165 */
166static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev) 166static void pci_fixup_transparent_bridge(struct pci_dev *dev)
167{ 167{
168 if ((dev->device & 0xff00) == 0x2400) 168 if ((dev->device & 0xff00) == 0x2400)
169 dev->transparent = 1; 169 dev->transparent = 1;
@@ -317,7 +317,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_r
317 * video device at this point. 317 * video device at this point.
318 */ 318 */
319 319
320static void __devinit pci_fixup_video(struct pci_dev *pdev) 320static void pci_fixup_video(struct pci_dev *pdev)
321{ 321{
322 struct pci_dev *bridge; 322 struct pci_dev *bridge;
323 struct pci_bus *bus; 323 struct pci_bus *bus;
@@ -357,7 +357,7 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
357 PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); 357 PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
358 358
359 359
360static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = { 360static const struct dmi_system_id msi_k8t_dmi_table[] = {
361 { 361 {
362 .ident = "MSI-K8T-Neo2Fir", 362 .ident = "MSI-K8T-Neo2Fir",
363 .matches = { 363 .matches = {
@@ -378,7 +378,7 @@ static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
378 * The soundcard is only enabled, if the mainborad is identified 378 * The soundcard is only enabled, if the mainborad is identified
379 * via DMI-tables and the soundcard is detected to be off. 379 * via DMI-tables and the soundcard is detected to be off.
380 */ 380 */
381static void __devinit pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev) 381static void pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev)
382{ 382{
383 unsigned char val; 383 unsigned char val;
384 if (!dmi_check_system(msi_k8t_dmi_table)) 384 if (!dmi_check_system(msi_k8t_dmi_table))
@@ -414,7 +414,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
414 */ 414 */
415static u16 toshiba_line_size; 415static u16 toshiba_line_size;
416 416
417static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] = { 417static const struct dmi_system_id toshiba_ohci1394_dmi_table[] = {
418 { 418 {
419 .ident = "Toshiba PS5 based laptop", 419 .ident = "Toshiba PS5 based laptop",
420 .matches = { 420 .matches = {
@@ -439,7 +439,7 @@ static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] =
439 { } 439 { }
440}; 440};
441 441
442static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev) 442static void pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev)
443{ 443{
444 if (!dmi_check_system(toshiba_ohci1394_dmi_table)) 444 if (!dmi_check_system(toshiba_ohci1394_dmi_table))
445 return; /* only applies to certain Toshibas (so far) */ 445 return; /* only applies to certain Toshibas (so far) */
@@ -450,7 +450,7 @@ static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev)
450DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0x8032, 450DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0x8032,
451 pci_pre_fixup_toshiba_ohci1394); 451 pci_pre_fixup_toshiba_ohci1394);
452 452
453static void __devinit pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev) 453static void pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev)
454{ 454{
455 if (!dmi_check_system(toshiba_ohci1394_dmi_table)) 455 if (!dmi_check_system(toshiba_ohci1394_dmi_table))
456 return; /* only applies to certain Toshibas (so far) */ 456 return; /* only applies to certain Toshibas (so far) */
@@ -488,7 +488,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY,
488 * Siemens Nixdorf AG FSC Multiprocessor Interrupt Controller: 488 * Siemens Nixdorf AG FSC Multiprocessor Interrupt Controller:
489 * prevent update of the BAR0, which doesn't look like a normal BAR. 489 * prevent update of the BAR0, which doesn't look like a normal BAR.
490 */ 490 */
491static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev) 491static void pci_siemens_interrupt_controller(struct pci_dev *dev)
492{ 492{
493 dev->resource[0].flags |= IORESOURCE_PCI_FIXED; 493 dev->resource[0].flags |= IORESOURCE_PCI_FIXED;
494} 494}
@@ -531,7 +531,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
531 * 531 *
532 * Match off the LPC and svid/sdid (older kernels lose the bridge subvendor) 532 * Match off the LPC and svid/sdid (older kernels lose the bridge subvendor)
533 */ 533 */
534static void __devinit twinhead_reserve_killing_zone(struct pci_dev *dev) 534static void twinhead_reserve_killing_zone(struct pci_dev *dev)
535{ 535{
536 if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) { 536 if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) {
537 pr_info("Reserving memory on Twinhead H12Y\n"); 537 pr_info("Reserving memory on Twinhead H12Y\n");
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index a9e83083fb85..4db96fb1c232 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -10,7 +10,7 @@
10 * Discover remaining PCI buses in case there are peer host bridges. 10 * Discover remaining PCI buses in case there are peer host bridges.
11 * We use the number of last PCI bus provided by the PCI BIOS. 11 * We use the number of last PCI bus provided by the PCI BIOS.
12 */ 12 */
13static void __devinit pcibios_fixup_peer_bridges(void) 13static void pcibios_fixup_peer_bridges(void)
14{ 14{
15 int n; 15 int n;
16 16
@@ -34,7 +34,7 @@ int __init pci_legacy_init(void)
34 return 0; 34 return 0;
35} 35}
36 36
37void __devinit pcibios_scan_specific_bus(int busn) 37void pcibios_scan_specific_bus(int busn)
38{ 38{
39 int devfn; 39 int devfn;
40 long node; 40 long node;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 704b9ec043d7..082e88129712 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -49,7 +49,7 @@ static __init void free_all_mmcfg(void)
49 pci_mmconfig_remove(cfg); 49 pci_mmconfig_remove(cfg);
50} 50}
51 51
52static __devinit void list_add_sorted(struct pci_mmcfg_region *new) 52static void list_add_sorted(struct pci_mmcfg_region *new)
53{ 53{
54 struct pci_mmcfg_region *cfg; 54 struct pci_mmcfg_region *cfg;
55 55
@@ -65,9 +65,8 @@ static __devinit void list_add_sorted(struct pci_mmcfg_region *new)
65 list_add_tail_rcu(&new->list, &pci_mmcfg_list); 65 list_add_tail_rcu(&new->list, &pci_mmcfg_list);
66} 66}
67 67
68static __devinit struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, 68static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
69 int start, 69 int end, u64 addr)
70 int end, u64 addr)
71{ 70{
72 struct pci_mmcfg_region *new; 71 struct pci_mmcfg_region *new;
73 struct resource *res; 72 struct resource *res;
@@ -371,8 +370,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
371 return !list_empty(&pci_mmcfg_list); 370 return !list_empty(&pci_mmcfg_list);
372} 371}
373 372
374static acpi_status __devinit check_mcfg_resource(struct acpi_resource *res, 373static acpi_status check_mcfg_resource(struct acpi_resource *res, void *data)
375 void *data)
376{ 374{
377 struct resource *mcfg_res = data; 375 struct resource *mcfg_res = data;
378 struct acpi_resource_address64 address; 376 struct acpi_resource_address64 address;
@@ -408,8 +406,8 @@ static acpi_status __devinit check_mcfg_resource(struct acpi_resource *res,
408 return AE_OK; 406 return AE_OK;
409} 407}
410 408
411static acpi_status __devinit find_mboard_resource(acpi_handle handle, u32 lvl, 409static acpi_status find_mboard_resource(acpi_handle handle, u32 lvl,
412 void *context, void **rv) 410 void *context, void **rv)
413{ 411{
414 struct resource *mcfg_res = context; 412 struct resource *mcfg_res = context;
415 413
@@ -422,7 +420,7 @@ static acpi_status __devinit find_mboard_resource(acpi_handle handle, u32 lvl,
422 return AE_OK; 420 return AE_OK;
423} 421}
424 422
425static int __devinit is_acpi_reserved(u64 start, u64 end, unsigned not_used) 423static int is_acpi_reserved(u64 start, u64 end, unsigned not_used)
426{ 424{
427 struct resource mcfg_res; 425 struct resource mcfg_res;
428 426
@@ -550,8 +548,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
550 if (cfg->address < 0xFFFFFFFF) 548 if (cfg->address < 0xFFFFFFFF)
551 return 0; 549 return 0;
552 550
553 if (!strcmp(mcfg->header.oem_id, "SGI") || 551 if (!strncmp(mcfg->header.oem_id, "SGI", 3))
554 !strcmp(mcfg->header.oem_id, "SGI2"))
555 return 0; 552 return 0;
556 553
557 if (mcfg->header.revision >= 1) { 554 if (mcfg->header.revision >= 1) {
@@ -693,9 +690,8 @@ static int __init pci_mmcfg_late_insert_resources(void)
693late_initcall(pci_mmcfg_late_insert_resources); 690late_initcall(pci_mmcfg_late_insert_resources);
694 691
695/* Add MMCFG information for host bridges */ 692/* Add MMCFG information for host bridges */
696int __devinit pci_mmconfig_insert(struct device *dev, 693int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
697 u16 seg, u8 start, u8 end, 694 phys_addr_t addr)
698 phys_addr_t addr)
699{ 695{
700 int rc; 696 int rc;
701 struct resource *tmp = NULL; 697 struct resource *tmp = NULL;
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index db63ac23e3d9..5c90975cdf0f 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -142,7 +142,7 @@ void __init pci_mmcfg_arch_free(void)
142{ 142{
143} 143}
144 144
145int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg) 145int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
146{ 146{
147 return 0; 147 return 0;
148} 148}
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index d4ebd07c306d..bea52496aea6 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -95,7 +95,7 @@ const struct pci_raw_ops pci_mmcfg = {
95 .write = pci_mmcfg_write, 95 .write = pci_mmcfg_write,
96}; 96};
97 97
98static void __iomem * __devinit mcfg_ioremap(struct pci_mmcfg_region *cfg) 98static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg)
99{ 99{
100 void __iomem *addr; 100 void __iomem *addr;
101 u64 start, size; 101 u64 start, size;
@@ -133,7 +133,7 @@ void __init pci_mmcfg_arch_free(void)
133 pci_mmcfg_arch_unmap(cfg); 133 pci_mmcfg_arch_unmap(cfg);
134} 134}
135 135
136int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg) 136int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
137{ 137{
138 cfg->virt = mcfg_ioremap(cfg); 138 cfg->virt = mcfg_ioremap(cfg);
139 if (!cfg->virt) { 139 if (!cfg->virt) {
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index e14a2ff708b5..6eb18c42a28a 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -247,7 +247,7 @@ int __init pci_mrst_init(void)
247/* Langwell devices are not true pci devices, they are not subject to 10 ms 247/* Langwell devices are not true pci devices, they are not subject to 10 ms
248 * d3 to d0 delay required by pci spec. 248 * d3 to d0 delay required by pci spec.
249 */ 249 */
250static void __devinit pci_d3delay_fixup(struct pci_dev *dev) 250static void pci_d3delay_fixup(struct pci_dev *dev)
251{ 251{
252 /* PCI fixups are effectively decided compile time. If we have a dual 252 /* PCI fixups are effectively decided compile time. If we have a dual
253 SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */ 253 SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */
@@ -262,7 +262,7 @@ static void __devinit pci_d3delay_fixup(struct pci_dev *dev)
262} 262}
263DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup); 263DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
264 264
265static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev) 265static void mrst_power_off_unused_dev(struct pci_dev *dev)
266{ 266{
267 pci_set_power_state(dev, PCI_D3hot); 267 pci_set_power_state(dev, PCI_D3hot);
268} 268}
@@ -275,7 +275,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev);
275/* 275/*
276 * Langwell devices reside at fixed offsets, don't try to move them. 276 * Langwell devices reside at fixed offsets, don't try to move them.
277 */ 277 */
278static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) 278static void pci_fixed_bar_fixup(struct pci_dev *dev)
279{ 279{
280 unsigned long offset; 280 unsigned long offset;
281 u32 size; 281 u32 size;
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 00edfe652b72..72c229f9ebcf 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -116,7 +116,7 @@ static const struct pci_raw_ops pci_direct_conf1_mq = {
116}; 116};
117 117
118 118
119static void __devinit pci_fixup_i450nx(struct pci_dev *d) 119static void pci_fixup_i450nx(struct pci_dev *d)
120{ 120{
121 /* 121 /*
122 * i450NX -- Find and scan all secondary buses on all PXB's. 122 * i450NX -- Find and scan all secondary buses on all PXB's.
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index da8fe0535ff4..c77b24a8b2da 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -124,7 +124,7 @@ static struct {
124 124
125static int pci_bios_present; 125static int pci_bios_present;
126 126
127static int __devinit check_pcibios(void) 127static int check_pcibios(void)
128{ 128{
129 u32 signature, eax, ebx, ecx; 129 u32 signature, eax, ebx, ecx;
130 u8 status, major_ver, minor_ver, hw_mech; 130 u8 status, major_ver, minor_ver, hw_mech;
@@ -312,7 +312,7 @@ static const struct pci_raw_ops pci_bios_access = {
312 * Try to find PCI BIOS. 312 * Try to find PCI BIOS.
313 */ 313 */
314 314
315static const struct pci_raw_ops * __devinit pci_find_bios(void) 315static const struct pci_raw_ops *pci_find_bios(void)
316{ 316{
317 union bios32 *check; 317 union bios32 *check;
318 unsigned char sum; 318 unsigned char sum;
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 8d874396cb29..01e0231a113e 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -2,10 +2,12 @@
2obj-y += ce4100/ 2obj-y += ce4100/
3obj-y += efi/ 3obj-y += efi/
4obj-y += geode/ 4obj-y += geode/
5obj-y += goldfish/
5obj-y += iris/ 6obj-y += iris/
6obj-y += mrst/ 7obj-y += mrst/
7obj-y += olpc/ 8obj-y += olpc/
8obj-y += scx200/ 9obj-y += scx200/
9obj-y += sfi/ 10obj-y += sfi/
11obj-y += ts5500/
10obj-y += visws/ 12obj-y += visws/
11obj-y += uv/ 13obj-y += uv/
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d9c1b95af17c..7145ec63c520 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -11,20 +11,21 @@
11 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
12 */ 12 */
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/init.h>
14#include <linux/acpi.h> 15#include <linux/acpi.h>
15#include <linux/efi.h> 16#include <linux/efi.h>
16#include <linux/efi-bgrt.h> 17#include <linux/efi-bgrt.h>
17 18
18struct acpi_table_bgrt *bgrt_tab; 19struct acpi_table_bgrt *bgrt_tab;
19void *bgrt_image; 20void *__initdata bgrt_image;
20size_t bgrt_image_size; 21size_t __initdata bgrt_image_size;
21 22
22struct bmp_header { 23struct bmp_header {
23 u16 id; 24 u16 id;
24 u32 size; 25 u32 size;
25} __packed; 26} __packed;
26 27
27void efi_bgrt_init(void) 28void __init efi_bgrt_init(void)
28{ 29{
29 acpi_status status; 30 acpi_status status;
30 void __iomem *image; 31 void __iomem *image;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ad4439145f85..70b2a3a305d6 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -51,9 +51,6 @@
51 51
52#define EFI_DEBUG 1 52#define EFI_DEBUG 1
53 53
54int efi_enabled;
55EXPORT_SYMBOL(efi_enabled);
56
57struct efi __read_mostly efi = { 54struct efi __read_mostly efi = {
58 .mps = EFI_INVALID_TABLE_ADDR, 55 .mps = EFI_INVALID_TABLE_ADDR,
59 .acpi = EFI_INVALID_TABLE_ADDR, 56 .acpi = EFI_INVALID_TABLE_ADDR,
@@ -69,19 +66,28 @@ EXPORT_SYMBOL(efi);
69 66
70struct efi_memory_map memmap; 67struct efi_memory_map memmap;
71 68
72bool efi_64bit;
73
74static struct efi efi_phys __initdata; 69static struct efi efi_phys __initdata;
75static efi_system_table_t efi_systab __initdata; 70static efi_system_table_t efi_systab __initdata;
76 71
77static inline bool efi_is_native(void) 72static inline bool efi_is_native(void)
78{ 73{
79 return IS_ENABLED(CONFIG_X86_64) == efi_64bit; 74 return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
75}
76
77unsigned long x86_efi_facility;
78
79/*
80 * Returns 1 if 'facility' is enabled, 0 otherwise.
81 */
82int efi_enabled(int facility)
83{
84 return test_bit(facility, &x86_efi_facility) != 0;
80} 85}
86EXPORT_SYMBOL(efi_enabled);
81 87
82static int __init setup_noefi(char *arg) 88static int __init setup_noefi(char *arg)
83{ 89{
84 efi_enabled = 0; 90 clear_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
85 return 0; 91 return 0;
86} 92}
87early_param("noefi", setup_noefi); 93early_param("noefi", setup_noefi);
@@ -410,8 +416,8 @@ void __init efi_reserve_boot_services(void)
410 * - Not within any part of the kernel 416 * - Not within any part of the kernel
411 * - Not the bios reserved area 417 * - Not the bios reserved area
412 */ 418 */
413 if ((start+size >= virt_to_phys(_text) 419 if ((start+size >= __pa_symbol(_text)
414 && start <= virt_to_phys(_end)) || 420 && start <= __pa_symbol(_end)) ||
415 !e820_all_mapped(start, start+size, E820_RAM) || 421 !e820_all_mapped(start, start+size, E820_RAM) ||
416 memblock_is_region_reserved(start, size)) { 422 memblock_is_region_reserved(start, size)) {
417 /* Could not reserve, skip it */ 423 /* Could not reserve, skip it */
@@ -426,6 +432,7 @@ void __init efi_reserve_boot_services(void)
426 432
427void __init efi_unmap_memmap(void) 433void __init efi_unmap_memmap(void)
428{ 434{
435 clear_bit(EFI_MEMMAP, &x86_efi_facility);
429 if (memmap.map) { 436 if (memmap.map) {
430 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); 437 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
431 memmap.map = NULL; 438 memmap.map = NULL;
@@ -460,7 +467,7 @@ void __init efi_free_boot_services(void)
460 467
461static int __init efi_systab_init(void *phys) 468static int __init efi_systab_init(void *phys)
462{ 469{
463 if (efi_64bit) { 470 if (efi_enabled(EFI_64BIT)) {
464 efi_system_table_64_t *systab64; 471 efi_system_table_64_t *systab64;
465 u64 tmp = 0; 472 u64 tmp = 0;
466 473
@@ -552,7 +559,7 @@ static int __init efi_config_init(u64 tables, int nr_tables)
552 void *config_tables, *tablep; 559 void *config_tables, *tablep;
553 int i, sz; 560 int i, sz;
554 561
555 if (efi_64bit) 562 if (efi_enabled(EFI_64BIT))
556 sz = sizeof(efi_config_table_64_t); 563 sz = sizeof(efi_config_table_64_t);
557 else 564 else
558 sz = sizeof(efi_config_table_32_t); 565 sz = sizeof(efi_config_table_32_t);
@@ -572,7 +579,7 @@ static int __init efi_config_init(u64 tables, int nr_tables)
572 efi_guid_t guid; 579 efi_guid_t guid;
573 unsigned long table; 580 unsigned long table;
574 581
575 if (efi_64bit) { 582 if (efi_enabled(EFI_64BIT)) {
576 u64 table64; 583 u64 table64;
577 guid = ((efi_config_table_64_t *)tablep)->guid; 584 guid = ((efi_config_table_64_t *)tablep)->guid;
578 table64 = ((efi_config_table_64_t *)tablep)->table; 585 table64 = ((efi_config_table_64_t *)tablep)->table;
@@ -684,7 +691,6 @@ void __init efi_init(void)
684 if (boot_params.efi_info.efi_systab_hi || 691 if (boot_params.efi_info.efi_systab_hi ||
685 boot_params.efi_info.efi_memmap_hi) { 692 boot_params.efi_info.efi_memmap_hi) {
686 pr_info("Table located above 4GB, disabling EFI.\n"); 693 pr_info("Table located above 4GB, disabling EFI.\n");
687 efi_enabled = 0;
688 return; 694 return;
689 } 695 }
690 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; 696 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
@@ -694,10 +700,10 @@ void __init efi_init(void)
694 ((__u64)boot_params.efi_info.efi_systab_hi<<32)); 700 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
695#endif 701#endif
696 702
697 if (efi_systab_init(efi_phys.systab)) { 703 if (efi_systab_init(efi_phys.systab))
698 efi_enabled = 0;
699 return; 704 return;
700 } 705
706 set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
701 707
702 /* 708 /*
703 * Show what we know for posterity 709 * Show what we know for posterity
@@ -715,10 +721,10 @@ void __init efi_init(void)
715 efi.systab->hdr.revision >> 16, 721 efi.systab->hdr.revision >> 16,
716 efi.systab->hdr.revision & 0xffff, vendor); 722 efi.systab->hdr.revision & 0xffff, vendor);
717 723
718 if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) { 724 if (efi_config_init(efi.systab->tables, efi.systab->nr_tables))
719 efi_enabled = 0;
720 return; 725 return;
721 } 726
727 set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
722 728
723 /* 729 /*
724 * Note: We currently don't support runtime services on an EFI 730 * Note: We currently don't support runtime services on an EFI
@@ -727,15 +733,17 @@ void __init efi_init(void)
727 733
728 if (!efi_is_native()) 734 if (!efi_is_native())
729 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); 735 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
730 else if (efi_runtime_init()) { 736 else {
731 efi_enabled = 0; 737 if (efi_runtime_init())
732 return; 738 return;
739 set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
733 } 740 }
734 741
735 if (efi_memmap_init()) { 742 if (efi_memmap_init())
736 efi_enabled = 0;
737 return; 743 return;
738 } 744
745 set_bit(EFI_MEMMAP, &x86_efi_facility);
746
739#ifdef CONFIG_X86_32 747#ifdef CONFIG_X86_32
740 if (efi_is_native()) { 748 if (efi_is_native()) {
741 x86_platform.get_wallclock = efi_get_time; 749 x86_platform.get_wallclock = efi_get_time;
@@ -835,7 +843,7 @@ void __init efi_enter_virtual_mode(void)
835 efi_memory_desc_t *md, *prev_md = NULL; 843 efi_memory_desc_t *md, *prev_md = NULL;
836 efi_status_t status; 844 efi_status_t status;
837 unsigned long size; 845 unsigned long size;
838 u64 end, systab, end_pfn; 846 u64 end, systab, start_pfn, end_pfn;
839 void *p, *va, *new_memmap = NULL; 847 void *p, *va, *new_memmap = NULL;
840 int count = 0; 848 int count = 0;
841 849
@@ -888,10 +896,9 @@ void __init efi_enter_virtual_mode(void)
888 size = md->num_pages << EFI_PAGE_SHIFT; 896 size = md->num_pages << EFI_PAGE_SHIFT;
889 end = md->phys_addr + size; 897 end = md->phys_addr + size;
890 898
899 start_pfn = PFN_DOWN(md->phys_addr);
891 end_pfn = PFN_UP(end); 900 end_pfn = PFN_UP(end);
892 if (end_pfn <= max_low_pfn_mapped 901 if (pfn_range_is_mapped(start_pfn, end_pfn)) {
893 || (end_pfn > (1UL << (32 - PAGE_SHIFT))
894 && end_pfn <= max_pfn_mapped)) {
895 va = __va(md->phys_addr); 902 va = __va(md->phys_addr);
896 903
897 if (!(md->attribute & EFI_MEMORY_WB)) 904 if (!(md->attribute & EFI_MEMORY_WB))
@@ -941,7 +948,7 @@ void __init efi_enter_virtual_mode(void)
941 * 948 *
942 * Call EFI services through wrapper functions. 949 * Call EFI services through wrapper functions.
943 */ 950 */
944 efi.runtime_version = efi_systab.fw_revision; 951 efi.runtime_version = efi_systab.hdr.revision;
945 efi.get_time = virt_efi_get_time; 952 efi.get_time = virt_efi_get_time;
946 efi.set_time = virt_efi_set_time; 953 efi.set_time = virt_efi_set_time;
947 efi.get_wakeup_time = virt_efi_get_wakeup_time; 954 efi.get_wakeup_time = virt_efi_get_wakeup_time;
@@ -969,6 +976,9 @@ u32 efi_mem_type(unsigned long phys_addr)
969 efi_memory_desc_t *md; 976 efi_memory_desc_t *md;
970 void *p; 977 void *p;
971 978
979 if (!efi_enabled(EFI_MEMMAP))
980 return 0;
981
972 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 982 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
973 md = p; 983 md = p;
974 if ((md->phys_addr <= phys_addr) && 984 if ((md->phys_addr <= phys_addr) &&
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 95fd505dfeb6..2b2003860615 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -38,7 +38,7 @@
38#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
39#include <asm/fixmap.h> 39#include <asm/fixmap.h>
40 40
41static pgd_t save_pgd __initdata; 41static pgd_t *save_pgd __initdata;
42static unsigned long efi_flags __initdata; 42static unsigned long efi_flags __initdata;
43 43
44static void __init early_code_mapping_set_exec(int executable) 44static void __init early_code_mapping_set_exec(int executable)
@@ -61,12 +61,20 @@ static void __init early_code_mapping_set_exec(int executable)
61void __init efi_call_phys_prelog(void) 61void __init efi_call_phys_prelog(void)
62{ 62{
63 unsigned long vaddress; 63 unsigned long vaddress;
64 int pgd;
65 int n_pgds;
64 66
65 early_code_mapping_set_exec(1); 67 early_code_mapping_set_exec(1);
66 local_irq_save(efi_flags); 68 local_irq_save(efi_flags);
67 vaddress = (unsigned long)__va(0x0UL); 69
68 save_pgd = *pgd_offset_k(0x0UL); 70 n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
69 set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); 71 save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
72
73 for (pgd = 0; pgd < n_pgds; pgd++) {
74 save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
75 vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
76 set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
77 }
70 __flush_tlb_all(); 78 __flush_tlb_all();
71} 79}
72 80
@@ -75,7 +83,11 @@ void __init efi_call_phys_epilog(void)
75 /* 83 /*
76 * After the lock is released, the original page table is restored. 84 * After the lock is released, the original page table is restored.
77 */ 85 */
78 set_pgd(pgd_offset_k(0x0UL), save_pgd); 86 int pgd;
87 int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
88 for (pgd = 0; pgd < n_pgds; pgd++)
89 set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
90 kfree(save_pgd);
79 __flush_tlb_all(); 91 __flush_tlb_all();
80 local_irq_restore(efi_flags); 92 local_irq_restore(efi_flags);
81 early_code_mapping_set_exec(0); 93 early_code_mapping_set_exec(0);
diff --git a/arch/x86/platform/goldfish/Makefile b/arch/x86/platform/goldfish/Makefile
new file mode 100644
index 000000000000..f030b532fdf3
--- /dev/null
+++ b/arch/x86/platform/goldfish/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_GOLDFISH) += goldfish.o
diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c
new file mode 100644
index 000000000000..1693107a518e
--- /dev/null
+++ b/arch/x86/platform/goldfish/goldfish.c
@@ -0,0 +1,51 @@
1/*
2 * Copyright (C) 2007 Google, Inc.
3 * Copyright (C) 2011 Intel, Inc.
4 * Copyright (C) 2013 Intel, Inc.
5 *
6 * This software is licensed under the terms of the GNU General Public
7 * License version 2, as published by the Free Software Foundation, and
8 * may be copied, distributed, and modified under those terms.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */
16
17#include <linux/kernel.h>
18#include <linux/irq.h>
19#include <linux/platform_device.h>
20
21/*
22 * Where in virtual device memory the IO devices (timers, system controllers
23 * and so on)
24 */
25
26#define GOLDFISH_PDEV_BUS_BASE (0xff001000)
27#define GOLDFISH_PDEV_BUS_END (0xff7fffff)
28#define GOLDFISH_PDEV_BUS_IRQ (4)
29
30#define GOLDFISH_TTY_BASE (0x2000)
31
32static struct resource goldfish_pdev_bus_resources[] = {
33 {
34 .start = GOLDFISH_PDEV_BUS_BASE,
35 .end = GOLDFISH_PDEV_BUS_END,
36 .flags = IORESOURCE_MEM,
37 },
38 {
39 .start = GOLDFISH_PDEV_BUS_IRQ,
40 .end = GOLDFISH_PDEV_BUS_IRQ,
41 .flags = IORESOURCE_IRQ,
42 }
43};
44
45static int __init goldfish_init(void)
46{
47 platform_device_register_simple("goldfish_pdev_bus", -1,
48 goldfish_pdev_bus_resources, 2);
49 return 0;
50}
51device_initcall(goldfish_init);
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index fd41a9262d65..e31bcd8f2eee 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -782,7 +782,7 @@ BLOCKING_NOTIFIER_HEAD(intel_scu_notifier);
782EXPORT_SYMBOL_GPL(intel_scu_notifier); 782EXPORT_SYMBOL_GPL(intel_scu_notifier);
783 783
784/* Called by IPC driver */ 784/* Called by IPC driver */
785void __devinit intel_scu_devices_create(void) 785void intel_scu_devices_create(void)
786{ 786{
787 int i; 787 int i;
788 788
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index d75582d1aa55..ff0174dda810 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -121,7 +121,7 @@ static const struct platform_suspend_ops xo1_suspend_ops = {
121 .enter = xo1_power_state_enter, 121 .enter = xo1_power_state_enter,
122}; 122};
123 123
124static int __devinit xo1_pm_probe(struct platform_device *pdev) 124static int xo1_pm_probe(struct platform_device *pdev)
125{ 125{
126 struct resource *res; 126 struct resource *res;
127 int err; 127 int err;
@@ -154,7 +154,7 @@ static int __devinit xo1_pm_probe(struct platform_device *pdev)
154 return 0; 154 return 0;
155} 155}
156 156
157static int __devexit xo1_pm_remove(struct platform_device *pdev) 157static int xo1_pm_remove(struct platform_device *pdev)
158{ 158{
159 mfd_cell_disable(pdev); 159 mfd_cell_disable(pdev);
160 160
@@ -173,7 +173,7 @@ static struct platform_driver cs5535_pms_driver = {
173 .owner = THIS_MODULE, 173 .owner = THIS_MODULE,
174 }, 174 },
175 .probe = xo1_pm_probe, 175 .probe = xo1_pm_probe,
176 .remove = __devexit_p(xo1_pm_remove), 176 .remove = xo1_pm_remove,
177}; 177};
178 178
179static struct platform_driver cs5535_acpi_driver = { 179static struct platform_driver cs5535_acpi_driver = {
@@ -182,7 +182,7 @@ static struct platform_driver cs5535_acpi_driver = {
182 .owner = THIS_MODULE, 182 .owner = THIS_MODULE,
183 }, 183 },
184 .probe = xo1_pm_probe, 184 .probe = xo1_pm_probe,
185 .remove = __devexit_p(xo1_pm_remove), 185 .remove = xo1_pm_remove,
186}; 186};
187 187
188static int __init xo1_pm_init(void) 188static int __init xo1_pm_init(void)
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 63d4aa40956e..74704be7b1fe 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -309,7 +309,7 @@ static int xo1_sci_resume(struct platform_device *pdev)
309 return 0; 309 return 0;
310} 310}
311 311
312static int __devinit setup_sci_interrupt(struct platform_device *pdev) 312static int setup_sci_interrupt(struct platform_device *pdev)
313{ 313{
314 u32 lo, hi; 314 u32 lo, hi;
315 u32 sts; 315 u32 sts;
@@ -351,7 +351,7 @@ static int __devinit setup_sci_interrupt(struct platform_device *pdev)
351 return r; 351 return r;
352} 352}
353 353
354static int __devinit setup_ec_sci(void) 354static int setup_ec_sci(void)
355{ 355{
356 int r; 356 int r;
357 357
@@ -395,7 +395,7 @@ static void free_ec_sci(void)
395 gpio_free(OLPC_GPIO_ECSCI); 395 gpio_free(OLPC_GPIO_ECSCI);
396} 396}
397 397
398static int __devinit setup_lid_events(void) 398static int setup_lid_events(void)
399{ 399{
400 int r; 400 int r;
401 401
@@ -432,7 +432,7 @@ static void free_lid_events(void)
432 gpio_free(OLPC_GPIO_LID); 432 gpio_free(OLPC_GPIO_LID);
433} 433}
434 434
435static int __devinit setup_power_button(struct platform_device *pdev) 435static int setup_power_button(struct platform_device *pdev)
436{ 436{
437 int r; 437 int r;
438 438
@@ -463,7 +463,7 @@ static void free_power_button(void)
463 input_free_device(power_button_idev); 463 input_free_device(power_button_idev);
464} 464}
465 465
466static int __devinit setup_ebook_switch(struct platform_device *pdev) 466static int setup_ebook_switch(struct platform_device *pdev)
467{ 467{
468 int r; 468 int r;
469 469
@@ -494,7 +494,7 @@ static void free_ebook_switch(void)
494 input_free_device(ebook_switch_idev); 494 input_free_device(ebook_switch_idev);
495} 495}
496 496
497static int __devinit setup_lid_switch(struct platform_device *pdev) 497static int setup_lid_switch(struct platform_device *pdev)
498{ 498{
499 int r; 499 int r;
500 500
@@ -538,7 +538,7 @@ static void free_lid_switch(void)
538 input_free_device(lid_switch_idev); 538 input_free_device(lid_switch_idev);
539} 539}
540 540
541static int __devinit xo1_sci_probe(struct platform_device *pdev) 541static int xo1_sci_probe(struct platform_device *pdev)
542{ 542{
543 struct resource *res; 543 struct resource *res;
544 int r; 544 int r;
@@ -613,7 +613,7 @@ err_ebook:
613 return r; 613 return r;
614} 614}
615 615
616static int __devexit xo1_sci_remove(struct platform_device *pdev) 616static int xo1_sci_remove(struct platform_device *pdev)
617{ 617{
618 mfd_cell_disable(pdev); 618 mfd_cell_disable(pdev);
619 free_irq(sci_irq, pdev); 619 free_irq(sci_irq, pdev);
@@ -632,7 +632,7 @@ static struct platform_driver xo1_sci_driver = {
632 .name = "olpc-xo1-sci-acpi", 632 .name = "olpc-xo1-sci-acpi",
633 }, 633 },
634 .probe = xo1_sci_probe, 634 .probe = xo1_sci_probe,
635 .remove = __devexit_p(xo1_sci_remove), 635 .remove = xo1_sci_remove,
636 .suspend = xo1_sci_suspend, 636 .suspend = xo1_sci_suspend,
637 .resume = xo1_sci_resume, 637 .resume = xo1_sci_resume,
638}; 638};
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 2fdca25905ae..fef7d0ba7e3a 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -195,7 +195,7 @@ err_sysfs:
195 return r; 195 return r;
196} 196}
197 197
198static int xo15_sci_remove(struct acpi_device *device, int type) 198static int xo15_sci_remove(struct acpi_device *device)
199{ 199{
200 acpi_disable_gpe(NULL, xo15_sci_gpe); 200 acpi_disable_gpe(NULL, xo15_sci_gpe);
201 acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler); 201 acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
index 7a9ad30d6c9f..3dc9aee41d91 100644
--- a/arch/x86/platform/scx200/scx200_32.c
+++ b/arch/x86/platform/scx200/scx200_32.c
@@ -35,7 +35,7 @@ static struct pci_device_id scx200_tbl[] = {
35}; 35};
36MODULE_DEVICE_TABLE(pci,scx200_tbl); 36MODULE_DEVICE_TABLE(pci,scx200_tbl);
37 37
38static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *); 38static int scx200_probe(struct pci_dev *, const struct pci_device_id *);
39 39
40static struct pci_driver scx200_pci_driver = { 40static struct pci_driver scx200_pci_driver = {
41 .name = "scx200", 41 .name = "scx200",
@@ -45,7 +45,7 @@ static struct pci_driver scx200_pci_driver = {
45 45
46static DEFINE_MUTEX(scx200_gpio_config_lock); 46static DEFINE_MUTEX(scx200_gpio_config_lock);
47 47
48static void __devinit scx200_init_shadow(void) 48static void scx200_init_shadow(void)
49{ 49{
50 int bank; 50 int bank;
51 51
@@ -54,7 +54,7 @@ static void __devinit scx200_init_shadow(void)
54 scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); 54 scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
55} 55}
56 56
57static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 57static int scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
58{ 58{
59 unsigned base; 59 unsigned base;
60 60
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index 7785b72ecc3a..bcd1a703e3e6 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -35,7 +35,7 @@
35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
36 36
37/* All CPUs enumerated by SFI must be present and enabled */ 37/* All CPUs enumerated by SFI must be present and enabled */
38static void __cpuinit mp_sfi_register_lapic(u8 id) 38static void __init mp_sfi_register_lapic(u8 id)
39{ 39{
40 if (MAX_LOCAL_APIC - id <= 0) { 40 if (MAX_LOCAL_APIC - id <= 0) {
41 pr_warning("Processor #%d invalid (max %d)\n", 41 pr_warning("Processor #%d invalid (max %d)\n",
diff --git a/arch/x86/platform/ts5500/Makefile b/arch/x86/platform/ts5500/Makefile
new file mode 100644
index 000000000000..c54e348c96a7
--- /dev/null
+++ b/arch/x86/platform/ts5500/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_TS5500) += ts5500.o
diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c
new file mode 100644
index 000000000000..39febb214e8c
--- /dev/null
+++ b/arch/x86/platform/ts5500/ts5500.c
@@ -0,0 +1,339 @@
1/*
2 * Technologic Systems TS-5500 Single Board Computer support
3 *
4 * Copyright (C) 2013 Savoir-faire Linux Inc.
5 * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option) any later
10 * version.
11 *
12 *
13 * This driver registers the Technologic Systems TS-5500 Single Board Computer
14 * (SBC) and its devices, and exposes information to userspace such as jumpers'
15 * state or available options. For further information about sysfs entries, see
16 * Documentation/ABI/testing/sysfs-platform-ts5500.
17 *
18 * This code actually supports the TS-5500 platform, but it may be extended to
19 * support similar Technologic Systems x86-based platforms, such as the TS-5600.
20 */
21
22#include <linux/delay.h>
23#include <linux/io.h>
24#include <linux/kernel.h>
25#include <linux/leds.h>
26#include <linux/module.h>
27#include <linux/platform_data/gpio-ts5500.h>
28#include <linux/platform_data/max197.h>
29#include <linux/platform_device.h>
30#include <linux/slab.h>
31
32/* Product code register */
33#define TS5500_PRODUCT_CODE_ADDR 0x74
34#define TS5500_PRODUCT_CODE 0x60 /* TS-5500 product code */
35
36/* SRAM/RS-485/ADC options, and RS-485 RTS/Automatic RS-485 flags register */
37#define TS5500_SRAM_RS485_ADC_ADDR 0x75
38#define TS5500_SRAM BIT(0) /* SRAM option */
39#define TS5500_RS485 BIT(1) /* RS-485 option */
40#define TS5500_ADC BIT(2) /* A/D converter option */
41#define TS5500_RS485_RTS BIT(6) /* RTS for RS-485 */
42#define TS5500_RS485_AUTO BIT(7) /* Automatic RS-485 */
43
44/* External Reset/Industrial Temperature Range options register */
45#define TS5500_ERESET_ITR_ADDR 0x76
46#define TS5500_ERESET BIT(0) /* External Reset option */
47#define TS5500_ITR BIT(1) /* Indust. Temp. Range option */
48
49/* LED/Jumpers register */
50#define TS5500_LED_JP_ADDR 0x77
51#define TS5500_LED BIT(0) /* LED flag */
52#define TS5500_JP1 BIT(1) /* Automatic CMOS */
53#define TS5500_JP2 BIT(2) /* Enable Serial Console */
54#define TS5500_JP3 BIT(3) /* Write Enable Drive A */
55#define TS5500_JP4 BIT(4) /* Fast Console (115K baud) */
56#define TS5500_JP5 BIT(5) /* User Jumper */
57#define TS5500_JP6 BIT(6) /* Console on COM1 (req. JP2) */
58#define TS5500_JP7 BIT(7) /* Undocumented (Unused) */
59
60/* A/D Converter registers */
61#define TS5500_ADC_CONV_BUSY_ADDR 0x195 /* Conversion state register */
62#define TS5500_ADC_CONV_BUSY BIT(0)
63#define TS5500_ADC_CONV_INIT_LSB_ADDR 0x196 /* Start conv. / LSB register */
64#define TS5500_ADC_CONV_MSB_ADDR 0x197 /* MSB register */
65#define TS5500_ADC_CONV_DELAY 12 /* usec */
66
67/**
68 * struct ts5500_sbc - TS-5500 board description
69 * @id: Board product ID.
70 * @sram: Flag for SRAM option.
71 * @rs485: Flag for RS-485 option.
72 * @adc: Flag for Analog/Digital converter option.
73 * @ereset: Flag for External Reset option.
74 * @itr: Flag for Industrial Temperature Range option.
75 * @jumpers: Bitfield for jumpers' state.
76 */
77struct ts5500_sbc {
78 int id;
79 bool sram;
80 bool rs485;
81 bool adc;
82 bool ereset;
83 bool itr;
84 u8 jumpers;
85};
86
87/* Board signatures in BIOS shadow RAM */
88static const struct {
89 const char * const string;
90 const ssize_t offset;
91} ts5500_signatures[] __initdata = {
92 { "TS-5x00 AMD Elan", 0xb14 },
93};
94
95static int __init ts5500_check_signature(void)
96{
97 void __iomem *bios;
98 int i, ret = -ENODEV;
99
100 bios = ioremap(0xf0000, 0x10000);
101 if (!bios)
102 return -ENOMEM;
103
104 for (i = 0; i < ARRAY_SIZE(ts5500_signatures); i++) {
105 if (check_signature(bios + ts5500_signatures[i].offset,
106 ts5500_signatures[i].string,
107 strlen(ts5500_signatures[i].string))) {
108 ret = 0;
109 break;
110 }
111 }
112
113 iounmap(bios);
114 return ret;
115}
116
117static int __init ts5500_detect_config(struct ts5500_sbc *sbc)
118{
119 u8 tmp;
120 int ret = 0;
121
122 if (!request_region(TS5500_PRODUCT_CODE_ADDR, 4, "ts5500"))
123 return -EBUSY;
124
125 tmp = inb(TS5500_PRODUCT_CODE_ADDR);
126 if (tmp != TS5500_PRODUCT_CODE) {
127 pr_err("This platform is not a TS-5500 (found ID 0x%x)\n", tmp);
128 ret = -ENODEV;
129 goto cleanup;
130 }
131 sbc->id = tmp;
132
133 tmp = inb(TS5500_SRAM_RS485_ADC_ADDR);
134 sbc->sram = tmp & TS5500_SRAM;
135 sbc->rs485 = tmp & TS5500_RS485;
136 sbc->adc = tmp & TS5500_ADC;
137
138 tmp = inb(TS5500_ERESET_ITR_ADDR);
139 sbc->ereset = tmp & TS5500_ERESET;
140 sbc->itr = tmp & TS5500_ITR;
141
142 tmp = inb(TS5500_LED_JP_ADDR);
143 sbc->jumpers = tmp & ~TS5500_LED;
144
145cleanup:
146 release_region(TS5500_PRODUCT_CODE_ADDR, 4);
147 return ret;
148}
149
150static ssize_t ts5500_show_id(struct device *dev,
151 struct device_attribute *attr, char *buf)
152{
153 struct ts5500_sbc *sbc = dev_get_drvdata(dev);
154
155 return sprintf(buf, "0x%.2x\n", sbc->id);
156}
157
158static ssize_t ts5500_show_jumpers(struct device *dev,
159 struct device_attribute *attr,
160 char *buf)
161{
162 struct ts5500_sbc *sbc = dev_get_drvdata(dev);
163
164 return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1);
165}
166
167#define TS5500_SHOW(field) \
168 static ssize_t ts5500_show_##field(struct device *dev, \
169 struct device_attribute *attr, \
170 char *buf) \
171 { \
172 struct ts5500_sbc *sbc = dev_get_drvdata(dev); \
173 return sprintf(buf, "%d\n", sbc->field); \
174 }
175
176TS5500_SHOW(sram)
177TS5500_SHOW(rs485)
178TS5500_SHOW(adc)
179TS5500_SHOW(ereset)
180TS5500_SHOW(itr)
181
182static DEVICE_ATTR(id, S_IRUGO, ts5500_show_id, NULL);
183static DEVICE_ATTR(jumpers, S_IRUGO, ts5500_show_jumpers, NULL);
184static DEVICE_ATTR(sram, S_IRUGO, ts5500_show_sram, NULL);
185static DEVICE_ATTR(rs485, S_IRUGO, ts5500_show_rs485, NULL);
186static DEVICE_ATTR(adc, S_IRUGO, ts5500_show_adc, NULL);
187static DEVICE_ATTR(ereset, S_IRUGO, ts5500_show_ereset, NULL);
188static DEVICE_ATTR(itr, S_IRUGO, ts5500_show_itr, NULL);
189
190static struct attribute *ts5500_attributes[] = {
191 &dev_attr_id.attr,
192 &dev_attr_jumpers.attr,
193 &dev_attr_sram.attr,
194 &dev_attr_rs485.attr,
195 &dev_attr_adc.attr,
196 &dev_attr_ereset.attr,
197 &dev_attr_itr.attr,
198 NULL
199};
200
201static const struct attribute_group ts5500_attr_group = {
202 .attrs = ts5500_attributes,
203};
204
205static struct resource ts5500_dio1_resource[] = {
206 DEFINE_RES_IRQ_NAMED(7, "DIO1 interrupt"),
207};
208
209static struct platform_device ts5500_dio1_pdev = {
210 .name = "ts5500-dio1",
211 .id = -1,
212 .resource = ts5500_dio1_resource,
213 .num_resources = 1,
214};
215
216static struct resource ts5500_dio2_resource[] = {
217 DEFINE_RES_IRQ_NAMED(6, "DIO2 interrupt"),
218};
219
220static struct platform_device ts5500_dio2_pdev = {
221 .name = "ts5500-dio2",
222 .id = -1,
223 .resource = ts5500_dio2_resource,
224 .num_resources = 1,
225};
226
227static void ts5500_led_set(struct led_classdev *led_cdev,
228 enum led_brightness brightness)
229{
230 outb(!!brightness, TS5500_LED_JP_ADDR);
231}
232
233static enum led_brightness ts5500_led_get(struct led_classdev *led_cdev)
234{
235 return (inb(TS5500_LED_JP_ADDR) & TS5500_LED) ? LED_FULL : LED_OFF;
236}
237
238static struct led_classdev ts5500_led_cdev = {
239 .name = "ts5500:green:",
240 .brightness_set = ts5500_led_set,
241 .brightness_get = ts5500_led_get,
242};
243
244static int ts5500_adc_convert(u8 ctrl)
245{
246 u8 lsb, msb;
247
248 /* Start conversion (ensure the 3 MSB are set to 0) */
249 outb(ctrl & 0x1f, TS5500_ADC_CONV_INIT_LSB_ADDR);
250
251 /*
252 * The platform has CPLD logic driving the A/D converter.
253 * The conversion must complete within 11 microseconds,
254 * otherwise we have to re-initiate a conversion.
255 */
256 udelay(TS5500_ADC_CONV_DELAY);
257 if (inb(TS5500_ADC_CONV_BUSY_ADDR) & TS5500_ADC_CONV_BUSY)
258 return -EBUSY;
259
260 /* Read the raw data */
261 lsb = inb(TS5500_ADC_CONV_INIT_LSB_ADDR);
262 msb = inb(TS5500_ADC_CONV_MSB_ADDR);
263
264 return (msb << 8) | lsb;
265}
266
267static struct max197_platform_data ts5500_adc_pdata = {
268 .convert = ts5500_adc_convert,
269};
270
271static struct platform_device ts5500_adc_pdev = {
272 .name = "max197",
273 .id = -1,
274 .dev = {
275 .platform_data = &ts5500_adc_pdata,
276 },
277};
278
279static int __init ts5500_init(void)
280{
281 struct platform_device *pdev;
282 struct ts5500_sbc *sbc;
283 int err;
284
285 /*
286 * There is no DMI available or PCI bridge subvendor info,
287 * only the BIOS provides a 16-bit identification call.
288 * It is safer to find a signature in the BIOS shadow RAM.
289 */
290 err = ts5500_check_signature();
291 if (err)
292 return err;
293
294 pdev = platform_device_register_simple("ts5500", -1, NULL, 0);
295 if (IS_ERR(pdev))
296 return PTR_ERR(pdev);
297
298 sbc = devm_kzalloc(&pdev->dev, sizeof(struct ts5500_sbc), GFP_KERNEL);
299 if (!sbc) {
300 err = -ENOMEM;
301 goto error;
302 }
303
304 err = ts5500_detect_config(sbc);
305 if (err)
306 goto error;
307
308 platform_set_drvdata(pdev, sbc);
309
310 err = sysfs_create_group(&pdev->dev.kobj, &ts5500_attr_group);
311 if (err)
312 goto error;
313
314 ts5500_dio1_pdev.dev.parent = &pdev->dev;
315 if (platform_device_register(&ts5500_dio1_pdev))
316 dev_warn(&pdev->dev, "DIO1 block registration failed\n");
317 ts5500_dio2_pdev.dev.parent = &pdev->dev;
318 if (platform_device_register(&ts5500_dio2_pdev))
319 dev_warn(&pdev->dev, "DIO2 block registration failed\n");
320
321 if (led_classdev_register(&pdev->dev, &ts5500_led_cdev))
322 dev_warn(&pdev->dev, "LED registration failed\n");
323
324 if (sbc->adc) {
325 ts5500_adc_pdev.dev.parent = &pdev->dev;
326 if (platform_device_register(&ts5500_adc_pdev))
327 dev_warn(&pdev->dev, "ADC registration failed\n");
328 }
329
330 return 0;
331error:
332 platform_device_unregister(pdev);
333 return err;
334}
335device_initcall(ts5500_init);
336
337MODULE_LICENSE("GPL");
338MODULE_AUTHOR("Savoir-faire Linux Inc. <kernel@savoirfairelinux.com>");
339MODULE_DESCRIPTION("Technologic Systems TS-5500 platform driver");
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index b8b3a37c80cd..0f92173a12b6 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1034,7 +1034,8 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
1034 * globally purge translation cache of a virtual address or all TLB's 1034 * globally purge translation cache of a virtual address or all TLB's
1035 * @cpumask: mask of all cpu's in which the address is to be removed 1035 * @cpumask: mask of all cpu's in which the address is to be removed
1036 * @mm: mm_struct containing virtual address range 1036 * @mm: mm_struct containing virtual address range
1037 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) 1037 * @start: start virtual address to be removed from TLB
1038 * @end: end virtual address to be remove from TLB
1038 * @cpu: the current cpu 1039 * @cpu: the current cpu
1039 * 1040 *
1040 * This is the entry point for initiating any UV global TLB shootdown. 1041 * This is the entry point for initiating any UV global TLB shootdown.
@@ -1056,7 +1057,7 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
1056 */ 1057 */
1057const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 1058const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1058 struct mm_struct *mm, unsigned long start, 1059 struct mm_struct *mm, unsigned long start,
1059 unsigned end, unsigned int cpu) 1060 unsigned long end, unsigned int cpu)
1060{ 1061{
1061 int locals = 0; 1062 int locals = 0;
1062 int remotes = 0; 1063 int remotes = 0;
@@ -1113,7 +1114,10 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1113 1114
1114 record_send_statistics(stat, locals, hubs, remotes, bau_desc); 1115 record_send_statistics(stat, locals, hubs, remotes, bau_desc);
1115 1116
1116 bau_desc->payload.address = start; 1117 if (!end || (end - start) <= PAGE_SIZE)
1118 bau_desc->payload.address = start;
1119 else
1120 bau_desc->payload.address = TLB_FLUSH_ALL;
1117 bau_desc->payload.sending_cpu = cpu; 1121 bau_desc->payload.sending_cpu = cpu;
1118 /* 1122 /*
1119 * uv_flush_send_and_wait returns 0 if all cpu's were messaged, 1123 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
@@ -1463,7 +1467,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
1463 } 1467 }
1464 1468
1465 if (input_arg == 0) { 1469 if (input_arg == 0) {
1466 elements = sizeof(stat_description)/sizeof(*stat_description); 1470 elements = ARRAY_SIZE(stat_description);
1467 printk(KERN_DEBUG "# cpu: cpu number\n"); 1471 printk(KERN_DEBUG "# cpu: cpu number\n");
1468 printk(KERN_DEBUG "Sender statistics:\n"); 1472 printk(KERN_DEBUG "Sender statistics:\n");
1469 for (i = 0; i < elements; i++) 1473 for (i = 0; i < elements; i++)
@@ -1504,7 +1508,7 @@ static int parse_tunables_write(struct bau_control *bcp, char *instr,
1504 char *q; 1508 char *q;
1505 int cnt = 0; 1509 int cnt = 0;
1506 int val; 1510 int val;
1507 int e = sizeof(tunables) / sizeof(*tunables); 1511 int e = ARRAY_SIZE(tunables);
1508 1512
1509 p = instr + strspn(instr, WHITESPACE); 1513 p = instr + strspn(instr, WHITESPACE);
1510 q = p; 1514 q = p;
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 5032e0d19b86..98718f604eb6 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -15,7 +15,7 @@
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 * 17 *
18 * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved. 18 * Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Dimitri Sivanich 19 * Copyright (c) Dimitri Sivanich
20 */ 20 */
21#include <linux/clockchips.h> 21#include <linux/clockchips.h>
@@ -102,9 +102,10 @@ static int uv_intr_pending(int pnode)
102 if (is_uv1_hub()) 102 if (is_uv1_hub())
103 return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & 103 return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
104 UV1H_EVENT_OCCURRED0_RTC1_MASK; 104 UV1H_EVENT_OCCURRED0_RTC1_MASK;
105 else 105 else if (is_uvx_hub())
106 return uv_read_global_mmr64(pnode, UV2H_EVENT_OCCURRED2) & 106 return uv_read_global_mmr64(pnode, UVXH_EVENT_OCCURRED2) &
107 UV2H_EVENT_OCCURRED2_RTC_1_MASK; 107 UVXH_EVENT_OCCURRED2_RTC_1_MASK;
108 return 0;
108} 109}
109 110
110/* Setup interrupt and return non-zero if early expiration occurred. */ 111/* Setup interrupt and return non-zero if early expiration occurred. */
@@ -122,8 +123,8 @@ static int uv_setup_intr(int cpu, u64 expires)
122 uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, 123 uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
123 UV1H_EVENT_OCCURRED0_RTC1_MASK); 124 UV1H_EVENT_OCCURRED0_RTC1_MASK);
124 else 125 else
125 uv_write_global_mmr64(pnode, UV2H_EVENT_OCCURRED2_ALIAS, 126 uv_write_global_mmr64(pnode, UVXH_EVENT_OCCURRED2_ALIAS,
126 UV2H_EVENT_OCCURRED2_RTC_1_MASK); 127 UVXH_EVENT_OCCURRED2_RTC_1_MASK);
127 128
128 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | 129 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
129 ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); 130 ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 74202c1910cd..7d28c885d238 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -129,8 +129,6 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
129 } 129 }
130 } 130 }
131 131
132 resume_map_numa_kva(pgd_base);
133
134 return 0; 132 return 0;
135} 133}
136 134
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 460f314d13e5..a0fde91c16cf 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -11,6 +11,8 @@
11#include <linux/gfp.h> 11#include <linux/gfp.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/suspend.h> 13#include <linux/suspend.h>
14
15#include <asm/init.h>
14#include <asm/proto.h> 16#include <asm/proto.h>
15#include <asm/page.h> 17#include <asm/page.h>
16#include <asm/pgtable.h> 18#include <asm/pgtable.h>
@@ -39,41 +41,21 @@ pgd_t *temp_level4_pgt;
39 41
40void *relocated_restore_code; 42void *relocated_restore_code;
41 43
42static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) 44static void *alloc_pgt_page(void *context)
43{ 45{
44 long i, j; 46 return (void *)get_safe_page(GFP_ATOMIC);
45
46 i = pud_index(address);
47 pud = pud + i;
48 for (; i < PTRS_PER_PUD; pud++, i++) {
49 unsigned long paddr;
50 pmd_t *pmd;
51
52 paddr = address + i*PUD_SIZE;
53 if (paddr >= end)
54 break;
55
56 pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
57 if (!pmd)
58 return -ENOMEM;
59 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
60 for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
61 unsigned long pe;
62
63 if (paddr >= end)
64 break;
65 pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
66 pe &= __supported_pte_mask;
67 set_pmd(pmd, __pmd(pe));
68 }
69 }
70 return 0;
71} 47}
72 48
73static int set_up_temporary_mappings(void) 49static int set_up_temporary_mappings(void)
74{ 50{
75 unsigned long start, end, next; 51 struct x86_mapping_info info = {
76 int error; 52 .alloc_pgt_page = alloc_pgt_page,
53 .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
54 .kernel_mapping = true,
55 };
56 unsigned long mstart, mend;
57 int result;
58 int i;
77 59
78 temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); 60 temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
79 if (!temp_level4_pgt) 61 if (!temp_level4_pgt)
@@ -84,21 +66,17 @@ static int set_up_temporary_mappings(void)
84 init_level4_pgt[pgd_index(__START_KERNEL_map)]); 66 init_level4_pgt[pgd_index(__START_KERNEL_map)]);
85 67
86 /* Set up the direct mapping from scratch */ 68 /* Set up the direct mapping from scratch */
87 start = (unsigned long)pfn_to_kaddr(0); 69 for (i = 0; i < nr_pfn_mapped; i++) {
88 end = (unsigned long)pfn_to_kaddr(max_pfn); 70 mstart = pfn_mapped[i].start << PAGE_SHIFT;
89 71 mend = pfn_mapped[i].end << PAGE_SHIFT;
90 for (; start < end; start = next) { 72
91 pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); 73 result = kernel_ident_mapping_init(&info, temp_level4_pgt,
92 if (!pud) 74 mstart, mend);
93 return -ENOMEM; 75
94 next = start + PGDIR_SIZE; 76 if (result)
95 if (next > end) 77 return result;
96 next = end;
97 if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
98 return error;
99 set_pgd(temp_level4_pgt + pgd_index(start),
100 mk_kernel_pgd(__pa(pud)));
101 } 78 }
79
102 return 0; 80 return 0;
103} 81}
104 82
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index cbca565af5bd..a44f457e70a1 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -8,9 +8,26 @@
8struct real_mode_header *real_mode_header; 8struct real_mode_header *real_mode_header;
9u32 *trampoline_cr4_features; 9u32 *trampoline_cr4_features;
10 10
11void __init setup_real_mode(void) 11void __init reserve_real_mode(void)
12{ 12{
13 phys_addr_t mem; 13 phys_addr_t mem;
14 unsigned char *base;
15 size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
16
17 /* Has to be under 1M so we can execute real-mode AP code. */
18 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
19 if (!mem)
20 panic("Cannot allocate trampoline\n");
21
22 base = __va(mem);
23 memblock_reserve(mem, size);
24 real_mode_header = (struct real_mode_header *) base;
25 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
26 base, (unsigned long long)mem, size);
27}
28
29void __init setup_real_mode(void)
30{
14 u16 real_mode_seg; 31 u16 real_mode_seg;
15 u32 *rel; 32 u32 *rel;
16 u32 count; 33 u32 count;
@@ -25,16 +42,7 @@ void __init setup_real_mode(void)
25 u64 efer; 42 u64 efer;
26#endif 43#endif
27 44
28 /* Has to be in very low memory so we can execute real-mode AP code. */ 45 base = (unsigned char *)real_mode_header;
29 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
30 if (!mem)
31 panic("Cannot allocate trampoline\n");
32
33 base = __va(mem);
34 memblock_reserve(mem, size);
35 real_mode_header = (struct real_mode_header *) base;
36 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
37 base, (unsigned long long)mem, size);
38 46
39 memcpy(base, real_mode_blob, size); 47 memcpy(base, real_mode_blob, size);
40 48
@@ -62,9 +70,9 @@ void __init setup_real_mode(void)
62 __va(real_mode_header->trampoline_header); 70 __va(real_mode_header->trampoline_header);
63 71
64#ifdef CONFIG_X86_32 72#ifdef CONFIG_X86_32
65 trampoline_header->start = __pa(startup_32_smp); 73 trampoline_header->start = __pa_symbol(startup_32_smp);
66 trampoline_header->gdt_limit = __BOOT_DS + 7; 74 trampoline_header->gdt_limit = __BOOT_DS + 7;
67 trampoline_header->gdt_base = __pa(boot_gdt); 75 trampoline_header->gdt_base = __pa_symbol(boot_gdt);
68#else 76#else
69 /* 77 /*
70 * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR 78 * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR
@@ -78,16 +86,18 @@ void __init setup_real_mode(void)
78 *trampoline_cr4_features = read_cr4(); 86 *trampoline_cr4_features = read_cr4();
79 87
80 trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); 88 trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
81 trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE; 89 trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
82 trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE; 90 trampoline_pgd[511] = init_level4_pgt[511].pgd;
83#endif 91#endif
84} 92}
85 93
86/* 94/*
87 * set_real_mode_permissions() gets called very early, to guarantee the 95 * reserve_real_mode() gets called very early, to guarantee the
88 * availability of low memory. This is before the proper kernel page 96 * availability of low memory. This is before the proper kernel page
89 * tables are set up, so we cannot set page permissions in that 97 * tables are set up, so we cannot set page permissions in that
90 * function. Thus, we use an arch_initcall instead. 98 * function. Also trampoline code will be executed by APs so we
99 * need to mark it executable at do_pre_smp_initcalls() at least,
100 * thus run it as a early_initcall().
91 */ 101 */
92static int __init set_real_mode_permissions(void) 102static int __init set_real_mode_permissions(void)
93{ 103{
@@ -111,5 +121,4 @@ static int __init set_real_mode_permissions(void)
111 121
112 return 0; 122 return 0;
113} 123}
114 124early_initcall(set_real_mode_permissions);
115arch_initcall(set_real_mode_permissions);
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 28e3fa9056ea..f2fe78ff22cc 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -73,12 +73,12 @@
7364 i386 getppid sys_getppid 7364 i386 getppid sys_getppid
7465 i386 getpgrp sys_getpgrp 7465 i386 getpgrp sys_getpgrp
7566 i386 setsid sys_setsid 7566 i386 setsid sys_setsid
7667 i386 sigaction sys_sigaction sys32_sigaction 7667 i386 sigaction sys_sigaction compat_sys_sigaction
7768 i386 sgetmask sys_sgetmask 7768 i386 sgetmask sys_sgetmask
7869 i386 ssetmask sys_ssetmask 7869 i386 ssetmask sys_ssetmask
7970 i386 setreuid sys_setreuid16 7970 i386 setreuid sys_setreuid16
8071 i386 setregid sys_setregid16 8071 i386 setregid sys_setregid16
8172 i386 sigsuspend sys_sigsuspend sys32_sigsuspend 8172 i386 sigsuspend sys_sigsuspend sys_sigsuspend
8273 i386 sigpending sys_sigpending compat_sys_sigpending 8273 i386 sigpending sys_sigpending compat_sys_sigpending
8374 i386 sethostname sys_sethostname 8374 i386 sethostname sys_sethostname
8475 i386 setrlimit sys_setrlimit compat_sys_setrlimit 8475 i386 setrlimit sys_setrlimit compat_sys_setrlimit
@@ -116,16 +116,16 @@
116107 i386 lstat sys_newlstat compat_sys_newlstat 116107 i386 lstat sys_newlstat compat_sys_newlstat
117108 i386 fstat sys_newfstat compat_sys_newfstat 117108 i386 fstat sys_newfstat compat_sys_newfstat
118109 i386 olduname sys_uname 118109 i386 olduname sys_uname
119110 i386 iopl ptregs_iopl stub32_iopl 119110 i386 iopl sys_iopl
120111 i386 vhangup sys_vhangup 120111 i386 vhangup sys_vhangup
121112 i386 idle 121112 i386 idle
122113 i386 vm86old ptregs_vm86old sys32_vm86_warning 122113 i386 vm86old sys_vm86old sys32_vm86_warning
123114 i386 wait4 sys_wait4 compat_sys_wait4 123114 i386 wait4 sys_wait4 compat_sys_wait4
124115 i386 swapoff sys_swapoff 124115 i386 swapoff sys_swapoff
125116 i386 sysinfo sys_sysinfo compat_sys_sysinfo 125116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
126117 i386 ipc sys_ipc sys32_ipc 126117 i386 ipc sys_ipc sys32_ipc
127118 i386 fsync sys_fsync 127118 i386 fsync sys_fsync
128119 i386 sigreturn ptregs_sigreturn stub32_sigreturn 128119 i386 sigreturn sys_sigreturn stub32_sigreturn
129120 i386 clone sys_clone stub32_clone 129120 i386 clone sys_clone stub32_clone
130121 i386 setdomainname sys_setdomainname 130121 i386 setdomainname sys_setdomainname
131122 i386 uname sys_newuname 131122 i386 uname sys_newuname
@@ -167,24 +167,24 @@
167158 i386 sched_yield sys_sched_yield 167158 i386 sched_yield sys_sched_yield
168159 i386 sched_get_priority_max sys_sched_get_priority_max 168159 i386 sched_get_priority_max sys_sched_get_priority_max
169160 i386 sched_get_priority_min sys_sched_get_priority_min 169160 i386 sched_get_priority_min sys_sched_get_priority_min
170161 i386 sched_rr_get_interval sys_sched_rr_get_interval sys32_sched_rr_get_interval 170161 i386 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
171162 i386 nanosleep sys_nanosleep compat_sys_nanosleep 171162 i386 nanosleep sys_nanosleep compat_sys_nanosleep
172163 i386 mremap sys_mremap 172163 i386 mremap sys_mremap
173164 i386 setresuid sys_setresuid16 173164 i386 setresuid sys_setresuid16
174165 i386 getresuid sys_getresuid16 174165 i386 getresuid sys_getresuid16
175166 i386 vm86 ptregs_vm86 sys32_vm86_warning 175166 i386 vm86 sys_vm86 sys32_vm86_warning
176167 i386 query_module 176167 i386 query_module
177168 i386 poll sys_poll 177168 i386 poll sys_poll
178169 i386 nfsservctl 178169 i386 nfsservctl
179170 i386 setresgid sys_setresgid16 179170 i386 setresgid sys_setresgid16
180171 i386 getresgid sys_getresgid16 180171 i386 getresgid sys_getresgid16
181172 i386 prctl sys_prctl 181172 i386 prctl sys_prctl
182173 i386 rt_sigreturn ptregs_rt_sigreturn stub32_rt_sigreturn 182173 i386 rt_sigreturn sys_rt_sigreturn stub32_rt_sigreturn
183174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction 183174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
184175 i386 rt_sigprocmask sys_rt_sigprocmask 184175 i386 rt_sigprocmask sys_rt_sigprocmask
185176 i386 rt_sigpending sys_rt_sigpending sys32_rt_sigpending 185176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
186177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait 186177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
187178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo sys32_rt_sigqueueinfo 187178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
188179 i386 rt_sigsuspend sys_rt_sigsuspend 188179 i386 rt_sigsuspend sys_rt_sigsuspend
189180 i386 pread64 sys_pread64 sys32_pread 189180 i386 pread64 sys_pread64 sys32_pread
190181 i386 pwrite64 sys_pwrite64 sys32_pwrite 190181 i386 pwrite64 sys_pwrite64 sys32_pwrite
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index dc97328bd90a..38ae65dfd14f 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -325,7 +325,7 @@
325# x32-specific system call numbers start at 512 to avoid cache impact 325# x32-specific system call numbers start at 512 to avoid cache impact
326# for native 64-bit operation. 326# for native 64-bit operation.
327# 327#
328512 x32 rt_sigaction sys32_rt_sigaction 328512 x32 rt_sigaction compat_sys_rt_sigaction
329513 x32 rt_sigreturn stub_x32_rt_sigreturn 329513 x32 rt_sigreturn stub_x32_rt_sigreturn
330514 x32 ioctl compat_sys_ioctl 330514 x32 ioctl compat_sys_ioctl
331515 x32 readv compat_sys_readv 331515 x32 readv compat_sys_readv
@@ -335,9 +335,9 @@
335519 x32 recvmsg compat_sys_recvmsg 335519 x32 recvmsg compat_sys_recvmsg
336520 x32 execve stub_x32_execve 336520 x32 execve stub_x32_execve
337521 x32 ptrace compat_sys_ptrace 337521 x32 ptrace compat_sys_ptrace
338522 x32 rt_sigpending sys32_rt_sigpending 338522 x32 rt_sigpending compat_sys_rt_sigpending
339523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait 339523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
340524 x32 rt_sigqueueinfo sys32_rt_sigqueueinfo 340524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
341525 x32 sigaltstack compat_sys_sigaltstack 341525 x32 sigaltstack compat_sys_sigaltstack
342526 x32 timer_create compat_sys_timer_create 342526 x32 timer_create compat_sys_timer_create
343527 x32 mq_notify compat_sys_mq_notify 343527 x32 mq_notify compat_sys_mq_notify
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index cc2f8c131286..872eb60e7806 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -55,7 +55,7 @@ static FILE *input_file; /* Input file name */
55static void usage(const char *err) 55static void usage(const char *err)
56{ 56{
57 if (err) 57 if (err)
58 fprintf(stderr, "Error: %s\n\n", err); 58 fprintf(stderr, "%s: Error: %s\n\n", prog, err);
59 fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog); 59 fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
60 fprintf(stderr, "\t-y 64bit mode\n"); 60 fprintf(stderr, "\t-y 64bit mode\n");
61 fprintf(stderr, "\t-n 32bit mode\n"); 61 fprintf(stderr, "\t-n 32bit mode\n");
@@ -269,7 +269,13 @@ int main(int argc, char **argv)
269 insns++; 269 insns++;
270 } 270 }
271 271
272 fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed); 272 fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
273 prog,
274 (errors) ? "Failure" : "Success",
275 insns,
276 (input_file) ? "given" : "random",
277 errors,
278 seed);
273 279
274 return errors ? 1 : 0; 280 return errors ? 1 : 0;
275} 281}
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5a1847d61930..79d67bd507fa 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -814,12 +814,14 @@ int main(int argc, char **argv)
814 read_relocs(fp); 814 read_relocs(fp);
815 if (show_absolute_syms) { 815 if (show_absolute_syms) {
816 print_absolute_symbols(); 816 print_absolute_symbols();
817 return 0; 817 goto out;
818 } 818 }
819 if (show_absolute_relocs) { 819 if (show_absolute_relocs) {
820 print_absolute_relocs(); 820 print_absolute_relocs();
821 return 0; 821 goto out;
822 } 822 }
823 emit_relocs(as_text, use_real_mode); 823 emit_relocs(as_text, use_real_mode);
824out:
825 fclose(fp);
824 return 0; 826 return 0;
825} 827}
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 53c90fd412d1..14ef8d1dbc33 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,7 +13,6 @@ endmenu
13config UML_X86 13config UML_X86
14 def_bool y 14 def_bool y
15 select GENERIC_FIND_FIRST_BIT 15 select GENERIC_FIND_FIRST_BIT
16 select GENERIC_SIGALTSTACK
17 16
18config 64BIT 17config 64BIT
19 bool "64-bit kernel" if SUBARCH = "x86" 18 bool "64-bit kernel" if SUBARCH = "x86"
@@ -25,6 +24,8 @@ config X86_32
25 select ARCH_WANT_IPC_PARSE_VERSION 24 select ARCH_WANT_IPC_PARSE_VERSION
26 select MODULES_USE_ELF_REL 25 select MODULES_USE_ELF_REL
27 select CLONE_BACKWARDS 26 select CLONE_BACKWARDS
27 select OLD_SIGSUSPEND3
28 select OLD_SIGACTION
28 29
29config X86_64 30config X86_64
30 def_bool 64BIT 31 def_bool 64BIT
@@ -37,9 +38,8 @@ config RWSEM_GENERIC_SPINLOCK
37 def_bool !RWSEM_XCHGADD_ALGORITHM 38 def_bool !RWSEM_XCHGADD_ALGORITHM
38 39
39config 3_LEVEL_PGTABLES 40config 3_LEVEL_PGTABLES
40 bool "Three-level pagetables (EXPERIMENTAL)" if !64BIT 41 bool "Three-level pagetables" if !64BIT
41 default 64BIT 42 default 64BIT
42 depends on EXPERIMENTAL
43 help 43 help
44 Three-level pagetables will let UML have more than 4G of physical 44 Three-level pagetables will let UML have more than 4G of physical
45 memory. All the memory that can't be mapped directly will be treated 45 memory. All the memory that can't be mapped directly will be treated
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 5d065b2222d3..eafa324eb7a5 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -10,7 +10,7 @@ endif
10 10
11obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \ 11obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
12 ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ 12 ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
13 stub_$(BITS).o stub_segv.o syscalls_$(BITS).o \ 13 stub_$(BITS).o stub_segv.o \
14 sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ 14 sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
15 mem_$(BITS).o subarch.o os-$(OS)/ 15 mem_$(BITS).o subarch.o os-$(OS)/
16 16
@@ -25,7 +25,7 @@ subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o
25 25
26else 26else
27 27
28obj-y += vdso/ 28obj-y += syscalls_64.o vdso/
29 29
30subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \ 30subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \
31 ../lib/rwsem.o 31 ../lib/rwsem.o
diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c
index 8784ab30d91b..84ac7f7b0257 100644
--- a/arch/x86/um/fault.c
+++ b/arch/x86/um/fault.c
@@ -20,7 +20,7 @@ int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
20 const struct exception_table_entry *fixup; 20 const struct exception_table_entry *fixup;
21 21
22 fixup = search_exception_tables(address); 22 fixup = search_exception_tables(address);
23 if (fixup != 0) { 23 if (fixup) {
24 UPT_IP(regs) = fixup->fixup; 24 UPT_IP(regs) = fixup->fixup;
25 return 1; 25 return 1;
26 } 26 }
diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
index 8436079be914..68fd2cf526fd 100644
--- a/arch/x86/um/shared/sysdep/syscalls_32.h
+++ b/arch/x86/um/shared/sysdep/syscalls_32.h
@@ -8,11 +8,6 @@
8 8
9typedef long syscall_handler_t(struct pt_regs); 9typedef long syscall_handler_t(struct pt_regs);
10 10
11/* Not declared on x86, incompatible declarations on x86_64, so these have
12 * to go here rather than in sys_call_table.c
13 */
14extern syscall_handler_t sys_rt_sigaction;
15
16extern syscall_handler_t *sys_call_table[]; 11extern syscall_handler_t *sys_call_table[];
17 12
18#define EXECUTE_SYSCALL(syscall, regs) \ 13#define EXECUTE_SYSCALL(syscall, regs) \
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 71cef48ea5cd..ae7319db18ee 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -464,7 +464,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
464 return 0; 464 return 0;
465} 465}
466 466
467long sys_sigreturn(struct pt_regs *regs) 467long sys_sigreturn(void)
468{ 468{
469 unsigned long sp = PT_REGS_SP(&current->thread.regs); 469 unsigned long sp = PT_REGS_SP(&current->thread.regs);
470 struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); 470 struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
@@ -577,7 +577,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
577} 577}
578#endif 578#endif
579 579
580long sys_rt_sigreturn(struct pt_regs *regs) 580long sys_rt_sigreturn(void)
581{ 581{
582 unsigned long sp = PT_REGS_SP(&current->thread.regs); 582 unsigned long sp = PT_REGS_SP(&current->thread.regs);
583 struct rt_sigframe __user *frame = 583 struct rt_sigframe __user *frame =
@@ -601,14 +601,3 @@ long sys_rt_sigreturn(struct pt_regs *regs)
601 force_sig(SIGSEGV, current); 601 force_sig(SIGSEGV, current);
602 return 0; 602 return 0;
603} 603}
604
605#ifdef CONFIG_X86_32
606long ptregs_sigreturn(void)
607{
608 return sys_sigreturn(NULL);
609}
610long ptregs_rt_sigreturn(void)
611{
612 return sys_rt_sigreturn(NULL);
613}
614#endif
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index a0c3b0d1a122..531d4269e2e3 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -24,10 +24,6 @@
24 24
25#define old_mmap sys_old_mmap 25#define old_mmap sys_old_mmap
26 26
27#define ptregs_iopl sys_iopl
28#define ptregs_vm86old sys_vm86old
29#define ptregs_vm86 sys_vm86
30
31#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; 27#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
32#include <asm/syscalls_32.h> 28#include <asm/syscalls_32.h>
33 29
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
deleted file mode 100644
index e8bcea99acdb..000000000000
--- a/arch/x86/um/syscalls_32.c
+++ /dev/null
@@ -1,38 +0,0 @@
1/*
2 * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <linux/syscalls.h>
7#include <sysdep/syscalls.h>
8
9long sys_sigaction(int sig, const struct old_sigaction __user *act,
10 struct old_sigaction __user *oact)
11{
12 struct k_sigaction new_ka, old_ka;
13 int ret;
14
15 if (act) {
16 old_sigset_t mask;
17 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
18 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
19 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
20 __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
21 __get_user(mask, &act->sa_mask))
22 return -EFAULT;
23 siginitset(&new_ka.sa.sa_mask, mask);
24 }
25
26 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
27
28 if (!ret && oact) {
29 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
30 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
31 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
32 __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
33 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
34 return -EFAULT;
35 }
36
37 return ret;
38}
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 205ad328aa52..c74436e687bf 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -60,7 +60,7 @@ notrace static cycle_t vread_tsc(void)
60 60
61static notrace cycle_t vread_hpet(void) 61static notrace cycle_t vread_hpet(void)
62{ 62{
63 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 63 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER);
64} 64}
65 65
66#ifdef CONFIG_PARAVIRT_CLOCK 66#ifdef CONFIG_PARAVIRT_CLOCK
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 138e5667409a..39928d16be3b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1517,72 +1517,51 @@ asmlinkage void __init xen_start_kernel(void)
1517#endif 1517#endif
1518} 1518}
1519 1519
1520#ifdef CONFIG_XEN_PVHVM 1520void __ref xen_hvm_init_shared_info(void)
1521#define HVM_SHARED_INFO_ADDR 0xFE700000UL
1522static struct shared_info *xen_hvm_shared_info;
1523static unsigned long xen_hvm_sip_phys;
1524static int xen_major, xen_minor;
1525
1526static void xen_hvm_connect_shared_info(unsigned long pfn)
1527{ 1521{
1522 int cpu;
1528 struct xen_add_to_physmap xatp; 1523 struct xen_add_to_physmap xatp;
1524 static struct shared_info *shared_info_page = 0;
1529 1525
1526 if (!shared_info_page)
1527 shared_info_page = (struct shared_info *)
1528 extend_brk(PAGE_SIZE, PAGE_SIZE);
1530 xatp.domid = DOMID_SELF; 1529 xatp.domid = DOMID_SELF;
1531 xatp.idx = 0; 1530 xatp.idx = 0;
1532 xatp.space = XENMAPSPACE_shared_info; 1531 xatp.space = XENMAPSPACE_shared_info;
1533 xatp.gpfn = pfn; 1532 xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
1534 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 1533 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
1535 BUG(); 1534 BUG();
1536 1535
1537} 1536 HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
1538static void __init xen_hvm_set_shared_info(struct shared_info *sip)
1539{
1540 int cpu;
1541
1542 HYPERVISOR_shared_info = sip;
1543 1537
1544 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info 1538 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
1545 * page, we use it in the event channel upcall and in some pvclock 1539 * page, we use it in the event channel upcall and in some pvclock
1546 * related functions. We don't need the vcpu_info placement 1540 * related functions. We don't need the vcpu_info placement
1547 * optimizations because we don't use any pv_mmu or pv_irq op on 1541 * optimizations because we don't use any pv_mmu or pv_irq op on
1548 * HVM. */ 1542 * HVM.
1549 for_each_online_cpu(cpu) 1543 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
1544 * online but xen_hvm_init_shared_info is run at resume time too and
1545 * in that case multiple vcpus might be online. */
1546 for_each_online_cpu(cpu) {
1550 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1547 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1551}
1552
1553/* Reconnect the shared_info pfn to a (new) mfn */
1554void xen_hvm_resume_shared_info(void)
1555{
1556 xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
1557}
1558
1559/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage.
1560 * On these old tools the shared info page will be placed in E820_Ram.
1561 * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects
1562 * that nothing is mapped up to HVM_SHARED_INFO_ADDR.
1563 * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used
1564 * here for the shared info page. */
1565static void __init xen_hvm_init_shared_info(void)
1566{
1567 if (xen_major < 4) {
1568 xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
1569 xen_hvm_sip_phys = __pa(xen_hvm_shared_info);
1570 } else {
1571 xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR;
1572 set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys);
1573 xen_hvm_shared_info =
1574 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
1575 } 1548 }
1576 xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
1577 xen_hvm_set_shared_info(xen_hvm_shared_info);
1578} 1549}
1579 1550
1551#ifdef CONFIG_XEN_PVHVM
1580static void __init init_hvm_pv_info(void) 1552static void __init init_hvm_pv_info(void)
1581{ 1553{
1582 uint32_t ecx, edx, pages, msr, base; 1554 int major, minor;
1555 uint32_t eax, ebx, ecx, edx, pages, msr, base;
1583 u64 pfn; 1556 u64 pfn;
1584 1557
1585 base = xen_cpuid_base(); 1558 base = xen_cpuid_base();
1559 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1560
1561 major = eax >> 16;
1562 minor = eax & 0xffff;
1563 printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
1564
1586 cpuid(base + 2, &pages, &msr, &ecx, &edx); 1565 cpuid(base + 2, &pages, &msr, &ecx, &edx);
1587 1566
1588 pfn = __pa(hypercall_page); 1567 pfn = __pa(hypercall_page);
@@ -1633,22 +1612,12 @@ static void __init xen_hvm_guest_init(void)
1633 1612
1634static bool __init xen_hvm_platform(void) 1613static bool __init xen_hvm_platform(void)
1635{ 1614{
1636 uint32_t eax, ebx, ecx, edx, base;
1637
1638 if (xen_pv_domain()) 1615 if (xen_pv_domain())
1639 return false; 1616 return false;
1640 1617
1641 base = xen_cpuid_base(); 1618 if (!xen_cpuid_base())
1642 if (!base)
1643 return false; 1619 return false;
1644 1620
1645 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1646
1647 xen_major = eax >> 16;
1648 xen_minor = eax & 0xffff;
1649
1650 printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor);
1651
1652 return true; 1621 return true;
1653} 1622}
1654 1623
@@ -1668,6 +1637,7 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
1668 .name = "Xen HVM", 1637 .name = "Xen HVM",
1669 .detect = xen_hvm_platform, 1638 .detect = xen_hvm_platform,
1670 .init_platform = xen_hvm_guest_init, 1639 .init_platform = xen_hvm_guest_init,
1640 .x2apic_available = xen_x2apic_para_available,
1671}; 1641};
1672EXPORT_SYMBOL(x86_hyper_xen_hvm); 1642EXPORT_SYMBOL(x86_hyper_xen_hvm);
1673#endif 1643#endif
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 01de35c77221..e8e34938c57d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1178,20 +1178,6 @@ static void xen_exit_mmap(struct mm_struct *mm)
1178 1178
1179static void xen_post_allocator_init(void); 1179static void xen_post_allocator_init(void);
1180 1180
1181static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1182{
1183 /* reserve the range used */
1184 native_pagetable_reserve(start, end);
1185
1186 /* set as RW the rest */
1187 printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
1188 PFN_PHYS(pgt_buf_top));
1189 while (end < PFN_PHYS(pgt_buf_top)) {
1190 make_lowmem_page_readwrite(__va(end));
1191 end += PAGE_SIZE;
1192 }
1193}
1194
1195#ifdef CONFIG_X86_64 1181#ifdef CONFIG_X86_64
1196static void __init xen_cleanhighmap(unsigned long vaddr, 1182static void __init xen_cleanhighmap(unsigned long vaddr,
1197 unsigned long vaddr_end) 1183 unsigned long vaddr_end)
@@ -1422,7 +1408,6 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
1422 xen_mc_callback(set_current_cr3, (void *)cr3); 1408 xen_mc_callback(set_current_cr3, (void *)cr3);
1423 } 1409 }
1424} 1410}
1425
1426static void xen_write_cr3(unsigned long cr3) 1411static void xen_write_cr3(unsigned long cr3)
1427{ 1412{
1428 BUG_ON(preemptible()); 1413 BUG_ON(preemptible());
@@ -1448,6 +1433,45 @@ static void xen_write_cr3(unsigned long cr3)
1448 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ 1433 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
1449} 1434}
1450 1435
1436#ifdef CONFIG_X86_64
1437/*
1438 * At the start of the day - when Xen launches a guest, it has already
1439 * built pagetables for the guest. We diligently look over them
1440 * in xen_setup_kernel_pagetable and graft as appropiate them in the
1441 * init_level4_pgt and its friends. Then when we are happy we load
1442 * the new init_level4_pgt - and continue on.
1443 *
1444 * The generic code starts (start_kernel) and 'init_mem_mapping' sets
1445 * up the rest of the pagetables. When it has completed it loads the cr3.
1446 * N.B. that baremetal would start at 'start_kernel' (and the early
1447 * #PF handler would create bootstrap pagetables) - so we are running
1448 * with the same assumptions as what to do when write_cr3 is executed
1449 * at this point.
1450 *
1451 * Since there are no user-page tables at all, we have two variants
1452 * of xen_write_cr3 - the early bootup (this one), and the late one
1453 * (xen_write_cr3). The reason we have to do that is that in 64-bit
1454 * the Linux kernel and user-space are both in ring 3 while the
1455 * hypervisor is in ring 0.
1456 */
1457static void __init xen_write_cr3_init(unsigned long cr3)
1458{
1459 BUG_ON(preemptible());
1460
1461 xen_mc_batch(); /* disables interrupts */
1462
1463 /* Update while interrupts are disabled, so its atomic with
1464 respect to ipis */
1465 this_cpu_write(xen_cr3, cr3);
1466
1467 __xen_write_cr3(true, cr3);
1468
1469 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
1470
1471 pv_mmu_ops.write_cr3 = &xen_write_cr3;
1472}
1473#endif
1474
1451static int xen_pgd_alloc(struct mm_struct *mm) 1475static int xen_pgd_alloc(struct mm_struct *mm)
1452{ 1476{
1453 pgd_t *pgd = mm->pgd; 1477 pgd_t *pgd = mm->pgd;
@@ -1503,19 +1527,6 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1503#else /* CONFIG_X86_64 */ 1527#else /* CONFIG_X86_64 */
1504static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) 1528static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1505{ 1529{
1506 unsigned long pfn = pte_pfn(pte);
1507
1508 /*
1509 * If the new pfn is within the range of the newly allocated
1510 * kernel pagetable, and it isn't being mapped into an
1511 * early_ioremap fixmap slot as a freshly allocated page, make sure
1512 * it is RO.
1513 */
1514 if (((!is_early_ioremap_ptep(ptep) &&
1515 pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
1516 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
1517 pte = pte_wrprotect(pte);
1518
1519 return pte; 1530 return pte;
1520} 1531}
1521#endif /* CONFIG_X86_64 */ 1532#endif /* CONFIG_X86_64 */
@@ -2129,11 +2140,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2129 .write_cr2 = xen_write_cr2, 2140 .write_cr2 = xen_write_cr2,
2130 2141
2131 .read_cr3 = xen_read_cr3, 2142 .read_cr3 = xen_read_cr3,
2132#ifdef CONFIG_X86_32
2133 .write_cr3 = xen_write_cr3_init, 2143 .write_cr3 = xen_write_cr3_init,
2134#else
2135 .write_cr3 = xen_write_cr3,
2136#endif
2137 2144
2138 .flush_tlb_user = xen_flush_tlb, 2145 .flush_tlb_user = xen_flush_tlb,
2139 .flush_tlb_kernel = xen_flush_tlb, 2146 .flush_tlb_kernel = xen_flush_tlb,
@@ -2197,7 +2204,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2197 2204
2198void __init xen_init_mmu_ops(void) 2205void __init xen_init_mmu_ops(void)
2199{ 2206{
2200 x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
2201 x86_init.paging.pagetable_init = xen_pagetable_init; 2207 x86_init.paging.pagetable_init = xen_pagetable_init;
2202 pv_mmu_ops = xen_mmu_ops; 2208 pv_mmu_ops = xen_mmu_ops;
2203 2209
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8971a26d21ab..94eac5c85cdc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -556,12 +556,9 @@ void __init xen_arch_setup(void)
556 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 556 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
557 557
558 /* Set up idle, making sure it calls safe_halt() pvop */ 558 /* Set up idle, making sure it calls safe_halt() pvop */
559#ifdef CONFIG_X86_32
560 boot_cpu_data.hlt_works_ok = 1;
561#endif
562 disable_cpuidle(); 559 disable_cpuidle();
563 disable_cpufreq(); 560 disable_cpufreq();
564 WARN_ON(set_pm_idle_to_default()); 561 WARN_ON(xen_set_default_idle());
565 fiddle_vdso(); 562 fiddle_vdso();
566#ifdef CONFIG_NUMA 563#ifdef CONFIG_NUMA
567 numa_off = 1; 564 numa_off = 1;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 4f7d2599b484..09ea61d2e02f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -300,8 +300,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
300 gdt = get_cpu_gdt_table(cpu); 300 gdt = get_cpu_gdt_table(cpu);
301 301
302 ctxt->flags = VGCF_IN_KERNEL; 302 ctxt->flags = VGCF_IN_KERNEL;
303 ctxt->user_regs.ds = __USER_DS;
304 ctxt->user_regs.es = __USER_DS;
305 ctxt->user_regs.ss = __KERNEL_DS; 303 ctxt->user_regs.ss = __KERNEL_DS;
306#ifdef CONFIG_X86_32 304#ifdef CONFIG_X86_32
307 ctxt->user_regs.fs = __KERNEL_PERCPU; 305 ctxt->user_regs.fs = __KERNEL_PERCPU;
@@ -310,35 +308,41 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
310 ctxt->gs_base_kernel = per_cpu_offset(cpu); 308 ctxt->gs_base_kernel = per_cpu_offset(cpu);
311#endif 309#endif
312 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 310 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
313 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
314 311
315 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 312 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
316 313
317 xen_copy_trap_info(ctxt->trap_ctxt); 314 {
315 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
316 ctxt->user_regs.ds = __USER_DS;
317 ctxt->user_regs.es = __USER_DS;
318 318
319 ctxt->ldt_ents = 0; 319 xen_copy_trap_info(ctxt->trap_ctxt);
320 320
321 BUG_ON((unsigned long)gdt & ~PAGE_MASK); 321 ctxt->ldt_ents = 0;
322 322
323 gdt_mfn = arbitrary_virt_to_mfn(gdt); 323 BUG_ON((unsigned long)gdt & ~PAGE_MASK);
324 make_lowmem_page_readonly(gdt);
325 make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
326 324
327 ctxt->gdt_frames[0] = gdt_mfn; 325 gdt_mfn = arbitrary_virt_to_mfn(gdt);
328 ctxt->gdt_ents = GDT_ENTRIES; 326 make_lowmem_page_readonly(gdt);
327 make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
329 328
330 ctxt->user_regs.cs = __KERNEL_CS; 329 ctxt->gdt_frames[0] = gdt_mfn;
331 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 330 ctxt->gdt_ents = GDT_ENTRIES;
332 331
333 ctxt->kernel_ss = __KERNEL_DS; 332 ctxt->kernel_ss = __KERNEL_DS;
334 ctxt->kernel_sp = idle->thread.sp0; 333 ctxt->kernel_sp = idle->thread.sp0;
335 334
336#ifdef CONFIG_X86_32 335#ifdef CONFIG_X86_32
337 ctxt->event_callback_cs = __KERNEL_CS; 336 ctxt->event_callback_cs = __KERNEL_CS;
338 ctxt->failsafe_callback_cs = __KERNEL_CS; 337 ctxt->failsafe_callback_cs = __KERNEL_CS;
339#endif 338#endif
340 ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; 339 ctxt->event_callback_eip =
341 ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; 340 (unsigned long)xen_hypervisor_callback;
341 ctxt->failsafe_callback_eip =
342 (unsigned long)xen_failsafe_callback;
343 }
344 ctxt->user_regs.cs = __KERNEL_CS;
345 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
342 346
343 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); 347 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
344 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 348 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
@@ -432,13 +436,6 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
432 play_dead_common(); 436 play_dead_common();
433 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); 437 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
434 cpu_bringup(); 438 cpu_bringup();
435 /*
436 * Balance out the preempt calls - as we are running in cpu_idle
437 * loop which has been called at bootup from cpu_bringup_and_idle.
438 * The cpucpu_bringup_and_idle called cpu_bringup which made a
439 * preempt_disable() So this preempt_enable will balance it out.
440 */
441 preempt_enable();
442} 439}
443 440
444#else /* !CONFIG_HOTPLUG_CPU */ 441#else /* !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 83e866d714ce..f7a080ef0354 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -328,7 +328,6 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
328 if (per_cpu(lock_spinners, cpu) == xl) { 328 if (per_cpu(lock_spinners, cpu) == xl) {
329 ADD_STATS(released_slow_kicked, 1); 329 ADD_STATS(released_slow_kicked, 1);
330 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 330 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
331 break;
332 } 331 }
333 } 332 }
334} 333}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index ae8a00c39de4..45329c8c226e 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
30{ 30{
31#ifdef CONFIG_XEN_PVHVM 31#ifdef CONFIG_XEN_PVHVM
32 int cpu; 32 int cpu;
33 xen_hvm_resume_shared_info(); 33 xen_hvm_init_shared_info();
34 xen_callback_vector(); 34 xen_callback_vector();
35 xen_unplug_emulated_devices(); 35 xen_unplug_emulated_devices();
36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) { 36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index f9643fc50de5..33ca6e42a4ca 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -89,11 +89,11 @@ ENTRY(xen_iret)
89 */ 89 */
90#ifdef CONFIG_SMP 90#ifdef CONFIG_SMP
91 GET_THREAD_INFO(%eax) 91 GET_THREAD_INFO(%eax)
92 movl TI_cpu(%eax), %eax 92 movl %ss:TI_cpu(%eax), %eax
93 movl __per_cpu_offset(,%eax,4), %eax 93 movl %ss:__per_cpu_offset(,%eax,4), %eax
94 mov xen_vcpu(%eax), %eax 94 mov %ss:xen_vcpu(%eax), %eax
95#else 95#else
96 movl xen_vcpu, %eax 96 movl %ss:xen_vcpu, %eax
97#endif 97#endif
98 98
99 /* check IF state we're restoring */ 99 /* check IF state we're restoring */
@@ -106,11 +106,11 @@ ENTRY(xen_iret)
106 * resuming the code, so we don't have to be worried about 106 * resuming the code, so we don't have to be worried about
107 * being preempted to another CPU. 107 * being preempted to another CPU.
108 */ 108 */
109 setz XEN_vcpu_info_mask(%eax) 109 setz %ss:XEN_vcpu_info_mask(%eax)
110xen_iret_start_crit: 110xen_iret_start_crit:
111 111
112 /* check for unmasked and pending */ 112 /* check for unmasked and pending */
113 cmpw $0x0001, XEN_vcpu_info_pending(%eax) 113 cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
114 114
115 /* 115 /*
116 * If there's something pending, mask events again so we can 116 * If there's something pending, mask events again so we can
@@ -118,7 +118,7 @@ xen_iret_start_crit:
118 * touch XEN_vcpu_info_mask. 118 * touch XEN_vcpu_info_mask.
119 */ 119 */
120 jne 1f 120 jne 1f
121 movb $1, XEN_vcpu_info_mask(%eax) 121 movb $1, %ss:XEN_vcpu_info_mask(%eax)
122 122
1231: popl %eax 1231: popl %eax
124 124
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d2e73d19d366..a95b41744ad0 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -40,7 +40,7 @@ void xen_enable_syscall(void);
40void xen_vcpu_restore(void); 40void xen_vcpu_restore(void);
41 41
42void xen_callback_vector(void); 42void xen_callback_vector(void);
43void xen_hvm_resume_shared_info(void); 43void xen_hvm_init_shared_info(void);
44void xen_unplug_emulated_devices(void); 44void xen_unplug_emulated_devices(void);
45 45
46void __init xen_build_dynamic_phys_to_machine(void); 46void __init xen_build_dynamic_phys_to_machine(void);