aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2013-04-20 12:16:44 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2013-04-20 12:16:44 -0400
commitf53f292eeaa234615c31a1306babe703fc4263f2 (patch)
tree707b0933a20f7dc05495e974243a23b5c9f8c918 /arch/x86
parent15b9c359f288b09003cb70f7ed204affc0c6614d (diff)
parenta9499fa7cd3fd4824a7202d00c766b269fa3bda6 (diff)
Merge remote-tracking branch 'efi/chainsaw' into x86/efi
Resolved Conflicts: drivers/firmware/efivars.c fs/efivarsfs/file.c Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig92
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/boot.h18
-rw-r--r--arch/x86/boot/cmdline.c12
-rw-r--r--arch/x86/boot/compressed/cmdline.c12
-rw-r--r--arch/x86/boot/compressed/head_64.S48
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/boot/header.S10
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aes-i586-asm_32.S15
-rw-r--r--arch/x86/crypto/aes-x86_64-asm_64.S30
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S23
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c37
-rw-r--r--arch/x86/crypto/blowfish-x86_64-asm_64.S39
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S38
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S50
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S35
-rw-r--r--arch/x86/crypto/crc32-pclmul_asm.S246
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c201
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S8
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S4
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S28
-rw-r--r--arch/x86/crypto/salsa20-x86_64-asm_64.S28
-rw-r--r--arch/x86/crypto/salsa20_glue.c5
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S35
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S20
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S20
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S10
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S35
-rw-r--r--arch/x86/crypto/twofish-i586-asm_32.S11
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S20
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64.S11
-rw-r--r--arch/x86/ia32/ia32_aout.c6
-rw-r--r--arch/x86/ia32/ia32_signal.c50
-rw-r--r--arch/x86/ia32/ia32entry.S16
-rw-r--r--arch/x86/ia32/sys_ia32.c176
-rw-r--r--arch/x86/include/asm/acpi.h4
-rw-r--r--arch/x86/include/asm/amd_nb.h17
-rw-r--r--arch/x86/include/asm/bootparam_utils.h54
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/fpu-internal.h5
-rw-r--r--arch/x86/include/asm/ftrace.h25
-rw-r--r--arch/x86/include/asm/hpet.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h13
-rw-r--r--arch/x86/include/asm/hypervisor.h13
-rw-r--r--arch/x86/include/asm/ia32.h15
-rw-r--r--arch/x86/include/asm/init.h28
-rw-r--r--arch/x86/include/asm/io_apic.h28
-rw-r--r--arch/x86/include/asm/irq_remapping.h40
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/kexec.h6
-rw-r--r--arch/x86/include/asm/kprobes.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h30
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/include/asm/linkage.h18
-rw-r--r--arch/x86/include/asm/mce.h84
-rw-r--r--arch/x86/include/asm/microcode.h14
-rw-r--r--arch/x86/include/asm/microcode_intel.h85
-rw-r--r--arch/x86/include/asm/mmzone_32.h6
-rw-r--r--arch/x86/include/asm/mshyperv.h4
-rw-r--r--arch/x86/include/asm/mwait.h3
-rw-r--r--arch/x86/include/asm/numa.h6
-rw-r--r--arch/x86/include/asm/numa_64.h6
-rw-r--r--arch/x86/include/asm/page.h7
-rw-r--r--arch/x86/include/asm/page_32.h1
-rw-r--r--arch/x86/include/asm/page_64.h36
-rw-r--r--arch/x86/include/asm/page_64_types.h22
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/pci.h6
-rw-r--r--arch/x86/include/asm/pci_x86.h1
-rw-r--r--arch/x86/include/asm/perf_event.h13
-rw-r--r--arch/x86/include/asm/pgtable.h34
-rw-r--r--arch/x86/include/asm/pgtable_32.h7
-rw-r--r--arch/x86/include/asm/pgtable_64.h8
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/pgtable_types.h5
-rw-r--r--arch/x86/include/asm/processor.h29
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/realmode.h3
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/signal.h22
-rw-r--r--arch/x86/include/asm/sys_ia32.h17
-rw-r--r--arch/x86/include/asm/syscalls.h13
-rw-r--r--arch/x86/include/asm/thread_info.h1
-rw-r--r--arch/x86/include/asm/tlbflush.h18
-rw-r--r--arch/x86/include/asm/uaccess.h55
-rw-r--r--arch/x86/include/asm/unistd.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h44
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h1496
-rw-r--r--arch/x86/include/asm/vmx.h18
-rw-r--r--arch/x86/include/asm/x86_init.h39
-rw-r--r--arch/x86/include/asm/xen/events.h3
-rw-r--r--arch/x86/include/asm/xen/hypercall.h4
-rw-r--r--arch/x86/include/asm/xen/page.h2
-rw-r--r--arch/x86/include/asm/xor.h491
-rw-r--r--arch/x86/include/asm/xor_32.h309
-rw-r--r--arch/x86/include/asm/xor_64.h305
-rw-r--r--arch/x86/include/uapi/asm/mce.h87
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h7
-rw-r--r--arch/x86/include/uapi/asm/signal.h8
-rw-r--r--arch/x86/include/uapi/asm/vmx.h9
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/boot.c5
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/amd_gart_64.c5
-rw-r--r--arch/x86/kernel/apb_timer.c10
-rw-r--r--arch/x86/kernel/apic/apic.c30
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c457
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c21
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c206
-rw-r--r--arch/x86/kernel/apm_32.c68
-rw-r--r--arch/x86/kernel/cpu/amd.c68
-rw-r--r--arch/x86/kernel/cpu/bugs.c27
-rw-r--r--arch/x86/kernel/cpu/common.c17
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c16
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c54
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event.h25
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c322
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c33
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c2
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c13
-rw-r--r--arch/x86/kernel/cpuid.c4
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/entry_32.S54
-rw-r--r--arch/x86/kernel/entry_64.S41
-rw-r--r--arch/x86/kernel/ftrace.c4
-rw-r--r--arch/x86/kernel/head.c53
-rw-r--r--arch/x86/kernel/head32.c21
-rw-r--r--arch/x86/kernel/head64.c146
-rw-r--r--arch/x86/kernel/head_32.S104
-rw-r--r--arch/x86/kernel/head_64.S216
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c1
-rw-r--r--arch/x86/kernel/ioport.c3
-rw-r--r--arch/x86/kernel/kprobes/Makefile7
-rw-r--r--arch/x86/kernel/kprobes/common.h (renamed from arch/x86/kernel/kprobes-common.h)11
-rw-r--r--arch/x86/kernel/kprobes/core.c (renamed from arch/x86/kernel/kprobes.c)89
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c93
-rw-r--r--arch/x86/kernel/kprobes/opt.c (renamed from arch/x86/kernel/kprobes-opt.c)2
-rw-r--r--arch/x86/kernel/kvm.c12
-rw-r--r--arch/x86/kernel/kvmclock.c15
-rw-r--r--arch/x86/kernel/machine_kexec_64.c171
-rw-r--r--arch/x86/kernel/microcode_core.c7
-rw-r--r--arch/x86/kernel/microcode_core_early.c76
-rw-r--r--arch/x86/kernel/microcode_intel.c198
-rw-r--r--arch/x86/kernel/microcode_intel_early.c796
-rw-r--r--arch/x86/kernel/microcode_intel_lib.c174
-rw-r--r--arch/x86/kernel/msr.c9
-rw-r--r--arch/x86/kernel/nmi.c1
-rw-r--r--arch/x86/kernel/process.c122
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/pvclock.c2
-rw-r--r--arch/x86/kernel/rtc.c1
-rw-r--r--arch/x86/kernel/setup.c270
-rw-r--r--arch/x86/kernel/signal.c184
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/kernel/traps.c9
-rw-r--r--arch/x86/kernel/tsc.c3
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--arch/x86/kernel/vm86_32.c8
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/kernel/x86_init.c28
-rw-r--r--arch/x86/kvm/emulate.c673
-rw-r--r--arch/x86/kvm/i8254.c1
-rw-r--r--arch/x86/kvm/i8259.c2
-rw-r--r--arch/x86/kvm/irq.c74
-rw-r--r--arch/x86/kvm/lapic.c140
-rw-r--r--arch/x86/kvm/lapic.h34
-rw-r--r--arch/x86/kvm/mmu.c194
-rw-r--r--arch/x86/kvm/mmutrace.h6
-rw-r--r--arch/x86/kvm/paging_tmpl.h106
-rw-r--r--arch/x86/kvm/svm.c24
-rw-r--r--arch/x86/kvm/vmx.c714
-rw-r--r--arch/x86/kvm/x86.c248
-rw-r--r--arch/x86/lguest/Kconfig1
-rw-r--r--arch/x86/lguest/boot.c3
-rw-r--r--arch/x86/lib/getuser.S43
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mm/fault.c16
-rw-r--r--arch/x86/mm/init.c468
-rw-r--r--arch/x86/mm/init_32.c118
-rw-r--r--arch/x86/mm/init_64.c659
-rw-r--r--arch/x86/mm/memtest.c10
-rw-r--r--arch/x86/mm/mm_internal.h19
-rw-r--r--arch/x86/mm/numa.c41
-rw-r--r--arch/x86/mm/numa_32.c161
-rw-r--r--arch/x86/mm/numa_64.c13
-rw-r--r--arch/x86/mm/numa_internal.h6
-rw-r--r--arch/x86/mm/pageattr.c163
-rw-r--r--arch/x86/mm/pat.c11
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/mm/physaddr.c60
-rw-r--r--arch/x86/mm/srat.c29
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c40
-rw-r--r--arch/x86/pci/acpi.c9
-rw-r--r--arch/x86/pci/common.c1
-rw-r--r--arch/x86/pci/i386.c185
-rw-r--r--arch/x86/pci/legacy.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c3
-rw-r--r--arch/x86/pci/numaq_32.c2
-rw-r--r--arch/x86/pci/xen.c9
-rw-r--r--arch/x86/platform/Makefile2
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c7
-rw-r--r--arch/x86/platform/efi/efi.c16
-rw-r--r--arch/x86/platform/goldfish/Makefile1
-rw-r--r--arch/x86/platform/goldfish/goldfish.c51
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c2
-rw-r--r--arch/x86/platform/sfi/sfi.c2
-rw-r--r--arch/x86/platform/ts5500/Makefile1
-rw-r--r--arch/x86/platform/ts5500/ts5500.c339
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/platform/uv/uv_time.c13
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/power/hibernate_32.c2
-rw-r--r--arch/x86/power/hibernate_64.c66
-rw-r--r--arch/x86/realmode/init.c49
-rw-r--r--arch/x86/syscalls/syscall_32.tbl28
-rw-r--r--arch/x86/syscalls/syscall_64.tbl6
-rw-r--r--arch/x86/tools/insn_sanity.c10
-rw-r--r--arch/x86/um/Kconfig6
-rw-r--r--arch/x86/um/Makefile4
-rw-r--r--arch/x86/um/fault.c2
-rw-r--r--arch/x86/um/shared/sysdep/syscalls_32.h5
-rw-r--r--arch/x86/um/signal.c15
-rw-r--r--arch/x86/um/sys_call_table_32.c4
-rw-r--r--arch/x86/um/syscalls_32.c38
-rw-r--r--arch/x86/vdso/vclock_gettime.c2
-rw-r--r--arch/x86/xen/enlighten.c88
-rw-r--r--arch/x86/xen/mmu.c71
-rw-r--r--arch/x86/xen/setup.c5
-rw-r--r--arch/x86/xen/smp.c42
-rw-r--r--arch/x86/xen/spinlock.c1
-rw-r--r--arch/x86/xen/suspend.c2
-rw-r--r--arch/x86/xen/xen-asm_32.S14
-rw-r--r--arch/x86/xen/xen-ops.h2
254 files changed, 9392 insertions, 5339 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 225543bf45a5..70c0f3da0476 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1,7 +1,7 @@
1# Select 32 or 64 bit 1# Select 32 or 64 bit
2config 64BIT 2config 64BIT
3 bool "64-bit kernel" if ARCH = "x86" 3 bool "64-bit kernel" if ARCH = "x86"
4 default ARCH = "x86_64" 4 default ARCH != "i386"
5 ---help--- 5 ---help---
6 Say yes to build a 64-bit kernel - formerly known as x86_64 6 Say yes to build a 64-bit kernel - formerly known as x86_64
7 Say no to build a 32-bit kernel - formerly known as i386 7 Say no to build a 32-bit kernel - formerly known as i386
@@ -28,7 +28,6 @@ config X86
28 select HAVE_OPROFILE 28 select HAVE_OPROFILE
29 select HAVE_PCSPKR_PLATFORM 29 select HAVE_PCSPKR_PLATFORM
30 select HAVE_PERF_EVENTS 30 select HAVE_PERF_EVENTS
31 select HAVE_IRQ_WORK
32 select HAVE_IOREMAP_PROT 31 select HAVE_IOREMAP_PROT
33 select HAVE_KPROBES 32 select HAVE_KPROBES
34 select HAVE_MEMBLOCK 33 select HAVE_MEMBLOCK
@@ -40,10 +39,12 @@ config X86
40 select HAVE_DMA_CONTIGUOUS if !SWIOTLB 39 select HAVE_DMA_CONTIGUOUS if !SWIOTLB
41 select HAVE_KRETPROBES 40 select HAVE_KRETPROBES
42 select HAVE_OPTPROBES 41 select HAVE_OPTPROBES
42 select HAVE_KPROBES_ON_FTRACE
43 select HAVE_FTRACE_MCOUNT_RECORD 43 select HAVE_FTRACE_MCOUNT_RECORD
44 select HAVE_FENTRY if X86_64 44 select HAVE_FENTRY if X86_64
45 select HAVE_C_RECORDMCOUNT 45 select HAVE_C_RECORDMCOUNT
46 select HAVE_DYNAMIC_FTRACE 46 select HAVE_DYNAMIC_FTRACE
47 select HAVE_DYNAMIC_FTRACE_WITH_REGS
47 select HAVE_FUNCTION_TRACER 48 select HAVE_FUNCTION_TRACER
48 select HAVE_FUNCTION_GRAPH_TRACER 49 select HAVE_FUNCTION_GRAPH_TRACER
49 select HAVE_FUNCTION_GRAPH_FP_TEST 50 select HAVE_FUNCTION_GRAPH_FP_TEST
@@ -106,14 +107,19 @@ config X86
106 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) 107 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
107 select GENERIC_TIME_VSYSCALL if X86_64 108 select GENERIC_TIME_VSYSCALL if X86_64
108 select KTIME_SCALAR if X86_32 109 select KTIME_SCALAR if X86_32
110 select ALWAYS_USE_PERSISTENT_CLOCK
109 select GENERIC_STRNCPY_FROM_USER 111 select GENERIC_STRNCPY_FROM_USER
110 select GENERIC_STRNLEN_USER 112 select GENERIC_STRNLEN_USER
111 select HAVE_CONTEXT_TRACKING if X86_64 113 select HAVE_CONTEXT_TRACKING if X86_64
112 select HAVE_IRQ_TIME_ACCOUNTING 114 select HAVE_IRQ_TIME_ACCOUNTING
115 select VIRT_TO_BUS
113 select MODULES_USE_ELF_REL if X86_32 116 select MODULES_USE_ELF_REL if X86_32
114 select MODULES_USE_ELF_RELA if X86_64 117 select MODULES_USE_ELF_RELA if X86_64
115 select CLONE_BACKWARDS if X86_32 118 select CLONE_BACKWARDS if X86_32
116 select GENERIC_SIGALTSTACK 119 select ARCH_USE_BUILTIN_BSWAP
120 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
121 select OLD_SIGACTION if X86_32
122 select COMPAT_OLD_SIGACTION if IA32_EMULATION
117 123
118config INSTRUCTION_DECODER 124config INSTRUCTION_DECODER
119 def_bool y 125 def_bool y
@@ -222,7 +228,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
222 228
223config HAVE_INTEL_TXT 229config HAVE_INTEL_TXT
224 def_bool y 230 def_bool y
225 depends on EXPERIMENTAL && INTEL_IOMMU && ACPI 231 depends on INTEL_IOMMU && ACPI
226 232
227config X86_32_SMP 233config X86_32_SMP
228 def_bool y 234 def_bool y
@@ -320,6 +326,10 @@ config X86_BIGSMP
320 ---help--- 326 ---help---
321 This option is needed for the systems that have more than 8 CPUs 327 This option is needed for the systems that have more than 8 CPUs
322 328
329config GOLDFISH
330 def_bool y
331 depends on X86_GOLDFISH
332
323if X86_32 333if X86_32
324config X86_EXTENDED_PLATFORM 334config X86_EXTENDED_PLATFORM
325 bool "Support for extended (non-PC) x86 platforms" 335 bool "Support for extended (non-PC) x86 platforms"
@@ -402,6 +412,14 @@ config X86_UV
402# Following is an alphabetically sorted list of 32 bit extended platforms 412# Following is an alphabetically sorted list of 32 bit extended platforms
403# Please maintain the alphabetic order if and when there are additions 413# Please maintain the alphabetic order if and when there are additions
404 414
415config X86_GOLDFISH
416 bool "Goldfish (Virtual Platform)"
417 depends on X86_32
418 ---help---
419 Enable support for the Goldfish virtual platform used primarily
420 for Android development. Unless you are building for the Android
421 Goldfish emulator say N here.
422
405config X86_INTEL_CE 423config X86_INTEL_CE
406 bool "CE4100 TV platform" 424 bool "CE4100 TV platform"
407 depends on PCI 425 depends on PCI
@@ -454,6 +472,16 @@ config X86_MDFLD
454 472
455endif 473endif
456 474
475config X86_INTEL_LPSS
476 bool "Intel Low Power Subsystem Support"
477 depends on ACPI
478 select COMMON_CLK
479 ---help---
480 Select to build support for Intel Low Power Subsystem such as
481 found on Intel Lynxpoint PCH. Selecting this option enables
482 things like clock tree (common clock framework) which are needed
483 by the LPSS peripheral drivers.
484
457config X86_RDC321X 485config X86_RDC321X
458 bool "RDC R-321x SoC" 486 bool "RDC R-321x SoC"
459 depends on X86_32 487 depends on X86_32
@@ -617,7 +645,7 @@ config PARAVIRT
617 645
618config PARAVIRT_SPINLOCKS 646config PARAVIRT_SPINLOCKS
619 bool "Paravirtualization layer for spinlocks" 647 bool "Paravirtualization layer for spinlocks"
620 depends on PARAVIRT && SMP && EXPERIMENTAL 648 depends on PARAVIRT && SMP
621 ---help--- 649 ---help---
622 Paravirtualized spinlocks allow a pvops backend to replace the 650 Paravirtualized spinlocks allow a pvops backend to replace the
623 spinlock implementation with something virtualization-friendly 651 spinlock implementation with something virtualization-friendly
@@ -729,7 +757,7 @@ config GART_IOMMU
729config CALGARY_IOMMU 757config CALGARY_IOMMU
730 bool "IBM Calgary IOMMU support" 758 bool "IBM Calgary IOMMU support"
731 select SWIOTLB 759 select SWIOTLB
732 depends on X86_64 && PCI && EXPERIMENTAL 760 depends on X86_64 && PCI
733 ---help--- 761 ---help---
734 Support for hardware IOMMUs in IBM's xSeries x366 and x460 762 Support for hardware IOMMUs in IBM's xSeries x366 and x460
735 systems. Needed to run systems with more than 3GB of memory 763 systems. Needed to run systems with more than 3GB of memory
@@ -771,7 +799,7 @@ config IOMMU_HELPER
771 799
772config MAXSMP 800config MAXSMP
773 bool "Enable Maximum number of SMP Processors and NUMA Nodes" 801 bool "Enable Maximum number of SMP Processors and NUMA Nodes"
774 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL 802 depends on X86_64 && SMP && DEBUG_KERNEL
775 select CPUMASK_OFFSTACK 803 select CPUMASK_OFFSTACK
776 ---help--- 804 ---help---
777 Enable maximum number of CPUS and NUMA Nodes for this architecture. 805 Enable maximum number of CPUS and NUMA Nodes for this architecture.
@@ -1029,6 +1057,24 @@ config MICROCODE_OLD_INTERFACE
1029 def_bool y 1057 def_bool y
1030 depends on MICROCODE 1058 depends on MICROCODE
1031 1059
1060config MICROCODE_INTEL_LIB
1061 def_bool y
1062 depends on MICROCODE_INTEL
1063
1064config MICROCODE_INTEL_EARLY
1065 bool "Early load microcode"
1066 depends on MICROCODE_INTEL && BLK_DEV_INITRD
1067 default y
1068 help
1069 This option provides functionality to read additional microcode data
1070 at the beginning of initrd image. The data tells kernel to load
1071 microcode to CPU's as early as possible. No functional change if no
1072 microcode data is glued to the initrd, therefore it's safe to say Y.
1073
1074config MICROCODE_EARLY
1075 def_bool y
1076 depends on MICROCODE_INTEL_EARLY
1077
1032config X86_MSR 1078config X86_MSR
1033 tristate "/dev/cpu/*/msr - Model-specific register support" 1079 tristate "/dev/cpu/*/msr - Model-specific register support"
1034 ---help--- 1080 ---help---
@@ -1107,7 +1153,6 @@ config HIGHMEM64G
1107endchoice 1153endchoice
1108 1154
1109choice 1155choice
1110 depends on EXPERIMENTAL
1111 prompt "Memory split" if EXPERT 1156 prompt "Memory split" if EXPERT
1112 default VMSPLIT_3G 1157 default VMSPLIT_3G
1113 depends on X86_32 1158 depends on X86_32
@@ -1184,7 +1229,7 @@ config DIRECT_GBPAGES
1184config NUMA 1229config NUMA
1185 bool "Numa Memory Allocation and Scheduler Support" 1230 bool "Numa Memory Allocation and Scheduler Support"
1186 depends on SMP 1231 depends on SMP
1187 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) 1232 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI))
1188 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) 1233 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
1189 ---help--- 1234 ---help---
1190 Enable NUMA (Non Uniform Memory Access) support. 1235 Enable NUMA (Non Uniform Memory Access) support.
@@ -1253,10 +1298,6 @@ config NODES_SHIFT
1253 Specify the maximum number of NUMA Nodes available on the target 1298 Specify the maximum number of NUMA Nodes available on the target
1254 system. Increases memory reserved to accommodate various tables. 1299 system. Increases memory reserved to accommodate various tables.
1255 1300
1256config HAVE_ARCH_ALLOC_REMAP
1257 def_bool y
1258 depends on X86_32 && NUMA
1259
1260config ARCH_HAVE_MEMORY_PRESENT 1301config ARCH_HAVE_MEMORY_PRESENT
1261 def_bool y 1302 def_bool y
1262 depends on X86_32 && DISCONTIGMEM 1303 depends on X86_32 && DISCONTIGMEM
@@ -1279,7 +1320,7 @@ config ARCH_DISCONTIGMEM_DEFAULT
1279 1320
1280config ARCH_SPARSEMEM_ENABLE 1321config ARCH_SPARSEMEM_ENABLE
1281 def_bool y 1322 def_bool y
1282 depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD 1323 depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD
1283 select SPARSEMEM_STATIC if X86_32 1324 select SPARSEMEM_STATIC if X86_32
1284 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 1325 select SPARSEMEM_VMEMMAP_ENABLE if X86_64
1285 1326
@@ -1593,8 +1634,7 @@ config CRASH_DUMP
1593 For more details see Documentation/kdump/kdump.txt 1634 For more details see Documentation/kdump/kdump.txt
1594 1635
1595config KEXEC_JUMP 1636config KEXEC_JUMP
1596 bool "kexec jump (EXPERIMENTAL)" 1637 bool "kexec jump"
1597 depends on EXPERIMENTAL
1598 depends on KEXEC && HIBERNATION 1638 depends on KEXEC && HIBERNATION
1599 ---help--- 1639 ---help---
1600 Jump between original kernel and kexeced kernel and invoke 1640 Jump between original kernel and kexeced kernel and invoke
@@ -1699,7 +1739,7 @@ config HOTPLUG_CPU
1699config BOOTPARAM_HOTPLUG_CPU0 1739config BOOTPARAM_HOTPLUG_CPU0
1700 bool "Set default setting of cpu0_hotpluggable" 1740 bool "Set default setting of cpu0_hotpluggable"
1701 default n 1741 default n
1702 depends on HOTPLUG_CPU && EXPERIMENTAL 1742 depends on HOTPLUG_CPU
1703 ---help--- 1743 ---help---
1704 Set whether default state of cpu0_hotpluggable is on or off. 1744 Set whether default state of cpu0_hotpluggable is on or off.
1705 1745
@@ -1728,7 +1768,7 @@ config BOOTPARAM_HOTPLUG_CPU0
1728config DEBUG_HOTPLUG_CPU0 1768config DEBUG_HOTPLUG_CPU0
1729 def_bool n 1769 def_bool n
1730 prompt "Debug CPU0 hotplug" 1770 prompt "Debug CPU0 hotplug"
1731 depends on HOTPLUG_CPU && EXPERIMENTAL 1771 depends on HOTPLUG_CPU
1732 ---help--- 1772 ---help---
1733 Enabling this option offlines CPU0 (if CPU0 can be offlined) as 1773 Enabling this option offlines CPU0 (if CPU0 can be offlined) as
1734 soon as possible and boots up userspace with CPU0 offlined. User 1774 soon as possible and boots up userspace with CPU0 offlined. User
@@ -1912,6 +1952,7 @@ config APM_DO_ENABLE
1912 this feature. 1952 this feature.
1913 1953
1914config APM_CPU_IDLE 1954config APM_CPU_IDLE
1955 depends on CPU_IDLE
1915 bool "Make CPU Idle calls when idle" 1956 bool "Make CPU Idle calls when idle"
1916 ---help--- 1957 ---help---
1917 Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. 1958 Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
@@ -2037,7 +2078,7 @@ config PCI_MMCONFIG
2037 2078
2038config PCI_CNB20LE_QUIRK 2079config PCI_CNB20LE_QUIRK
2039 bool "Read CNB20LE Host Bridge Windows" if EXPERT 2080 bool "Read CNB20LE Host Bridge Windows" if EXPERT
2040 depends on PCI && EXPERIMENTAL 2081 depends on PCI
2041 help 2082 help
2042 Read the PCI windows out of the CNB20LE host bridge. This allows 2083 Read the PCI windows out of the CNB20LE host bridge. This allows
2043 PCI hotplug to work on systems with the CNB20LE chipset which do 2084 PCI hotplug to work on systems with the CNB20LE chipset which do
@@ -2188,6 +2229,15 @@ config GEOS
2188 ---help--- 2229 ---help---
2189 This option enables system support for the Traverse Technologies GEOS. 2230 This option enables system support for the Traverse Technologies GEOS.
2190 2231
2232config TS5500
2233 bool "Technologic Systems TS-5500 platform support"
2234 depends on MELAN
2235 select CHECK_SIGNATURE
2236 select NEW_LEDS
2237 select LEDS_CLASS
2238 ---help---
2239 This option enables system support for the Technologic Systems TS-5500.
2240
2191endif # X86_32 2241endif # X86_32
2192 2242
2193config AMD_NB 2243config AMD_NB
@@ -2232,8 +2282,8 @@ config IA32_AOUT
2232 Support old a.out binaries in the 32bit emulation. 2282 Support old a.out binaries in the 32bit emulation.
2233 2283
2234config X86_X32 2284config X86_X32
2235 bool "x32 ABI for 64-bit mode (EXPERIMENTAL)" 2285 bool "x32 ABI for 64-bit mode"
2236 depends on X86_64 && IA32_EMULATION && EXPERIMENTAL 2286 depends on X86_64 && IA32_EMULATION
2237 ---help--- 2287 ---help---
2238 Include code to run binaries for the x32 native 32-bit ABI 2288 Include code to run binaries for the x32 native 32-bit ABI
2239 for 64-bit processors. An x32 process gets access to the 2289 for 64-bit processors. An x32 process gets access to the
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index e71fc4279aab..5c477260294f 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -2,7 +2,11 @@
2 2
3# select defconfig based on actual architecture 3# select defconfig based on actual architecture
4ifeq ($(ARCH),x86) 4ifeq ($(ARCH),x86)
5 ifeq ($(shell uname -m),x86_64)
6 KBUILD_DEFCONFIG := x86_64_defconfig
7 else
5 KBUILD_DEFCONFIG := i386_defconfig 8 KBUILD_DEFCONFIG := i386_defconfig
9 endif
6else 10else
7 KBUILD_DEFCONFIG := $(ARCH)_defconfig 11 KBUILD_DEFCONFIG := $(ARCH)_defconfig
8endif 12endif
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 18997e5a1053..5b7531966b84 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -285,16 +285,26 @@ struct biosregs {
285void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); 285void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
286 286
287/* cmdline.c */ 287/* cmdline.c */
288int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize); 288int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize);
289int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option); 289int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option);
290static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) 290static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
291{ 291{
292 return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize); 292 unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
293
294 if (cmd_line_ptr >= 0x100000)
295 return -1; /* inaccessible */
296
297 return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize);
293} 298}
294 299
295static inline int cmdline_find_option_bool(const char *option) 300static inline int cmdline_find_option_bool(const char *option)
296{ 301{
297 return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option); 302 unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
303
304 if (cmd_line_ptr >= 0x100000)
305 return -1; /* inaccessible */
306
307 return __cmdline_find_option_bool(cmd_line_ptr, option);
298} 308}
299 309
300 310
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index 6b3b6f708c04..625d21b0cd3f 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -27,7 +27,7 @@ static inline int myisspace(u8 c)
27 * Returns the length of the argument (regardless of if it was 27 * Returns the length of the argument (regardless of if it was
28 * truncated to fit in the buffer), or -1 on not found. 28 * truncated to fit in the buffer), or -1 on not found.
29 */ 29 */
30int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize) 30int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize)
31{ 31{
32 addr_t cptr; 32 addr_t cptr;
33 char c; 33 char c;
@@ -41,8 +41,8 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
41 st_bufcpy /* Copying this to buffer */ 41 st_bufcpy /* Copying this to buffer */
42 } state = st_wordstart; 42 } state = st_wordstart;
43 43
44 if (!cmdline_ptr || cmdline_ptr >= 0x100000) 44 if (!cmdline_ptr)
45 return -1; /* No command line, or inaccessible */ 45 return -1; /* No command line */
46 46
47 cptr = cmdline_ptr & 0xf; 47 cptr = cmdline_ptr & 0xf;
48 set_fs(cmdline_ptr >> 4); 48 set_fs(cmdline_ptr >> 4);
@@ -99,7 +99,7 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
99 * Returns the position of that option (starts counting with 1) 99 * Returns the position of that option (starts counting with 1)
100 * or 0 on not found 100 * or 0 on not found
101 */ 101 */
102int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option) 102int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
103{ 103{
104 addr_t cptr; 104 addr_t cptr;
105 char c; 105 char c;
@@ -111,8 +111,8 @@ int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
111 st_wordskip, /* Miscompare, skip */ 111 st_wordskip, /* Miscompare, skip */
112 } state = st_wordstart; 112 } state = st_wordstart;
113 113
114 if (!cmdline_ptr || cmdline_ptr >= 0x100000) 114 if (!cmdline_ptr)
115 return -1; /* No command line, or inaccessible */ 115 return -1; /* No command line */
116 116
117 cptr = cmdline_ptr & 0xf; 117 cptr = cmdline_ptr & 0xf;
118 set_fs(cmdline_ptr >> 4); 118 set_fs(cmdline_ptr >> 4);
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index 10f6b1178c68..bffd73b45b1f 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -13,13 +13,21 @@ static inline char rdfs8(addr_t addr)
13 return *((char *)(fs + addr)); 13 return *((char *)(fs + addr));
14} 14}
15#include "../cmdline.c" 15#include "../cmdline.c"
16static unsigned long get_cmd_line_ptr(void)
17{
18 unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr;
19
20 cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32;
21
22 return cmd_line_ptr;
23}
16int cmdline_find_option(const char *option, char *buffer, int bufsize) 24int cmdline_find_option(const char *option, char *buffer, int bufsize)
17{ 25{
18 return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize); 26 return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize);
19} 27}
20int cmdline_find_option_bool(const char *option) 28int cmdline_find_option_bool(const char *option)
21{ 29{
22 return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); 30 return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
23} 31}
24 32
25#endif 33#endif
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f5d1aaa0dec8..c1d383d1fb7e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -37,6 +37,12 @@
37 __HEAD 37 __HEAD
38 .code32 38 .code32
39ENTRY(startup_32) 39ENTRY(startup_32)
40 /*
41 * 32bit entry is 0 and it is ABI so immutable!
42 * If we come here directly from a bootloader,
43 * kernel(text+data+bss+brk) ramdisk, zero_page, command line
44 * all need to be under the 4G limit.
45 */
40 cld 46 cld
41 /* 47 /*
42 * Test KEEP_SEGMENTS flag to see if the bootloader is asking 48 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
@@ -154,6 +160,12 @@ ENTRY(startup_32)
154 btsl $_EFER_LME, %eax 160 btsl $_EFER_LME, %eax
155 wrmsr 161 wrmsr
156 162
163 /* After gdt is loaded */
164 xorl %eax, %eax
165 lldt %ax
166 movl $0x20, %eax
167 ltr %ax
168
157 /* 169 /*
158 * Setup for the jump to 64bit mode 170 * Setup for the jump to 64bit mode
159 * 171 *
@@ -176,28 +188,18 @@ ENTRY(startup_32)
176 lret 188 lret
177ENDPROC(startup_32) 189ENDPROC(startup_32)
178 190
179no_longmode:
180 /* This isn't an x86-64 CPU so hang */
1811:
182 hlt
183 jmp 1b
184
185#include "../../kernel/verify_cpu.S"
186
187 /*
188 * Be careful here startup_64 needs to be at a predictable
189 * address so I can export it in an ELF header. Bootloaders
190 * should look at the ELF header to find this address, as
191 * it may change in the future.
192 */
193 .code64 191 .code64
194 .org 0x200 192 .org 0x200
195ENTRY(startup_64) 193ENTRY(startup_64)
196 /* 194 /*
195 * 64bit entry is 0x200 and it is ABI so immutable!
197 * We come here either from startup_32 or directly from a 196 * We come here either from startup_32 or directly from a
198 * 64bit bootloader. If we come here from a bootloader we depend on 197 * 64bit bootloader.
199 * an identity mapped page table being provied that maps our 198 * If we come here from a bootloader, kernel(text+data+bss+brk),
200 * entire text+data+bss and hopefully all of memory. 199 * ramdisk, zero_page, command line could be above 4G.
200 * We depend on an identity mapped page table being provided
201 * that maps our entire kernel(text+data+bss+brk), zero page
202 * and command line.
201 */ 203 */
202#ifdef CONFIG_EFI_STUB 204#ifdef CONFIG_EFI_STUB
203 /* 205 /*
@@ -247,9 +249,6 @@ preferred_addr:
247 movl %eax, %ss 249 movl %eax, %ss
248 movl %eax, %fs 250 movl %eax, %fs
249 movl %eax, %gs 251 movl %eax, %gs
250 lldt %ax
251 movl $0x20, %eax
252 ltr %ax
253 252
254 /* 253 /*
255 * Compute the decompressed kernel start address. It is where 254 * Compute the decompressed kernel start address. It is where
@@ -349,6 +348,15 @@ relocated:
349 */ 348 */
350 jmp *%rbp 349 jmp *%rbp
351 350
351 .code32
352no_longmode:
353 /* This isn't an x86-64 CPU so hang */
3541:
355 hlt
356 jmp 1b
357
358#include "../../kernel/verify_cpu.S"
359
352 .data 360 .data
353gdt: 361gdt:
354 .word gdt_end - gdt 362 .word gdt_end - gdt
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 88f7ff6da404..7cb56c6ca351 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -325,6 +325,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
325{ 325{
326 real_mode = rmode; 326 real_mode = rmode;
327 327
328 sanitize_boot_params(real_mode);
329
328 if (real_mode->screen_info.orig_video_mode == 7) { 330 if (real_mode->screen_info.orig_video_mode == 7) {
329 vidmem = (char *) 0xb0000; 331 vidmem = (char *) 0xb0000;
330 vidport = 0x3b4; 332 vidport = 0x3b4;
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 0e6dc0ee0eea..674019d8e235 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -18,6 +18,7 @@
18#include <asm/page.h> 18#include <asm/page.h>
19#include <asm/boot.h> 19#include <asm/boot.h>
20#include <asm/bootparam.h> 20#include <asm/bootparam.h>
21#include <asm/bootparam_utils.h>
21 22
22#define BOOT_BOOT_H 23#define BOOT_BOOT_H
23#include "../ctype.h" 24#include "../ctype.h"
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 944ce595f767..9ec06a1f6d61 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -374,6 +374,14 @@ xloadflags:
374#else 374#else
375# define XLF0 0 375# define XLF0 0
376#endif 376#endif
377
378#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
379 /* kernel/boot_param/ramdisk could be loaded above 4g */
380# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
381#else
382# define XLF1 0
383#endif
384
377#ifdef CONFIG_EFI_STUB 385#ifdef CONFIG_EFI_STUB
378# ifdef CONFIG_X86_64 386# ifdef CONFIG_X86_64
379# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */ 387# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
@@ -383,7 +391,7 @@ xloadflags:
383#else 391#else
384# define XLF23 0 392# define XLF23 0
385#endif 393#endif
386 .word XLF0 | XLF23 394 .word XLF0 | XLF1 | XLF23
387 395
388cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, 396cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
389 #added with boot protocol 397 #added with boot protocol
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 5598547281a7..94447086e551 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,3 +1,4 @@
1# CONFIG_64BIT is not set
1CONFIG_EXPERIMENTAL=y 2CONFIG_EXPERIMENTAL=y
2# CONFIG_LOCALVERSION_AUTO is not set 3# CONFIG_LOCALVERSION_AUTO is not set
3CONFIG_SYSVIPC=y 4CONFIG_SYSVIPC=y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index e0ca7c9ac383..63947a8f9f0f 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
27 27
28obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o 28obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
29obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o 29obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
30obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
30 31
31aes-i586-y := aes-i586-asm_32.o aes_glue.o 32aes-i586-y := aes-i586-asm_32.o aes_glue.o
32twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o 33twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
@@ -52,3 +53,4 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
52sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 53sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
53crc32c-intel-y := crc32c-intel_glue.o 54crc32c-intel-y := crc32c-intel_glue.o
54crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o 55crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
56crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
index b949ec2f9af4..2849dbc59e11 100644
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -36,6 +36,7 @@
36.file "aes-i586-asm.S" 36.file "aes-i586-asm.S"
37.text 37.text
38 38
39#include <linux/linkage.h>
39#include <asm/asm-offsets.h> 40#include <asm/asm-offsets.h>
40 41
41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) 42#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
@@ -219,14 +220,10 @@
219// AES (Rijndael) Encryption Subroutine 220// AES (Rijndael) Encryption Subroutine
220/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ 221/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
221 222
222.global aes_enc_blk
223
224.extern crypto_ft_tab 223.extern crypto_ft_tab
225.extern crypto_fl_tab 224.extern crypto_fl_tab
226 225
227.align 4 226ENTRY(aes_enc_blk)
228
229aes_enc_blk:
230 push %ebp 227 push %ebp
231 mov ctx(%esp),%ebp 228 mov ctx(%esp),%ebp
232 229
@@ -290,18 +287,15 @@ aes_enc_blk:
290 mov %r0,(%ebp) 287 mov %r0,(%ebp)
291 pop %ebp 288 pop %ebp
292 ret 289 ret
290ENDPROC(aes_enc_blk)
293 291
294// AES (Rijndael) Decryption Subroutine 292// AES (Rijndael) Decryption Subroutine
295/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ 293/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
296 294
297.global aes_dec_blk
298
299.extern crypto_it_tab 295.extern crypto_it_tab
300.extern crypto_il_tab 296.extern crypto_il_tab
301 297
302.align 4 298ENTRY(aes_dec_blk)
303
304aes_dec_blk:
305 push %ebp 299 push %ebp
306 mov ctx(%esp),%ebp 300 mov ctx(%esp),%ebp
307 301
@@ -365,3 +359,4 @@ aes_dec_blk:
365 mov %r0,(%ebp) 359 mov %r0,(%ebp)
366 pop %ebp 360 pop %ebp
367 ret 361 ret
362ENDPROC(aes_dec_blk)
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index 5b577d5a059b..910565547163 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -15,6 +15,7 @@
15 15
16.text 16.text
17 17
18#include <linux/linkage.h>
18#include <asm/asm-offsets.h> 19#include <asm/asm-offsets.h>
19 20
20#define R1 %rax 21#define R1 %rax
@@ -49,10 +50,8 @@
49#define R11 %r11 50#define R11 %r11
50 51
51#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ 52#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
52 .global FUNC; \ 53 ENTRY(FUNC); \
53 .type FUNC,@function; \ 54 movq r1,r2; \
54 .align 8; \
55FUNC: movq r1,r2; \
56 movq r3,r4; \ 55 movq r3,r4; \
57 leaq KEY+48(r8),r9; \ 56 leaq KEY+48(r8),r9; \
58 movq r10,r11; \ 57 movq r10,r11; \
@@ -71,14 +70,15 @@ FUNC: movq r1,r2; \
71 je B192; \ 70 je B192; \
72 leaq 32(r9),r9; 71 leaq 32(r9),r9;
73 72
74#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ 73#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
75 movq r1,r2; \ 74 movq r1,r2; \
76 movq r3,r4; \ 75 movq r3,r4; \
77 movl r5 ## E,(r9); \ 76 movl r5 ## E,(r9); \
78 movl r6 ## E,4(r9); \ 77 movl r6 ## E,4(r9); \
79 movl r7 ## E,8(r9); \ 78 movl r7 ## E,8(r9); \
80 movl r8 ## E,12(r9); \ 79 movl r8 ## E,12(r9); \
81 ret; 80 ret; \
81 ENDPROC(FUNC);
82 82
83#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ 83#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
84 movzbl r2 ## H,r5 ## E; \ 84 movzbl r2 ## H,r5 ## E; \
@@ -133,7 +133,7 @@ FUNC: movq r1,r2; \
133#define entry(FUNC,KEY,B128,B192) \ 133#define entry(FUNC,KEY,B128,B192) \
134 prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) 134 prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
135 135
136#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) 136#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
137 137
138#define encrypt_round(TAB,OFFSET) \ 138#define encrypt_round(TAB,OFFSET) \
139 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ 139 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
@@ -151,12 +151,12 @@ FUNC: movq r1,r2; \
151 151
152/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ 152/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
153 153
154 entry(aes_enc_blk,0,enc128,enc192) 154 entry(aes_enc_blk,0,.Le128,.Le192)
155 encrypt_round(crypto_ft_tab,-96) 155 encrypt_round(crypto_ft_tab,-96)
156 encrypt_round(crypto_ft_tab,-80) 156 encrypt_round(crypto_ft_tab,-80)
157enc192: encrypt_round(crypto_ft_tab,-64) 157.Le192: encrypt_round(crypto_ft_tab,-64)
158 encrypt_round(crypto_ft_tab,-48) 158 encrypt_round(crypto_ft_tab,-48)
159enc128: encrypt_round(crypto_ft_tab,-32) 159.Le128: encrypt_round(crypto_ft_tab,-32)
160 encrypt_round(crypto_ft_tab,-16) 160 encrypt_round(crypto_ft_tab,-16)
161 encrypt_round(crypto_ft_tab, 0) 161 encrypt_round(crypto_ft_tab, 0)
162 encrypt_round(crypto_ft_tab, 16) 162 encrypt_round(crypto_ft_tab, 16)
@@ -166,16 +166,16 @@ enc128: encrypt_round(crypto_ft_tab,-32)
166 encrypt_round(crypto_ft_tab, 80) 166 encrypt_round(crypto_ft_tab, 80)
167 encrypt_round(crypto_ft_tab, 96) 167 encrypt_round(crypto_ft_tab, 96)
168 encrypt_final(crypto_fl_tab,112) 168 encrypt_final(crypto_fl_tab,112)
169 return 169 return(aes_enc_blk)
170 170
171/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ 171/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
172 172
173 entry(aes_dec_blk,240,dec128,dec192) 173 entry(aes_dec_blk,240,.Ld128,.Ld192)
174 decrypt_round(crypto_it_tab,-96) 174 decrypt_round(crypto_it_tab,-96)
175 decrypt_round(crypto_it_tab,-80) 175 decrypt_round(crypto_it_tab,-80)
176dec192: decrypt_round(crypto_it_tab,-64) 176.Ld192: decrypt_round(crypto_it_tab,-64)
177 decrypt_round(crypto_it_tab,-48) 177 decrypt_round(crypto_it_tab,-48)
178dec128: decrypt_round(crypto_it_tab,-32) 178.Ld128: decrypt_round(crypto_it_tab,-32)
179 decrypt_round(crypto_it_tab,-16) 179 decrypt_round(crypto_it_tab,-16)
180 decrypt_round(crypto_it_tab, 0) 180 decrypt_round(crypto_it_tab, 0)
181 decrypt_round(crypto_it_tab, 16) 181 decrypt_round(crypto_it_tab, 16)
@@ -185,4 +185,4 @@ dec128: decrypt_round(crypto_it_tab,-32)
185 decrypt_round(crypto_it_tab, 80) 185 decrypt_round(crypto_it_tab, 80)
186 decrypt_round(crypto_it_tab, 96) 186 decrypt_round(crypto_it_tab, 96)
187 decrypt_final(crypto_il_tab,112) 187 decrypt_final(crypto_il_tab,112)
188 return 188 return(aes_dec_blk)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 3470624d7835..04b797767b9e 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
1262* poly = x^128 + x^127 + x^126 + x^121 + 1 1262* poly = x^128 + x^127 + x^126 + x^121 + 1
1263* 1263*
1264*****************************************************************************/ 1264*****************************************************************************/
1265
1266ENTRY(aesni_gcm_dec) 1265ENTRY(aesni_gcm_dec)
1267 push %r12 1266 push %r12
1268 push %r13 1267 push %r13
@@ -1437,6 +1436,7 @@ _return_T_done_decrypt:
1437 pop %r13 1436 pop %r13
1438 pop %r12 1437 pop %r12
1439 ret 1438 ret
1439ENDPROC(aesni_gcm_dec)
1440 1440
1441 1441
1442/***************************************************************************** 1442/*****************************************************************************
@@ -1700,10 +1700,12 @@ _return_T_done_encrypt:
1700 pop %r13 1700 pop %r13
1701 pop %r12 1701 pop %r12
1702 ret 1702 ret
1703ENDPROC(aesni_gcm_enc)
1703 1704
1704#endif 1705#endif
1705 1706
1706 1707
1708.align 4
1707_key_expansion_128: 1709_key_expansion_128:
1708_key_expansion_256a: 1710_key_expansion_256a:
1709 pshufd $0b11111111, %xmm1, %xmm1 1711 pshufd $0b11111111, %xmm1, %xmm1
@@ -1715,6 +1717,8 @@ _key_expansion_256a:
1715 movaps %xmm0, (TKEYP) 1717 movaps %xmm0, (TKEYP)
1716 add $0x10, TKEYP 1718 add $0x10, TKEYP
1717 ret 1719 ret
1720ENDPROC(_key_expansion_128)
1721ENDPROC(_key_expansion_256a)
1718 1722
1719.align 4 1723.align 4
1720_key_expansion_192a: 1724_key_expansion_192a:
@@ -1739,6 +1743,7 @@ _key_expansion_192a:
1739 movaps %xmm1, 0x10(TKEYP) 1743 movaps %xmm1, 0x10(TKEYP)
1740 add $0x20, TKEYP 1744 add $0x20, TKEYP
1741 ret 1745 ret
1746ENDPROC(_key_expansion_192a)
1742 1747
1743.align 4 1748.align 4
1744_key_expansion_192b: 1749_key_expansion_192b:
@@ -1758,6 +1763,7 @@ _key_expansion_192b:
1758 movaps %xmm0, (TKEYP) 1763 movaps %xmm0, (TKEYP)
1759 add $0x10, TKEYP 1764 add $0x10, TKEYP
1760 ret 1765 ret
1766ENDPROC(_key_expansion_192b)
1761 1767
1762.align 4 1768.align 4
1763_key_expansion_256b: 1769_key_expansion_256b:
@@ -1770,6 +1776,7 @@ _key_expansion_256b:
1770 movaps %xmm2, (TKEYP) 1776 movaps %xmm2, (TKEYP)
1771 add $0x10, TKEYP 1777 add $0x10, TKEYP
1772 ret 1778 ret
1779ENDPROC(_key_expansion_256b)
1773 1780
1774/* 1781/*
1775 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 1782 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key)
1882 popl KEYP 1889 popl KEYP
1883#endif 1890#endif
1884 ret 1891 ret
1892ENDPROC(aesni_set_key)
1885 1893
1886/* 1894/*
1887 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 1895 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -1903,6 +1911,7 @@ ENTRY(aesni_enc)
1903 popl KEYP 1911 popl KEYP
1904#endif 1912#endif
1905 ret 1913 ret
1914ENDPROC(aesni_enc)
1906 1915
1907/* 1916/*
1908 * _aesni_enc1: internal ABI 1917 * _aesni_enc1: internal ABI
@@ -1960,6 +1969,7 @@ _aesni_enc1:
1960 movaps 0x70(TKEYP), KEY 1969 movaps 0x70(TKEYP), KEY
1961 AESENCLAST KEY STATE 1970 AESENCLAST KEY STATE
1962 ret 1971 ret
1972ENDPROC(_aesni_enc1)
1963 1973
1964/* 1974/*
1965 * _aesni_enc4: internal ABI 1975 * _aesni_enc4: internal ABI
@@ -2068,6 +2078,7 @@ _aesni_enc4:
2068 AESENCLAST KEY STATE3 2078 AESENCLAST KEY STATE3
2069 AESENCLAST KEY STATE4 2079 AESENCLAST KEY STATE4
2070 ret 2080 ret
2081ENDPROC(_aesni_enc4)
2071 2082
2072/* 2083/*
2073 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 2084 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -2090,6 +2101,7 @@ ENTRY(aesni_dec)
2090 popl KEYP 2101 popl KEYP
2091#endif 2102#endif
2092 ret 2103 ret
2104ENDPROC(aesni_dec)
2093 2105
2094/* 2106/*
2095 * _aesni_dec1: internal ABI 2107 * _aesni_dec1: internal ABI
@@ -2147,6 +2159,7 @@ _aesni_dec1:
2147 movaps 0x70(TKEYP), KEY 2159 movaps 0x70(TKEYP), KEY
2148 AESDECLAST KEY STATE 2160 AESDECLAST KEY STATE
2149 ret 2161 ret
2162ENDPROC(_aesni_dec1)
2150 2163
2151/* 2164/*
2152 * _aesni_dec4: internal ABI 2165 * _aesni_dec4: internal ABI
@@ -2255,6 +2268,7 @@ _aesni_dec4:
2255 AESDECLAST KEY STATE3 2268 AESDECLAST KEY STATE3
2256 AESDECLAST KEY STATE4 2269 AESDECLAST KEY STATE4
2257 ret 2270 ret
2271ENDPROC(_aesni_dec4)
2258 2272
2259/* 2273/*
2260 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2274 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc)
2312 popl LEN 2326 popl LEN
2313#endif 2327#endif
2314 ret 2328 ret
2329ENDPROC(aesni_ecb_enc)
2315 2330
2316/* 2331/*
2317 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2332 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec)
2370 popl LEN 2385 popl LEN
2371#endif 2386#endif
2372 ret 2387 ret
2388ENDPROC(aesni_ecb_dec)
2373 2389
2374/* 2390/*
2375 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2391 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc)
2411 popl IVP 2427 popl IVP
2412#endif 2428#endif
2413 ret 2429 ret
2430ENDPROC(aesni_cbc_enc)
2414 2431
2415/* 2432/*
2416 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2433 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec)
2501 popl IVP 2518 popl IVP
2502#endif 2519#endif
2503 ret 2520 ret
2521ENDPROC(aesni_cbc_dec)
2504 2522
2505#ifdef __x86_64__ 2523#ifdef __x86_64__
2506.align 16 2524.align 16
@@ -2527,6 +2545,7 @@ _aesni_inc_init:
2527 MOVQ_R64_XMM TCTR_LOW INC 2545 MOVQ_R64_XMM TCTR_LOW INC
2528 MOVQ_R64_XMM CTR TCTR_LOW 2546 MOVQ_R64_XMM CTR TCTR_LOW
2529 ret 2547 ret
2548ENDPROC(_aesni_inc_init)
2530 2549
2531/* 2550/*
2532 * _aesni_inc: internal ABI 2551 * _aesni_inc: internal ABI
@@ -2555,6 +2574,7 @@ _aesni_inc:
2555 movaps CTR, IV 2574 movaps CTR, IV
2556 PSHUFB_XMM BSWAP_MASK IV 2575 PSHUFB_XMM BSWAP_MASK IV
2557 ret 2576 ret
2577ENDPROC(_aesni_inc)
2558 2578
2559/* 2579/*
2560 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2580 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc)
2615 movups IV, (IVP) 2635 movups IV, (IVP)
2616.Lctr_enc_just_ret: 2636.Lctr_enc_just_ret:
2617 ret 2637 ret
2638ENDPROC(aesni_ctr_enc)
2618#endif 2639#endif
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 1b9c22bea8a7..a0795da22c02 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -40,10 +40,6 @@
40#include <linux/workqueue.h> 40#include <linux/workqueue.h>
41#include <linux/spinlock.h> 41#include <linux/spinlock.h>
42 42
43#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE)
44#define HAS_CTR
45#endif
46
47#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) 43#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
48#define HAS_PCBC 44#define HAS_PCBC
49#endif 45#endif
@@ -395,12 +391,6 @@ static int ablk_ctr_init(struct crypto_tfm *tfm)
395 return ablk_init_common(tfm, "__driver-ctr-aes-aesni"); 391 return ablk_init_common(tfm, "__driver-ctr-aes-aesni");
396} 392}
397 393
398#ifdef HAS_CTR
399static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
400{
401 return ablk_init_common(tfm, "rfc3686(__driver-ctr-aes-aesni)");
402}
403#endif
404#endif 394#endif
405 395
406#ifdef HAS_PCBC 396#ifdef HAS_PCBC
@@ -1158,33 +1148,6 @@ static struct crypto_alg aesni_algs[] = { {
1158 .maxauthsize = 16, 1148 .maxauthsize = 16,
1159 }, 1149 },
1160 }, 1150 },
1161#ifdef HAS_CTR
1162}, {
1163 .cra_name = "rfc3686(ctr(aes))",
1164 .cra_driver_name = "rfc3686-ctr-aes-aesni",
1165 .cra_priority = 400,
1166 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1167 .cra_blocksize = 1,
1168 .cra_ctxsize = sizeof(struct async_helper_ctx),
1169 .cra_alignmask = 0,
1170 .cra_type = &crypto_ablkcipher_type,
1171 .cra_module = THIS_MODULE,
1172 .cra_init = ablk_rfc3686_ctr_init,
1173 .cra_exit = ablk_exit,
1174 .cra_u = {
1175 .ablkcipher = {
1176 .min_keysize = AES_MIN_KEY_SIZE +
1177 CTR_RFC3686_NONCE_SIZE,
1178 .max_keysize = AES_MAX_KEY_SIZE +
1179 CTR_RFC3686_NONCE_SIZE,
1180 .ivsize = CTR_RFC3686_IV_SIZE,
1181 .setkey = ablk_set_key,
1182 .encrypt = ablk_encrypt,
1183 .decrypt = ablk_decrypt,
1184 .geniv = "seqiv",
1185 },
1186 },
1187#endif
1188#endif 1151#endif
1189#ifdef HAS_PCBC 1152#ifdef HAS_PCBC
1190}, { 1153}, {
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 391d245dc086..246c67006ed0 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -20,6 +20,8 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/linkage.h>
24
23.file "blowfish-x86_64-asm.S" 25.file "blowfish-x86_64-asm.S"
24.text 26.text
25 27
@@ -116,11 +118,7 @@
116 bswapq RX0; \ 118 bswapq RX0; \
117 xorq RX0, (RIO); 119 xorq RX0, (RIO);
118 120
119.align 8 121ENTRY(__blowfish_enc_blk)
120.global __blowfish_enc_blk
121.type __blowfish_enc_blk,@function;
122
123__blowfish_enc_blk:
124 /* input: 122 /* input:
125 * %rdi: ctx, CTX 123 * %rdi: ctx, CTX
126 * %rsi: dst 124 * %rsi: dst
@@ -148,19 +146,16 @@ __blowfish_enc_blk:
148 146
149 movq %r10, RIO; 147 movq %r10, RIO;
150 test %cl, %cl; 148 test %cl, %cl;
151 jnz __enc_xor; 149 jnz .L__enc_xor;
152 150
153 write_block(); 151 write_block();
154 ret; 152 ret;
155__enc_xor: 153.L__enc_xor:
156 xor_block(); 154 xor_block();
157 ret; 155 ret;
156ENDPROC(__blowfish_enc_blk)
158 157
159.align 8 158ENTRY(blowfish_dec_blk)
160.global blowfish_dec_blk
161.type blowfish_dec_blk,@function;
162
163blowfish_dec_blk:
164 /* input: 159 /* input:
165 * %rdi: ctx, CTX 160 * %rdi: ctx, CTX
166 * %rsi: dst 161 * %rsi: dst
@@ -189,6 +184,7 @@ blowfish_dec_blk:
189 movq %r11, %rbp; 184 movq %r11, %rbp;
190 185
191 ret; 186 ret;
187ENDPROC(blowfish_dec_blk)
192 188
193/********************************************************************** 189/**********************************************************************
194 4-way blowfish, four blocks parallel 190 4-way blowfish, four blocks parallel
@@ -300,11 +296,7 @@ blowfish_dec_blk:
300 bswapq RX3; \ 296 bswapq RX3; \
301 xorq RX3, 24(RIO); 297 xorq RX3, 24(RIO);
302 298
303.align 8 299ENTRY(__blowfish_enc_blk_4way)
304.global __blowfish_enc_blk_4way
305.type __blowfish_enc_blk_4way,@function;
306
307__blowfish_enc_blk_4way:
308 /* input: 300 /* input:
309 * %rdi: ctx, CTX 301 * %rdi: ctx, CTX
310 * %rsi: dst 302 * %rsi: dst
@@ -336,7 +328,7 @@ __blowfish_enc_blk_4way:
336 movq %r11, RIO; 328 movq %r11, RIO;
337 329
338 test %bpl, %bpl; 330 test %bpl, %bpl;
339 jnz __enc_xor4; 331 jnz .L__enc_xor4;
340 332
341 write_block4(); 333 write_block4();
342 334
@@ -344,18 +336,15 @@ __blowfish_enc_blk_4way:
344 popq %rbp; 336 popq %rbp;
345 ret; 337 ret;
346 338
347__enc_xor4: 339.L__enc_xor4:
348 xor_block4(); 340 xor_block4();
349 341
350 popq %rbx; 342 popq %rbx;
351 popq %rbp; 343 popq %rbp;
352 ret; 344 ret;
345ENDPROC(__blowfish_enc_blk_4way)
353 346
354.align 8 347ENTRY(blowfish_dec_blk_4way)
355.global blowfish_dec_blk_4way
356.type blowfish_dec_blk_4way,@function;
357
358blowfish_dec_blk_4way:
359 /* input: 348 /* input:
360 * %rdi: ctx, CTX 349 * %rdi: ctx, CTX
361 * %rsi: dst 350 * %rsi: dst
@@ -387,4 +376,4 @@ blowfish_dec_blk_4way:
387 popq %rbp; 376 popq %rbp;
388 377
389 ret; 378 ret;
390 379ENDPROC(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index 2306d2e4816f..cfc163469c71 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -15,6 +15,8 @@
15 * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz 15 * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
16 */ 16 */
17 17
18#include <linux/linkage.h>
19
18#define CAMELLIA_TABLE_BYTE_LEN 272 20#define CAMELLIA_TABLE_BYTE_LEN 272
19 21
20/* struct camellia_ctx: */ 22/* struct camellia_ctx: */
@@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
190 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, 192 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
191 %rcx, (%r9)); 193 %rcx, (%r9));
192 ret; 194 ret;
195ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
193 196
194.align 8 197.align 8
195roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: 198roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
@@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
197 %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, 200 %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
198 %rax, (%r9)); 201 %rax, (%r9));
199 ret; 202 ret;
203ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
200 204
201/* 205/*
202 * IN/OUT: 206 * IN/OUT:
@@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
709.text 713.text
710 714
711.align 8 715.align 8
712.type __camellia_enc_blk16,@function;
713
714__camellia_enc_blk16: 716__camellia_enc_blk16:
715 /* input: 717 /* input:
716 * %rdi: ctx, CTX 718 * %rdi: ctx, CTX
@@ -793,10 +795,9 @@ __camellia_enc_blk16:
793 %xmm15, %rax, %rcx, 24); 795 %xmm15, %rax, %rcx, 24);
794 796
795 jmp .Lenc_done; 797 jmp .Lenc_done;
798ENDPROC(__camellia_enc_blk16)
796 799
797.align 8 800.align 8
798.type __camellia_dec_blk16,@function;
799
800__camellia_dec_blk16: 801__camellia_dec_blk16:
801 /* input: 802 /* input:
802 * %rdi: ctx, CTX 803 * %rdi: ctx, CTX
@@ -877,12 +878,9 @@ __camellia_dec_blk16:
877 ((key_table + (24) * 8) + 4)(CTX)); 878 ((key_table + (24) * 8) + 4)(CTX));
878 879
879 jmp .Ldec_max24; 880 jmp .Ldec_max24;
881ENDPROC(__camellia_dec_blk16)
880 882
881.align 8 883ENTRY(camellia_ecb_enc_16way)
882.global camellia_ecb_enc_16way
883.type camellia_ecb_enc_16way,@function;
884
885camellia_ecb_enc_16way:
886 /* input: 884 /* input:
887 * %rdi: ctx, CTX 885 * %rdi: ctx, CTX
888 * %rsi: dst (16 blocks) 886 * %rsi: dst (16 blocks)
@@ -903,12 +901,9 @@ camellia_ecb_enc_16way:
903 %xmm8, %rsi); 901 %xmm8, %rsi);
904 902
905 ret; 903 ret;
904ENDPROC(camellia_ecb_enc_16way)
906 905
907.align 8 906ENTRY(camellia_ecb_dec_16way)
908.global camellia_ecb_dec_16way
909.type camellia_ecb_dec_16way,@function;
910
911camellia_ecb_dec_16way:
912 /* input: 907 /* input:
913 * %rdi: ctx, CTX 908 * %rdi: ctx, CTX
914 * %rsi: dst (16 blocks) 909 * %rsi: dst (16 blocks)
@@ -934,12 +929,9 @@ camellia_ecb_dec_16way:
934 %xmm8, %rsi); 929 %xmm8, %rsi);
935 930
936 ret; 931 ret;
932ENDPROC(camellia_ecb_dec_16way)
937 933
938.align 8 934ENTRY(camellia_cbc_dec_16way)
939.global camellia_cbc_dec_16way
940.type camellia_cbc_dec_16way,@function;
941
942camellia_cbc_dec_16way:
943 /* input: 935 /* input:
944 * %rdi: ctx, CTX 936 * %rdi: ctx, CTX
945 * %rsi: dst (16 blocks) 937 * %rsi: dst (16 blocks)
@@ -986,6 +978,7 @@ camellia_cbc_dec_16way:
986 %xmm8, %rsi); 978 %xmm8, %rsi);
987 979
988 ret; 980 ret;
981ENDPROC(camellia_cbc_dec_16way)
989 982
990#define inc_le128(x, minus_one, tmp) \ 983#define inc_le128(x, minus_one, tmp) \
991 vpcmpeqq minus_one, x, tmp; \ 984 vpcmpeqq minus_one, x, tmp; \
@@ -993,11 +986,7 @@ camellia_cbc_dec_16way:
993 vpslldq $8, tmp, tmp; \ 986 vpslldq $8, tmp, tmp; \
994 vpsubq tmp, x, x; 987 vpsubq tmp, x, x;
995 988
996.align 8 989ENTRY(camellia_ctr_16way)
997.global camellia_ctr_16way
998.type camellia_ctr_16way,@function;
999
1000camellia_ctr_16way:
1001 /* input: 990 /* input:
1002 * %rdi: ctx, CTX 991 * %rdi: ctx, CTX
1003 * %rsi: dst (16 blocks) 992 * %rsi: dst (16 blocks)
@@ -1100,3 +1089,4 @@ camellia_ctr_16way:
1100 %xmm8, %rsi); 1089 %xmm8, %rsi);
1101 1090
1102 ret; 1091 ret;
1092ENDPROC(camellia_ctr_16way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 0b3374335fdc..310319c601ed 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -20,6 +20,8 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/linkage.h>
24
23.file "camellia-x86_64-asm_64.S" 25.file "camellia-x86_64-asm_64.S"
24.text 26.text
25 27
@@ -188,10 +190,7 @@
188 bswapq RAB0; \ 190 bswapq RAB0; \
189 movq RAB0, 4*2(RIO); 191 movq RAB0, 4*2(RIO);
190 192
191.global __camellia_enc_blk; 193ENTRY(__camellia_enc_blk)
192.type __camellia_enc_blk,@function;
193
194__camellia_enc_blk:
195 /* input: 194 /* input:
196 * %rdi: ctx, CTX 195 * %rdi: ctx, CTX
197 * %rsi: dst 196 * %rsi: dst
@@ -214,33 +213,31 @@ __camellia_enc_blk:
214 movl $24, RT1d; /* max */ 213 movl $24, RT1d; /* max */
215 214
216 cmpb $16, key_length(CTX); 215 cmpb $16, key_length(CTX);
217 je __enc_done; 216 je .L__enc_done;
218 217
219 enc_fls(24); 218 enc_fls(24);
220 enc_rounds(24); 219 enc_rounds(24);
221 movl $32, RT1d; /* max */ 220 movl $32, RT1d; /* max */
222 221
223__enc_done: 222.L__enc_done:
224 testb RXORbl, RXORbl; 223 testb RXORbl, RXORbl;
225 movq RDST, RIO; 224 movq RDST, RIO;
226 225
227 jnz __enc_xor; 226 jnz .L__enc_xor;
228 227
229 enc_outunpack(mov, RT1); 228 enc_outunpack(mov, RT1);
230 229
231 movq RRBP, %rbp; 230 movq RRBP, %rbp;
232 ret; 231 ret;
233 232
234__enc_xor: 233.L__enc_xor:
235 enc_outunpack(xor, RT1); 234 enc_outunpack(xor, RT1);
236 235
237 movq RRBP, %rbp; 236 movq RRBP, %rbp;
238 ret; 237 ret;
238ENDPROC(__camellia_enc_blk)
239 239
240.global camellia_dec_blk; 240ENTRY(camellia_dec_blk)
241.type camellia_dec_blk,@function;
242
243camellia_dec_blk:
244 /* input: 241 /* input:
245 * %rdi: ctx, CTX 242 * %rdi: ctx, CTX
246 * %rsi: dst 243 * %rsi: dst
@@ -258,12 +255,12 @@ camellia_dec_blk:
258 dec_inpack(RT2); 255 dec_inpack(RT2);
259 256
260 cmpb $24, RT2bl; 257 cmpb $24, RT2bl;
261 je __dec_rounds16; 258 je .L__dec_rounds16;
262 259
263 dec_rounds(24); 260 dec_rounds(24);
264 dec_fls(24); 261 dec_fls(24);
265 262
266__dec_rounds16: 263.L__dec_rounds16:
267 dec_rounds(16); 264 dec_rounds(16);
268 dec_fls(16); 265 dec_fls(16);
269 dec_rounds(8); 266 dec_rounds(8);
@@ -276,6 +273,7 @@ __dec_rounds16:
276 273
277 movq RRBP, %rbp; 274 movq RRBP, %rbp;
278 ret; 275 ret;
276ENDPROC(camellia_dec_blk)
279 277
280/********************************************************************** 278/**********************************************************************
281 2-way camellia 279 2-way camellia
@@ -426,10 +424,7 @@ __dec_rounds16:
426 bswapq RAB1; \ 424 bswapq RAB1; \
427 movq RAB1, 12*2(RIO); 425 movq RAB1, 12*2(RIO);
428 426
429.global __camellia_enc_blk_2way; 427ENTRY(__camellia_enc_blk_2way)
430.type __camellia_enc_blk_2way,@function;
431
432__camellia_enc_blk_2way:
433 /* input: 428 /* input:
434 * %rdi: ctx, CTX 429 * %rdi: ctx, CTX
435 * %rsi: dst 430 * %rsi: dst
@@ -453,16 +448,16 @@ __camellia_enc_blk_2way:
453 movl $24, RT2d; /* max */ 448 movl $24, RT2d; /* max */
454 449
455 cmpb $16, key_length(CTX); 450 cmpb $16, key_length(CTX);
456 je __enc2_done; 451 je .L__enc2_done;
457 452
458 enc_fls2(24); 453 enc_fls2(24);
459 enc_rounds2(24); 454 enc_rounds2(24);
460 movl $32, RT2d; /* max */ 455 movl $32, RT2d; /* max */
461 456
462__enc2_done: 457.L__enc2_done:
463 test RXORbl, RXORbl; 458 test RXORbl, RXORbl;
464 movq RDST, RIO; 459 movq RDST, RIO;
465 jnz __enc2_xor; 460 jnz .L__enc2_xor;
466 461
467 enc_outunpack2(mov, RT2); 462 enc_outunpack2(mov, RT2);
468 463
@@ -470,17 +465,15 @@ __enc2_done:
470 popq %rbx; 465 popq %rbx;
471 ret; 466 ret;
472 467
473__enc2_xor: 468.L__enc2_xor:
474 enc_outunpack2(xor, RT2); 469 enc_outunpack2(xor, RT2);
475 470
476 movq RRBP, %rbp; 471 movq RRBP, %rbp;
477 popq %rbx; 472 popq %rbx;
478 ret; 473 ret;
474ENDPROC(__camellia_enc_blk_2way)
479 475
480.global camellia_dec_blk_2way; 476ENTRY(camellia_dec_blk_2way)
481.type camellia_dec_blk_2way,@function;
482
483camellia_dec_blk_2way:
484 /* input: 477 /* input:
485 * %rdi: ctx, CTX 478 * %rdi: ctx, CTX
486 * %rsi: dst 479 * %rsi: dst
@@ -499,12 +492,12 @@ camellia_dec_blk_2way:
499 dec_inpack2(RT2); 492 dec_inpack2(RT2);
500 493
501 cmpb $24, RT2bl; 494 cmpb $24, RT2bl;
502 je __dec2_rounds16; 495 je .L__dec2_rounds16;
503 496
504 dec_rounds2(24); 497 dec_rounds2(24);
505 dec_fls2(24); 498 dec_fls2(24);
506 499
507__dec2_rounds16: 500.L__dec2_rounds16:
508 dec_rounds2(16); 501 dec_rounds2(16);
509 dec_fls2(16); 502 dec_fls2(16);
510 dec_rounds2(8); 503 dec_rounds2(8);
@@ -518,3 +511,4 @@ __dec2_rounds16:
518 movq RRBP, %rbp; 511 movq RRBP, %rbp;
519 movq RXOR, %rbx; 512 movq RXOR, %rbx;
520 ret; 513 ret;
514ENDPROC(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index 15b00ac7cbd3..c35fd5d6ecd2 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -23,6 +23,8 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/linkage.h>
27
26.file "cast5-avx-x86_64-asm_64.S" 28.file "cast5-avx-x86_64-asm_64.S"
27 29
28.extern cast_s1 30.extern cast_s1
@@ -211,8 +213,6 @@
211.text 213.text
212 214
213.align 16 215.align 16
214.type __cast5_enc_blk16,@function;
215
216__cast5_enc_blk16: 216__cast5_enc_blk16:
217 /* input: 217 /* input:
218 * %rdi: ctx, CTX 218 * %rdi: ctx, CTX
@@ -263,14 +263,14 @@ __cast5_enc_blk16:
263 263
264 movzbl rr(CTX), %eax; 264 movzbl rr(CTX), %eax;
265 testl %eax, %eax; 265 testl %eax, %eax;
266 jnz __skip_enc; 266 jnz .L__skip_enc;
267 267
268 round(RL, RR, 12, 1); 268 round(RL, RR, 12, 1);
269 round(RR, RL, 13, 2); 269 round(RR, RL, 13, 2);
270 round(RL, RR, 14, 3); 270 round(RL, RR, 14, 3);
271 round(RR, RL, 15, 1); 271 round(RR, RL, 15, 1);
272 272
273__skip_enc: 273.L__skip_enc:
274 popq %rbx; 274 popq %rbx;
275 popq %rbp; 275 popq %rbp;
276 276
@@ -282,10 +282,9 @@ __skip_enc:
282 outunpack_blocks(RR4, RL4, RTMP, RX, RKM); 282 outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
283 283
284 ret; 284 ret;
285ENDPROC(__cast5_enc_blk16)
285 286
286.align 16 287.align 16
287.type __cast5_dec_blk16,@function;
288
289__cast5_dec_blk16: 288__cast5_dec_blk16:
290 /* input: 289 /* input:
291 * %rdi: ctx, CTX 290 * %rdi: ctx, CTX
@@ -323,14 +322,14 @@ __cast5_dec_blk16:
323 322
324 movzbl rr(CTX), %eax; 323 movzbl rr(CTX), %eax;
325 testl %eax, %eax; 324 testl %eax, %eax;
326 jnz __skip_dec; 325 jnz .L__skip_dec;
327 326
328 round(RL, RR, 15, 1); 327 round(RL, RR, 15, 1);
329 round(RR, RL, 14, 3); 328 round(RR, RL, 14, 3);
330 round(RL, RR, 13, 2); 329 round(RL, RR, 13, 2);
331 round(RR, RL, 12, 1); 330 round(RR, RL, 12, 1);
332 331
333__dec_tail: 332.L__dec_tail:
334 round(RL, RR, 11, 3); 333 round(RL, RR, 11, 3);
335 round(RR, RL, 10, 2); 334 round(RR, RL, 10, 2);
336 round(RL, RR, 9, 1); 335 round(RL, RR, 9, 1);
@@ -355,15 +354,12 @@ __dec_tail:
355 354
356 ret; 355 ret;
357 356
358__skip_dec: 357.L__skip_dec:
359 vpsrldq $4, RKR, RKR; 358 vpsrldq $4, RKR, RKR;
360 jmp __dec_tail; 359 jmp .L__dec_tail;
360ENDPROC(__cast5_dec_blk16)
361 361
362.align 16 362ENTRY(cast5_ecb_enc_16way)
363.global cast5_ecb_enc_16way
364.type cast5_ecb_enc_16way,@function;
365
366cast5_ecb_enc_16way:
367 /* input: 363 /* input:
368 * %rdi: ctx, CTX 364 * %rdi: ctx, CTX
369 * %rsi: dst 365 * %rsi: dst
@@ -393,12 +389,9 @@ cast5_ecb_enc_16way:
393 vmovdqu RL4, (7*4*4)(%r11); 389 vmovdqu RL4, (7*4*4)(%r11);
394 390
395 ret; 391 ret;
392ENDPROC(cast5_ecb_enc_16way)
396 393
397.align 16 394ENTRY(cast5_ecb_dec_16way)
398.global cast5_ecb_dec_16way
399.type cast5_ecb_dec_16way,@function;
400
401cast5_ecb_dec_16way:
402 /* input: 395 /* input:
403 * %rdi: ctx, CTX 396 * %rdi: ctx, CTX
404 * %rsi: dst 397 * %rsi: dst
@@ -428,12 +421,9 @@ cast5_ecb_dec_16way:
428 vmovdqu RL4, (7*4*4)(%r11); 421 vmovdqu RL4, (7*4*4)(%r11);
429 422
430 ret; 423 ret;
424ENDPROC(cast5_ecb_dec_16way)
431 425
432.align 16 426ENTRY(cast5_cbc_dec_16way)
433.global cast5_cbc_dec_16way
434.type cast5_cbc_dec_16way,@function;
435
436cast5_cbc_dec_16way:
437 /* input: 427 /* input:
438 * %rdi: ctx, CTX 428 * %rdi: ctx, CTX
439 * %rsi: dst 429 * %rsi: dst
@@ -480,12 +470,9 @@ cast5_cbc_dec_16way:
480 popq %r12; 470 popq %r12;
481 471
482 ret; 472 ret;
473ENDPROC(cast5_cbc_dec_16way)
483 474
484.align 16 475ENTRY(cast5_ctr_16way)
485.global cast5_ctr_16way
486.type cast5_ctr_16way,@function;
487
488cast5_ctr_16way:
489 /* input: 476 /* input:
490 * %rdi: ctx, CTX 477 * %rdi: ctx, CTX
491 * %rsi: dst 478 * %rsi: dst
@@ -556,3 +543,4 @@ cast5_ctr_16way:
556 popq %r12; 543 popq %r12;
557 544
558 ret; 545 ret;
546ENDPROC(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 2569d0da841f..f93b6105a0ce 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -23,6 +23,7 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/linkage.h>
26#include "glue_helper-asm-avx.S" 27#include "glue_helper-asm-avx.S"
27 28
28.file "cast6-avx-x86_64-asm_64.S" 29.file "cast6-avx-x86_64-asm_64.S"
@@ -250,8 +251,6 @@
250.text 251.text
251 252
252.align 8 253.align 8
253.type __cast6_enc_blk8,@function;
254
255__cast6_enc_blk8: 254__cast6_enc_blk8:
256 /* input: 255 /* input:
257 * %rdi: ctx, CTX 256 * %rdi: ctx, CTX
@@ -295,10 +294,9 @@ __cast6_enc_blk8:
295 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 294 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
296 295
297 ret; 296 ret;
297ENDPROC(__cast6_enc_blk8)
298 298
299.align 8 299.align 8
300.type __cast6_dec_blk8,@function;
301
302__cast6_dec_blk8: 300__cast6_dec_blk8:
303 /* input: 301 /* input:
304 * %rdi: ctx, CTX 302 * %rdi: ctx, CTX
@@ -341,12 +339,9 @@ __cast6_dec_blk8:
341 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 339 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
342 340
343 ret; 341 ret;
342ENDPROC(__cast6_dec_blk8)
344 343
345.align 8 344ENTRY(cast6_ecb_enc_8way)
346.global cast6_ecb_enc_8way
347.type cast6_ecb_enc_8way,@function;
348
349cast6_ecb_enc_8way:
350 /* input: 345 /* input:
351 * %rdi: ctx, CTX 346 * %rdi: ctx, CTX
352 * %rsi: dst 347 * %rsi: dst
@@ -362,12 +357,9 @@ cast6_ecb_enc_8way:
362 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 357 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
363 358
364 ret; 359 ret;
360ENDPROC(cast6_ecb_enc_8way)
365 361
366.align 8 362ENTRY(cast6_ecb_dec_8way)
367.global cast6_ecb_dec_8way
368.type cast6_ecb_dec_8way,@function;
369
370cast6_ecb_dec_8way:
371 /* input: 363 /* input:
372 * %rdi: ctx, CTX 364 * %rdi: ctx, CTX
373 * %rsi: dst 365 * %rsi: dst
@@ -383,12 +375,9 @@ cast6_ecb_dec_8way:
383 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 375 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
384 376
385 ret; 377 ret;
378ENDPROC(cast6_ecb_dec_8way)
386 379
387.align 8 380ENTRY(cast6_cbc_dec_8way)
388.global cast6_cbc_dec_8way
389.type cast6_cbc_dec_8way,@function;
390
391cast6_cbc_dec_8way:
392 /* input: 381 /* input:
393 * %rdi: ctx, CTX 382 * %rdi: ctx, CTX
394 * %rsi: dst 383 * %rsi: dst
@@ -409,12 +398,9 @@ cast6_cbc_dec_8way:
409 popq %r12; 398 popq %r12;
410 399
411 ret; 400 ret;
401ENDPROC(cast6_cbc_dec_8way)
412 402
413.align 8 403ENTRY(cast6_ctr_8way)
414.global cast6_ctr_8way
415.type cast6_ctr_8way,@function;
416
417cast6_ctr_8way:
418 /* input: 404 /* input:
419 * %rdi: ctx, CTX 405 * %rdi: ctx, CTX
420 * %rsi: dst 406 * %rsi: dst
@@ -437,3 +423,4 @@ cast6_ctr_8way:
437 popq %r12; 423 popq %r12;
438 424
439 ret; 425 ret;
426ENDPROC(cast6_ctr_8way)
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
new file mode 100644
index 000000000000..c8335014a044
--- /dev/null
+++ b/arch/x86/crypto/crc32-pclmul_asm.S
@@ -0,0 +1,246 @@
1/* GPL HEADER START
2 *
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License version 2 for more details (a copy is included
13 * in the LICENSE file that accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License
16 * version 2 along with this program; If not, see http://www.gnu.org/licenses
17 *
18 * Please visit http://www.xyratex.com/contact if you need additional
19 * information or have any questions.
20 *
21 * GPL HEADER END
22 */
23
24/*
25 * Copyright 2012 Xyratex Technology Limited
26 *
27 * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
28 * calculation.
29 * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
30 * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
31 * at:
32 * http://www.intel.com/products/processor/manuals/
33 * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
34 * Volume 2B: Instruction Set Reference, N-Z
35 *
36 * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
37 * Alexander Boyko <Alexander_Boyko@xyratex.com>
38 */
39
40#include <linux/linkage.h>
41#include <asm/inst.h>
42
43
44.align 16
45/*
46 * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
47 * #define CONSTANT_R1 0x154442bd4LL
48 *
49 * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
50 * #define CONSTANT_R2 0x1c6e41596LL
51 */
52.Lconstant_R2R1:
53 .octa 0x00000001c6e415960000000154442bd4
54/*
55 * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
56 * #define CONSTANT_R3 0x1751997d0LL
57 *
58 * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
59 * #define CONSTANT_R4 0x0ccaa009eLL
60 */
61.Lconstant_R4R3:
62 .octa 0x00000000ccaa009e00000001751997d0
63/*
64 * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
65 * #define CONSTANT_R5 0x163cd6124LL
66 */
67.Lconstant_R5:
68 .octa 0x00000000000000000000000163cd6124
69.Lconstant_mask32:
70 .octa 0x000000000000000000000000FFFFFFFF
71/*
72 * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
73 *
74 * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
75 * #define CONSTANT_RU 0x1F7011641LL
76 */
77.Lconstant_RUpoly:
78 .octa 0x00000001F701164100000001DB710641
79
80#define CONSTANT %xmm0
81
82#ifdef __x86_64__
83#define BUF %rdi
84#define LEN %rsi
85#define CRC %edx
86#else
87#define BUF %eax
88#define LEN %edx
89#define CRC %ecx
90#endif
91
92
93
94.text
95/**
96 * Calculate crc32
97 * BUF - buffer (16 bytes aligned)
98 * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
99 * CRC - initial crc32
100 * return %eax crc32
101 * uint crc32_pclmul_le_16(unsigned char const *buffer,
102 * size_t len, uint crc32)
103 */
104.globl crc32_pclmul_le_16
105.align 4, 0x90
106crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */
107 movdqa (BUF), %xmm1
108 movdqa 0x10(BUF), %xmm2
109 movdqa 0x20(BUF), %xmm3
110 movdqa 0x30(BUF), %xmm4
111 movd CRC, CONSTANT
112 pxor CONSTANT, %xmm1
113 sub $0x40, LEN
114 add $0x40, BUF
115#ifndef __x86_64__
116 /* This is for position independent code(-fPIC) support for 32bit */
117 call delta
118delta:
119 pop %ecx
120#endif
121 cmp $0x40, LEN
122 jb less_64
123
124#ifdef __x86_64__
125 movdqa .Lconstant_R2R1(%rip), CONSTANT
126#else
127 movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
128#endif
129
130loop_64:/* 64 bytes Full cache line folding */
131 prefetchnta 0x40(BUF)
132 movdqa %xmm1, %xmm5
133 movdqa %xmm2, %xmm6
134 movdqa %xmm3, %xmm7
135#ifdef __x86_64__
136 movdqa %xmm4, %xmm8
137#endif
138 PCLMULQDQ 00, CONSTANT, %xmm1
139 PCLMULQDQ 00, CONSTANT, %xmm2
140 PCLMULQDQ 00, CONSTANT, %xmm3
141#ifdef __x86_64__
142 PCLMULQDQ 00, CONSTANT, %xmm4
143#endif
144 PCLMULQDQ 0x11, CONSTANT, %xmm5
145 PCLMULQDQ 0x11, CONSTANT, %xmm6
146 PCLMULQDQ 0x11, CONSTANT, %xmm7
147#ifdef __x86_64__
148 PCLMULQDQ 0x11, CONSTANT, %xmm8
149#endif
150 pxor %xmm5, %xmm1
151 pxor %xmm6, %xmm2
152 pxor %xmm7, %xmm3
153#ifdef __x86_64__
154 pxor %xmm8, %xmm4
155#else
156 /* xmm8 unsupported for x32 */
157 movdqa %xmm4, %xmm5
158 PCLMULQDQ 00, CONSTANT, %xmm4
159 PCLMULQDQ 0x11, CONSTANT, %xmm5
160 pxor %xmm5, %xmm4
161#endif
162
163 pxor (BUF), %xmm1
164 pxor 0x10(BUF), %xmm2
165 pxor 0x20(BUF), %xmm3
166 pxor 0x30(BUF), %xmm4
167
168 sub $0x40, LEN
169 add $0x40, BUF
170 cmp $0x40, LEN
171 jge loop_64
172less_64:/* Folding cache line into 128bit */
173#ifdef __x86_64__
174 movdqa .Lconstant_R4R3(%rip), CONSTANT
175#else
176 movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT
177#endif
178 prefetchnta (BUF)
179
180 movdqa %xmm1, %xmm5
181 PCLMULQDQ 0x00, CONSTANT, %xmm1
182 PCLMULQDQ 0x11, CONSTANT, %xmm5
183 pxor %xmm5, %xmm1
184 pxor %xmm2, %xmm1
185
186 movdqa %xmm1, %xmm5
187 PCLMULQDQ 0x00, CONSTANT, %xmm1
188 PCLMULQDQ 0x11, CONSTANT, %xmm5
189 pxor %xmm5, %xmm1
190 pxor %xmm3, %xmm1
191
192 movdqa %xmm1, %xmm5
193 PCLMULQDQ 0x00, CONSTANT, %xmm1
194 PCLMULQDQ 0x11, CONSTANT, %xmm5
195 pxor %xmm5, %xmm1
196 pxor %xmm4, %xmm1
197
198 cmp $0x10, LEN
199 jb fold_64
200loop_16:/* Folding rest buffer into 128bit */
201 movdqa %xmm1, %xmm5
202 PCLMULQDQ 0x00, CONSTANT, %xmm1
203 PCLMULQDQ 0x11, CONSTANT, %xmm5
204 pxor %xmm5, %xmm1
205 pxor (BUF), %xmm1
206 sub $0x10, LEN
207 add $0x10, BUF
208 cmp $0x10, LEN
209 jge loop_16
210
211fold_64:
212 /* perform the last 64 bit fold, also adds 32 zeroes
213 * to the input stream */
214 PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
215 psrldq $0x08, %xmm1
216 pxor CONSTANT, %xmm1
217
218 /* final 32-bit fold */
219 movdqa %xmm1, %xmm2
220#ifdef __x86_64__
221 movdqa .Lconstant_R5(%rip), CONSTANT
222 movdqa .Lconstant_mask32(%rip), %xmm3
223#else
224 movdqa .Lconstant_R5 - delta(%ecx), CONSTANT
225 movdqa .Lconstant_mask32 - delta(%ecx), %xmm3
226#endif
227 psrldq $0x04, %xmm2
228 pand %xmm3, %xmm1
229 PCLMULQDQ 0x00, CONSTANT, %xmm1
230 pxor %xmm2, %xmm1
231
232 /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
233#ifdef __x86_64__
234 movdqa .Lconstant_RUpoly(%rip), CONSTANT
235#else
236 movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT
237#endif
238 movdqa %xmm1, %xmm2
239 pand %xmm3, %xmm1
240 PCLMULQDQ 0x10, CONSTANT, %xmm1
241 pand %xmm3, %xmm1
242 PCLMULQDQ 0x00, CONSTANT, %xmm1
243 pxor %xmm2, %xmm1
244 pextrd $0x01, %xmm1, %eax
245
246 ret
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
new file mode 100644
index 000000000000..9d014a74ef96
--- /dev/null
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -0,0 +1,201 @@
1/* GPL HEADER START
2 *
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License version 2 for more details (a copy is included
13 * in the LICENSE file that accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License
16 * version 2 along with this program; If not, see http://www.gnu.org/licenses
17 *
18 * Please visit http://www.xyratex.com/contact if you need additional
19 * information or have any questions.
20 *
21 * GPL HEADER END
22 */
23
24/*
25 * Copyright 2012 Xyratex Technology Limited
26 *
27 * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
28 */
29#include <linux/init.h>
30#include <linux/module.h>
31#include <linux/string.h>
32#include <linux/kernel.h>
33#include <linux/crc32.h>
34#include <crypto/internal/hash.h>
35
36#include <asm/cpufeature.h>
37#include <asm/cpu_device_id.h>
38#include <asm/i387.h>
39
40#define CHKSUM_BLOCK_SIZE 1
41#define CHKSUM_DIGEST_SIZE 4
42
43#define PCLMUL_MIN_LEN 64L /* minimum size of buffer
44 * for crc32_pclmul_le_16 */
45#define SCALE_F 16L /* size of xmm register */
46#define SCALE_F_MASK (SCALE_F - 1)
47
48u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
49
50static u32 __attribute__((pure))
51 crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
52{
53 unsigned int iquotient;
54 unsigned int iremainder;
55 unsigned int prealign;
56
57 if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable())
58 return crc32_le(crc, p, len);
59
60 if ((long)p & SCALE_F_MASK) {
61 /* align p to 16 byte */
62 prealign = SCALE_F - ((long)p & SCALE_F_MASK);
63
64 crc = crc32_le(crc, p, prealign);
65 len -= prealign;
66 p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
67 ~SCALE_F_MASK);
68 }
69 iquotient = len & (~SCALE_F_MASK);
70 iremainder = len & SCALE_F_MASK;
71
72 kernel_fpu_begin();
73 crc = crc32_pclmul_le_16(p, iquotient, crc);
74 kernel_fpu_end();
75
76 if (iremainder)
77 crc = crc32_le(crc, p + iquotient, iremainder);
78
79 return crc;
80}
81
82static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
83{
84 u32 *key = crypto_tfm_ctx(tfm);
85
86 *key = 0;
87
88 return 0;
89}
90
91static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
92 unsigned int keylen)
93{
94 u32 *mctx = crypto_shash_ctx(hash);
95
96 if (keylen != sizeof(u32)) {
97 crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
98 return -EINVAL;
99 }
100 *mctx = le32_to_cpup((__le32 *)key);
101 return 0;
102}
103
104static int crc32_pclmul_init(struct shash_desc *desc)
105{
106 u32 *mctx = crypto_shash_ctx(desc->tfm);
107 u32 *crcp = shash_desc_ctx(desc);
108
109 *crcp = *mctx;
110
111 return 0;
112}
113
114static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
115 unsigned int len)
116{
117 u32 *crcp = shash_desc_ctx(desc);
118
119 *crcp = crc32_pclmul_le(*crcp, data, len);
120 return 0;
121}
122
123/* No final XOR 0xFFFFFFFF, like crc32_le */
124static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
125 u8 *out)
126{
127 *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
128 return 0;
129}
130
131static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
132 unsigned int len, u8 *out)
133{
134 return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
135}
136
137static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
138{
139 u32 *crcp = shash_desc_ctx(desc);
140
141 *(__le32 *)out = cpu_to_le32p(crcp);
142 return 0;
143}
144
145static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
146 unsigned int len, u8 *out)
147{
148 return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
149 out);
150}
151
152static struct shash_alg alg = {
153 .setkey = crc32_pclmul_setkey,
154 .init = crc32_pclmul_init,
155 .update = crc32_pclmul_update,
156 .final = crc32_pclmul_final,
157 .finup = crc32_pclmul_finup,
158 .digest = crc32_pclmul_digest,
159 .descsize = sizeof(u32),
160 .digestsize = CHKSUM_DIGEST_SIZE,
161 .base = {
162 .cra_name = "crc32",
163 .cra_driver_name = "crc32-pclmul",
164 .cra_priority = 200,
165 .cra_blocksize = CHKSUM_BLOCK_SIZE,
166 .cra_ctxsize = sizeof(u32),
167 .cra_module = THIS_MODULE,
168 .cra_init = crc32_pclmul_cra_init,
169 }
170};
171
172static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
173 X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
174 {}
175};
176MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
177
178
179static int __init crc32_pclmul_mod_init(void)
180{
181
182 if (!x86_match_cpu(crc32pclmul_cpu_id)) {
183 pr_info("PCLMULQDQ-NI instructions are not detected.\n");
184 return -ENODEV;
185 }
186 return crypto_register_shash(&alg);
187}
188
189static void __exit crc32_pclmul_mod_fini(void)
190{
191 crypto_unregister_shash(&alg);
192}
193
194module_init(crc32_pclmul_mod_init);
195module_exit(crc32_pclmul_mod_fini);
196
197MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
198MODULE_LICENSE("GPL");
199
200MODULE_ALIAS("crc32");
201MODULE_ALIAS("crc32-pclmul");
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 93c6d39237ac..cf1a7ec4cc3a 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -42,6 +42,8 @@
42 * SOFTWARE. 42 * SOFTWARE.
43 */ 43 */
44 44
45#include <linux/linkage.h>
46
45## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction 47## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
46 48
47.macro LABEL prefix n 49.macro LABEL prefix n
@@ -68,8 +70,7 @@
68 70
69# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); 71# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
70 72
71.global crc_pcl 73ENTRY(crc_pcl)
72crc_pcl:
73#define bufp %rdi 74#define bufp %rdi
74#define bufp_dw %edi 75#define bufp_dw %edi
75#define bufp_w %di 76#define bufp_w %di
@@ -323,6 +324,9 @@ JMPTBL_ENTRY %i
323.noaltmacro 324.noaltmacro
324 i=i+1 325 i=i+1
325.endr 326.endr
327
328ENDPROC(crc_pcl)
329
326 ################################################################ 330 ################################################################
327 ## PCLMULQDQ tables 331 ## PCLMULQDQ tables
328 ## Table is 128 entries x 2 quad words each 332 ## Table is 128 entries x 2 quad words each
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 1eb7f90cb7b9..586f41aac361 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -94,6 +94,7 @@ __clmul_gf128mul_ble:
94 pxor T2, T1 94 pxor T2, T1
95 pxor T1, DATA 95 pxor T1, DATA
96 ret 96 ret
97ENDPROC(__clmul_gf128mul_ble)
97 98
98/* void clmul_ghash_mul(char *dst, const be128 *shash) */ 99/* void clmul_ghash_mul(char *dst, const be128 *shash) */
99ENTRY(clmul_ghash_mul) 100ENTRY(clmul_ghash_mul)
@@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul)
105 PSHUFB_XMM BSWAP DATA 106 PSHUFB_XMM BSWAP DATA
106 movups DATA, (%rdi) 107 movups DATA, (%rdi)
107 ret 108 ret
109ENDPROC(clmul_ghash_mul)
108 110
109/* 111/*
110 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, 112 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
@@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update)
131 movups DATA, (%rdi) 133 movups DATA, (%rdi)
132.Lupdate_just_ret: 134.Lupdate_just_ret:
133 ret 135 ret
136ENDPROC(clmul_ghash_update)
134 137
135/* 138/*
136 * void clmul_ghash_setkey(be128 *shash, const u8 *key); 139 * void clmul_ghash_setkey(be128 *shash, const u8 *key);
@@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey)
155 pxor %xmm1, %xmm0 158 pxor %xmm1, %xmm0
156 movups %xmm0, (%rdi) 159 movups %xmm0, (%rdi)
157 ret 160 ret
161ENDPROC(clmul_ghash_setkey)
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
index 72eb306680b2..329452b8f794 100644
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ b/arch/x86/crypto/salsa20-i586-asm_32.S
@@ -2,11 +2,12 @@
2# D. J. Bernstein 2# D. J. Bernstein
3# Public domain. 3# Public domain.
4 4
5# enter ECRYPT_encrypt_bytes 5#include <linux/linkage.h>
6
6.text 7.text
7.p2align 5 8
8.globl ECRYPT_encrypt_bytes 9# enter salsa20_encrypt_bytes
9ECRYPT_encrypt_bytes: 10ENTRY(salsa20_encrypt_bytes)
10 mov %esp,%eax 11 mov %esp,%eax
11 and $31,%eax 12 and $31,%eax
12 add $256,%eax 13 add $256,%eax
@@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes:
933 add $64,%esi 934 add $64,%esi
934 # goto bytesatleast1 935 # goto bytesatleast1
935 jmp ._bytesatleast1 936 jmp ._bytesatleast1
936# enter ECRYPT_keysetup 937ENDPROC(salsa20_encrypt_bytes)
937.text 938
938.p2align 5 939# enter salsa20_keysetup
939.globl ECRYPT_keysetup 940ENTRY(salsa20_keysetup)
940ECRYPT_keysetup:
941 mov %esp,%eax 941 mov %esp,%eax
942 and $31,%eax 942 and $31,%eax
943 add $256,%eax 943 add $256,%eax
@@ -1060,11 +1060,10 @@ ECRYPT_keysetup:
1060 # leave 1060 # leave
1061 add %eax,%esp 1061 add %eax,%esp
1062 ret 1062 ret
1063# enter ECRYPT_ivsetup 1063ENDPROC(salsa20_keysetup)
1064.text 1064
1065.p2align 5 1065# enter salsa20_ivsetup
1066.globl ECRYPT_ivsetup 1066ENTRY(salsa20_ivsetup)
1067ECRYPT_ivsetup:
1068 mov %esp,%eax 1067 mov %esp,%eax
1069 and $31,%eax 1068 and $31,%eax
1070 add $256,%eax 1069 add $256,%eax
@@ -1112,3 +1111,4 @@ ECRYPT_ivsetup:
1112 # leave 1111 # leave
1113 add %eax,%esp 1112 add %eax,%esp
1114 ret 1113 ret
1114ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
index 6214a9b09706..9279e0b2d60e 100644
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S
@@ -1,8 +1,7 @@
1# enter ECRYPT_encrypt_bytes 1#include <linux/linkage.h>
2.text 2
3.p2align 5 3# enter salsa20_encrypt_bytes
4.globl ECRYPT_encrypt_bytes 4ENTRY(salsa20_encrypt_bytes)
5ECRYPT_encrypt_bytes:
6 mov %rsp,%r11 5 mov %rsp,%r11
7 and $31,%r11 6 and $31,%r11
8 add $256,%r11 7 add $256,%r11
@@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes:
802 # comment:fp stack unchanged by jump 801 # comment:fp stack unchanged by jump
803 # goto bytesatleast1 802 # goto bytesatleast1
804 jmp ._bytesatleast1 803 jmp ._bytesatleast1
805# enter ECRYPT_keysetup 804ENDPROC(salsa20_encrypt_bytes)
806.text 805
807.p2align 5 806# enter salsa20_keysetup
808.globl ECRYPT_keysetup 807ENTRY(salsa20_keysetup)
809ECRYPT_keysetup:
810 mov %rsp,%r11 808 mov %rsp,%r11
811 and $31,%r11 809 and $31,%r11
812 add $256,%r11 810 add $256,%r11
@@ -892,11 +890,10 @@ ECRYPT_keysetup:
892 mov %rdi,%rax 890 mov %rdi,%rax
893 mov %rsi,%rdx 891 mov %rsi,%rdx
894 ret 892 ret
895# enter ECRYPT_ivsetup 893ENDPROC(salsa20_keysetup)
896.text 894
897.p2align 5 895# enter salsa20_ivsetup
898.globl ECRYPT_ivsetup 896ENTRY(salsa20_ivsetup)
899ECRYPT_ivsetup:
900 mov %rsp,%r11 897 mov %rsp,%r11
901 and $31,%r11 898 and $31,%r11
902 add $256,%r11 899 add $256,%r11
@@ -918,3 +915,4 @@ ECRYPT_ivsetup:
918 mov %rdi,%rax 915 mov %rdi,%rax
919 mov %rsi,%rdx 916 mov %rsi,%rdx
920 ret 917 ret
918ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index a3a3c0205c16..5e8e67739bb5 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -26,11 +26,6 @@
26#define SALSA20_MIN_KEY_SIZE 16U 26#define SALSA20_MIN_KEY_SIZE 16U
27#define SALSA20_MAX_KEY_SIZE 32U 27#define SALSA20_MAX_KEY_SIZE 32U
28 28
29// use the ECRYPT_* function names
30#define salsa20_keysetup ECRYPT_keysetup
31#define salsa20_ivsetup ECRYPT_ivsetup
32#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes
33
34struct salsa20_ctx 29struct salsa20_ctx
35{ 30{
36 u32 input[16]; 31 u32 input[16];
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 02b0e9fe997c..43c938612b74 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -24,6 +24,7 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/linkage.h>
27#include "glue_helper-asm-avx.S" 28#include "glue_helper-asm-avx.S"
28 29
29.file "serpent-avx-x86_64-asm_64.S" 30.file "serpent-avx-x86_64-asm_64.S"
@@ -566,8 +567,6 @@
566 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 567 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
567 568
568.align 8 569.align 8
569.type __serpent_enc_blk8_avx,@function;
570
571__serpent_enc_blk8_avx: 570__serpent_enc_blk8_avx:
572 /* input: 571 /* input:
573 * %rdi: ctx, CTX 572 * %rdi: ctx, CTX
@@ -619,10 +618,9 @@ __serpent_enc_blk8_avx:
619 write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); 618 write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
620 619
621 ret; 620 ret;
621ENDPROC(__serpent_enc_blk8_avx)
622 622
623.align 8 623.align 8
624.type __serpent_dec_blk8_avx,@function;
625
626__serpent_dec_blk8_avx: 624__serpent_dec_blk8_avx:
627 /* input: 625 /* input:
628 * %rdi: ctx, CTX 626 * %rdi: ctx, CTX
@@ -674,12 +672,9 @@ __serpent_dec_blk8_avx:
674 write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); 672 write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
675 673
676 ret; 674 ret;
675ENDPROC(__serpent_dec_blk8_avx)
677 676
678.align 8 677ENTRY(serpent_ecb_enc_8way_avx)
679.global serpent_ecb_enc_8way_avx
680.type serpent_ecb_enc_8way_avx,@function;
681
682serpent_ecb_enc_8way_avx:
683 /* input: 678 /* input:
684 * %rdi: ctx, CTX 679 * %rdi: ctx, CTX
685 * %rsi: dst 680 * %rsi: dst
@@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx:
693 store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 688 store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
694 689
695 ret; 690 ret;
691ENDPROC(serpent_ecb_enc_8way_avx)
696 692
697.align 8 693ENTRY(serpent_ecb_dec_8way_avx)
698.global serpent_ecb_dec_8way_avx
699.type serpent_ecb_dec_8way_avx,@function;
700
701serpent_ecb_dec_8way_avx:
702 /* input: 694 /* input:
703 * %rdi: ctx, CTX 695 * %rdi: ctx, CTX
704 * %rsi: dst 696 * %rsi: dst
@@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx:
712 store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 704 store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
713 705
714 ret; 706 ret;
707ENDPROC(serpent_ecb_dec_8way_avx)
715 708
716.align 8 709ENTRY(serpent_cbc_dec_8way_avx)
717.global serpent_cbc_dec_8way_avx
718.type serpent_cbc_dec_8way_avx,@function;
719
720serpent_cbc_dec_8way_avx:
721 /* input: 710 /* input:
722 * %rdi: ctx, CTX 711 * %rdi: ctx, CTX
723 * %rsi: dst 712 * %rsi: dst
@@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx:
731 store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 720 store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
732 721
733 ret; 722 ret;
723ENDPROC(serpent_cbc_dec_8way_avx)
734 724
735.align 8 725ENTRY(serpent_ctr_8way_avx)
736.global serpent_ctr_8way_avx
737.type serpent_ctr_8way_avx,@function;
738
739serpent_ctr_8way_avx:
740 /* input: 726 /* input:
741 * %rdi: ctx, CTX 727 * %rdi: ctx, CTX
742 * %rsi: dst 728 * %rsi: dst
@@ -752,3 +738,4 @@ serpent_ctr_8way_avx:
752 store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 738 store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
753 739
754 ret; 740 ret;
741ENDPROC(serpent_ctr_8way_avx)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index c00053d42f99..d348f1553a79 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -24,6 +24,8 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/linkage.h>
28
27.file "serpent-sse2-i586-asm_32.S" 29.file "serpent-sse2-i586-asm_32.S"
28.text 30.text
29 31
@@ -510,11 +512,7 @@
510 pxor t0, x3; \ 512 pxor t0, x3; \
511 movdqu x3, (3*4*4)(out); 513 movdqu x3, (3*4*4)(out);
512 514
513.align 8 515ENTRY(__serpent_enc_blk_4way)
514.global __serpent_enc_blk_4way
515.type __serpent_enc_blk_4way,@function;
516
517__serpent_enc_blk_4way:
518 /* input: 516 /* input:
519 * arg_ctx(%esp): ctx, CTX 517 * arg_ctx(%esp): ctx, CTX
520 * arg_dst(%esp): dst 518 * arg_dst(%esp): dst
@@ -566,22 +564,19 @@ __serpent_enc_blk_4way:
566 movl arg_dst(%esp), %eax; 564 movl arg_dst(%esp), %eax;
567 565
568 cmpb $0, arg_xor(%esp); 566 cmpb $0, arg_xor(%esp);
569 jnz __enc_xor4; 567 jnz .L__enc_xor4;
570 568
571 write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); 569 write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
572 570
573 ret; 571 ret;
574 572
575__enc_xor4: 573.L__enc_xor4:
576 xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); 574 xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
577 575
578 ret; 576 ret;
577ENDPROC(__serpent_enc_blk_4way)
579 578
580.align 8 579ENTRY(serpent_dec_blk_4way)
581.global serpent_dec_blk_4way
582.type serpent_dec_blk_4way,@function;
583
584serpent_dec_blk_4way:
585 /* input: 580 /* input:
586 * arg_ctx(%esp): ctx, CTX 581 * arg_ctx(%esp): ctx, CTX
587 * arg_dst(%esp): dst 582 * arg_dst(%esp): dst
@@ -633,3 +628,4 @@ serpent_dec_blk_4way:
633 write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); 628 write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
634 629
635 ret; 630 ret;
631ENDPROC(serpent_dec_blk_4way)
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 3ee1ff04d3e9..acc066c7c6b2 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -24,6 +24,8 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/linkage.h>
28
27.file "serpent-sse2-x86_64-asm_64.S" 29.file "serpent-sse2-x86_64-asm_64.S"
28.text 30.text
29 31
@@ -632,11 +634,7 @@
632 pxor t0, x3; \ 634 pxor t0, x3; \
633 movdqu x3, (3*4*4)(out); 635 movdqu x3, (3*4*4)(out);
634 636
635.align 8 637ENTRY(__serpent_enc_blk_8way)
636.global __serpent_enc_blk_8way
637.type __serpent_enc_blk_8way,@function;
638
639__serpent_enc_blk_8way:
640 /* input: 638 /* input:
641 * %rdi: ctx, CTX 639 * %rdi: ctx, CTX
642 * %rsi: dst 640 * %rsi: dst
@@ -687,24 +685,21 @@ __serpent_enc_blk_8way:
687 leaq (4*4*4)(%rsi), %rax; 685 leaq (4*4*4)(%rsi), %rax;
688 686
689 testb %cl, %cl; 687 testb %cl, %cl;
690 jnz __enc_xor8; 688 jnz .L__enc_xor8;
691 689
692 write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 690 write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
693 write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); 691 write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
694 692
695 ret; 693 ret;
696 694
697__enc_xor8: 695.L__enc_xor8:
698 xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 696 xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
699 xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); 697 xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
700 698
701 ret; 699 ret;
700ENDPROC(__serpent_enc_blk_8way)
702 701
703.align 8 702ENTRY(serpent_dec_blk_8way)
704.global serpent_dec_blk_8way
705.type serpent_dec_blk_8way,@function;
706
707serpent_dec_blk_8way:
708 /* input: 703 /* input:
709 * %rdi: ctx, CTX 704 * %rdi: ctx, CTX
710 * %rsi: dst 705 * %rsi: dst
@@ -756,3 +751,4 @@ serpent_dec_blk_8way:
756 write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); 751 write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
757 752
758 ret; 753 ret;
754ENDPROC(serpent_dec_blk_8way)
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 49d6987a73d9..a4109506a5e8 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -28,6 +28,8 @@
28 * (at your option) any later version. 28 * (at your option) any later version.
29 */ 29 */
30 30
31#include <linux/linkage.h>
32
31#define CTX %rdi // arg1 33#define CTX %rdi // arg1
32#define BUF %rsi // arg2 34#define BUF %rsi // arg2
33#define CNT %rdx // arg3 35#define CNT %rdx // arg3
@@ -69,10 +71,8 @@
69 * param: function's name 71 * param: function's name
70 */ 72 */
71.macro SHA1_VECTOR_ASM name 73.macro SHA1_VECTOR_ASM name
72 .global \name 74 ENTRY(\name)
73 .type \name, @function 75
74 .align 32
75\name:
76 push %rbx 76 push %rbx
77 push %rbp 77 push %rbp
78 push %r12 78 push %r12
@@ -106,7 +106,7 @@
106 pop %rbx 106 pop %rbx
107 ret 107 ret
108 108
109 .size \name, .-\name 109 ENDPROC(\name)
110.endm 110.endm
111 111
112/* 112/*
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index ebac16bfa830..8d3e113b2c95 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -23,6 +23,7 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/linkage.h>
26#include "glue_helper-asm-avx.S" 27#include "glue_helper-asm-avx.S"
27 28
28.file "twofish-avx-x86_64-asm_64.S" 29.file "twofish-avx-x86_64-asm_64.S"
@@ -243,8 +244,6 @@
243 vpxor x3, wkey, x3; 244 vpxor x3, wkey, x3;
244 245
245.align 8 246.align 8
246.type __twofish_enc_blk8,@function;
247
248__twofish_enc_blk8: 247__twofish_enc_blk8:
249 /* input: 248 /* input:
250 * %rdi: ctx, CTX 249 * %rdi: ctx, CTX
@@ -284,10 +283,9 @@ __twofish_enc_blk8:
284 outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); 283 outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
285 284
286 ret; 285 ret;
286ENDPROC(__twofish_enc_blk8)
287 287
288.align 8 288.align 8
289.type __twofish_dec_blk8,@function;
290
291__twofish_dec_blk8: 289__twofish_dec_blk8:
292 /* input: 290 /* input:
293 * %rdi: ctx, CTX 291 * %rdi: ctx, CTX
@@ -325,12 +323,9 @@ __twofish_dec_blk8:
325 outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); 323 outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
326 324
327 ret; 325 ret;
326ENDPROC(__twofish_dec_blk8)
328 327
329.align 8 328ENTRY(twofish_ecb_enc_8way)
330.global twofish_ecb_enc_8way
331.type twofish_ecb_enc_8way,@function;
332
333twofish_ecb_enc_8way:
334 /* input: 329 /* input:
335 * %rdi: ctx, CTX 330 * %rdi: ctx, CTX
336 * %rsi: dst 331 * %rsi: dst
@@ -346,12 +341,9 @@ twofish_ecb_enc_8way:
346 store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); 341 store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
347 342
348 ret; 343 ret;
344ENDPROC(twofish_ecb_enc_8way)
349 345
350.align 8 346ENTRY(twofish_ecb_dec_8way)
351.global twofish_ecb_dec_8way
352.type twofish_ecb_dec_8way,@function;
353
354twofish_ecb_dec_8way:
355 /* input: 347 /* input:
356 * %rdi: ctx, CTX 348 * %rdi: ctx, CTX
357 * %rsi: dst 349 * %rsi: dst
@@ -367,12 +359,9 @@ twofish_ecb_dec_8way:
367 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 359 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
368 360
369 ret; 361 ret;
362ENDPROC(twofish_ecb_dec_8way)
370 363
371.align 8 364ENTRY(twofish_cbc_dec_8way)
372.global twofish_cbc_dec_8way
373.type twofish_cbc_dec_8way,@function;
374
375twofish_cbc_dec_8way:
376 /* input: 365 /* input:
377 * %rdi: ctx, CTX 366 * %rdi: ctx, CTX
378 * %rsi: dst 367 * %rsi: dst
@@ -393,12 +382,9 @@ twofish_cbc_dec_8way:
393 popq %r12; 382 popq %r12;
394 383
395 ret; 384 ret;
385ENDPROC(twofish_cbc_dec_8way)
396 386
397.align 8 387ENTRY(twofish_ctr_8way)
398.global twofish_ctr_8way
399.type twofish_ctr_8way,@function;
400
401twofish_ctr_8way:
402 /* input: 388 /* input:
403 * %rdi: ctx, CTX 389 * %rdi: ctx, CTX
404 * %rsi: dst 390 * %rsi: dst
@@ -421,3 +407,4 @@ twofish_ctr_8way:
421 popq %r12; 407 popq %r12;
422 408
423 ret; 409 ret;
410ENDPROC(twofish_ctr_8way)
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 658af4bb35c9..694ea4587ba7 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -20,6 +20,7 @@
20.file "twofish-i586-asm.S" 20.file "twofish-i586-asm.S"
21.text 21.text
22 22
23#include <linux/linkage.h>
23#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
24 25
25/* return address at 0 */ 26/* return address at 0 */
@@ -219,11 +220,7 @@
219 xor %esi, d ## D;\ 220 xor %esi, d ## D;\
220 ror $1, d ## D; 221 ror $1, d ## D;
221 222
222.align 4 223ENTRY(twofish_enc_blk)
223.global twofish_enc_blk
224.global twofish_dec_blk
225
226twofish_enc_blk:
227 push %ebp /* save registers according to calling convention*/ 224 push %ebp /* save registers according to calling convention*/
228 push %ebx 225 push %ebx
229 push %esi 226 push %esi
@@ -277,8 +274,9 @@ twofish_enc_blk:
277 pop %ebp 274 pop %ebp
278 mov $1, %eax 275 mov $1, %eax
279 ret 276 ret
277ENDPROC(twofish_enc_blk)
280 278
281twofish_dec_blk: 279ENTRY(twofish_dec_blk)
282 push %ebp /* save registers according to calling convention*/ 280 push %ebp /* save registers according to calling convention*/
283 push %ebx 281 push %ebx
284 push %esi 282 push %esi
@@ -333,3 +331,4 @@ twofish_dec_blk:
333 pop %ebp 331 pop %ebp
334 mov $1, %eax 332 mov $1, %eax
335 ret 333 ret
334ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index 5b012a2c5119..1c3b7ceb36d2 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -20,6 +20,8 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/linkage.h>
24
23.file "twofish-x86_64-asm-3way.S" 25.file "twofish-x86_64-asm-3way.S"
24.text 26.text
25 27
@@ -214,11 +216,7 @@
214 rorq $32, RAB2; \ 216 rorq $32, RAB2; \
215 outunpack3(mov, RIO, 2, RAB, 2); 217 outunpack3(mov, RIO, 2, RAB, 2);
216 218
217.align 8 219ENTRY(__twofish_enc_blk_3way)
218.global __twofish_enc_blk_3way
219.type __twofish_enc_blk_3way,@function;
220
221__twofish_enc_blk_3way:
222 /* input: 220 /* input:
223 * %rdi: ctx, CTX 221 * %rdi: ctx, CTX
224 * %rsi: dst 222 * %rsi: dst
@@ -250,7 +248,7 @@ __twofish_enc_blk_3way:
250 popq %rbp; /* bool xor */ 248 popq %rbp; /* bool xor */
251 249
252 testb %bpl, %bpl; 250 testb %bpl, %bpl;
253 jnz __enc_xor3; 251 jnz .L__enc_xor3;
254 252
255 outunpack_enc3(mov); 253 outunpack_enc3(mov);
256 254
@@ -262,7 +260,7 @@ __twofish_enc_blk_3way:
262 popq %r15; 260 popq %r15;
263 ret; 261 ret;
264 262
265__enc_xor3: 263.L__enc_xor3:
266 outunpack_enc3(xor); 264 outunpack_enc3(xor);
267 265
268 popq %rbx; 266 popq %rbx;
@@ -272,11 +270,9 @@ __enc_xor3:
272 popq %r14; 270 popq %r14;
273 popq %r15; 271 popq %r15;
274 ret; 272 ret;
273ENDPROC(__twofish_enc_blk_3way)
275 274
276.global twofish_dec_blk_3way 275ENTRY(twofish_dec_blk_3way)
277.type twofish_dec_blk_3way,@function;
278
279twofish_dec_blk_3way:
280 /* input: 276 /* input:
281 * %rdi: ctx, CTX 277 * %rdi: ctx, CTX
282 * %rsi: dst 278 * %rsi: dst
@@ -313,4 +309,4 @@ twofish_dec_blk_3way:
313 popq %r14; 309 popq %r14;
314 popq %r15; 310 popq %r15;
315 ret; 311 ret;
316 312ENDPROC(twofish_dec_blk_3way)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index 7bcf3fcc3668..a039d21986a2 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -20,6 +20,7 @@
20.file "twofish-x86_64-asm.S" 20.file "twofish-x86_64-asm.S"
21.text 21.text
22 22
23#include <linux/linkage.h>
23#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
24 25
25#define a_offset 0 26#define a_offset 0
@@ -214,11 +215,7 @@
214 xor %r8d, d ## D;\ 215 xor %r8d, d ## D;\
215 ror $1, d ## D; 216 ror $1, d ## D;
216 217
217.align 8 218ENTRY(twofish_enc_blk)
218.global twofish_enc_blk
219.global twofish_dec_blk
220
221twofish_enc_blk:
222 pushq R1 219 pushq R1
223 220
224 /* %rdi contains the ctx address */ 221 /* %rdi contains the ctx address */
@@ -269,8 +266,9 @@ twofish_enc_blk:
269 popq R1 266 popq R1
270 movq $1,%rax 267 movq $1,%rax
271 ret 268 ret
269ENDPROC(twofish_enc_blk)
272 270
273twofish_dec_blk: 271ENTRY(twofish_dec_blk)
274 pushq R1 272 pushq R1
275 273
276 /* %rdi contains the ctx address */ 274 /* %rdi contains the ctx address */
@@ -320,3 +318,4 @@ twofish_dec_blk:
320 popq R1 318 popq R1
321 movq $1,%rax 319 movq $1,%rax
322 ret 320 ret
321ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a703af19c281..03abf9b70011 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -271,7 +271,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
271 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && 271 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
272 N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || 272 N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
273 N_TRSIZE(ex) || N_DRSIZE(ex) || 273 N_TRSIZE(ex) || N_DRSIZE(ex) ||
274 i_size_read(bprm->file->f_path.dentry->d_inode) < 274 i_size_read(file_inode(bprm->file)) <
275 ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { 275 ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
276 return -ENOEXEC; 276 return -ENOEXEC;
277 } 277 }
@@ -425,12 +425,10 @@ beyond_if:
425 425
426static int load_aout_library(struct file *file) 426static int load_aout_library(struct file *file)
427{ 427{
428 struct inode *inode;
429 unsigned long bss, start_addr, len, error; 428 unsigned long bss, start_addr, len, error;
430 int retval; 429 int retval;
431 struct exec ex; 430 struct exec ex;
432 431
433 inode = file->f_path.dentry->d_inode;
434 432
435 retval = -ENOEXEC; 433 retval = -ENOEXEC;
436 error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); 434 error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
@@ -440,7 +438,7 @@ static int load_aout_library(struct file *file)
440 /* We come in here for the regular a.out style of shared libraries */ 438 /* We come in here for the regular a.out style of shared libraries */
441 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || 439 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
442 N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || 440 N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
443 i_size_read(inode) < 441 i_size_read(file_inode(file)) <
444 ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { 442 ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
445 goto out; 443 goto out;
446 } 444 }
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a1daf4a65009..cf1a471a18a2 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -129,13 +129,6 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
129 return err; 129 return err;
130} 130}
131 131
132asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
133{
134 sigset_t blocked;
135 siginitset(&blocked, mask);
136 return sigsuspend(&blocked);
137}
138
139/* 132/*
140 * Do a signal return; undo the signal stack. 133 * Do a signal return; undo the signal stack.
141 */ 134 */
@@ -215,8 +208,9 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
215 return err; 208 return err;
216} 209}
217 210
218asmlinkage long sys32_sigreturn(struct pt_regs *regs) 211asmlinkage long sys32_sigreturn(void)
219{ 212{
213 struct pt_regs *regs = current_pt_regs();
220 struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); 214 struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
221 sigset_t set; 215 sigset_t set;
222 unsigned int ax; 216 unsigned int ax;
@@ -241,8 +235,9 @@ badframe:
241 return 0; 235 return 0;
242} 236}
243 237
244asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) 238asmlinkage long sys32_rt_sigreturn(void)
245{ 239{
240 struct pt_regs *regs = current_pt_regs();
246 struct rt_sigframe_ia32 __user *frame; 241 struct rt_sigframe_ia32 __user *frame;
247 sigset_t set; 242 sigset_t set;
248 unsigned int ax; 243 unsigned int ax;
@@ -314,7 +309,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
314/* 309/*
315 * Determine which stack to use.. 310 * Determine which stack to use..
316 */ 311 */
317static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, 312static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
318 size_t frame_size, 313 size_t frame_size,
319 void __user **fpstate) 314 void __user **fpstate)
320{ 315{
@@ -324,16 +319,13 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
324 sp = regs->sp; 319 sp = regs->sp;
325 320
326 /* This is the X/Open sanctioned signal stack switching. */ 321 /* This is the X/Open sanctioned signal stack switching. */
327 if (ka->sa.sa_flags & SA_ONSTACK) { 322 if (ksig->ka.sa.sa_flags & SA_ONSTACK)
328 if (sas_ss_flags(sp) == 0) 323 sp = sigsp(sp, ksig);
329 sp = current->sas_ss_sp + current->sas_ss_size;
330 }
331
332 /* This is the legacy signal stack switching. */ 324 /* This is the legacy signal stack switching. */
333 else if ((regs->ss & 0xffff) != __USER32_DS && 325 else if ((regs->ss & 0xffff) != __USER32_DS &&
334 !(ka->sa.sa_flags & SA_RESTORER) && 326 !(ksig->ka.sa.sa_flags & SA_RESTORER) &&
335 ka->sa.sa_restorer) 327 ksig->ka.sa.sa_restorer)
336 sp = (unsigned long) ka->sa.sa_restorer; 328 sp = (unsigned long) ksig->ka.sa.sa_restorer;
337 329
338 if (used_math()) { 330 if (used_math()) {
339 unsigned long fx_aligned, math_size; 331 unsigned long fx_aligned, math_size;
@@ -352,7 +344,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
352 return (void __user *) sp; 344 return (void __user *) sp;
353} 345}
354 346
355int ia32_setup_frame(int sig, struct k_sigaction *ka, 347int ia32_setup_frame(int sig, struct ksignal *ksig,
356 compat_sigset_t *set, struct pt_regs *regs) 348 compat_sigset_t *set, struct pt_regs *regs)
357{ 349{
358 struct sigframe_ia32 __user *frame; 350 struct sigframe_ia32 __user *frame;
@@ -371,7 +363,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
371 0x80cd, /* int $0x80 */ 363 0x80cd, /* int $0x80 */
372 }; 364 };
373 365
374 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 366 frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate);
375 367
376 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 368 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
377 return -EFAULT; 369 return -EFAULT;
@@ -388,8 +380,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
388 return -EFAULT; 380 return -EFAULT;
389 } 381 }
390 382
391 if (ka->sa.sa_flags & SA_RESTORER) { 383 if (ksig->ka.sa.sa_flags & SA_RESTORER) {
392 restorer = ka->sa.sa_restorer; 384 restorer = ksig->ka.sa.sa_restorer;
393 } else { 385 } else {
394 /* Return stub is in 32bit vsyscall page */ 386 /* Return stub is in 32bit vsyscall page */
395 if (current->mm->context.vdso) 387 if (current->mm->context.vdso)
@@ -414,7 +406,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
414 406
415 /* Set up registers for signal handler */ 407 /* Set up registers for signal handler */
416 regs->sp = (unsigned long) frame; 408 regs->sp = (unsigned long) frame;
417 regs->ip = (unsigned long) ka->sa.sa_handler; 409 regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
418 410
419 /* Make -mregparm=3 work */ 411 /* Make -mregparm=3 work */
420 regs->ax = sig; 412 regs->ax = sig;
@@ -430,7 +422,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
430 return 0; 422 return 0;
431} 423}
432 424
433int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 425int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
434 compat_sigset_t *set, struct pt_regs *regs) 426 compat_sigset_t *set, struct pt_regs *regs)
435{ 427{
436 struct rt_sigframe_ia32 __user *frame; 428 struct rt_sigframe_ia32 __user *frame;
@@ -451,7 +443,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
451 0, 443 0,
452 }; 444 };
453 445
454 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 446 frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate);
455 447
456 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 448 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
457 return -EFAULT; 449 return -EFAULT;
@@ -469,8 +461,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
469 put_user_ex(0, &frame->uc.uc_link); 461 put_user_ex(0, &frame->uc.uc_link);
470 err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); 462 err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
471 463
472 if (ka->sa.sa_flags & SA_RESTORER) 464 if (ksig->ka.sa.sa_flags & SA_RESTORER)
473 restorer = ka->sa.sa_restorer; 465 restorer = ksig->ka.sa.sa_restorer;
474 else 466 else
475 restorer = VDSO32_SYMBOL(current->mm->context.vdso, 467 restorer = VDSO32_SYMBOL(current->mm->context.vdso,
476 rt_sigreturn); 468 rt_sigreturn);
@@ -483,7 +475,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
483 put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); 475 put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
484 } put_user_catch(err); 476 } put_user_catch(err);
485 477
486 err |= copy_siginfo_to_user32(&frame->info, info); 478 err |= copy_siginfo_to_user32(&frame->info, &ksig->info);
487 err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, 479 err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
488 regs, set->sig[0]); 480 regs, set->sig[0]);
489 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 481 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -493,7 +485,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
493 485
494 /* Set up registers for signal handler */ 486 /* Set up registers for signal handler */
495 regs->sp = (unsigned long) frame; 487 regs->sp = (unsigned long) frame;
496 regs->ip = (unsigned long) ka->sa.sa_handler; 488 regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
497 489
498 /* Make -mregparm=3 work */ 490 /* Make -mregparm=3 work */
499 regs->ax = sig; 491 regs->ax = sig;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 102ff7cb3e41..474dc1b59f72 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -207,7 +207,7 @@ sysexit_from_sys_call:
207 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 207 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
208 jnz ia32_ret_from_sys_call 208 jnz ia32_ret_from_sys_call
209 TRACE_IRQS_ON 209 TRACE_IRQS_ON
210 sti 210 ENABLE_INTERRUPTS(CLBR_NONE)
211 movl %eax,%esi /* second arg, syscall return value */ 211 movl %eax,%esi /* second arg, syscall return value */
212 cmpl $-MAX_ERRNO,%eax /* is it an error ? */ 212 cmpl $-MAX_ERRNO,%eax /* is it an error ? */
213 jbe 1f 213 jbe 1f
@@ -217,7 +217,7 @@ sysexit_from_sys_call:
217 call __audit_syscall_exit 217 call __audit_syscall_exit
218 movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ 218 movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
219 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 219 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
220 cli 220 DISABLE_INTERRUPTS(CLBR_NONE)
221 TRACE_IRQS_OFF 221 TRACE_IRQS_OFF
222 testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 222 testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
223 jz \exit 223 jz \exit
@@ -456,18 +456,16 @@ ia32_badsys:
456 ALIGN 456 ALIGN
457GLOBAL(\label) 457GLOBAL(\label)
458 leaq \func(%rip),%rax 458 leaq \func(%rip),%rax
459 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
460 jmp ia32_ptregs_common 459 jmp ia32_ptregs_common
461 .endm 460 .endm
462 461
463 CFI_STARTPROC32 462 CFI_STARTPROC32
464 463
465 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi 464 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
466 PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi 465 PTREGSCALL stub32_sigreturn, sys32_sigreturn
467 PTREGSCALL stub32_execve, compat_sys_execve, %rcx 466 PTREGSCALL stub32_execve, compat_sys_execve
468 PTREGSCALL stub32_fork, sys_fork, %rdi 467 PTREGSCALL stub32_fork, sys_fork
469 PTREGSCALL stub32_vfork, sys_vfork, %rdi 468 PTREGSCALL stub32_vfork, sys_vfork
470 PTREGSCALL stub32_iopl, sys_iopl, %rsi
471 469
472 ALIGN 470 ALIGN
473GLOBAL(stub32_clone) 471GLOBAL(stub32_clone)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index d0b689ba7be2..ad7a20cbc699 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -172,183 +172,12 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len,
172 return sys_mprotect(start, len, prot); 172 return sys_mprotect(start, len, prot);
173} 173}
174 174
175asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
176 struct sigaction32 __user *oact,
177 unsigned int sigsetsize)
178{
179 struct k_sigaction new_ka, old_ka;
180 int ret;
181 compat_sigset_t set32;
182
183 /* XXX: Don't preclude handling different sized sigset_t's. */
184 if (sigsetsize != sizeof(compat_sigset_t))
185 return -EINVAL;
186
187 if (act) {
188 compat_uptr_t handler, restorer;
189
190 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
191 __get_user(handler, &act->sa_handler) ||
192 __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
193 __get_user(restorer, &act->sa_restorer) ||
194 __copy_from_user(&set32, &act->sa_mask,
195 sizeof(compat_sigset_t)))
196 return -EFAULT;
197 new_ka.sa.sa_handler = compat_ptr(handler);
198 new_ka.sa.sa_restorer = compat_ptr(restorer);
199
200 /*
201 * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
202 * than _NSIG_WORDS << 1
203 */
204 switch (_NSIG_WORDS) {
205 case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
206 | (((long)set32.sig[7]) << 32);
207 case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
208 | (((long)set32.sig[5]) << 32);
209 case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
210 | (((long)set32.sig[3]) << 32);
211 case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
212 | (((long)set32.sig[1]) << 32);
213 }
214 }
215
216 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
217
218 if (!ret && oact) {
219 /*
220 * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
221 * than _NSIG_WORDS << 1
222 */
223 switch (_NSIG_WORDS) {
224 case 4:
225 set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
226 set32.sig[6] = old_ka.sa.sa_mask.sig[3];
227 case 3:
228 set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
229 set32.sig[4] = old_ka.sa.sa_mask.sig[2];
230 case 2:
231 set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
232 set32.sig[2] = old_ka.sa.sa_mask.sig[1];
233 case 1:
234 set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
235 set32.sig[0] = old_ka.sa.sa_mask.sig[0];
236 }
237 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
238 __put_user(ptr_to_compat(old_ka.sa.sa_handler),
239 &oact->sa_handler) ||
240 __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
241 &oact->sa_restorer) ||
242 __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
243 __copy_to_user(&oact->sa_mask, &set32,
244 sizeof(compat_sigset_t)))
245 return -EFAULT;
246 }
247
248 return ret;
249}
250
251asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
252 struct old_sigaction32 __user *oact)
253{
254 struct k_sigaction new_ka, old_ka;
255 int ret;
256
257 if (act) {
258 compat_old_sigset_t mask;
259 compat_uptr_t handler, restorer;
260
261 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
262 __get_user(handler, &act->sa_handler) ||
263 __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
264 __get_user(restorer, &act->sa_restorer) ||
265 __get_user(mask, &act->sa_mask))
266 return -EFAULT;
267
268 new_ka.sa.sa_handler = compat_ptr(handler);
269 new_ka.sa.sa_restorer = compat_ptr(restorer);
270
271 siginitset(&new_ka.sa.sa_mask, mask);
272 }
273
274 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
275
276 if (!ret && oact) {
277 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
278 __put_user(ptr_to_compat(old_ka.sa.sa_handler),
279 &oact->sa_handler) ||
280 __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
281 &oact->sa_restorer) ||
282 __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
283 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
284 return -EFAULT;
285 }
286
287 return ret;
288}
289
290asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, 175asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
291 int options) 176 int options)
292{ 177{
293 return compat_sys_wait4(pid, stat_addr, options, NULL); 178 return compat_sys_wait4(pid, stat_addr, options, NULL);
294} 179}
295 180
296/* 32-bit timeval and related flotsam. */
297
298asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
299 struct compat_timespec __user *interval)
300{
301 struct timespec t;
302 int ret;
303 mm_segment_t old_fs = get_fs();
304
305 set_fs(KERNEL_DS);
306 ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
307 set_fs(old_fs);
308 if (put_compat_timespec(&t, interval))
309 return -EFAULT;
310 return ret;
311}
312
313asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
314 compat_size_t sigsetsize)
315{
316 sigset_t s;
317 compat_sigset_t s32;
318 int ret;
319 mm_segment_t old_fs = get_fs();
320
321 set_fs(KERNEL_DS);
322 ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
323 set_fs(old_fs);
324 if (!ret) {
325 switch (_NSIG_WORDS) {
326 case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
327 case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
328 case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
329 case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
330 }
331 if (copy_to_user(set, &s32, sizeof(compat_sigset_t)))
332 return -EFAULT;
333 }
334 return ret;
335}
336
337asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
338 compat_siginfo_t __user *uinfo)
339{
340 siginfo_t info;
341 int ret;
342 mm_segment_t old_fs = get_fs();
343
344 if (copy_siginfo_from_user32(&info, uinfo))
345 return -EFAULT;
346 set_fs(KERNEL_DS);
347 ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
348 set_fs(old_fs);
349 return ret;
350}
351
352/* warning: next two assume little endian */ 181/* warning: next two assume little endian */
353asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, 182asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
354 u32 poslo, u32 poshi) 183 u32 poslo, u32 poshi)
@@ -389,11 +218,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
389 * Some system calls that need sign extended arguments. This could be 218 * Some system calls that need sign extended arguments. This could be
390 * done by a generic wrapper. 219 * done by a generic wrapper.
391 */ 220 */
392long sys32_lseek(unsigned int fd, int offset, unsigned int whence)
393{
394 return sys_lseek(fd, offset, whence);
395}
396
397long sys32_kill(int pid, int sig) 221long sys32_kill(int pid, int sig)
398{ 222{
399 return sys_kill(pid, sig); 223 return sys_kill(pid, sig);
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 0c44630d1789..b31bf97775fc 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -49,10 +49,6 @@
49 49
50/* Asm macros */ 50/* Asm macros */
51 51
52#define ACPI_ASM_MACROS
53#define BREAKPOINT3
54#define ACPI_DISABLE_IRQS() local_irq_disable()
55#define ACPI_ENABLE_IRQS() local_irq_enable()
56#define ACPI_FLUSH_CPU_CACHE() wbinvd() 52#define ACPI_FLUSH_CPU_CACHE() wbinvd()
57 53
58int __acpi_acquire_global_lock(unsigned int *lock); 54int __acpi_acquire_global_lock(unsigned int *lock);
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9cd8fd..a54ee1d054d9 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,23 @@ static inline struct amd_northbridge *node_to_amd_nb(int node)
81 return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; 81 return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
82} 82}
83 83
84static inline u16 amd_get_node_id(struct pci_dev *pdev)
85{
86 struct pci_dev *misc;
87 int i;
88
89 for (i = 0; i != amd_nb_num(); i++) {
90 misc = node_to_amd_nb(i)->misc;
91
92 if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
93 PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
94 return i;
95 }
96
97 WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev));
98 return 0;
99}
100
84#else 101#else
85 102
86#define amd_nb_num(x) 0 103#define amd_nb_num(x) 0
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
new file mode 100644
index 000000000000..653668d140f9
--- /dev/null
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -0,0 +1,54 @@
1#ifndef _ASM_X86_BOOTPARAM_UTILS_H
2#define _ASM_X86_BOOTPARAM_UTILS_H
3
4#include <asm/bootparam.h>
5
6/*
7 * This file is included from multiple environments. Do not
8 * add completing #includes to make it standalone.
9 */
10
11/*
12 * Deal with bootloaders which fail to initialize unknown fields in
13 * boot_params to zero. The list fields in this list are taken from
14 * analysis of kexec-tools; if other broken bootloaders initialize a
15 * different set of fields we will need to figure out how to disambiguate.
16 *
17 * Note: efi_info is commonly left uninitialized, but that field has a
18 * private magic, so it is better to leave it unchanged.
19 */
20static void sanitize_boot_params(struct boot_params *boot_params)
21{
22 /*
23 * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear
24 * this field. The purpose of this field is to guarantee
25 * compliance with the x86 boot spec located in
26 * Documentation/x86/boot.txt . That spec says that the
27 * *whole* structure should be cleared, after which only the
28 * portion defined by struct setup_header (boot_params->hdr)
29 * should be copied in.
30 *
31 * If you're having an issue because the sentinel is set, you
32 * need to change the whole structure to be cleared, not this
33 * (or any other) individual field, or you will soon have
34 * problems again.
35 */
36 if (boot_params->sentinel) {
37 /* fields in boot_params are left uninitialized, clear them */
38 memset(&boot_params->olpc_ofw_header, 0,
39 (char *)&boot_params->efi_info -
40 (char *)&boot_params->olpc_ofw_header);
41 memset(&boot_params->kbd_status, 0,
42 (char *)&boot_params->hdr -
43 (char *)&boot_params->kbd_status);
44 memset(&boot_params->_pad7[0], 0,
45 (char *)&boot_params->edd_mbr_sig_buffer[0] -
46 (char *)&boot_params->_pad7[0]);
47 memset(&boot_params->_pad8[0], 0,
48 (char *)&boot_params->eddbuf[0] -
49 (char *)&boot_params->_pad8[0]);
50 memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
51 }
52}
53
54#endif /* _ASM_X86_BOOTPARAM_UTILS_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 2d9075e863a0..93fe929d1cee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -167,6 +167,7 @@
167#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ 167#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
168#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ 168#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
169#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ 169#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
170#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
170 171
171/* 172/*
172 * Auxiliary flags: Linux defined - For features scattered in various 173 * Auxiliary flags: Linux defined - For features scattered in various
@@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32];
309#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) 310#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
310#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) 311#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
311#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) 312#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
313#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
312#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) 314#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
313#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) 315#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
314#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) 316#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 41ab26ea6564..e25cc33ec54d 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -26,9 +26,10 @@
26#ifdef CONFIG_X86_64 26#ifdef CONFIG_X86_64
27# include <asm/sigcontext32.h> 27# include <asm/sigcontext32.h>
28# include <asm/user32.h> 28# include <asm/user32.h>
29int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 29struct ksignal;
30int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
30 compat_sigset_t *set, struct pt_regs *regs); 31 compat_sigset_t *set, struct pt_regs *regs);
31int ia32_setup_frame(int sig, struct k_sigaction *ka, 32int ia32_setup_frame(int sig, struct ksignal *ksig,
32 compat_sigset_t *set, struct pt_regs *regs); 33 compat_sigset_t *set, struct pt_regs *regs);
33#else 34#else
34# define user_i387_ia32_struct user_i387_struct 35# define user_i387_ia32_struct user_i387_struct
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9a25b522d377..0525a8bdf65d 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
44 44
45#ifdef CONFIG_DYNAMIC_FTRACE 45#ifdef CONFIG_DYNAMIC_FTRACE
46#define ARCH_SUPPORTS_FTRACE_OPS 1 46#define ARCH_SUPPORTS_FTRACE_OPS 1
47#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
48#endif 47#endif
49 48
50#ifndef __ASSEMBLY__ 49#ifndef __ASSEMBLY__
@@ -73,4 +72,28 @@ int ftrace_int3_handler(struct pt_regs *regs);
73#endif /* __ASSEMBLY__ */ 72#endif /* __ASSEMBLY__ */
74#endif /* CONFIG_FUNCTION_TRACER */ 73#endif /* CONFIG_FUNCTION_TRACER */
75 74
75
76#if !defined(__ASSEMBLY__) && !defined(COMPILE_OFFSETS)
77
78#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION)
79#include <asm/compat.h>
80
81/*
82 * Because ia32 syscalls do not map to x86_64 syscall numbers
83 * this screws up the trace output when tracing a ia32 task.
84 * Instead of reporting bogus syscalls, just do not trace them.
85 *
86 * If the user realy wants these, then they should use the
87 * raw syscall tracepoints with filtering.
88 */
89#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1
90static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
91{
92 if (is_compat_task())
93 return true;
94 return false;
95}
96#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
97#endif /* !__ASSEMBLY__ && !COMPILE_OFFSETS */
98
76#endif /* _ASM_X86_FTRACE_H */ 99#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 434e2106cc87..b18df579c0e9 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
80extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); 80extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
81 81
82#ifdef CONFIG_PCI_MSI 82#ifdef CONFIG_PCI_MSI
83extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); 83extern int default_setup_hpet_msi(unsigned int irq, unsigned int id);
84#else 84#else
85static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 85static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id)
86{ 86{
87 return -EINVAL; 87 return -EINVAL;
88} 88}
@@ -111,6 +111,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
111static inline int hpet_enable(void) { return 0; } 111static inline int hpet_enable(void) { return 0; }
112static inline int is_hpet_enabled(void) { return 0; } 112static inline int is_hpet_enabled(void) { return 0; }
113#define hpet_readl(a) 0 113#define hpet_readl(a) 0
114#define default_setup_hpet_msi NULL
114 115
115#endif 116#endif
116#endif /* _ASM_X86_HPET_H */ 117#endif /* _ASM_X86_HPET_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eb92a6ed2be7..10a78c3d3d5a 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -101,6 +101,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
101 irq_attr->polarity = polarity; 101 irq_attr->polarity = polarity;
102} 102}
103 103
104/* Intel specific interrupt remapping information */
104struct irq_2_iommu { 105struct irq_2_iommu {
105 struct intel_iommu *iommu; 106 struct intel_iommu *iommu;
106 u16 irte_index; 107 u16 irte_index;
@@ -108,6 +109,12 @@ struct irq_2_iommu {
108 u8 irte_mask; 109 u8 irte_mask;
109}; 110};
110 111
112/* AMD specific interrupt remapping information */
113struct irq_2_irte {
114 u16 devid; /* Device ID for IRTE table */
115 u16 index; /* Index into IRTE table*/
116};
117
111/* 118/*
112 * This is performance-critical, we want to do it O(1) 119 * This is performance-critical, we want to do it O(1)
113 * 120 *
@@ -120,7 +127,11 @@ struct irq_cfg {
120 u8 vector; 127 u8 vector;
121 u8 move_in_progress : 1; 128 u8 move_in_progress : 1;
122#ifdef CONFIG_IRQ_REMAP 129#ifdef CONFIG_IRQ_REMAP
123 struct irq_2_iommu irq_2_iommu; 130 u8 remapped : 1;
131 union {
132 struct irq_2_iommu irq_2_iommu;
133 struct irq_2_irte irq_2_irte;
134 };
124#endif 135#endif
125}; 136};
126 137
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index b518c7509933..86095ed14135 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -25,6 +25,7 @@
25 25
26extern void init_hypervisor(struct cpuinfo_x86 *c); 26extern void init_hypervisor(struct cpuinfo_x86 *c);
27extern void init_hypervisor_platform(void); 27extern void init_hypervisor_platform(void);
28extern bool hypervisor_x2apic_available(void);
28 29
29/* 30/*
30 * x86 hypervisor information 31 * x86 hypervisor information
@@ -41,6 +42,9 @@ struct hypervisor_x86 {
41 42
42 /* Platform setup (run once per boot) */ 43 /* Platform setup (run once per boot) */
43 void (*init_platform)(void); 44 void (*init_platform)(void);
45
46 /* X2APIC detection (run once per boot) */
47 bool (*x2apic_available)(void);
44}; 48};
45 49
46extern const struct hypervisor_x86 *x86_hyper; 50extern const struct hypervisor_x86 *x86_hyper;
@@ -51,13 +55,4 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
51extern const struct hypervisor_x86 x86_hyper_xen_hvm; 55extern const struct hypervisor_x86 x86_hyper_xen_hvm;
52extern const struct hypervisor_x86 x86_hyper_kvm; 56extern const struct hypervisor_x86 x86_hyper_kvm;
53 57
54static inline bool hypervisor_x2apic_available(void)
55{
56 if (kvm_para_available())
57 return true;
58 if (xen_x2apic_para_available())
59 return true;
60 return false;
61}
62
63#endif 58#endif
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 4c6da2e4bb1d..d0e8e0141041 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -13,21 +13,6 @@
13#include <asm/sigcontext32.h> 13#include <asm/sigcontext32.h>
14 14
15/* signal.h */ 15/* signal.h */
16struct sigaction32 {
17 unsigned int sa_handler; /* Really a pointer, but need to deal
18 with 32 bits */
19 unsigned int sa_flags;
20 unsigned int sa_restorer; /* Another 32 bit pointer */
21 compat_sigset_t sa_mask; /* A 32 bit mask */
22};
23
24struct old_sigaction32 {
25 unsigned int sa_handler; /* Really a pointer, but need to deal
26 with 32 bits */
27 compat_old_sigset_t sa_mask; /* A 32 bit mask */
28 unsigned int sa_flags;
29 unsigned int sa_restorer; /* Another 32 bit pointer */
30};
31 16
32struct ucontext_ia32 { 17struct ucontext_ia32 {
33 unsigned int uc_flags; 18 unsigned int uc_flags;
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index adcc0ae73d09..223042086f4e 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -1,20 +1,14 @@
1#ifndef _ASM_X86_INIT_32_H 1#ifndef _ASM_X86_INIT_H
2#define _ASM_X86_INIT_32_H 2#define _ASM_X86_INIT_H
3 3
4#ifdef CONFIG_X86_32 4struct x86_mapping_info {
5extern void __init early_ioremap_page_table_range_init(void); 5 void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
6#endif 6 void *context; /* context for alloc_pgt_page */
7 unsigned long pmd_flag; /* page flag for PMD entry */
8 bool kernel_mapping; /* kernel mapping or ident mapping */
9};
7 10
8extern void __init zone_sizes_init(void); 11int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
12 unsigned long addr, unsigned long end);
9 13
10extern unsigned long __init 14#endif /* _ASM_X86_INIT_H */
11kernel_physical_mapping_init(unsigned long start,
12 unsigned long end,
13 unsigned long page_size_mask);
14
15
16extern unsigned long __initdata pgt_buf_start;
17extern unsigned long __meminitdata pgt_buf_end;
18extern unsigned long __meminitdata pgt_buf_top;
19
20#endif /* _ASM_X86_INIT_32_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 73d8c5398ea9..459e50a424d1 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -144,11 +144,24 @@ extern int timer_through_8259;
144 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) 144 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
145 145
146struct io_apic_irq_attr; 146struct io_apic_irq_attr;
147struct irq_cfg;
147extern int io_apic_set_pci_routing(struct device *dev, int irq, 148extern int io_apic_set_pci_routing(struct device *dev, int irq,
148 struct io_apic_irq_attr *irq_attr); 149 struct io_apic_irq_attr *irq_attr);
149void setup_IO_APIC_irq_extra(u32 gsi); 150void setup_IO_APIC_irq_extra(u32 gsi);
150extern void ioapic_insert_resources(void); 151extern void ioapic_insert_resources(void);
151 152
153extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
154 unsigned int, int,
155 struct io_apic_irq_attr *);
156extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
157 unsigned int, int,
158 struct io_apic_irq_attr *);
159extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
160
161extern void native_compose_msi_msg(struct pci_dev *pdev,
162 unsigned int irq, unsigned int dest,
163 struct msi_msg *msg, u8 hpet_id);
164extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
152int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); 165int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
153 166
154extern int save_ioapic_entries(void); 167extern int save_ioapic_entries(void);
@@ -179,6 +192,12 @@ extern void __init native_io_apic_init_mappings(void);
179extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); 192extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
180extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); 193extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
181extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); 194extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
195extern void native_disable_io_apic(void);
196extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
197extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
198extern int native_ioapic_set_affinity(struct irq_data *,
199 const struct cpumask *,
200 bool);
182 201
183static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 202static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
184{ 203{
@@ -193,6 +212,9 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
193{ 212{
194 x86_io_apic_ops.modify(apic, reg, value); 213 x86_io_apic_ops.modify(apic, reg, value);
195} 214}
215
216extern void io_apic_eoi(unsigned int apic, unsigned int vector);
217
196#else /* !CONFIG_X86_IO_APIC */ 218#else /* !CONFIG_X86_IO_APIC */
197 219
198#define io_apic_assign_pci_irqs 0 220#define io_apic_assign_pci_irqs 0
@@ -223,6 +245,12 @@ static inline void disable_ioapic_support(void) { }
223#define native_io_apic_read NULL 245#define native_io_apic_read NULL
224#define native_io_apic_write NULL 246#define native_io_apic_write NULL
225#define native_io_apic_modify NULL 247#define native_io_apic_modify NULL
248#define native_disable_io_apic NULL
249#define native_io_apic_print_entries NULL
250#define native_ioapic_set_affinity NULL
251#define native_setup_ioapic_entry NULL
252#define native_compose_msi_msg NULL
253#define native_eoi_ioapic_pin NULL
226#endif 254#endif
227 255
228#endif /* _ASM_X86_IO_APIC_H */ 256#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5fb9bbbd2f14..95fd3527f632 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -26,8 +26,6 @@
26 26
27#ifdef CONFIG_IRQ_REMAP 27#ifdef CONFIG_IRQ_REMAP
28 28
29extern int irq_remapping_enabled;
30
31extern void setup_irq_remapping_ops(void); 29extern void setup_irq_remapping_ops(void);
32extern int irq_remapping_supported(void); 30extern int irq_remapping_supported(void);
33extern int irq_remapping_prepare(void); 31extern int irq_remapping_prepare(void);
@@ -40,21 +38,19 @@ extern int setup_ioapic_remapped_entry(int irq,
40 unsigned int destination, 38 unsigned int destination,
41 int vector, 39 int vector,
42 struct io_apic_irq_attr *attr); 40 struct io_apic_irq_attr *attr);
43extern int set_remapped_irq_affinity(struct irq_data *data,
44 const struct cpumask *mask,
45 bool force);
46extern void free_remapped_irq(int irq); 41extern void free_remapped_irq(int irq);
47extern void compose_remapped_msi_msg(struct pci_dev *pdev, 42extern void compose_remapped_msi_msg(struct pci_dev *pdev,
48 unsigned int irq, unsigned int dest, 43 unsigned int irq, unsigned int dest,
49 struct msi_msg *msg, u8 hpet_id); 44 struct msi_msg *msg, u8 hpet_id);
50extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
51extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
52 int index, int sub_handle);
53extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); 45extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
46extern void panic_if_irq_remap(const char *msg);
47extern bool setup_remapped_irq(int irq,
48 struct irq_cfg *cfg,
49 struct irq_chip *chip);
54 50
55#else /* CONFIG_IRQ_REMAP */ 51void irq_remap_modify_chip_defaults(struct irq_chip *chip);
56 52
57#define irq_remapping_enabled 0 53#else /* CONFIG_IRQ_REMAP */
58 54
59static inline void setup_irq_remapping_ops(void) { } 55static inline void setup_irq_remapping_ops(void) { }
60static inline int irq_remapping_supported(void) { return 0; } 56static inline int irq_remapping_supported(void) { return 0; }
@@ -71,30 +67,30 @@ static inline int setup_ioapic_remapped_entry(int irq,
71{ 67{
72 return -ENODEV; 68 return -ENODEV;
73} 69}
74static inline int set_remapped_irq_affinity(struct irq_data *data,
75 const struct cpumask *mask,
76 bool force)
77{
78 return 0;
79}
80static inline void free_remapped_irq(int irq) { } 70static inline void free_remapped_irq(int irq) { }
81static inline void compose_remapped_msi_msg(struct pci_dev *pdev, 71static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
82 unsigned int irq, unsigned int dest, 72 unsigned int irq, unsigned int dest,
83 struct msi_msg *msg, u8 hpet_id) 73 struct msi_msg *msg, u8 hpet_id)
84{ 74{
85} 75}
86static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) 76static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
87{ 77{
88 return -ENODEV; 78 return -ENODEV;
89} 79}
90static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, 80
91 int index, int sub_handle) 81static inline void panic_if_irq_remap(const char *msg)
82{
83}
84
85static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
92{ 86{
93 return -ENODEV;
94} 87}
95static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) 88
89static inline bool setup_remapped_irq(int irq,
90 struct irq_cfg *cfg,
91 struct irq_chip *chip)
96{ 92{
97 return -ENODEV; 93 return false;
98} 94}
99#endif /* CONFIG_IRQ_REMAP */ 95#endif /* CONFIG_IRQ_REMAP */
100 96
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 1508e518c7e3..aac5fa62a86c 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -109,8 +109,8 @@
109 109
110#define UV_BAU_MESSAGE 0xf5 110#define UV_BAU_MESSAGE 0xf5
111 111
112/* Xen vector callback to receive events in a HVM domain */ 112/* Vector on which hypervisor callbacks will be delivered */
113#define XEN_HVM_EVTCHN_CALLBACK 0xf3 113#define HYPERVISOR_CALLBACK_VECTOR 0xf3
114 114
115/* 115/*
116 * Local APIC timer IRQ vector is on a different priority level, 116 * Local APIC timer IRQ vector is on a different priority level,
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 6080d2694bad..17483a492f18 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -48,11 +48,11 @@
48# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) 48# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
49#else 49#else
50/* Maximum physical address we can use pages from */ 50/* Maximum physical address we can use pages from */
51# define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL) 51# define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1)
52/* Maximum address we can reach in physical address mode */ 52/* Maximum address we can reach in physical address mode */
53# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL) 53# define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1)
54/* Maximum address we can use for the control pages */ 54/* Maximum address we can use for the control pages */
55# define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) 55# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
56 56
57/* Allocate one page for the pdp and the second for the code */ 57/* Allocate one page for the pdp and the second for the code */
58# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) 58# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index d3ddd17405d0..5a6d2873f80e 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -77,6 +77,7 @@ struct arch_specific_insn {
77 * a post_handler or break_handler). 77 * a post_handler or break_handler).
78 */ 78 */
79 int boostable; 79 int boostable;
80 bool if_modifier;
80}; 81};
81 82
82struct arch_optimized_insn { 83struct arch_optimized_insn {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dc87b65e9c3a..4979778cc7fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -33,10 +33,10 @@
33 33
34#define KVM_MAX_VCPUS 254 34#define KVM_MAX_VCPUS 254
35#define KVM_SOFT_MAX_VCPUS 160 35#define KVM_SOFT_MAX_VCPUS 160
36#define KVM_MEMORY_SLOTS 32 36#define KVM_USER_MEM_SLOTS 125
37/* memory slots that does not exposed to userspace */ 37/* memory slots that are not exposed to userspace */
38#define KVM_PRIVATE_MEM_SLOTS 4 38#define KVM_PRIVATE_MEM_SLOTS 3
39#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 39#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
40 40
41#define KVM_MMIO_SIZE 16 41#define KVM_MMIO_SIZE 16
42 42
@@ -219,11 +219,6 @@ struct kvm_mmu_page {
219 u64 *spt; 219 u64 *spt;
220 /* hold the gfn of each spte inside spt */ 220 /* hold the gfn of each spte inside spt */
221 gfn_t *gfns; 221 gfn_t *gfns;
222 /*
223 * One bit set per slot which has memory
224 * in this shadow page.
225 */
226 DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM);
227 bool unsync; 222 bool unsync;
228 int root_count; /* Currently serving as active root */ 223 int root_count; /* Currently serving as active root */
229 unsigned int unsync_children; 224 unsigned int unsync_children;
@@ -419,8 +414,8 @@ struct kvm_vcpu_arch {
419 gpa_t time; 414 gpa_t time;
420 struct pvclock_vcpu_time_info hv_clock; 415 struct pvclock_vcpu_time_info hv_clock;
421 unsigned int hw_tsc_khz; 416 unsigned int hw_tsc_khz;
422 unsigned int time_offset; 417 struct gfn_to_hva_cache pv_time;
423 struct page *time_page; 418 bool pv_time_enabled;
424 /* set guest stopped flag in pvclock flags field */ 419 /* set guest stopped flag in pvclock flags field */
425 bool pvclock_set_guest_stopped_request; 420 bool pvclock_set_guest_stopped_request;
426 421
@@ -502,6 +497,13 @@ struct kvm_vcpu_arch {
502 u64 msr_val; 497 u64 msr_val;
503 struct gfn_to_hva_cache data; 498 struct gfn_to_hva_cache data;
504 } pv_eoi; 499 } pv_eoi;
500
501 /*
502 * Indicate whether the access faults on its page table in guest
503 * which is set when fix page fault and used to detect unhandeable
504 * instruction.
505 */
506 bool write_fault_to_shadow_pgtable;
505}; 507};
506 508
507struct kvm_lpage_info { 509struct kvm_lpage_info {
@@ -697,6 +699,11 @@ struct kvm_x86_ops {
697 void (*enable_nmi_window)(struct kvm_vcpu *vcpu); 699 void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
698 void (*enable_irq_window)(struct kvm_vcpu *vcpu); 700 void (*enable_irq_window)(struct kvm_vcpu *vcpu);
699 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); 701 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
702 int (*vm_has_apicv)(struct kvm *kvm);
703 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
704 void (*hwapic_isr_update)(struct kvm *kvm, int isr);
705 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
706 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
700 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 707 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
701 int (*get_tdp_level)(void); 708 int (*get_tdp_level)(void);
702 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 709 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
@@ -991,6 +998,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva);
991int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 998int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
992void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 999void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
993int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); 1000int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
1001int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
994int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 1002int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
995int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); 1003int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
996int kvm_cpu_get_interrupt(struct kvm_vcpu *v); 1004int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed1f16187be..695399f2d5eb 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -27,7 +27,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
27 * 27 *
28 * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively. 28 * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
29 * The hypercall number should be placed in rax and the return value will be 29 * The hypercall number should be placed in rax and the return value will be
30 * placed in rax. No other registers will be clobbered unless explicited 30 * placed in rax. No other registers will be clobbered unless explicitly
31 * noted by the particular hypercall. 31 * noted by the particular hypercall.
32 */ 32 */
33 33
@@ -85,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
85 return ret; 85 return ret;
86} 86}
87 87
88static inline int kvm_para_available(void) 88static inline bool kvm_para_available(void)
89{ 89{
90 unsigned int eax, ebx, ecx, edx; 90 unsigned int eax, ebx, ecx, edx;
91 char signature[13]; 91 char signature[13];
92 92
93 if (boot_cpu_data.cpuid_level < 0) 93 if (boot_cpu_data.cpuid_level < 0)
94 return 0; /* So we don't blow up on old processors */ 94 return false; /* So we don't blow up on old processors */
95 95
96 if (cpu_has_hypervisor) { 96 if (cpu_has_hypervisor) {
97 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); 97 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
@@ -101,10 +101,10 @@ static inline int kvm_para_available(void)
101 signature[12] = 0; 101 signature[12] = 0;
102 102
103 if (strcmp(signature, "KVMKVMKVM") == 0) 103 if (strcmp(signature, "KVMKVMKVM") == 0)
104 return 1; 104 return true;
105 } 105 }
106 106
107 return 0; 107 return false;
108} 108}
109 109
110static inline unsigned int kvm_arch_para_features(void) 110static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 48142971b25d..79327e9483a3 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -27,20 +27,20 @@
27#define __asmlinkage_protect0(ret) \ 27#define __asmlinkage_protect0(ret) \
28 __asmlinkage_protect_n(ret) 28 __asmlinkage_protect_n(ret)
29#define __asmlinkage_protect1(ret, arg1) \ 29#define __asmlinkage_protect1(ret, arg1) \
30 __asmlinkage_protect_n(ret, "g" (arg1)) 30 __asmlinkage_protect_n(ret, "m" (arg1))
31#define __asmlinkage_protect2(ret, arg1, arg2) \ 31#define __asmlinkage_protect2(ret, arg1, arg2) \
32 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2)) 32 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
33#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ 33#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
34 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3)) 34 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
35#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ 35#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
36 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ 36 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
37 "g" (arg4)) 37 "m" (arg4))
38#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ 38#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
39 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ 39 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
40 "g" (arg4), "g" (arg5)) 40 "m" (arg4), "m" (arg5))
41#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ 41#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
42 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ 42 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
43 "g" (arg4), "g" (arg5), "g" (arg6)) 43 "m" (arg4), "m" (arg5), "m" (arg6))
44 44
45#endif /* CONFIG_X86_32 */ 45#endif /* CONFIG_X86_32 */
46 46
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ecdfee60ee4a..f4076af1f4ed 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -3,6 +3,90 @@
3 3
4#include <uapi/asm/mce.h> 4#include <uapi/asm/mce.h>
5 5
6/*
7 * Machine Check support for x86
8 */
9
10/* MCG_CAP register defines */
11#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
12#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
13#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
14#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
15#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
16#define MCG_EXT_CNT_SHIFT 16
17#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
18#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
19
20/* MCG_STATUS register defines */
21#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
22#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
23#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
24
25/* MCi_STATUS register defines */
26#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
27#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
28#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
29#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
30#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
31#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
32#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
33#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
34#define MCI_STATUS_AR (1ULL<<55) /* Action required */
35#define MCACOD 0xffff /* MCA Error Code */
36
37/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
38#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
39#define MCACOD_SCRUBMSK 0xfff0
40#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
41#define MCACOD_DATA 0x0134 /* Data Load */
42#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
43
44/* MCi_MISC register defines */
45#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
46#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
47#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
48#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
49#define MCI_MISC_ADDR_PHYS 2 /* physical address */
50#define MCI_MISC_ADDR_MEM 3 /* memory address */
51#define MCI_MISC_ADDR_GENERIC 7 /* generic */
52
53/* CTL2 register defines */
54#define MCI_CTL2_CMCI_EN (1ULL << 30)
55#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
56
57#define MCJ_CTX_MASK 3
58#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
59#define MCJ_CTX_RANDOM 0 /* inject context: random */
60#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
61#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
62#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
63#define MCJ_EXCEPTION 0x8 /* raise as exception */
64#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
65
66#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
67
68/* Software defined banks */
69#define MCE_EXTENDED_BANK 128
70#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0)
71#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
72
73#define MCE_LOG_LEN 32
74#define MCE_LOG_SIGNATURE "MACHINECHECK"
75
76/*
77 * This structure contains all data related to the MCE log. Also
78 * carries a signature to make it easier to find from external
79 * debugging tools. Each entry is only valid when its finished flag
80 * is set.
81 */
82struct mce_log {
83 char signature[12]; /* "MACHINECHECK" */
84 unsigned len; /* = MCE_LOG_LEN */
85 unsigned next;
86 unsigned flags;
87 unsigned recordlen; /* length of struct mce */
88 struct mce entry[MCE_LOG_LEN];
89};
6 90
7struct mca_config { 91struct mca_config {
8 bool dont_log_ce; 92 bool dont_log_ce;
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 43d921b4752c..6825e2efd1b4 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -57,4 +57,18 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
57static inline void __exit exit_amd_microcode(void) {} 57static inline void __exit exit_amd_microcode(void) {}
58#endif 58#endif
59 59
60#ifdef CONFIG_MICROCODE_EARLY
61#define MAX_UCODE_COUNT 128
62extern void __init load_ucode_bsp(void);
63extern __init void load_ucode_ap(void);
64extern int __init save_microcode_in_initrd(void);
65#else
66static inline void __init load_ucode_bsp(void) {}
67static inline __init void load_ucode_ap(void) {}
68static inline int __init save_microcode_in_initrd(void)
69{
70 return 0;
71}
72#endif
73
60#endif /* _ASM_X86_MICROCODE_H */ 74#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
new file mode 100644
index 000000000000..5356f927d411
--- /dev/null
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -0,0 +1,85 @@
1#ifndef _ASM_X86_MICROCODE_INTEL_H
2#define _ASM_X86_MICROCODE_INTEL_H
3
4#include <asm/microcode.h>
5
6struct microcode_header_intel {
7 unsigned int hdrver;
8 unsigned int rev;
9 unsigned int date;
10 unsigned int sig;
11 unsigned int cksum;
12 unsigned int ldrver;
13 unsigned int pf;
14 unsigned int datasize;
15 unsigned int totalsize;
16 unsigned int reserved[3];
17};
18
19struct microcode_intel {
20 struct microcode_header_intel hdr;
21 unsigned int bits[0];
22};
23
24/* microcode format is extended from prescott processors */
25struct extended_signature {
26 unsigned int sig;
27 unsigned int pf;
28 unsigned int cksum;
29};
30
31struct extended_sigtable {
32 unsigned int count;
33 unsigned int cksum;
34 unsigned int reserved[3];
35 struct extended_signature sigs[0];
36};
37
38#define DEFAULT_UCODE_DATASIZE (2000)
39#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
40#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
41#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
42#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
43#define DWSIZE (sizeof(u32))
44
45#define get_totalsize(mc) \
46 (((struct microcode_intel *)mc)->hdr.totalsize ? \
47 ((struct microcode_intel *)mc)->hdr.totalsize : \
48 DEFAULT_UCODE_TOTALSIZE)
49
50#define get_datasize(mc) \
51 (((struct microcode_intel *)mc)->hdr.datasize ? \
52 ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
53
54#define sigmatch(s1, s2, p1, p2) \
55 (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
56
57#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
58
59extern int
60get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev);
61extern int microcode_sanity_check(void *mc, int print_err);
62extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev);
63extern int
64update_match_revision(struct microcode_header_intel *mc_header, int rev);
65
66#ifdef CONFIG_MICROCODE_INTEL_EARLY
67extern void __init load_ucode_intel_bsp(void);
68extern void __cpuinit load_ucode_intel_ap(void);
69extern void show_ucode_info_early(void);
70#else
71static inline __init void load_ucode_intel_bsp(void) {}
72static inline __cpuinit void load_ucode_intel_ap(void) {}
73static inline void show_ucode_info_early(void) {}
74#endif
75
76#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
77extern int save_mc_for_early(u8 *mc);
78#else
79static inline int save_mc_for_early(u8 *mc)
80{
81 return 0;
82}
83#endif
84
85#endif /* _ASM_X86_MICROCODE_INTEL_H */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index eb05fb3b02fb..8a9b3e288cb4 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -14,12 +14,6 @@ extern struct pglist_data *node_data[];
14 14
15#include <asm/numaq.h> 15#include <asm/numaq.h>
16 16
17extern void resume_map_numa_kva(pgd_t *pgd);
18
19#else /* !CONFIG_NUMA */
20
21static inline void resume_map_numa_kva(pgd_t *pgd) {}
22
23#endif /* CONFIG_NUMA */ 17#endif /* CONFIG_NUMA */
24 18
25#ifdef CONFIG_DISCONTIGMEM 19#ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 79ce5685ab64..c2934be2446a 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -11,4 +11,8 @@ struct ms_hyperv_info {
11 11
12extern struct ms_hyperv_info ms_hyperv; 12extern struct ms_hyperv_info ms_hyperv;
13 13
14void hyperv_callback_vector(void);
15void hyperv_vector_handler(struct pt_regs *regs);
16void hv_register_vmbus_handler(int irq, irq_handler_t handler);
17
14#endif 18#endif
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index bcdff997668c..2f366d0ac6b4 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -4,7 +4,8 @@
4#define MWAIT_SUBSTATE_MASK 0xf 4#define MWAIT_SUBSTATE_MASK 0xf
5#define MWAIT_CSTATE_MASK 0xf 5#define MWAIT_CSTATE_MASK 0xf
6#define MWAIT_SUBSTATE_SIZE 4 6#define MWAIT_SUBSTATE_SIZE 4
7#define MWAIT_MAX_NUM_CSTATES 8 7#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
8#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
8 9
9#define CPUID_MWAIT_LEAF 5 10#define CPUID_MWAIT_LEAF 5
10#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 11#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 49119fcea2dc..1b99ee5c9f00 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -54,13 +54,11 @@ static inline int numa_cpu_node(int cpu)
54 54
55#ifdef CONFIG_X86_32 55#ifdef CONFIG_X86_32
56# include <asm/numa_32.h> 56# include <asm/numa_32.h>
57#else
58# include <asm/numa_64.h>
59#endif 57#endif
60 58
61#ifdef CONFIG_NUMA 59#ifdef CONFIG_NUMA
62extern void __cpuinit numa_set_node(int cpu, int node); 60extern void numa_set_node(int cpu, int node);
63extern void __cpuinit numa_clear_node(int cpu); 61extern void numa_clear_node(int cpu);
64extern void __init init_cpu_to_node(void); 62extern void __init init_cpu_to_node(void);
65extern void __cpuinit numa_add_cpu(int cpu); 63extern void __cpuinit numa_add_cpu(int cpu);
66extern void __cpuinit numa_remove_cpu(int cpu); 64extern void __cpuinit numa_remove_cpu(int cpu);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
deleted file mode 100644
index 0c05f7ae46e8..000000000000
--- a/arch/x86/include/asm/numa_64.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_X86_NUMA_64_H
2#define _ASM_X86_NUMA_64_H
3
4extern unsigned long numa_free_all_bootmem(void);
5
6#endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 8ca82839288a..c87892442e53 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -17,6 +17,10 @@
17 17
18struct page; 18struct page;
19 19
20#include <linux/range.h>
21extern struct range pfn_mapped[];
22extern int nr_pfn_mapped;
23
20static inline void clear_user_page(void *page, unsigned long vaddr, 24static inline void clear_user_page(void *page, unsigned long vaddr,
21 struct page *pg) 25 struct page *pg)
22{ 26{
@@ -44,7 +48,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
44 * case properly. Once all supported versions of gcc understand it, we can 48 * case properly. Once all supported versions of gcc understand it, we can
45 * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) 49 * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated)
46 */ 50 */
47#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) 51#define __pa_symbol(x) \
52 __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
48 53
49#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) 54#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
50 55
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index da4e762406f7..4d550d04b609 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -15,6 +15,7 @@ extern unsigned long __phys_addr(unsigned long);
15#else 15#else
16#define __phys_addr(x) __phys_addr_nodebug(x) 16#define __phys_addr(x) __phys_addr_nodebug(x)
17#endif 17#endif
18#define __phys_addr_symbol(x) __phys_addr(x)
18#define __phys_reloc_hide(x) RELOC_HIDE((x), 0) 19#define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
19 20
20#ifdef CONFIG_FLATMEM 21#ifdef CONFIG_FLATMEM
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 072694ed81a5..0f1ddee6a0ce 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -3,4 +3,40 @@
3 3
4#include <asm/page_64_types.h> 4#include <asm/page_64_types.h>
5 5
6#ifndef __ASSEMBLY__
7
8/* duplicated to the one in bootmem.h */
9extern unsigned long max_pfn;
10extern unsigned long phys_base;
11
12static inline unsigned long __phys_addr_nodebug(unsigned long x)
13{
14 unsigned long y = x - __START_KERNEL_map;
15
16 /* use the carry flag to determine if x was < __START_KERNEL_map */
17 x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET));
18
19 return x;
20}
21
22#ifdef CONFIG_DEBUG_VIRTUAL
23extern unsigned long __phys_addr(unsigned long);
24extern unsigned long __phys_addr_symbol(unsigned long);
25#else
26#define __phys_addr(x) __phys_addr_nodebug(x)
27#define __phys_addr_symbol(x) \
28 ((unsigned long)(x) - __START_KERNEL_map + phys_base)
29#endif
30
31#define __phys_reloc_hide(x) (x)
32
33#ifdef CONFIG_FLATMEM
34#define pfn_valid(pfn) ((pfn) < max_pfn)
35#endif
36
37void clear_page(void *page);
38void copy_page(void *to, void *from);
39
40#endif /* !__ASSEMBLY__ */
41
6#endif /* _ASM_X86_PAGE_64_H */ 42#endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 320f7bb95f76..8b491e66eaa8 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -50,26 +50,4 @@
50#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) 50#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
51#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) 51#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
52 52
53#ifndef __ASSEMBLY__
54void clear_page(void *page);
55void copy_page(void *to, void *from);
56
57/* duplicated to the one in bootmem.h */
58extern unsigned long max_pfn;
59extern unsigned long phys_base;
60
61extern unsigned long __phys_addr(unsigned long);
62#define __phys_reloc_hide(x) (x)
63
64#define vmemmap ((struct page *)VMEMMAP_START)
65
66extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
67extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
68
69#endif /* !__ASSEMBLY__ */
70
71#ifdef CONFIG_FLATMEM
72#define pfn_valid(pfn) ((pfn) < max_pfn)
73#endif
74
75#endif /* _ASM_X86_PAGE_64_DEFS_H */ 53#endif /* _ASM_X86_PAGE_64_DEFS_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index e21fdd10479f..54c97879195e 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -51,6 +51,8 @@ static inline phys_addr_t get_max_mapped(void)
51 return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; 51 return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
52} 52}
53 53
54bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
55
54extern unsigned long init_memory_mapping(unsigned long start, 56extern unsigned long init_memory_mapping(unsigned long start,
55 unsigned long end); 57 unsigned long end);
56 58
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index dba7805176bf..d9e9e6c7ed32 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -14,6 +14,9 @@
14struct pci_sysdata { 14struct pci_sysdata {
15 int domain; /* PCI domain */ 15 int domain; /* PCI domain */
16 int node; /* NUMA node */ 16 int node; /* NUMA node */
17#ifdef CONFIG_ACPI
18 void *acpi; /* ACPI-specific data */
19#endif
17#ifdef CONFIG_X86_64 20#ifdef CONFIG_X86_64
18 void *iommu; /* IOMMU private data */ 21 void *iommu; /* IOMMU private data */
19#endif 22#endif
@@ -121,9 +124,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
121#define arch_teardown_msi_irq x86_teardown_msi_irq 124#define arch_teardown_msi_irq x86_teardown_msi_irq
122#define arch_restore_msi_irqs x86_restore_msi_irqs 125#define arch_restore_msi_irqs x86_restore_msi_irqs
123/* implemented in arch/x86/kernel/apic/io_apic. */ 126/* implemented in arch/x86/kernel/apic/io_apic. */
127struct msi_desc;
124int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); 128int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
125void native_teardown_msi_irq(unsigned int irq); 129void native_teardown_msi_irq(unsigned int irq);
126void native_restore_msi_irqs(struct pci_dev *dev, int irq); 130void native_restore_msi_irqs(struct pci_dev *dev, int irq);
131int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
132 unsigned int irq_base, unsigned int irq_offset);
127/* default to the implementation in drivers/lib/msi.c */ 133/* default to the implementation in drivers/lib/msi.c */
128#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS 134#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
129#define HAVE_DEFAULT_MSI_RESTORE_IRQS 135#define HAVE_DEFAULT_MSI_RESTORE_IRQS
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 747e5a38b590..fa1195dae425 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -54,7 +54,6 @@ void pcibios_set_cache_line_size(void);
54/* pci-pc.c */ 54/* pci-pc.c */
55 55
56extern int pcibios_last_bus; 56extern int pcibios_last_bus;
57extern struct pci_bus *pci_root_bus;
58extern struct pci_ops pci_root_ops; 57extern struct pci_ops pci_root_ops;
59 58
60void pcibios_scan_specific_bus(int busn); 59void pcibios_scan_specific_bus(int busn);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4fabcdf1cfa7..57cb63402213 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,8 +29,13 @@
29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) 29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL 30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
31 31
32#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40) 32#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
33#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41) 33#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
34#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
35
36#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37
37#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \
38 (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
34 39
35#define AMD64_EVENTSEL_EVENT \ 40#define AMD64_EVENTSEL_EVENT \
36 (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) 41 (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
@@ -46,8 +51,12 @@
46#define AMD64_RAW_EVENT_MASK \ 51#define AMD64_RAW_EVENT_MASK \
47 (X86_RAW_EVENT_MASK | \ 52 (X86_RAW_EVENT_MASK | \
48 AMD64_EVENTSEL_EVENT) 53 AMD64_EVENTSEL_EVENT)
54#define AMD64_RAW_EVENT_MASK_NB \
55 (AMD64_EVENTSEL_EVENT | \
56 ARCH_PERFMON_EVENTSEL_UMASK)
49#define AMD64_NUM_COUNTERS 4 57#define AMD64_NUM_COUNTERS 4
50#define AMD64_NUM_COUNTERS_CORE 6 58#define AMD64_NUM_COUNTERS_CORE 6
59#define AMD64_NUM_COUNTERS_NB 4
51 60
52#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 61#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
53#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 62#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5199db2923d3..1e672234c4ff 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
142 return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; 142 return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
143} 143}
144 144
145static inline unsigned long pud_pfn(pud_t pud)
146{
147 return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
148}
149
145#define pte_page(pte) pfn_to_page(pte_pfn(pte)) 150#define pte_page(pte) pfn_to_page(pte_pfn(pte))
146 151
147static inline int pmd_large(pmd_t pte) 152static inline int pmd_large(pmd_t pte)
@@ -390,6 +395,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
390 395
391#ifndef __ASSEMBLY__ 396#ifndef __ASSEMBLY__
392#include <linux/mm_types.h> 397#include <linux/mm_types.h>
398#include <linux/log2.h>
393 399
394static inline int pte_none(pte_t pte) 400static inline int pte_none(pte_t pte)
395{ 401{
@@ -615,6 +621,8 @@ static inline int pgd_none(pgd_t pgd)
615#ifndef __ASSEMBLY__ 621#ifndef __ASSEMBLY__
616 622
617extern int direct_gbpages; 623extern int direct_gbpages;
624void init_mem_mapping(void);
625void early_alloc_pgt_buf(void);
618 626
619/* local pte updates need not use xchg for locking */ 627/* local pte updates need not use xchg for locking */
620static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) 628static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
@@ -781,6 +789,32 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
781 memcpy(dst, src, count * sizeof(pgd_t)); 789 memcpy(dst, src, count * sizeof(pgd_t));
782} 790}
783 791
792#define PTE_SHIFT ilog2(PTRS_PER_PTE)
793static inline int page_level_shift(enum pg_level level)
794{
795 return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT;
796}
797static inline unsigned long page_level_size(enum pg_level level)
798{
799 return 1UL << page_level_shift(level);
800}
801static inline unsigned long page_level_mask(enum pg_level level)
802{
803 return ~(page_level_size(level) - 1);
804}
805
806/*
807 * The x86 doesn't have any external MMU info: the kernel page
808 * tables contain all the necessary information.
809 */
810static inline void update_mmu_cache(struct vm_area_struct *vma,
811 unsigned long addr, pte_t *ptep)
812{
813}
814static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
815 unsigned long addr, pmd_t *pmd)
816{
817}
784 818
785#include <asm-generic/pgtable.h> 819#include <asm-generic/pgtable.h>
786#endif /* __ASSEMBLY__ */ 820#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 8faa215a503e..9ee322103c6d 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -66,13 +66,6 @@ do { \
66 __flush_tlb_one((vaddr)); \ 66 __flush_tlb_one((vaddr)); \
67} while (0) 67} while (0)
68 68
69/*
70 * The i386 doesn't have any external MMU info: the kernel page
71 * tables contain all the necessary information.
72 */
73#define update_mmu_cache(vma, address, ptep) do { } while (0)
74#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
75
76#endif /* !__ASSEMBLY__ */ 69#endif /* !__ASSEMBLY__ */
77 70
78/* 71/*
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 47356f9df82e..e22c1dbf7feb 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -142,9 +142,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
142#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) 142#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
143#define pte_unmap(pte) ((void)(pte))/* NOP */ 143#define pte_unmap(pte) ((void)(pte))/* NOP */
144 144
145#define update_mmu_cache(vma, address, ptep) do { } while (0)
146#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
147
148/* Encode and de-code a swap entry */ 145/* Encode and de-code a swap entry */
149#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE 146#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
150#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) 147#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
@@ -183,6 +180,11 @@ extern void cleanup_highmap(void);
183 180
184#define __HAVE_ARCH_PTE_SAME 181#define __HAVE_ARCH_PTE_SAME
185 182
183#define vmemmap ((struct page *)VMEMMAP_START)
184
185extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
186extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
187
186#endif /* !__ASSEMBLY__ */ 188#endif /* !__ASSEMBLY__ */
187 189
188#endif /* _ASM_X86_PGTABLE_64_H */ 190#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 766ea16fbbbd..2d883440cb9a 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_PGTABLE_64_DEFS_H 1#ifndef _ASM_X86_PGTABLE_64_DEFS_H
2#define _ASM_X86_PGTABLE_64_DEFS_H 2#define _ASM_X86_PGTABLE_64_DEFS_H
3 3
4#include <asm/sparsemem.h>
5
4#ifndef __ASSEMBLY__ 6#ifndef __ASSEMBLY__
5#include <linux/types.h> 7#include <linux/types.h>
6 8
@@ -60,4 +62,6 @@ typedef struct { pteval_t pte; } pte_t;
60#define MODULES_END _AC(0xffffffffff000000, UL) 62#define MODULES_END _AC(0xffffffffff000000, UL)
61#define MODULES_LEN (MODULES_END - MODULES_VADDR) 63#define MODULES_LEN (MODULES_END - MODULES_VADDR)
62 64
65#define EARLY_DYNAMIC_PAGE_TABLES 64
66
63#endif /* _ASM_X86_PGTABLE_64_DEFS_H */ 67#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 3c32db8c539d..567b5d0632b2 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -321,7 +321,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
321/* Install a pte for a particular vaddr in kernel space. */ 321/* Install a pte for a particular vaddr in kernel space. */
322void set_pte_vaddr(unsigned long vaddr, pte_t pte); 322void set_pte_vaddr(unsigned long vaddr, pte_t pte);
323 323
324extern void native_pagetable_reserve(u64 start, u64 end);
325#ifdef CONFIG_X86_32 324#ifdef CONFIG_X86_32
326extern void native_pagetable_init(void); 325extern void native_pagetable_init(void);
327#else 326#else
@@ -331,7 +330,7 @@ extern void native_pagetable_init(void);
331struct seq_file; 330struct seq_file;
332extern void arch_report_meminfo(struct seq_file *m); 331extern void arch_report_meminfo(struct seq_file *m);
333 332
334enum { 333enum pg_level {
335 PG_LEVEL_NONE, 334 PG_LEVEL_NONE,
336 PG_LEVEL_4K, 335 PG_LEVEL_4K,
337 PG_LEVEL_2M, 336 PG_LEVEL_2M,
@@ -352,6 +351,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
352 * as a pte too. 351 * as a pte too.
353 */ 352 */
354extern pte_t *lookup_address(unsigned long address, unsigned int *level); 353extern pte_t *lookup_address(unsigned long address, unsigned int *level);
354extern int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase);
355extern phys_addr_t slow_virt_to_phys(void *__address);
355 356
356#endif /* !__ASSEMBLY__ */ 357#endif /* !__ASSEMBLY__ */
357 358
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 888184b2fc85..3270116b1488 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -89,7 +89,6 @@ struct cpuinfo_x86 {
89 char wp_works_ok; /* It doesn't on 386's */ 89 char wp_works_ok; /* It doesn't on 386's */
90 90
91 /* Problems on some 486Dx4's and old 386's: */ 91 /* Problems on some 486Dx4's and old 386's: */
92 char hlt_works_ok;
93 char hard_math; 92 char hard_math;
94 char rfu; 93 char rfu;
95 char fdiv_bug; 94 char fdiv_bug;
@@ -165,15 +164,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
165 164
166extern const struct seq_operations cpuinfo_op; 165extern const struct seq_operations cpuinfo_op;
167 166
168static inline int hlt_works(int cpu)
169{
170#ifdef CONFIG_X86_32
171 return cpu_data(cpu).hlt_works_ok;
172#else
173 return 1;
174#endif
175}
176
177#define cache_line_size() (boot_cpu_data.x86_cache_alignment) 167#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
178 168
179extern void cpu_detect(struct cpuinfo_x86 *c); 169extern void cpu_detect(struct cpuinfo_x86 *c);
@@ -190,6 +180,14 @@ extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
190extern void detect_extended_topology(struct cpuinfo_x86 *c); 180extern void detect_extended_topology(struct cpuinfo_x86 *c);
191extern void detect_ht(struct cpuinfo_x86 *c); 181extern void detect_ht(struct cpuinfo_x86 *c);
192 182
183#ifdef CONFIG_X86_32
184extern int have_cpuid_p(void);
185#else
186static inline int have_cpuid_p(void)
187{
188 return 1;
189}
190#endif
193static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, 191static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
194 unsigned int *ecx, unsigned int *edx) 192 unsigned int *ecx, unsigned int *edx)
195{ 193{
@@ -725,12 +723,13 @@ extern unsigned long boot_option_idle_override;
725extern bool amd_e400_c1e_detected; 723extern bool amd_e400_c1e_detected;
726 724
727enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, 725enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
728 IDLE_POLL, IDLE_FORCE_MWAIT}; 726 IDLE_POLL};
729 727
730extern void enable_sep_cpu(void); 728extern void enable_sep_cpu(void);
731extern int sysenter_setup(void); 729extern int sysenter_setup(void);
732 730
733extern void early_trap_init(void); 731extern void early_trap_init(void);
732void early_trap_pf_init(void);
734 733
735/* Defined in head.S */ 734/* Defined in head.S */
736extern struct desc_ptr early_gdt_descr; 735extern struct desc_ptr early_gdt_descr;
@@ -943,7 +942,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
943extern int get_tsc_mode(unsigned long adr); 942extern int get_tsc_mode(unsigned long adr);
944extern int set_tsc_mode(unsigned int val); 943extern int set_tsc_mode(unsigned int val);
945 944
946extern int amd_get_nb_id(int cpu); 945extern u16 amd_get_nb_id(int cpu);
947 946
948struct aperfmperf { 947struct aperfmperf {
949 u64 aperf, mperf; 948 u64 aperf, mperf;
@@ -998,7 +997,11 @@ extern unsigned long arch_align_stack(unsigned long sp);
998extern void free_init_pages(char *what, unsigned long begin, unsigned long end); 997extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
999 998
1000void default_idle(void); 999void default_idle(void);
1001bool set_pm_idle_to_default(void); 1000#ifdef CONFIG_XEN
1001bool xen_set_default_idle(void);
1002#else
1003#define xen_set_default_idle 0
1004#endif
1002 1005
1003void stop_this_cpu(void *dummy); 1006void stop_this_cpu(void *dummy);
1004 1007
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 6f414ed88620..6fd3fd769796 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -5,8 +5,6 @@
5 5
6/* misc architecture specific prototypes */ 6/* misc architecture specific prototypes */
7 7
8void early_idt_handler(void);
9
10void system_call(void); 8void system_call(void);
11void syscall_init(void); 9void syscall_init(void);
12 10
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index fe1ec5bcd846..9c6b890d5e7a 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -58,6 +58,7 @@ extern unsigned char boot_gdt[];
58extern unsigned char secondary_startup_64[]; 58extern unsigned char secondary_startup_64[];
59#endif 59#endif
60 60
61extern void __init setup_real_mode(void); 61void reserve_real_mode(void);
62void setup_real_mode(void);
62 63
63#endif /* _ARCH_X86_REALMODE_H */ 64#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 6c7fc25f2c34..5c6e4fb370f5 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -47,6 +47,12 @@
47# define NEED_NOPL 0 47# define NEED_NOPL 0
48#endif 48#endif
49 49
50#ifdef CONFIG_MATOM
51# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
52#else
53# define NEED_MOVBE 0
54#endif
55
50#ifdef CONFIG_X86_64 56#ifdef CONFIG_X86_64
51#ifdef CONFIG_PARAVIRT 57#ifdef CONFIG_PARAVIRT
52/* Paravirtualized systems may not have PSE or PGE available */ 58/* Paravirtualized systems may not have PSE or PGE available */
@@ -80,7 +86,7 @@
80 86
81#define REQUIRED_MASK2 0 87#define REQUIRED_MASK2 0
82#define REQUIRED_MASK3 (NEED_NOPL) 88#define REQUIRED_MASK3 (NEED_NOPL)
83#define REQUIRED_MASK4 0 89#define REQUIRED_MASK4 (NEED_MOVBE)
84#define REQUIRED_MASK5 0 90#define REQUIRED_MASK5 0
85#define REQUIRED_MASK6 0 91#define REQUIRED_MASK6 0
86#define REQUIRED_MASK7 0 92#define REQUIRED_MASK7 0
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 216bf364a7e7..35e67a457182 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,27 +31,9 @@ typedef sigset_t compat_sigset_t;
31#include <uapi/asm/signal.h> 31#include <uapi/asm/signal.h>
32#ifndef __ASSEMBLY__ 32#ifndef __ASSEMBLY__
33extern void do_notify_resume(struct pt_regs *, void *, __u32); 33extern void do_notify_resume(struct pt_regs *, void *, __u32);
34#ifdef __i386__
35struct old_sigaction {
36 __sighandler_t sa_handler;
37 old_sigset_t sa_mask;
38 unsigned long sa_flags;
39 __sigrestore_t sa_restorer;
40};
41
42struct sigaction {
43 __sighandler_t sa_handler;
44 unsigned long sa_flags;
45 __sigrestore_t sa_restorer;
46 sigset_t sa_mask; /* mask last for extensibility */
47};
48
49struct k_sigaction {
50 struct sigaction sa;
51};
52 34
53#else /* __i386__ */ 35#define __ARCH_HAS_SA_RESTORER
54#endif /* !__i386__ */ 36
55#include <asm/sigcontext.h> 37#include <asm/sigcontext.h>
56 38
57#ifdef __i386__ 39#ifdef __i386__
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 31f61f96e0fb..8459efc39686 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -32,29 +32,17 @@ struct mmap_arg_struct32;
32asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); 32asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
33asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); 33asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
34 34
35struct sigaction32;
36struct old_sigaction32;
37asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
38 struct sigaction32 __user *, unsigned int);
39asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
40 struct old_sigaction32 __user *);
41asmlinkage long sys32_alarm(unsigned int); 35asmlinkage long sys32_alarm(unsigned int);
42 36
43asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); 37asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
44asmlinkage long sys32_sysfs(int, u32, u32); 38asmlinkage long sys32_sysfs(int, u32, u32);
45 39
46asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
47 struct compat_timespec __user *);
48asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
49asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
50
51asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); 40asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
52asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); 41asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
53 42
54asmlinkage long sys32_personality(unsigned long); 43asmlinkage long sys32_personality(unsigned long);
55asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); 44asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
56 45
57long sys32_lseek(unsigned int, int, unsigned int);
58long sys32_kill(int, int); 46long sys32_kill(int, int);
59long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); 47long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
60long sys32_vm86_warning(void); 48long sys32_vm86_warning(void);
@@ -68,9 +56,8 @@ asmlinkage long sys32_fallocate(int, int, unsigned,
68 unsigned, unsigned, unsigned); 56 unsigned, unsigned, unsigned);
69 57
70/* ia32/ia32_signal.c */ 58/* ia32/ia32_signal.c */
71asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); 59asmlinkage long sys32_sigreturn(void);
72asmlinkage long sys32_sigreturn(struct pt_regs *); 60asmlinkage long sys32_rt_sigreturn(void);
73asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
74 61
75/* ia32/ipc32.c */ 62/* ia32/ipc32.c */
76asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); 63asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 58b7e3eac0ae..6cf0a9cc60cd 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -18,13 +18,13 @@
18/* Common in X86_32 and X86_64 */ 18/* Common in X86_32 and X86_64 */
19/* kernel/ioport.c */ 19/* kernel/ioport.c */
20asmlinkage long sys_ioperm(unsigned long, unsigned long, int); 20asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
21long sys_iopl(unsigned int, struct pt_regs *); 21asmlinkage long sys_iopl(unsigned int);
22 22
23/* kernel/ldt.c */ 23/* kernel/ldt.c */
24asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); 24asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
25 25
26/* kernel/signal.c */ 26/* kernel/signal.c */
27long sys_rt_sigreturn(struct pt_regs *); 27long sys_rt_sigreturn(void);
28 28
29/* kernel/tls.c */ 29/* kernel/tls.c */
30asmlinkage int sys_set_thread_area(struct user_desc __user *); 30asmlinkage int sys_set_thread_area(struct user_desc __user *);
@@ -34,14 +34,11 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
34#ifdef CONFIG_X86_32 34#ifdef CONFIG_X86_32
35 35
36/* kernel/signal.c */ 36/* kernel/signal.c */
37asmlinkage int sys_sigsuspend(int, int, old_sigset_t); 37unsigned long sys_sigreturn(void);
38asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
39 struct old_sigaction __user *);
40unsigned long sys_sigreturn(struct pt_regs *);
41 38
42/* kernel/vm86_32.c */ 39/* kernel/vm86_32.c */
43int sys_vm86old(struct vm86_struct __user *, struct pt_regs *); 40int sys_vm86old(struct vm86_struct __user *);
44int sys_vm86(unsigned long, unsigned long, struct pt_regs *); 41int sys_vm86(unsigned long, unsigned long);
45 42
46#else /* CONFIG_X86_32 */ 43#else /* CONFIG_X86_32 */
47 44
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2d946e63ee82..2cd056e3ada3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,7 +20,6 @@
20struct task_struct; 20struct task_struct;
21struct exec_domain; 21struct exec_domain;
22#include <asm/processor.h> 22#include <asm/processor.h>
23#include <asm/ftrace.h>
24#include <linux/atomic.h> 23#include <linux/atomic.h>
25 24
26struct thread_info { 25struct thread_info {
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0fee48e279cc..50a7fc0f824a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -20,10 +20,20 @@ static inline void __native_flush_tlb(void)
20 native_write_cr3(native_read_cr3()); 20 native_write_cr3(native_read_cr3());
21} 21}
22 22
23static inline void __native_flush_tlb_global_irq_disabled(void)
24{
25 unsigned long cr4;
26
27 cr4 = native_read_cr4();
28 /* clear PGE */
29 native_write_cr4(cr4 & ~X86_CR4_PGE);
30 /* write old PGE again and flush TLBs */
31 native_write_cr4(cr4);
32}
33
23static inline void __native_flush_tlb_global(void) 34static inline void __native_flush_tlb_global(void)
24{ 35{
25 unsigned long flags; 36 unsigned long flags;
26 unsigned long cr4;
27 37
28 /* 38 /*
29 * Read-modify-write to CR4 - protect it from preemption and 39 * Read-modify-write to CR4 - protect it from preemption and
@@ -32,11 +42,7 @@ static inline void __native_flush_tlb_global(void)
32 */ 42 */
33 raw_local_irq_save(flags); 43 raw_local_irq_save(flags);
34 44
35 cr4 = native_read_cr4(); 45 __native_flush_tlb_global_irq_disabled();
36 /* clear PGE */
37 native_write_cr4(cr4 & ~X86_CR4_PGE);
38 /* write old PGE again and flush TLBs */
39 native_write_cr4(cr4);
40 46
41 raw_local_irq_restore(flags); 47 raw_local_irq_restore(flags);
42} 48}
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1709801d18ec..5ee26875baea 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -125,13 +125,12 @@ extern int __get_user_4(void);
125extern int __get_user_8(void); 125extern int __get_user_8(void);
126extern int __get_user_bad(void); 126extern int __get_user_bad(void);
127 127
128#define __get_user_x(size, ret, x, ptr) \ 128/*
129 asm volatile("call __get_user_" #size \ 129 * This is a type: either unsigned long, if the argument fits into
130 : "=a" (ret), "=d" (x) \ 130 * that type, or otherwise unsigned long long.
131 : "0" (ptr)) \ 131 */
132 132#define __inttype(x) \
133/* Careful: we have to cast the result to the type of the pointer 133__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
134 * for sign reasons */
135 134
136/** 135/**
137 * get_user: - Get a simple variable from user space. 136 * get_user: - Get a simple variable from user space.
@@ -150,38 +149,26 @@ extern int __get_user_bad(void);
150 * Returns zero on success, or -EFAULT on error. 149 * Returns zero on success, or -EFAULT on error.
151 * On error, the variable @x is set to zero. 150 * On error, the variable @x is set to zero.
152 */ 151 */
153#ifdef CONFIG_X86_32 152/*
154#define __get_user_8(__ret_gu, __val_gu, ptr) \ 153 * Careful: we have to cast the result to the type of the pointer
155 __get_user_x(X, __ret_gu, __val_gu, ptr) 154 * for sign reasons.
156#else 155 *
157#define __get_user_8(__ret_gu, __val_gu, ptr) \ 156 * The use of %edx as the register specifier is a bit of a
158 __get_user_x(8, __ret_gu, __val_gu, ptr) 157 * simplification, as gcc only cares about it as the starting point
159#endif 158 * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits
160 159 * (%ecx being the next register in gcc's x86 register sequence), and
160 * %rdx on 64 bits.
161 */
161#define get_user(x, ptr) \ 162#define get_user(x, ptr) \
162({ \ 163({ \
163 int __ret_gu; \ 164 int __ret_gu; \
164 unsigned long __val_gu; \ 165 register __inttype(*(ptr)) __val_gu asm("%edx"); \
165 __chk_user_ptr(ptr); \ 166 __chk_user_ptr(ptr); \
166 might_fault(); \ 167 might_fault(); \
167 switch (sizeof(*(ptr))) { \ 168 asm volatile("call __get_user_%P3" \
168 case 1: \ 169 : "=a" (__ret_gu), "=r" (__val_gu) \
169 __get_user_x(1, __ret_gu, __val_gu, ptr); \ 170 : "0" (ptr), "i" (sizeof(*(ptr)))); \
170 break; \ 171 (x) = (__typeof__(*(ptr))) __val_gu; \
171 case 2: \
172 __get_user_x(2, __ret_gu, __val_gu, ptr); \
173 break; \
174 case 4: \
175 __get_user_x(4, __ret_gu, __val_gu, ptr); \
176 break; \
177 case 8: \
178 __get_user_8(__ret_gu, __val_gu, ptr); \
179 break; \
180 default: \
181 __get_user_x(X, __ret_gu, __val_gu, ptr); \
182 break; \
183 } \
184 (x) = (__typeof__(*(ptr)))__val_gu; \
185 __ret_gu; \ 172 __ret_gu; \
186}) 173})
187 174
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index a0790e07ba65..3d5df1c4447f 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -38,8 +38,6 @@
38# define __ARCH_WANT_SYS_OLD_GETRLIMIT 38# define __ARCH_WANT_SYS_OLD_GETRLIMIT
39# define __ARCH_WANT_SYS_OLD_UNAME 39# define __ARCH_WANT_SYS_OLD_UNAME
40# define __ARCH_WANT_SYS_PAUSE 40# define __ARCH_WANT_SYS_PAUSE
41# define __ARCH_WANT_SYS_RT_SIGACTION
42# define __ARCH_WANT_SYS_RT_SIGSUSPEND
43# define __ARCH_WANT_SYS_SGETMASK 41# define __ARCH_WANT_SYS_SGETMASK
44# define __ARCH_WANT_SYS_SIGNAL 42# define __ARCH_WANT_SYS_SIGNAL
45# define __ARCH_WANT_SYS_SIGPENDING 43# define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 21f7385badb8..2c32df95bb78 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV architectural definitions 6 * SGI UV architectural definitions
7 * 7 *
8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_HUB_H 11#ifndef _ASM_X86_UV_UV_HUB_H
@@ -175,6 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
175 */ 175 */
176#define UV1_HUB_REVISION_BASE 1 176#define UV1_HUB_REVISION_BASE 1
177#define UV2_HUB_REVISION_BASE 3 177#define UV2_HUB_REVISION_BASE 3
178#define UV3_HUB_REVISION_BASE 5
178 179
179static inline int is_uv1_hub(void) 180static inline int is_uv1_hub(void)
180{ 181{
@@ -183,6 +184,23 @@ static inline int is_uv1_hub(void)
183 184
184static inline int is_uv2_hub(void) 185static inline int is_uv2_hub(void)
185{ 186{
187 return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) &&
188 (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE));
189}
190
191static inline int is_uv3_hub(void)
192{
193 return uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE;
194}
195
196static inline int is_uv_hub(void)
197{
198 return uv_hub_info->hub_revision;
199}
200
201/* code common to uv2 and uv3 only */
202static inline int is_uvx_hub(void)
203{
186 return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE; 204 return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE;
187} 205}
188 206
@@ -230,14 +248,23 @@ union uvh_apicid {
230#define UV2_LOCAL_MMR_SIZE (32UL * 1024 * 1024) 248#define UV2_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
231#define UV2_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024) 249#define UV2_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024)
232 250
233#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE \ 251#define UV3_LOCAL_MMR_BASE 0xfa000000UL
234 : UV2_LOCAL_MMR_BASE) 252#define UV3_GLOBAL_MMR32_BASE 0xfc000000UL
235#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE \ 253#define UV3_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
236 : UV2_GLOBAL_MMR32_BASE) 254#define UV3_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024)
237#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \ 255
238 UV2_LOCAL_MMR_SIZE) 256#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \
257 (is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
258 UV3_LOCAL_MMR_BASE))
259#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE :\
260 (is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE :\
261 UV3_GLOBAL_MMR32_BASE))
262#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \
263 (is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
264 UV3_LOCAL_MMR_SIZE))
239#define UV_GLOBAL_MMR32_SIZE (is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\ 265#define UV_GLOBAL_MMR32_SIZE (is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\
240 UV2_GLOBAL_MMR32_SIZE) 266 (is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE :\
267 UV3_GLOBAL_MMR32_SIZE))
241#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) 268#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base)
242 269
243#define UV_GLOBAL_GRU_MMR_BASE 0x4000000 270#define UV_GLOBAL_GRU_MMR_BASE 0x4000000
@@ -599,6 +626,7 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
599 * 1 - UV1 rev 1.0 initial silicon 626 * 1 - UV1 rev 1.0 initial silicon
600 * 2 - UV1 rev 2.0 production silicon 627 * 2 - UV1 rev 2.0 production silicon
601 * 3 - UV2 rev 1.0 initial silicon 628 * 3 - UV2 rev 1.0 initial silicon
629 * 5 - UV3 rev 1.0 initial silicon
602 */ 630 */
603static inline int uv_get_min_hub_revision_id(void) 631static inline int uv_get_min_hub_revision_id(void)
604{ 632{
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index cf1d73643f60..bd5f80e58a23 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,16 +5,25 @@
5 * 5 *
6 * SGI UV MMR definitions 6 * SGI UV MMR definitions
7 * 7 *
8 * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_MMRS_H 11#ifndef _ASM_X86_UV_UV_MMRS_H
12#define _ASM_X86_UV_UV_MMRS_H 12#define _ASM_X86_UV_UV_MMRS_H
13 13
14/* 14/*
15 * This file contains MMR definitions for both UV1 & UV2 hubs. 15 * This file contains MMR definitions for all UV hubs types.
16 * 16 *
17 * In general, MMR addresses and structures are identical on both hubs. 17 * To minimize coding differences between hub types, the symbols are
18 * grouped by architecture types.
19 *
20 * UVH - definitions common to all UV hub types.
21 * UVXH - definitions common to all UV eXtended hub types (currently 2 & 3).
22 * UV1H - definitions specific to UV type 1 hub.
23 * UV2H - definitions specific to UV type 2 hub.
24 * UV3H - definitions specific to UV type 3 hub.
25 *
26 * So in general, MMR addresses and structures are identical on all hubs types.
18 * These MMRs are identified as: 27 * These MMRs are identified as:
19 * #define UVH_xxx <address> 28 * #define UVH_xxx <address>
20 * union uvh_xxx { 29 * union uvh_xxx {
@@ -23,24 +32,36 @@
23 * } s; 32 * } s;
24 * }; 33 * };
25 * 34 *
26 * If the MMR exists on both hub type but has different addresses or 35 * If the MMR exists on all hub types but have different addresses:
27 * contents, the MMR definition is similar to: 36 * #define UV1Hxxx a
28 * #define UV1H_xxx <uv1 address> 37 * #define UV2Hxxx b
29 * #define UV2H_xxx <uv2address> 38 * #define UV3Hxxx c
30 * #define UVH_xxx (is_uv1_hub() ? UV1H_xxx : UV2H_xxx) 39 * #define UVHxxx (is_uv1_hub() ? UV1Hxxx :
40 * (is_uv2_hub() ? UV2Hxxx :
41 * UV3Hxxx))
42 *
43 * If the MMR exists on all hub types > 1 but have different addresses:
44 * #define UV2Hxxx b
45 * #define UV3Hxxx c
46 * #define UVXHxxx (is_uv2_hub() ? UV2Hxxx :
47 * UV3Hxxx))
48 *
31 * union uvh_xxx { 49 * union uvh_xxx {
32 * unsigned long v; 50 * unsigned long v;
33 * struct uv1h_int_cmpd_s { (Common fields only) 51 * struct uvh_xxx_s { # Common fields only
34 * } s; 52 * } s;
35 * struct uv1h_int_cmpd_s { (Full UV1 definition) 53 * struct uv1h_xxx_s { # Full UV1 definition (*)
36 * } s1; 54 * } s1;
37 * struct uv2h_int_cmpd_s { (Full UV2 definition) 55 * struct uv2h_xxx_s { # Full UV2 definition (*)
38 * } s2; 56 * } s2;
57 * struct uv3h_xxx_s { # Full UV3 definition (*)
58 * } s3;
39 * }; 59 * };
60 * (* - if present and different than the common struct)
40 * 61 *
41 * Only essential difference are enumerated. For example, if the address is 62 * Only essential differences are enumerated. For example, if the address is
42 * the same for both UV1 & UV2, only a single #define is generated. Likewise, 63 * the same for all UV's, only a single #define is generated. Likewise,
43 * if the contents is the same for both hubs, only the "s" structure is 64 * if the contents is the same for all hubs, only the "s" structure is
44 * generated. 65 * generated.
45 * 66 *
46 * If the MMR exists on ONLY 1 type of hub, no generic definition is 67 * If the MMR exists on ONLY 1 type of hub, no generic definition is
@@ -51,6 +72,8 @@
51 * struct uvh_int_cmpd_s { 72 * struct uvh_int_cmpd_s {
52 * } sn; 73 * } sn;
53 * }; 74 * };
75 *
76 * (GEN Flags: mflags_opt= undefs=0 UV23=UVXH)
54 */ 77 */
55 78
56#define UV_MMR_ENABLE (1UL << 63) 79#define UV_MMR_ENABLE (1UL << 63)
@@ -58,15 +81,18 @@
58#define UV1_HUB_PART_NUMBER 0x88a5 81#define UV1_HUB_PART_NUMBER 0x88a5
59#define UV2_HUB_PART_NUMBER 0x8eb8 82#define UV2_HUB_PART_NUMBER 0x8eb8
60#define UV2_HUB_PART_NUMBER_X 0x1111 83#define UV2_HUB_PART_NUMBER_X 0x1111
84#define UV3_HUB_PART_NUMBER 0x9578
85#define UV3_HUB_PART_NUMBER_X 0x4321
61 86
62/* Compat: if this #define is present, UV headers support UV2 */ 87/* Compat: Indicate which UV Hubs are supported. */
63#define UV2_HUB_IS_SUPPORTED 1 88#define UV2_HUB_IS_SUPPORTED 1
89#define UV3_HUB_IS_SUPPORTED 1
64 90
65/* ========================================================================= */ 91/* ========================================================================= */
66/* UVH_BAU_DATA_BROADCAST */ 92/* UVH_BAU_DATA_BROADCAST */
67/* ========================================================================= */ 93/* ========================================================================= */
68#define UVH_BAU_DATA_BROADCAST 0x61688UL 94#define UVH_BAU_DATA_BROADCAST 0x61688UL
69#define UVH_BAU_DATA_BROADCAST_32 0x440 95#define UVH_BAU_DATA_BROADCAST_32 0x440
70 96
71#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 97#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0
72#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL 98#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL
@@ -82,8 +108,8 @@ union uvh_bau_data_broadcast_u {
82/* ========================================================================= */ 108/* ========================================================================= */
83/* UVH_BAU_DATA_CONFIG */ 109/* UVH_BAU_DATA_CONFIG */
84/* ========================================================================= */ 110/* ========================================================================= */
85#define UVH_BAU_DATA_CONFIG 0x61680UL 111#define UVH_BAU_DATA_CONFIG 0x61680UL
86#define UVH_BAU_DATA_CONFIG_32 0x438 112#define UVH_BAU_DATA_CONFIG_32 0x438
87 113
88#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 114#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
89#define UVH_BAU_DATA_CONFIG_DM_SHFT 8 115#define UVH_BAU_DATA_CONFIG_DM_SHFT 8
@@ -121,10 +147,14 @@ union uvh_bau_data_config_u {
121/* ========================================================================= */ 147/* ========================================================================= */
122/* UVH_EVENT_OCCURRED0 */ 148/* UVH_EVENT_OCCURRED0 */
123/* ========================================================================= */ 149/* ========================================================================= */
124#define UVH_EVENT_OCCURRED0 0x70000UL 150#define UVH_EVENT_OCCURRED0 0x70000UL
125#define UVH_EVENT_OCCURRED0_32 0x5e8 151#define UVH_EVENT_OCCURRED0_32 0x5e8
152
153#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0
154#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
155#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
156#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
126 157
127#define UV1H_EVENT_OCCURRED0_LB_HCERR_SHFT 0
128#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 158#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1
129#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 159#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2
130#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3 160#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3
@@ -135,7 +165,6 @@ union uvh_bau_data_config_u {
135#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 165#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8
136#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 166#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9
137#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 167#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10
138#define UV1H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
139#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 168#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12
140#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 169#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13
141#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 170#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14
@@ -181,7 +210,6 @@ union uvh_bau_data_config_u {
181#define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54 210#define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54
182#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55 211#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55
183#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 212#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56
184#define UV1H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
185#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL 213#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL
186#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL 214#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL
187#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL 215#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL
@@ -192,7 +220,6 @@ union uvh_bau_data_config_u {
192#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL 220#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL
193#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL 221#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL
194#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL 222#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL
195#define UV1H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
196#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL 223#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL
197#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL 224#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL
198#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL 225#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL
@@ -239,188 +266,130 @@ union uvh_bau_data_config_u {
239#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL 266#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
240#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL 267#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
241 268
242#define UV2H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 269#define UVXH_EVENT_OCCURRED0_QP_HCERR_SHFT 1
243#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 270#define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2
244#define UV2H_EVENT_OCCURRED0_RH_HCERR_SHFT 2 271#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3
245#define UV2H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 272#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4
246#define UV2H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 273#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5
247#define UV2H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 274#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6
248#define UV2H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 275#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7
249#define UV2H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 276#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8
250#define UV2H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 277#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9
251#define UV2H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 278#define UVXH_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
252#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 279#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12
253#define UV2H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 280#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13
254#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 281#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14
255#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 282#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15
256#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 283#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16
257#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 284#define UVXH_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
258#define UV2H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 285#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
259#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 286#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
260#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 287#define UVXH_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
261#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 288#define UVXH_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
262#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 289#define UVXH_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
263#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 290#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
264#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 291#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
265#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 292#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
266#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 293#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
267#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 294#define UVXH_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
268#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 295#define UVXH_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
269#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 296#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
270#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 297#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
271#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 298#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
272#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 299#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
273#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 300#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
274#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 301#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
275#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 302#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
276#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 303#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
277#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 304#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
278#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 305#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
279#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 306#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
280#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 307#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
281#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 308#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
282#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 309#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
283#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 310#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
284#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 311#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
285#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 312#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
286#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 313#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
287#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 314#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
288#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 315#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
289#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 316#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
290#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 317#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
291#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 318#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
292#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 319#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
293#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 320#define UVXH_EVENT_OCCURRED0_IPI_INT_SHFT 53
294#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 321#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
295#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 322#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
296#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 323#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
297#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 324#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
298#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 325#define UVXH_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
299#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 326#define UVXH_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
300#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 327#define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL
301#define UV2H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL 328#define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL
302#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL 329#define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL
303#define UV2H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL 330#define UVXH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL
304#define UV2H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL 331#define UVXH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL
305#define UV2H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL 332#define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL
306#define UV2H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL 333#define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL
307#define UV2H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL 334#define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL
308#define UV2H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL 335#define UVXH_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
309#define UV2H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL 336#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL
310#define UV2H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL 337#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL
311#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL 338#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL
312#define UV2H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL 339#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL
313#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL 340#define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL
314#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL 341#define UVXH_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
315#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL 342#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
316#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL 343#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
317#define UV2H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL 344#define UVXH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
318#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL 345#define UVXH_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
319#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL 346#define UVXH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
320#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL 347#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
321#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL 348#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
322#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL 349#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
323#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL 350#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
324#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL 351#define UVXH_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
325#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL 352#define UVXH_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
326#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL 353#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
327#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL 354#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
328#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL 355#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
329#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL 356#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
330#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL 357#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
331#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL 358#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
332#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL 359#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
333#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL 360#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
334#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL 361#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
335#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL 362#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
336#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL 363#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
337#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL 364#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
338#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL 365#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
339#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL 366#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
340#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL 367#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
341#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL 368#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
342#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL 369#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
343#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL 370#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
344#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL 371#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
345#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL 372#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
346#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL 373#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
347#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL 374#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
348#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL 375#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
349#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL 376#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
350#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL 377#define UVXH_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
351#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL 378#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
352#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL 379#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
353#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL 380#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
354#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL 381#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
355#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL 382#define UVXH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
356#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
357#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
358#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
359#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
360 383
361union uvh_event_occurred0_u { 384union uvh_event_occurred0_u {
362 unsigned long v; 385 unsigned long v;
363 struct uv1h_event_occurred0_s { 386 struct uvh_event_occurred0_s {
364 unsigned long lb_hcerr:1; /* RW, W1C */ 387 unsigned long lb_hcerr:1; /* RW, W1C */
365 unsigned long gr0_hcerr:1; /* RW, W1C */ 388 unsigned long rsvd_1_10:10;
366 unsigned long gr1_hcerr:1; /* RW, W1C */
367 unsigned long lh_hcerr:1; /* RW, W1C */
368 unsigned long rh_hcerr:1; /* RW, W1C */
369 unsigned long xn_hcerr:1; /* RW, W1C */
370 unsigned long si_hcerr:1; /* RW, W1C */
371 unsigned long lb_aoerr0:1; /* RW, W1C */
372 unsigned long gr0_aoerr0:1; /* RW, W1C */
373 unsigned long gr1_aoerr0:1; /* RW, W1C */
374 unsigned long lh_aoerr0:1; /* RW, W1C */
375 unsigned long rh_aoerr0:1; /* RW, W1C */ 389 unsigned long rh_aoerr0:1; /* RW, W1C */
376 unsigned long xn_aoerr0:1; /* RW, W1C */ 390 unsigned long rsvd_12_63:52;
377 unsigned long si_aoerr0:1; /* RW, W1C */ 391 } s;
378 unsigned long lb_aoerr1:1; /* RW, W1C */ 392 struct uvxh_event_occurred0_s {
379 unsigned long gr0_aoerr1:1; /* RW, W1C */
380 unsigned long gr1_aoerr1:1; /* RW, W1C */
381 unsigned long lh_aoerr1:1; /* RW, W1C */
382 unsigned long rh_aoerr1:1; /* RW, W1C */
383 unsigned long xn_aoerr1:1; /* RW, W1C */
384 unsigned long si_aoerr1:1; /* RW, W1C */
385 unsigned long rh_vpi_int:1; /* RW, W1C */
386 unsigned long system_shutdown_int:1; /* RW, W1C */
387 unsigned long lb_irq_int_0:1; /* RW, W1C */
388 unsigned long lb_irq_int_1:1; /* RW, W1C */
389 unsigned long lb_irq_int_2:1; /* RW, W1C */
390 unsigned long lb_irq_int_3:1; /* RW, W1C */
391 unsigned long lb_irq_int_4:1; /* RW, W1C */
392 unsigned long lb_irq_int_5:1; /* RW, W1C */
393 unsigned long lb_irq_int_6:1; /* RW, W1C */
394 unsigned long lb_irq_int_7:1; /* RW, W1C */
395 unsigned long lb_irq_int_8:1; /* RW, W1C */
396 unsigned long lb_irq_int_9:1; /* RW, W1C */
397 unsigned long lb_irq_int_10:1; /* RW, W1C */
398 unsigned long lb_irq_int_11:1; /* RW, W1C */
399 unsigned long lb_irq_int_12:1; /* RW, W1C */
400 unsigned long lb_irq_int_13:1; /* RW, W1C */
401 unsigned long lb_irq_int_14:1; /* RW, W1C */
402 unsigned long lb_irq_int_15:1; /* RW, W1C */
403 unsigned long l1_nmi_int:1; /* RW, W1C */
404 unsigned long stop_clock:1; /* RW, W1C */
405 unsigned long asic_to_l1:1; /* RW, W1C */
406 unsigned long l1_to_asic:1; /* RW, W1C */
407 unsigned long ltc_int:1; /* RW, W1C */
408 unsigned long la_seq_trigger:1; /* RW, W1C */
409 unsigned long ipi_int:1; /* RW, W1C */
410 unsigned long extio_int0:1; /* RW, W1C */
411 unsigned long extio_int1:1; /* RW, W1C */
412 unsigned long extio_int2:1; /* RW, W1C */
413 unsigned long extio_int3:1; /* RW, W1C */
414 unsigned long profile_int:1; /* RW, W1C */
415 unsigned long rtc0:1; /* RW, W1C */
416 unsigned long rtc1:1; /* RW, W1C */
417 unsigned long rtc2:1; /* RW, W1C */
418 unsigned long rtc3:1; /* RW, W1C */
419 unsigned long bau_data:1; /* RW, W1C */
420 unsigned long power_management_req:1; /* RW, W1C */
421 unsigned long rsvd_57_63:7;
422 } s1;
423 struct uv2h_event_occurred0_s {
424 unsigned long lb_hcerr:1; /* RW */ 393 unsigned long lb_hcerr:1; /* RW */
425 unsigned long qp_hcerr:1; /* RW */ 394 unsigned long qp_hcerr:1; /* RW */
426 unsigned long rh_hcerr:1; /* RW */ 395 unsigned long rh_hcerr:1; /* RW */
@@ -481,19 +450,20 @@ union uvh_event_occurred0_u {
481 unsigned long extio_int3:1; /* RW */ 450 unsigned long extio_int3:1; /* RW */
482 unsigned long profile_int:1; /* RW */ 451 unsigned long profile_int:1; /* RW */
483 unsigned long rsvd_59_63:5; 452 unsigned long rsvd_59_63:5;
484 } s2; 453 } sx;
485}; 454};
486 455
487/* ========================================================================= */ 456/* ========================================================================= */
488/* UVH_EVENT_OCCURRED0_ALIAS */ 457/* UVH_EVENT_OCCURRED0_ALIAS */
489/* ========================================================================= */ 458/* ========================================================================= */
490#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL 459#define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL
491#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 460#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0
461
492 462
493/* ========================================================================= */ 463/* ========================================================================= */
494/* UVH_GR0_TLB_INT0_CONFIG */ 464/* UVH_GR0_TLB_INT0_CONFIG */
495/* ========================================================================= */ 465/* ========================================================================= */
496#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL 466#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
497 467
498#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 468#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
499#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 469#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
@@ -531,7 +501,7 @@ union uvh_gr0_tlb_int0_config_u {
531/* ========================================================================= */ 501/* ========================================================================= */
532/* UVH_GR0_TLB_INT1_CONFIG */ 502/* UVH_GR0_TLB_INT1_CONFIG */
533/* ========================================================================= */ 503/* ========================================================================= */
534#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL 504#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
535 505
536#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 506#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
537#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 507#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
@@ -571,9 +541,11 @@ union uvh_gr0_tlb_int1_config_u {
571/* ========================================================================= */ 541/* ========================================================================= */
572#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL 542#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL
573#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL 543#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL
574#define UVH_GR0_TLB_MMR_CONTROL (is_uv1_hub() ? \ 544#define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL
575 UV1H_GR0_TLB_MMR_CONTROL : \ 545#define UVH_GR0_TLB_MMR_CONTROL \
576 UV2H_GR0_TLB_MMR_CONTROL) 546 (is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL : \
547 (is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \
548 UV3H_GR0_TLB_MMR_CONTROL))
577 549
578#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 550#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
579#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 551#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
@@ -611,6 +583,21 @@ union uvh_gr0_tlb_int1_config_u {
611#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL 583#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL
612#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL 584#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL
613 585
586#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
587#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
588#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
589#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
590#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
591#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
592#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
593#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
594#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
595#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
596#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
597#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
598#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
599#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
600
614#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 601#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
615#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 602#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
616#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 603#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
@@ -630,6 +617,23 @@ union uvh_gr0_tlb_int1_config_u {
630#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL 617#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
631#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL 618#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
632 619
620#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
621#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
622#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
623#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
624#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
625#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
626#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
627#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
628#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
629#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
630#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
631#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
632#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
633#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
634#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
635#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
636
633union uvh_gr0_tlb_mmr_control_u { 637union uvh_gr0_tlb_mmr_control_u {
634 unsigned long v; 638 unsigned long v;
635 struct uvh_gr0_tlb_mmr_control_s { 639 struct uvh_gr0_tlb_mmr_control_s {
@@ -642,7 +646,9 @@ union uvh_gr0_tlb_mmr_control_u {
642 unsigned long rsvd_21_29:9; 646 unsigned long rsvd_21_29:9;
643 unsigned long mmr_write:1; /* WP */ 647 unsigned long mmr_write:1; /* WP */
644 unsigned long mmr_read:1; /* WP */ 648 unsigned long mmr_read:1; /* WP */
645 unsigned long rsvd_32_63:32; 649 unsigned long rsvd_32_48:17;
650 unsigned long rsvd_49_51:3;
651 unsigned long rsvd_52_63:12;
646 } s; 652 } s;
647 struct uv1h_gr0_tlb_mmr_control_s { 653 struct uv1h_gr0_tlb_mmr_control_s {
648 unsigned long index:12; /* RW */ 654 unsigned long index:12; /* RW */
@@ -666,6 +672,23 @@ union uvh_gr0_tlb_mmr_control_u {
666 unsigned long mmr_inj_tlblruv:1; /* RW */ 672 unsigned long mmr_inj_tlblruv:1; /* RW */
667 unsigned long rsvd_61_63:3; 673 unsigned long rsvd_61_63:3;
668 } s1; 674 } s1;
675 struct uvxh_gr0_tlb_mmr_control_s {
676 unsigned long index:12; /* RW */
677 unsigned long mem_sel:2; /* RW */
678 unsigned long rsvd_14_15:2;
679 unsigned long auto_valid_en:1; /* RW */
680 unsigned long rsvd_17_19:3;
681 unsigned long mmr_hash_index_en:1; /* RW */
682 unsigned long rsvd_21_29:9;
683 unsigned long mmr_write:1; /* WP */
684 unsigned long mmr_read:1; /* WP */
685 unsigned long mmr_op_done:1; /* RW */
686 unsigned long rsvd_33_47:15;
687 unsigned long rsvd_48:1;
688 unsigned long rsvd_49_51:3;
689 unsigned long rsvd_52:1;
690 unsigned long rsvd_53_63:11;
691 } sx;
669 struct uv2h_gr0_tlb_mmr_control_s { 692 struct uv2h_gr0_tlb_mmr_control_s {
670 unsigned long index:12; /* RW */ 693 unsigned long index:12; /* RW */
671 unsigned long mem_sel:2; /* RW */ 694 unsigned long mem_sel:2; /* RW */
@@ -683,6 +706,24 @@ union uvh_gr0_tlb_mmr_control_u {
683 unsigned long mmr_inj_tlbram:1; /* RW */ 706 unsigned long mmr_inj_tlbram:1; /* RW */
684 unsigned long rsvd_53_63:11; 707 unsigned long rsvd_53_63:11;
685 } s2; 708 } s2;
709 struct uv3h_gr0_tlb_mmr_control_s {
710 unsigned long index:12; /* RW */
711 unsigned long mem_sel:2; /* RW */
712 unsigned long rsvd_14_15:2;
713 unsigned long auto_valid_en:1; /* RW */
714 unsigned long rsvd_17_19:3;
715 unsigned long mmr_hash_index_en:1; /* RW */
716 unsigned long ecc_sel:1; /* RW */
717 unsigned long rsvd_22_29:8;
718 unsigned long mmr_write:1; /* WP */
719 unsigned long mmr_read:1; /* WP */
720 unsigned long mmr_op_done:1; /* RW */
721 unsigned long rsvd_33_47:15;
722 unsigned long undef_48:1; /* Undefined */
723 unsigned long rsvd_49_51:3;
724 unsigned long undef_52:1; /* Undefined */
725 unsigned long rsvd_53_63:11;
726 } s3;
686}; 727};
687 728
688/* ========================================================================= */ 729/* ========================================================================= */
@@ -690,9 +731,11 @@ union uvh_gr0_tlb_mmr_control_u {
690/* ========================================================================= */ 731/* ========================================================================= */
691#define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL 732#define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL
692#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL 733#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
693#define UVH_GR0_TLB_MMR_READ_DATA_HI (is_uv1_hub() ? \ 734#define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
694 UV1H_GR0_TLB_MMR_READ_DATA_HI : \ 735#define UVH_GR0_TLB_MMR_READ_DATA_HI \
695 UV2H_GR0_TLB_MMR_READ_DATA_HI) 736 (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI : \
737 (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \
738 UV3H_GR0_TLB_MMR_READ_DATA_HI))
696 739
697#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 740#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
698#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 741#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
@@ -703,6 +746,46 @@ union uvh_gr0_tlb_mmr_control_u {
703#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL 746#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
704#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL 747#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
705 748
749#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
750#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
751#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
752#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
753#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
754#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
755#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
756#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
757
758#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
759#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
760#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
761#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
762#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
763#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
764#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
765#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
766
767#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
768#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
769#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
770#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
771#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
772#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
773#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
774#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
775
776#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
777#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
778#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
779#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
780#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45
781#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
782#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
783#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
784#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
785#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
786#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
787#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
788
706union uvh_gr0_tlb_mmr_read_data_hi_u { 789union uvh_gr0_tlb_mmr_read_data_hi_u {
707 unsigned long v; 790 unsigned long v;
708 struct uvh_gr0_tlb_mmr_read_data_hi_s { 791 struct uvh_gr0_tlb_mmr_read_data_hi_s {
@@ -712,6 +795,36 @@ union uvh_gr0_tlb_mmr_read_data_hi_u {
712 unsigned long larger:1; /* RO */ 795 unsigned long larger:1; /* RO */
713 unsigned long rsvd_45_63:19; 796 unsigned long rsvd_45_63:19;
714 } s; 797 } s;
798 struct uv1h_gr0_tlb_mmr_read_data_hi_s {
799 unsigned long pfn:41; /* RO */
800 unsigned long gaa:2; /* RO */
801 unsigned long dirty:1; /* RO */
802 unsigned long larger:1; /* RO */
803 unsigned long rsvd_45_63:19;
804 } s1;
805 struct uvxh_gr0_tlb_mmr_read_data_hi_s {
806 unsigned long pfn:41; /* RO */
807 unsigned long gaa:2; /* RO */
808 unsigned long dirty:1; /* RO */
809 unsigned long larger:1; /* RO */
810 unsigned long rsvd_45_63:19;
811 } sx;
812 struct uv2h_gr0_tlb_mmr_read_data_hi_s {
813 unsigned long pfn:41; /* RO */
814 unsigned long gaa:2; /* RO */
815 unsigned long dirty:1; /* RO */
816 unsigned long larger:1; /* RO */
817 unsigned long rsvd_45_63:19;
818 } s2;
819 struct uv3h_gr0_tlb_mmr_read_data_hi_s {
820 unsigned long pfn:41; /* RO */
821 unsigned long gaa:2; /* RO */
822 unsigned long dirty:1; /* RO */
823 unsigned long larger:1; /* RO */
824 unsigned long aa_ext:1; /* RO */
825 unsigned long undef_46_54:9; /* Undefined */
826 unsigned long way_ecc:9; /* RO */
827 } s3;
715}; 828};
716 829
717/* ========================================================================= */ 830/* ========================================================================= */
@@ -719,9 +832,11 @@ union uvh_gr0_tlb_mmr_read_data_hi_u {
719/* ========================================================================= */ 832/* ========================================================================= */
720#define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL 833#define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL
721#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL 834#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
722#define UVH_GR0_TLB_MMR_READ_DATA_LO (is_uv1_hub() ? \ 835#define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
723 UV1H_GR0_TLB_MMR_READ_DATA_LO : \ 836#define UVH_GR0_TLB_MMR_READ_DATA_LO \
724 UV2H_GR0_TLB_MMR_READ_DATA_LO) 837 (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO : \
838 (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \
839 UV3H_GR0_TLB_MMR_READ_DATA_LO))
725 840
726#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 841#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
727#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 842#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
@@ -730,6 +845,34 @@ union uvh_gr0_tlb_mmr_read_data_hi_u {
730#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL 845#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
731#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL 846#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
732 847
848#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
849#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
850#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
851#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
852#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
853#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
854
855#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
856#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
857#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
858#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
859#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
860#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
861
862#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
863#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
864#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
865#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
866#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
867#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
868
869#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
870#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
871#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
872#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
873#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
874#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
875
733union uvh_gr0_tlb_mmr_read_data_lo_u { 876union uvh_gr0_tlb_mmr_read_data_lo_u {
734 unsigned long v; 877 unsigned long v;
735 struct uvh_gr0_tlb_mmr_read_data_lo_s { 878 struct uvh_gr0_tlb_mmr_read_data_lo_s {
@@ -737,12 +880,32 @@ union uvh_gr0_tlb_mmr_read_data_lo_u {
737 unsigned long asid:24; /* RO */ 880 unsigned long asid:24; /* RO */
738 unsigned long valid:1; /* RO */ 881 unsigned long valid:1; /* RO */
739 } s; 882 } s;
883 struct uv1h_gr0_tlb_mmr_read_data_lo_s {
884 unsigned long vpn:39; /* RO */
885 unsigned long asid:24; /* RO */
886 unsigned long valid:1; /* RO */
887 } s1;
888 struct uvxh_gr0_tlb_mmr_read_data_lo_s {
889 unsigned long vpn:39; /* RO */
890 unsigned long asid:24; /* RO */
891 unsigned long valid:1; /* RO */
892 } sx;
893 struct uv2h_gr0_tlb_mmr_read_data_lo_s {
894 unsigned long vpn:39; /* RO */
895 unsigned long asid:24; /* RO */
896 unsigned long valid:1; /* RO */
897 } s2;
898 struct uv3h_gr0_tlb_mmr_read_data_lo_s {
899 unsigned long vpn:39; /* RO */
900 unsigned long asid:24; /* RO */
901 unsigned long valid:1; /* RO */
902 } s3;
740}; 903};
741 904
742/* ========================================================================= */ 905/* ========================================================================= */
743/* UVH_GR1_TLB_INT0_CONFIG */ 906/* UVH_GR1_TLB_INT0_CONFIG */
744/* ========================================================================= */ 907/* ========================================================================= */
745#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL 908#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL
746 909
747#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 910#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
748#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 911#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
@@ -780,7 +943,7 @@ union uvh_gr1_tlb_int0_config_u {
780/* ========================================================================= */ 943/* ========================================================================= */
781/* UVH_GR1_TLB_INT1_CONFIG */ 944/* UVH_GR1_TLB_INT1_CONFIG */
782/* ========================================================================= */ 945/* ========================================================================= */
783#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL 946#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL
784 947
785#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 948#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
786#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 949#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
@@ -820,9 +983,11 @@ union uvh_gr1_tlb_int1_config_u {
820/* ========================================================================= */ 983/* ========================================================================= */
821#define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL 984#define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL
822#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL 985#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL
823#define UVH_GR1_TLB_MMR_CONTROL (is_uv1_hub() ? \ 986#define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL
824 UV1H_GR1_TLB_MMR_CONTROL : \ 987#define UVH_GR1_TLB_MMR_CONTROL \
825 UV2H_GR1_TLB_MMR_CONTROL) 988 (is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL : \
989 (is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \
990 UV3H_GR1_TLB_MMR_CONTROL))
826 991
827#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 992#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
828#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 993#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
@@ -860,6 +1025,21 @@ union uvh_gr1_tlb_int1_config_u {
860#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL 1025#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL
861#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL 1026#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL
862 1027
1028#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
1029#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
1030#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
1031#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
1032#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
1033#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
1034#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
1035#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
1036#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
1037#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
1038#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
1039#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
1040#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
1041#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
1042
863#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 1043#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
864#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 1044#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
865#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 1045#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
@@ -879,6 +1059,23 @@ union uvh_gr1_tlb_int1_config_u {
879#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL 1059#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
880#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL 1060#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
881 1061
1062#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
1063#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
1064#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
1065#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
1066#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
1067#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
1068#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
1069#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
1070#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
1071#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
1072#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
1073#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
1074#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
1075#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
1076#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
1077#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
1078
882union uvh_gr1_tlb_mmr_control_u { 1079union uvh_gr1_tlb_mmr_control_u {
883 unsigned long v; 1080 unsigned long v;
884 struct uvh_gr1_tlb_mmr_control_s { 1081 struct uvh_gr1_tlb_mmr_control_s {
@@ -891,7 +1088,9 @@ union uvh_gr1_tlb_mmr_control_u {
891 unsigned long rsvd_21_29:9; 1088 unsigned long rsvd_21_29:9;
892 unsigned long mmr_write:1; /* WP */ 1089 unsigned long mmr_write:1; /* WP */
893 unsigned long mmr_read:1; /* WP */ 1090 unsigned long mmr_read:1; /* WP */
894 unsigned long rsvd_32_63:32; 1091 unsigned long rsvd_32_48:17;
1092 unsigned long rsvd_49_51:3;
1093 unsigned long rsvd_52_63:12;
895 } s; 1094 } s;
896 struct uv1h_gr1_tlb_mmr_control_s { 1095 struct uv1h_gr1_tlb_mmr_control_s {
897 unsigned long index:12; /* RW */ 1096 unsigned long index:12; /* RW */
@@ -915,6 +1114,23 @@ union uvh_gr1_tlb_mmr_control_u {
915 unsigned long mmr_inj_tlblruv:1; /* RW */ 1114 unsigned long mmr_inj_tlblruv:1; /* RW */
916 unsigned long rsvd_61_63:3; 1115 unsigned long rsvd_61_63:3;
917 } s1; 1116 } s1;
1117 struct uvxh_gr1_tlb_mmr_control_s {
1118 unsigned long index:12; /* RW */
1119 unsigned long mem_sel:2; /* RW */
1120 unsigned long rsvd_14_15:2;
1121 unsigned long auto_valid_en:1; /* RW */
1122 unsigned long rsvd_17_19:3;
1123 unsigned long mmr_hash_index_en:1; /* RW */
1124 unsigned long rsvd_21_29:9;
1125 unsigned long mmr_write:1; /* WP */
1126 unsigned long mmr_read:1; /* WP */
1127 unsigned long mmr_op_done:1; /* RW */
1128 unsigned long rsvd_33_47:15;
1129 unsigned long rsvd_48:1;
1130 unsigned long rsvd_49_51:3;
1131 unsigned long rsvd_52:1;
1132 unsigned long rsvd_53_63:11;
1133 } sx;
918 struct uv2h_gr1_tlb_mmr_control_s { 1134 struct uv2h_gr1_tlb_mmr_control_s {
919 unsigned long index:12; /* RW */ 1135 unsigned long index:12; /* RW */
920 unsigned long mem_sel:2; /* RW */ 1136 unsigned long mem_sel:2; /* RW */
@@ -932,6 +1148,24 @@ union uvh_gr1_tlb_mmr_control_u {
932 unsigned long mmr_inj_tlbram:1; /* RW */ 1148 unsigned long mmr_inj_tlbram:1; /* RW */
933 unsigned long rsvd_53_63:11; 1149 unsigned long rsvd_53_63:11;
934 } s2; 1150 } s2;
1151 struct uv3h_gr1_tlb_mmr_control_s {
1152 unsigned long index:12; /* RW */
1153 unsigned long mem_sel:2; /* RW */
1154 unsigned long rsvd_14_15:2;
1155 unsigned long auto_valid_en:1; /* RW */
1156 unsigned long rsvd_17_19:3;
1157 unsigned long mmr_hash_index_en:1; /* RW */
1158 unsigned long ecc_sel:1; /* RW */
1159 unsigned long rsvd_22_29:8;
1160 unsigned long mmr_write:1; /* WP */
1161 unsigned long mmr_read:1; /* WP */
1162 unsigned long mmr_op_done:1; /* RW */
1163 unsigned long rsvd_33_47:15;
1164 unsigned long undef_48:1; /* Undefined */
1165 unsigned long rsvd_49_51:3;
1166 unsigned long undef_52:1; /* Undefined */
1167 unsigned long rsvd_53_63:11;
1168 } s3;
935}; 1169};
936 1170
937/* ========================================================================= */ 1171/* ========================================================================= */
@@ -939,9 +1173,11 @@ union uvh_gr1_tlb_mmr_control_u {
939/* ========================================================================= */ 1173/* ========================================================================= */
940#define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL 1174#define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL
941#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL 1175#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
942#define UVH_GR1_TLB_MMR_READ_DATA_HI (is_uv1_hub() ? \ 1176#define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
943 UV1H_GR1_TLB_MMR_READ_DATA_HI : \ 1177#define UVH_GR1_TLB_MMR_READ_DATA_HI \
944 UV2H_GR1_TLB_MMR_READ_DATA_HI) 1178 (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI : \
1179 (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \
1180 UV3H_GR1_TLB_MMR_READ_DATA_HI))
945 1181
946#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 1182#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
947#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 1183#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
@@ -952,6 +1188,46 @@ union uvh_gr1_tlb_mmr_control_u {
952#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL 1188#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
953#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL 1189#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
954 1190
1191#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
1192#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
1193#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
1194#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
1195#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
1196#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
1197#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
1198#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
1199
1200#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
1201#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
1202#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
1203#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
1204#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
1205#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
1206#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
1207#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
1208
1209#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
1210#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
1211#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
1212#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
1213#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
1214#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
1215#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
1216#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
1217
1218#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
1219#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
1220#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
1221#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
1222#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45
1223#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
1224#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
1225#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
1226#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
1227#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
1228#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
1229#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
1230
955union uvh_gr1_tlb_mmr_read_data_hi_u { 1231union uvh_gr1_tlb_mmr_read_data_hi_u {
956 unsigned long v; 1232 unsigned long v;
957 struct uvh_gr1_tlb_mmr_read_data_hi_s { 1233 struct uvh_gr1_tlb_mmr_read_data_hi_s {
@@ -961,6 +1237,36 @@ union uvh_gr1_tlb_mmr_read_data_hi_u {
961 unsigned long larger:1; /* RO */ 1237 unsigned long larger:1; /* RO */
962 unsigned long rsvd_45_63:19; 1238 unsigned long rsvd_45_63:19;
963 } s; 1239 } s;
1240 struct uv1h_gr1_tlb_mmr_read_data_hi_s {
1241 unsigned long pfn:41; /* RO */
1242 unsigned long gaa:2; /* RO */
1243 unsigned long dirty:1; /* RO */
1244 unsigned long larger:1; /* RO */
1245 unsigned long rsvd_45_63:19;
1246 } s1;
1247 struct uvxh_gr1_tlb_mmr_read_data_hi_s {
1248 unsigned long pfn:41; /* RO */
1249 unsigned long gaa:2; /* RO */
1250 unsigned long dirty:1; /* RO */
1251 unsigned long larger:1; /* RO */
1252 unsigned long rsvd_45_63:19;
1253 } sx;
1254 struct uv2h_gr1_tlb_mmr_read_data_hi_s {
1255 unsigned long pfn:41; /* RO */
1256 unsigned long gaa:2; /* RO */
1257 unsigned long dirty:1; /* RO */
1258 unsigned long larger:1; /* RO */
1259 unsigned long rsvd_45_63:19;
1260 } s2;
1261 struct uv3h_gr1_tlb_mmr_read_data_hi_s {
1262 unsigned long pfn:41; /* RO */
1263 unsigned long gaa:2; /* RO */
1264 unsigned long dirty:1; /* RO */
1265 unsigned long larger:1; /* RO */
1266 unsigned long aa_ext:1; /* RO */
1267 unsigned long undef_46_54:9; /* Undefined */
1268 unsigned long way_ecc:9; /* RO */
1269 } s3;
964}; 1270};
965 1271
966/* ========================================================================= */ 1272/* ========================================================================= */
@@ -968,9 +1274,11 @@ union uvh_gr1_tlb_mmr_read_data_hi_u {
968/* ========================================================================= */ 1274/* ========================================================================= */
969#define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL 1275#define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL
970#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL 1276#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
971#define UVH_GR1_TLB_MMR_READ_DATA_LO (is_uv1_hub() ? \ 1277#define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
972 UV1H_GR1_TLB_MMR_READ_DATA_LO : \ 1278#define UVH_GR1_TLB_MMR_READ_DATA_LO \
973 UV2H_GR1_TLB_MMR_READ_DATA_LO) 1279 (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO : \
1280 (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \
1281 UV3H_GR1_TLB_MMR_READ_DATA_LO))
974 1282
975#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 1283#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
976#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 1284#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
@@ -979,6 +1287,34 @@ union uvh_gr1_tlb_mmr_read_data_hi_u {
979#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL 1287#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
980#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL 1288#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
981 1289
1290#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
1291#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
1292#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
1293#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
1294#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
1295#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
1296
1297#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
1298#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
1299#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
1300#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
1301#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
1302#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
1303
1304#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
1305#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
1306#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
1307#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
1308#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
1309#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
1310
1311#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
1312#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
1313#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
1314#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
1315#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
1316#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
1317
982union uvh_gr1_tlb_mmr_read_data_lo_u { 1318union uvh_gr1_tlb_mmr_read_data_lo_u {
983 unsigned long v; 1319 unsigned long v;
984 struct uvh_gr1_tlb_mmr_read_data_lo_s { 1320 struct uvh_gr1_tlb_mmr_read_data_lo_s {
@@ -986,12 +1322,32 @@ union uvh_gr1_tlb_mmr_read_data_lo_u {
986 unsigned long asid:24; /* RO */ 1322 unsigned long asid:24; /* RO */
987 unsigned long valid:1; /* RO */ 1323 unsigned long valid:1; /* RO */
988 } s; 1324 } s;
1325 struct uv1h_gr1_tlb_mmr_read_data_lo_s {
1326 unsigned long vpn:39; /* RO */
1327 unsigned long asid:24; /* RO */
1328 unsigned long valid:1; /* RO */
1329 } s1;
1330 struct uvxh_gr1_tlb_mmr_read_data_lo_s {
1331 unsigned long vpn:39; /* RO */
1332 unsigned long asid:24; /* RO */
1333 unsigned long valid:1; /* RO */
1334 } sx;
1335 struct uv2h_gr1_tlb_mmr_read_data_lo_s {
1336 unsigned long vpn:39; /* RO */
1337 unsigned long asid:24; /* RO */
1338 unsigned long valid:1; /* RO */
1339 } s2;
1340 struct uv3h_gr1_tlb_mmr_read_data_lo_s {
1341 unsigned long vpn:39; /* RO */
1342 unsigned long asid:24; /* RO */
1343 unsigned long valid:1; /* RO */
1344 } s3;
989}; 1345};
990 1346
991/* ========================================================================= */ 1347/* ========================================================================= */
992/* UVH_INT_CMPB */ 1348/* UVH_INT_CMPB */
993/* ========================================================================= */ 1349/* ========================================================================= */
994#define UVH_INT_CMPB 0x22080UL 1350#define UVH_INT_CMPB 0x22080UL
995 1351
996#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 1352#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0
997#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL 1353#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL
@@ -1007,10 +1363,13 @@ union uvh_int_cmpb_u {
1007/* ========================================================================= */ 1363/* ========================================================================= */
1008/* UVH_INT_CMPC */ 1364/* UVH_INT_CMPC */
1009/* ========================================================================= */ 1365/* ========================================================================= */
1010#define UVH_INT_CMPC 0x22100UL 1366#define UVH_INT_CMPC 0x22100UL
1367
1368#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0
1369#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL
1011 1370
1012#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 1371#define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT 0
1013#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL 1372#define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL
1014 1373
1015union uvh_int_cmpc_u { 1374union uvh_int_cmpc_u {
1016 unsigned long v; 1375 unsigned long v;
@@ -1023,10 +1382,13 @@ union uvh_int_cmpc_u {
1023/* ========================================================================= */ 1382/* ========================================================================= */
1024/* UVH_INT_CMPD */ 1383/* UVH_INT_CMPD */
1025/* ========================================================================= */ 1384/* ========================================================================= */
1026#define UVH_INT_CMPD 0x22180UL 1385#define UVH_INT_CMPD 0x22180UL
1027 1386
1028#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 1387#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0
1029#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL 1388#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL
1389
1390#define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT 0
1391#define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL
1030 1392
1031union uvh_int_cmpd_u { 1393union uvh_int_cmpd_u {
1032 unsigned long v; 1394 unsigned long v;
@@ -1039,8 +1401,8 @@ union uvh_int_cmpd_u {
1039/* ========================================================================= */ 1401/* ========================================================================= */
1040/* UVH_IPI_INT */ 1402/* UVH_IPI_INT */
1041/* ========================================================================= */ 1403/* ========================================================================= */
1042#define UVH_IPI_INT 0x60500UL 1404#define UVH_IPI_INT 0x60500UL
1043#define UVH_IPI_INT_32 0x348 1405#define UVH_IPI_INT_32 0x348
1044 1406
1045#define UVH_IPI_INT_VECTOR_SHFT 0 1407#define UVH_IPI_INT_VECTOR_SHFT 0
1046#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 1408#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8
@@ -1069,8 +1431,8 @@ union uvh_ipi_int_u {
1069/* ========================================================================= */ 1431/* ========================================================================= */
1070/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ 1432/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */
1071/* ========================================================================= */ 1433/* ========================================================================= */
1072#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL 1434#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
1073#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 1435#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
1074 1436
1075#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 1437#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
1076#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 1438#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
@@ -1091,8 +1453,8 @@ union uvh_lb_bau_intd_payload_queue_first_u {
1091/* ========================================================================= */ 1453/* ========================================================================= */
1092/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ 1454/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */
1093/* ========================================================================= */ 1455/* ========================================================================= */
1094#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL 1456#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
1095#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 1457#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
1096 1458
1097#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 1459#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
1098#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL 1460#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
@@ -1109,8 +1471,8 @@ union uvh_lb_bau_intd_payload_queue_last_u {
1109/* ========================================================================= */ 1471/* ========================================================================= */
1110/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ 1472/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */
1111/* ========================================================================= */ 1473/* ========================================================================= */
1112#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL 1474#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
1113#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 1475#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
1114 1476
1115#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 1477#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
1116#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL 1478#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
@@ -1127,8 +1489,8 @@ union uvh_lb_bau_intd_payload_queue_tail_u {
1127/* ========================================================================= */ 1489/* ========================================================================= */
1128/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ 1490/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */
1129/* ========================================================================= */ 1491/* ========================================================================= */
1130#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL 1492#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
1131#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 1493#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
1132 1494
1133#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 1495#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
1134#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 1496#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
@@ -1189,14 +1551,21 @@ union uvh_lb_bau_intd_software_acknowledge_u {
1189/* ========================================================================= */ 1551/* ========================================================================= */
1190/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ 1552/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */
1191/* ========================================================================= */ 1553/* ========================================================================= */
1192#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL 1554#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
1193#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 1555#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
1556
1194 1557
1195/* ========================================================================= */ 1558/* ========================================================================= */
1196/* UVH_LB_BAU_MISC_CONTROL */ 1559/* UVH_LB_BAU_MISC_CONTROL */
1197/* ========================================================================= */ 1560/* ========================================================================= */
1198#define UVH_LB_BAU_MISC_CONTROL 0x320170UL 1561#define UVH_LB_BAU_MISC_CONTROL 0x320170UL
1199#define UVH_LB_BAU_MISC_CONTROL_32 0xa10 1562#define UV1H_LB_BAU_MISC_CONTROL 0x320170UL
1563#define UV2H_LB_BAU_MISC_CONTROL 0x320170UL
1564#define UV3H_LB_BAU_MISC_CONTROL 0x320170UL
1565#define UVH_LB_BAU_MISC_CONTROL_32 0xa10
1566#define UV1H_LB_BAU_MISC_CONTROL_32 0x320170UL
1567#define UV2H_LB_BAU_MISC_CONTROL_32 0x320170UL
1568#define UV3H_LB_BAU_MISC_CONTROL_32 0x320170UL
1200 1569
1201#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 1570#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
1202#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 1571#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
@@ -1213,6 +1582,7 @@ union uvh_lb_bau_intd_software_acknowledge_u {
1213#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 1582#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
1214#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 1583#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
1215#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 1584#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
1585#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
1216#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL 1586#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
1217#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL 1587#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
1218#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL 1588#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
@@ -1228,6 +1598,7 @@ union uvh_lb_bau_intd_software_acknowledge_u {
1228#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL 1598#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
1229#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL 1599#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
1230#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL 1600#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
1601#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
1231 1602
1232#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 1603#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
1233#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 1604#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
@@ -1262,6 +1633,53 @@ union uvh_lb_bau_intd_software_acknowledge_u {
1262#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL 1633#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
1263#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL 1634#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
1264 1635
1636#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
1637#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
1638#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
1639#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
1640#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
1641#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
1642#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
1643#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
1644#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
1645#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
1646#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
1647#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
1648#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
1649#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
1650#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
1651#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
1652#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
1653#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
1654#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
1655#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
1656#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
1657#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
1658#define UVXH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
1659#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
1660#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
1661#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
1662#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
1663#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
1664#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
1665#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
1666#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
1667#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
1668#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
1669#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
1670#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
1671#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
1672#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
1673#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
1674#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
1675#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
1676#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
1677#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
1678#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
1679#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
1680#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
1681#define UVXH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
1682
1265#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 1683#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
1266#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 1684#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
1267#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 1685#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
@@ -1309,6 +1727,59 @@ union uvh_lb_bau_intd_software_acknowledge_u {
1309#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL 1727#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
1310#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL 1728#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
1311 1729
1730#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
1731#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
1732#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
1733#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
1734#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
1735#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
1736#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
1737#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
1738#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
1739#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
1740#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
1741#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
1742#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
1743#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
1744#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
1745#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
1746#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
1747#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
1748#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
1749#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
1750#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
1751#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
1752#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
1753#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT 37
1754#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
1755#define UV3H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
1756#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
1757#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
1758#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
1759#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
1760#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
1761#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
1762#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
1763#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
1764#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
1765#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
1766#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
1767#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
1768#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
1769#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
1770#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
1771#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
1772#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
1773#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
1774#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
1775#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
1776#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
1777#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
1778#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
1779#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_MASK 0x0000002000000000UL
1780#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
1781#define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
1782
1312union uvh_lb_bau_misc_control_u { 1783union uvh_lb_bau_misc_control_u {
1313 unsigned long v; 1784 unsigned long v;
1314 struct uvh_lb_bau_misc_control_s { 1785 struct uvh_lb_bau_misc_control_s {
@@ -1327,7 +1798,8 @@ union uvh_lb_bau_misc_control_u {
1327 unsigned long programmed_initial_priority:3; /* RW */ 1798 unsigned long programmed_initial_priority:3; /* RW */
1328 unsigned long use_incoming_priority:1; /* RW */ 1799 unsigned long use_incoming_priority:1; /* RW */
1329 unsigned long enable_programmed_initial_priority:1;/* RW */ 1800 unsigned long enable_programmed_initial_priority:1;/* RW */
1330 unsigned long rsvd_29_63:35; 1801 unsigned long rsvd_29_47:19;
1802 unsigned long fun:16; /* RW */
1331 } s; 1803 } s;
1332 struct uv1h_lb_bau_misc_control_s { 1804 struct uv1h_lb_bau_misc_control_s {
1333 unsigned long rejection_delay:8; /* RW */ 1805 unsigned long rejection_delay:8; /* RW */
@@ -1348,6 +1820,32 @@ union uvh_lb_bau_misc_control_u {
1348 unsigned long rsvd_29_47:19; 1820 unsigned long rsvd_29_47:19;
1349 unsigned long fun:16; /* RW */ 1821 unsigned long fun:16; /* RW */
1350 } s1; 1822 } s1;
1823 struct uvxh_lb_bau_misc_control_s {
1824 unsigned long rejection_delay:8; /* RW */
1825 unsigned long apic_mode:1; /* RW */
1826 unsigned long force_broadcast:1; /* RW */
1827 unsigned long force_lock_nop:1; /* RW */
1828 unsigned long qpi_agent_presence_vector:3; /* RW */
1829 unsigned long descriptor_fetch_mode:1; /* RW */
1830 unsigned long enable_intd_soft_ack_mode:1; /* RW */
1831 unsigned long intd_soft_ack_timeout_period:4; /* RW */
1832 unsigned long enable_dual_mapping_mode:1; /* RW */
1833 unsigned long vga_io_port_decode_enable:1; /* RW */
1834 unsigned long vga_io_port_16_bit_decode:1; /* RW */
1835 unsigned long suppress_dest_registration:1; /* RW */
1836 unsigned long programmed_initial_priority:3; /* RW */
1837 unsigned long use_incoming_priority:1; /* RW */
1838 unsigned long enable_programmed_initial_priority:1;/* RW */
1839 unsigned long enable_automatic_apic_mode_selection:1;/* RW */
1840 unsigned long apic_mode_status:1; /* RO */
1841 unsigned long suppress_interrupts_to_self:1; /* RW */
1842 unsigned long enable_lock_based_system_flush:1;/* RW */
1843 unsigned long enable_extended_sb_status:1; /* RW */
1844 unsigned long suppress_int_prio_udt_to_self:1;/* RW */
1845 unsigned long use_legacy_descriptor_formats:1;/* RW */
1846 unsigned long rsvd_36_47:12;
1847 unsigned long fun:16; /* RW */
1848 } sx;
1351 struct uv2h_lb_bau_misc_control_s { 1849 struct uv2h_lb_bau_misc_control_s {
1352 unsigned long rejection_delay:8; /* RW */ 1850 unsigned long rejection_delay:8; /* RW */
1353 unsigned long apic_mode:1; /* RW */ 1851 unsigned long apic_mode:1; /* RW */
@@ -1374,13 +1872,42 @@ union uvh_lb_bau_misc_control_u {
1374 unsigned long rsvd_36_47:12; 1872 unsigned long rsvd_36_47:12;
1375 unsigned long fun:16; /* RW */ 1873 unsigned long fun:16; /* RW */
1376 } s2; 1874 } s2;
1875 struct uv3h_lb_bau_misc_control_s {
1876 unsigned long rejection_delay:8; /* RW */
1877 unsigned long apic_mode:1; /* RW */
1878 unsigned long force_broadcast:1; /* RW */
1879 unsigned long force_lock_nop:1; /* RW */
1880 unsigned long qpi_agent_presence_vector:3; /* RW */
1881 unsigned long descriptor_fetch_mode:1; /* RW */
1882 unsigned long enable_intd_soft_ack_mode:1; /* RW */
1883 unsigned long intd_soft_ack_timeout_period:4; /* RW */
1884 unsigned long enable_dual_mapping_mode:1; /* RW */
1885 unsigned long vga_io_port_decode_enable:1; /* RW */
1886 unsigned long vga_io_port_16_bit_decode:1; /* RW */
1887 unsigned long suppress_dest_registration:1; /* RW */
1888 unsigned long programmed_initial_priority:3; /* RW */
1889 unsigned long use_incoming_priority:1; /* RW */
1890 unsigned long enable_programmed_initial_priority:1;/* RW */
1891 unsigned long enable_automatic_apic_mode_selection:1;/* RW */
1892 unsigned long apic_mode_status:1; /* RO */
1893 unsigned long suppress_interrupts_to_self:1; /* RW */
1894 unsigned long enable_lock_based_system_flush:1;/* RW */
1895 unsigned long enable_extended_sb_status:1; /* RW */
1896 unsigned long suppress_int_prio_udt_to_self:1;/* RW */
1897 unsigned long use_legacy_descriptor_formats:1;/* RW */
1898 unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */
1899 unsigned long enable_intd_prefetch_hint:1; /* RW */
1900 unsigned long thread_kill_timebase:8; /* RW */
1901 unsigned long rsvd_46_47:2;
1902 unsigned long fun:16; /* RW */
1903 } s3;
1377}; 1904};
1378 1905
1379/* ========================================================================= */ 1906/* ========================================================================= */
1380/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ 1907/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */
1381/* ========================================================================= */ 1908/* ========================================================================= */
1382#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL 1909#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
1383#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 1910#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
1384 1911
1385#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 1912#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0
1386#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 1913#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62
@@ -1402,8 +1929,8 @@ union uvh_lb_bau_sb_activation_control_u {
1402/* ========================================================================= */ 1929/* ========================================================================= */
1403/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ 1930/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */
1404/* ========================================================================= */ 1931/* ========================================================================= */
1405#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL 1932#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
1406#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 1933#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
1407 1934
1408#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 1935#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0
1409#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL 1936#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL
@@ -1418,8 +1945,8 @@ union uvh_lb_bau_sb_activation_status_0_u {
1418/* ========================================================================= */ 1945/* ========================================================================= */
1419/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ 1946/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */
1420/* ========================================================================= */ 1947/* ========================================================================= */
1421#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL 1948#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
1422#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 1949#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
1423 1950
1424#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 1951#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0
1425#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL 1952#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL
@@ -1434,8 +1961,8 @@ union uvh_lb_bau_sb_activation_status_1_u {
1434/* ========================================================================= */ 1961/* ========================================================================= */
1435/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ 1962/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */
1436/* ========================================================================= */ 1963/* ========================================================================= */
1437#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL 1964#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
1438#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 1965#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
1439 1966
1440#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 1967#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12
1441#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 1968#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
@@ -1456,7 +1983,10 @@ union uvh_lb_bau_sb_descriptor_base_u {
1456/* ========================================================================= */ 1983/* ========================================================================= */
1457/* UVH_NODE_ID */ 1984/* UVH_NODE_ID */
1458/* ========================================================================= */ 1985/* ========================================================================= */
1459#define UVH_NODE_ID 0x0UL 1986#define UVH_NODE_ID 0x0UL
1987#define UV1H_NODE_ID 0x0UL
1988#define UV2H_NODE_ID 0x0UL
1989#define UV3H_NODE_ID 0x0UL
1460 1990
1461#define UVH_NODE_ID_FORCE1_SHFT 0 1991#define UVH_NODE_ID_FORCE1_SHFT 0
1462#define UVH_NODE_ID_MANUFACTURER_SHFT 1 1992#define UVH_NODE_ID_MANUFACTURER_SHFT 1
@@ -1484,6 +2014,21 @@ union uvh_lb_bau_sb_descriptor_base_u {
1484#define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL 2014#define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL
1485#define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL 2015#define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL
1486 2016
2017#define UVXH_NODE_ID_FORCE1_SHFT 0
2018#define UVXH_NODE_ID_MANUFACTURER_SHFT 1
2019#define UVXH_NODE_ID_PART_NUMBER_SHFT 12
2020#define UVXH_NODE_ID_REVISION_SHFT 28
2021#define UVXH_NODE_ID_NODE_ID_SHFT 32
2022#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50
2023#define UVXH_NODE_ID_NI_PORT_SHFT 57
2024#define UVXH_NODE_ID_FORCE1_MASK 0x0000000000000001UL
2025#define UVXH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
2026#define UVXH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
2027#define UVXH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
2028#define UVXH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
2029#define UVXH_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
2030#define UVXH_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
2031
1487#define UV2H_NODE_ID_FORCE1_SHFT 0 2032#define UV2H_NODE_ID_FORCE1_SHFT 0
1488#define UV2H_NODE_ID_MANUFACTURER_SHFT 1 2033#define UV2H_NODE_ID_MANUFACTURER_SHFT 1
1489#define UV2H_NODE_ID_PART_NUMBER_SHFT 12 2034#define UV2H_NODE_ID_PART_NUMBER_SHFT 12
@@ -1499,6 +2044,25 @@ union uvh_lb_bau_sb_descriptor_base_u {
1499#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL 2044#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
1500#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL 2045#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
1501 2046
2047#define UV3H_NODE_ID_FORCE1_SHFT 0
2048#define UV3H_NODE_ID_MANUFACTURER_SHFT 1
2049#define UV3H_NODE_ID_PART_NUMBER_SHFT 12
2050#define UV3H_NODE_ID_REVISION_SHFT 28
2051#define UV3H_NODE_ID_NODE_ID_SHFT 32
2052#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48
2053#define UV3H_NODE_ID_RESERVED_2_SHFT 49
2054#define UV3H_NODE_ID_NODES_PER_BIT_SHFT 50
2055#define UV3H_NODE_ID_NI_PORT_SHFT 57
2056#define UV3H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
2057#define UV3H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
2058#define UV3H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
2059#define UV3H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
2060#define UV3H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
2061#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
2062#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
2063#define UV3H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
2064#define UV3H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
2065
1502union uvh_node_id_u { 2066union uvh_node_id_u {
1503 unsigned long v; 2067 unsigned long v;
1504 struct uvh_node_id_s { 2068 struct uvh_node_id_s {
@@ -1521,6 +2085,17 @@ union uvh_node_id_u {
1521 unsigned long ni_port:4; /* RO */ 2085 unsigned long ni_port:4; /* RO */
1522 unsigned long rsvd_60_63:4; 2086 unsigned long rsvd_60_63:4;
1523 } s1; 2087 } s1;
2088 struct uvxh_node_id_s {
2089 unsigned long force1:1; /* RO */
2090 unsigned long manufacturer:11; /* RO */
2091 unsigned long part_number:16; /* RO */
2092 unsigned long revision:4; /* RO */
2093 unsigned long node_id:15; /* RW */
2094 unsigned long rsvd_47_49:3;
2095 unsigned long nodes_per_bit:7; /* RO */
2096 unsigned long ni_port:5; /* RO */
2097 unsigned long rsvd_62_63:2;
2098 } sx;
1524 struct uv2h_node_id_s { 2099 struct uv2h_node_id_s {
1525 unsigned long force1:1; /* RO */ 2100 unsigned long force1:1; /* RO */
1526 unsigned long manufacturer:11; /* RO */ 2101 unsigned long manufacturer:11; /* RO */
@@ -1532,13 +2107,26 @@ union uvh_node_id_u {
1532 unsigned long ni_port:5; /* RO */ 2107 unsigned long ni_port:5; /* RO */
1533 unsigned long rsvd_62_63:2; 2108 unsigned long rsvd_62_63:2;
1534 } s2; 2109 } s2;
2110 struct uv3h_node_id_s {
2111 unsigned long force1:1; /* RO */
2112 unsigned long manufacturer:11; /* RO */
2113 unsigned long part_number:16; /* RO */
2114 unsigned long revision:4; /* RO */
2115 unsigned long node_id:15; /* RW */
2116 unsigned long rsvd_47:1;
2117 unsigned long router_select:1; /* RO */
2118 unsigned long rsvd_49:1;
2119 unsigned long nodes_per_bit:7; /* RO */
2120 unsigned long ni_port:5; /* RO */
2121 unsigned long rsvd_62_63:2;
2122 } s3;
1535}; 2123};
1536 2124
1537/* ========================================================================= */ 2125/* ========================================================================= */
1538/* UVH_NODE_PRESENT_TABLE */ 2126/* UVH_NODE_PRESENT_TABLE */
1539/* ========================================================================= */ 2127/* ========================================================================= */
1540#define UVH_NODE_PRESENT_TABLE 0x1400UL 2128#define UVH_NODE_PRESENT_TABLE 0x1400UL
1541#define UVH_NODE_PRESENT_TABLE_DEPTH 16 2129#define UVH_NODE_PRESENT_TABLE_DEPTH 16
1542 2130
1543#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 2131#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0
1544#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL 2132#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL
@@ -1553,7 +2141,7 @@ union uvh_node_present_table_u {
1553/* ========================================================================= */ 2141/* ========================================================================= */
1554/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */ 2142/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */
1555/* ========================================================================= */ 2143/* ========================================================================= */
1556#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL 2144#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
1557 2145
1558#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 2146#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
1559#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 2147#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
@@ -1577,7 +2165,7 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
1577/* ========================================================================= */ 2165/* ========================================================================= */
1578/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */ 2166/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */
1579/* ========================================================================= */ 2167/* ========================================================================= */
1580#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL 2168#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
1581 2169
1582#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 2170#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
1583#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 2171#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
@@ -1601,7 +2189,7 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
1601/* ========================================================================= */ 2189/* ========================================================================= */
1602/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */ 2190/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */
1603/* ========================================================================= */ 2191/* ========================================================================= */
1604#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL 2192#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
1605 2193
1606#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 2194#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
1607#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 2195#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
@@ -1625,7 +2213,7 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
1625/* ========================================================================= */ 2213/* ========================================================================= */
1626/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ 2214/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */
1627/* ========================================================================= */ 2215/* ========================================================================= */
1628#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL 2216#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
1629 2217
1630#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 2218#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
1631#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL 2219#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
@@ -1642,7 +2230,7 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
1642/* ========================================================================= */ 2230/* ========================================================================= */
1643/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ 2231/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */
1644/* ========================================================================= */ 2232/* ========================================================================= */
1645#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL 2233#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
1646 2234
1647#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 2235#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
1648#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL 2236#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
@@ -1659,7 +2247,7 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
1659/* ========================================================================= */ 2247/* ========================================================================= */
1660/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ 2248/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */
1661/* ========================================================================= */ 2249/* ========================================================================= */
1662#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL 2250#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
1663 2251
1664#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 2252#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
1665#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL 2253#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
@@ -1676,7 +2264,10 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
1676/* ========================================================================= */ 2264/* ========================================================================= */
1677/* UVH_RH_GAM_CONFIG_MMR */ 2265/* UVH_RH_GAM_CONFIG_MMR */
1678/* ========================================================================= */ 2266/* ========================================================================= */
1679#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL 2267#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL
2268#define UV1H_RH_GAM_CONFIG_MMR 0x1600000UL
2269#define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL
2270#define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL
1680 2271
1681#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 2272#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
1682#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 2273#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
@@ -1690,11 +2281,21 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
1690#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL 2281#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
1691#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL 2282#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL
1692 2283
2284#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
2285#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
2286#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
2287#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
2288
1693#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 2289#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
1694#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 2290#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
1695#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL 2291#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
1696#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL 2292#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
1697 2293
2294#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
2295#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
2296#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
2297#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
2298
1698union uvh_rh_gam_config_mmr_u { 2299union uvh_rh_gam_config_mmr_u {
1699 unsigned long v; 2300 unsigned long v;
1700 struct uvh_rh_gam_config_mmr_s { 2301 struct uvh_rh_gam_config_mmr_s {
@@ -1709,20 +2310,37 @@ union uvh_rh_gam_config_mmr_u {
1709 unsigned long mmiol_cfg:1; /* RW */ 2310 unsigned long mmiol_cfg:1; /* RW */
1710 unsigned long rsvd_13_63:51; 2311 unsigned long rsvd_13_63:51;
1711 } s1; 2312 } s1;
2313 struct uvxh_rh_gam_config_mmr_s {
2314 unsigned long m_skt:6; /* RW */
2315 unsigned long n_skt:4; /* RW */
2316 unsigned long rsvd_10_63:54;
2317 } sx;
1712 struct uv2h_rh_gam_config_mmr_s { 2318 struct uv2h_rh_gam_config_mmr_s {
1713 unsigned long m_skt:6; /* RW */ 2319 unsigned long m_skt:6; /* RW */
1714 unsigned long n_skt:4; /* RW */ 2320 unsigned long n_skt:4; /* RW */
1715 unsigned long rsvd_10_63:54; 2321 unsigned long rsvd_10_63:54;
1716 } s2; 2322 } s2;
2323 struct uv3h_rh_gam_config_mmr_s {
2324 unsigned long m_skt:6; /* RW */
2325 unsigned long n_skt:4; /* RW */
2326 unsigned long rsvd_10_63:54;
2327 } s3;
1717}; 2328};
1718 2329
1719/* ========================================================================= */ 2330/* ========================================================================= */
1720/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ 2331/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */
1721/* ========================================================================= */ 2332/* ========================================================================= */
1722#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL 2333#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
2334#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
2335#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
2336#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
1723 2337
1724#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 2338#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
2339#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
2340#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
1725#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL 2341#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
2342#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
2343#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1726 2344
1727#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 2345#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
1728#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 2346#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48
@@ -1733,6 +2351,13 @@ union uvh_rh_gam_config_mmr_u {
1733#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL 2351#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
1734#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL 2352#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1735 2353
2354#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
2355#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
2356#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
2357#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
2358#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
2359#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
2360
1736#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 2361#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
1737#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 2362#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
1738#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 2363#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
@@ -1740,12 +2365,23 @@ union uvh_rh_gam_config_mmr_u {
1740#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL 2365#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
1741#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL 2366#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1742 2367
2368#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
2369#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
2370#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_SHFT 62
2371#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
2372#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
2373#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
2374#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK 0x4000000000000000UL
2375#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
2376
1743union uvh_rh_gam_gru_overlay_config_mmr_u { 2377union uvh_rh_gam_gru_overlay_config_mmr_u {
1744 unsigned long v; 2378 unsigned long v;
1745 struct uvh_rh_gam_gru_overlay_config_mmr_s { 2379 struct uvh_rh_gam_gru_overlay_config_mmr_s {
1746 unsigned long rsvd_0_27:28; 2380 unsigned long rsvd_0_27:28;
1747 unsigned long base:18; /* RW */ 2381 unsigned long base:18; /* RW */
1748 unsigned long rsvd_46_62:17; 2382 unsigned long rsvd_46_51:6;
2383 unsigned long n_gru:4; /* RW */
2384 unsigned long rsvd_56_62:7;
1749 unsigned long enable:1; /* RW */ 2385 unsigned long enable:1; /* RW */
1750 } s; 2386 } s;
1751 struct uv1h_rh_gam_gru_overlay_config_mmr_s { 2387 struct uv1h_rh_gam_gru_overlay_config_mmr_s {
@@ -1758,6 +2394,14 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
1758 unsigned long rsvd_56_62:7; 2394 unsigned long rsvd_56_62:7;
1759 unsigned long enable:1; /* RW */ 2395 unsigned long enable:1; /* RW */
1760 } s1; 2396 } s1;
2397 struct uvxh_rh_gam_gru_overlay_config_mmr_s {
2398 unsigned long rsvd_0_27:28;
2399 unsigned long base:18; /* RW */
2400 unsigned long rsvd_46_51:6;
2401 unsigned long n_gru:4; /* RW */
2402 unsigned long rsvd_56_62:7;
2403 unsigned long enable:1; /* RW */
2404 } sx;
1761 struct uv2h_rh_gam_gru_overlay_config_mmr_s { 2405 struct uv2h_rh_gam_gru_overlay_config_mmr_s {
1762 unsigned long rsvd_0_27:28; 2406 unsigned long rsvd_0_27:28;
1763 unsigned long base:18; /* RW */ 2407 unsigned long base:18; /* RW */
@@ -1766,12 +2410,22 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
1766 unsigned long rsvd_56_62:7; 2410 unsigned long rsvd_56_62:7;
1767 unsigned long enable:1; /* RW */ 2411 unsigned long enable:1; /* RW */
1768 } s2; 2412 } s2;
2413 struct uv3h_rh_gam_gru_overlay_config_mmr_s {
2414 unsigned long rsvd_0_27:28;
2415 unsigned long base:18; /* RW */
2416 unsigned long rsvd_46_51:6;
2417 unsigned long n_gru:4; /* RW */
2418 unsigned long rsvd_56_61:6;
2419 unsigned long mode:1; /* RW */
2420 unsigned long enable:1; /* RW */
2421 } s3;
1769}; 2422};
1770 2423
1771/* ========================================================================= */ 2424/* ========================================================================= */
1772/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */ 2425/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */
1773/* ========================================================================= */ 2426/* ========================================================================= */
1774#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL 2427#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
2428#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
1775 2429
1776#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 2430#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
1777#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 2431#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
@@ -1814,10 +2468,15 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u {
1814/* ========================================================================= */ 2468/* ========================================================================= */
1815/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ 2469/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */
1816/* ========================================================================= */ 2470/* ========================================================================= */
1817#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL 2471#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
2472#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
2473#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
2474#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
1818 2475
1819#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 2476#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
2477#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
1820#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL 2478#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
2479#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1821 2480
1822#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 2481#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
1823#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 2482#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46
@@ -1826,11 +2485,21 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u {
1826#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL 2485#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL
1827#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL 2486#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1828 2487
2488#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
2489#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
2490#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
2491#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
2492
1829#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 2493#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
1830#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 2494#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
1831#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL 2495#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
1832#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL 2496#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1833 2497
2498#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
2499#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
2500#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
2501#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
2502
1834union uvh_rh_gam_mmr_overlay_config_mmr_u { 2503union uvh_rh_gam_mmr_overlay_config_mmr_u {
1835 unsigned long v; 2504 unsigned long v;
1836 struct uvh_rh_gam_mmr_overlay_config_mmr_s { 2505 struct uvh_rh_gam_mmr_overlay_config_mmr_s {
@@ -1846,18 +2515,30 @@ union uvh_rh_gam_mmr_overlay_config_mmr_u {
1846 unsigned long rsvd_47_62:16; 2515 unsigned long rsvd_47_62:16;
1847 unsigned long enable:1; /* RW */ 2516 unsigned long enable:1; /* RW */
1848 } s1; 2517 } s1;
2518 struct uvxh_rh_gam_mmr_overlay_config_mmr_s {
2519 unsigned long rsvd_0_25:26;
2520 unsigned long base:20; /* RW */
2521 unsigned long rsvd_46_62:17;
2522 unsigned long enable:1; /* RW */
2523 } sx;
1849 struct uv2h_rh_gam_mmr_overlay_config_mmr_s { 2524 struct uv2h_rh_gam_mmr_overlay_config_mmr_s {
1850 unsigned long rsvd_0_25:26; 2525 unsigned long rsvd_0_25:26;
1851 unsigned long base:20; /* RW */ 2526 unsigned long base:20; /* RW */
1852 unsigned long rsvd_46_62:17; 2527 unsigned long rsvd_46_62:17;
1853 unsigned long enable:1; /* RW */ 2528 unsigned long enable:1; /* RW */
1854 } s2; 2529 } s2;
2530 struct uv3h_rh_gam_mmr_overlay_config_mmr_s {
2531 unsigned long rsvd_0_25:26;
2532 unsigned long base:20; /* RW */
2533 unsigned long rsvd_46_62:17;
2534 unsigned long enable:1; /* RW */
2535 } s3;
1855}; 2536};
1856 2537
1857/* ========================================================================= */ 2538/* ========================================================================= */
1858/* UVH_RTC */ 2539/* UVH_RTC */
1859/* ========================================================================= */ 2540/* ========================================================================= */
1860#define UVH_RTC 0x340000UL 2541#define UVH_RTC 0x340000UL
1861 2542
1862#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 2543#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0
1863#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL 2544#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL
@@ -1873,7 +2554,7 @@ union uvh_rtc_u {
1873/* ========================================================================= */ 2554/* ========================================================================= */
1874/* UVH_RTC1_INT_CONFIG */ 2555/* UVH_RTC1_INT_CONFIG */
1875/* ========================================================================= */ 2556/* ========================================================================= */
1876#define UVH_RTC1_INT_CONFIG 0x615c0UL 2557#define UVH_RTC1_INT_CONFIG 0x615c0UL
1877 2558
1878#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 2559#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0
1879#define UVH_RTC1_INT_CONFIG_DM_SHFT 8 2560#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
@@ -1911,8 +2592,8 @@ union uvh_rtc1_int_config_u {
1911/* ========================================================================= */ 2592/* ========================================================================= */
1912/* UVH_SCRATCH5 */ 2593/* UVH_SCRATCH5 */
1913/* ========================================================================= */ 2594/* ========================================================================= */
1914#define UVH_SCRATCH5 0x2d0200UL 2595#define UVH_SCRATCH5 0x2d0200UL
1915#define UVH_SCRATCH5_32 0x778 2596#define UVH_SCRATCH5_32 0x778
1916 2597
1917#define UVH_SCRATCH5_SCRATCH5_SHFT 0 2598#define UVH_SCRATCH5_SCRATCH5_SHFT 0
1918#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL 2599#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
@@ -1925,79 +2606,79 @@ union uvh_scratch5_u {
1925}; 2606};
1926 2607
1927/* ========================================================================= */ 2608/* ========================================================================= */
1928/* UV2H_EVENT_OCCURRED2 */ 2609/* UVXH_EVENT_OCCURRED2 */
1929/* ========================================================================= */ 2610/* ========================================================================= */
1930#define UV2H_EVENT_OCCURRED2 0x70100UL 2611#define UVXH_EVENT_OCCURRED2 0x70100UL
1931#define UV2H_EVENT_OCCURRED2_32 0xb68 2612#define UVXH_EVENT_OCCURRED2_32 0xb68
1932 2613
1933#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 2614#define UVXH_EVENT_OCCURRED2_RTC_0_SHFT 0
1934#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 2615#define UVXH_EVENT_OCCURRED2_RTC_1_SHFT 1
1935#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 2616#define UVXH_EVENT_OCCURRED2_RTC_2_SHFT 2
1936#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 2617#define UVXH_EVENT_OCCURRED2_RTC_3_SHFT 3
1937#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 2618#define UVXH_EVENT_OCCURRED2_RTC_4_SHFT 4
1938#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 2619#define UVXH_EVENT_OCCURRED2_RTC_5_SHFT 5
1939#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 2620#define UVXH_EVENT_OCCURRED2_RTC_6_SHFT 6
1940#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 2621#define UVXH_EVENT_OCCURRED2_RTC_7_SHFT 7
1941#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 2622#define UVXH_EVENT_OCCURRED2_RTC_8_SHFT 8
1942#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 2623#define UVXH_EVENT_OCCURRED2_RTC_9_SHFT 9
1943#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 2624#define UVXH_EVENT_OCCURRED2_RTC_10_SHFT 10
1944#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 2625#define UVXH_EVENT_OCCURRED2_RTC_11_SHFT 11
1945#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 2626#define UVXH_EVENT_OCCURRED2_RTC_12_SHFT 12
1946#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 2627#define UVXH_EVENT_OCCURRED2_RTC_13_SHFT 13
1947#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 2628#define UVXH_EVENT_OCCURRED2_RTC_14_SHFT 14
1948#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 2629#define UVXH_EVENT_OCCURRED2_RTC_15_SHFT 15
1949#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 2630#define UVXH_EVENT_OCCURRED2_RTC_16_SHFT 16
1950#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 2631#define UVXH_EVENT_OCCURRED2_RTC_17_SHFT 17
1951#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 2632#define UVXH_EVENT_OCCURRED2_RTC_18_SHFT 18
1952#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 2633#define UVXH_EVENT_OCCURRED2_RTC_19_SHFT 19
1953#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 2634#define UVXH_EVENT_OCCURRED2_RTC_20_SHFT 20
1954#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 2635#define UVXH_EVENT_OCCURRED2_RTC_21_SHFT 21
1955#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 2636#define UVXH_EVENT_OCCURRED2_RTC_22_SHFT 22
1956#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 2637#define UVXH_EVENT_OCCURRED2_RTC_23_SHFT 23
1957#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 2638#define UVXH_EVENT_OCCURRED2_RTC_24_SHFT 24
1958#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 2639#define UVXH_EVENT_OCCURRED2_RTC_25_SHFT 25
1959#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 2640#define UVXH_EVENT_OCCURRED2_RTC_26_SHFT 26
1960#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 2641#define UVXH_EVENT_OCCURRED2_RTC_27_SHFT 27
1961#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 2642#define UVXH_EVENT_OCCURRED2_RTC_28_SHFT 28
1962#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 2643#define UVXH_EVENT_OCCURRED2_RTC_29_SHFT 29
1963#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 2644#define UVXH_EVENT_OCCURRED2_RTC_30_SHFT 30
1964#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 2645#define UVXH_EVENT_OCCURRED2_RTC_31_SHFT 31
1965#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL 2646#define UVXH_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
1966#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL 2647#define UVXH_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
1967#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL 2648#define UVXH_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
1968#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL 2649#define UVXH_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
1969#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL 2650#define UVXH_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
1970#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL 2651#define UVXH_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
1971#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL 2652#define UVXH_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
1972#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL 2653#define UVXH_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
1973#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL 2654#define UVXH_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
1974#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL 2655#define UVXH_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
1975#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL 2656#define UVXH_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
1976#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL 2657#define UVXH_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
1977#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL 2658#define UVXH_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
1978#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL 2659#define UVXH_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
1979#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL 2660#define UVXH_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
1980#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL 2661#define UVXH_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
1981#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL 2662#define UVXH_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
1982#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL 2663#define UVXH_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
1983#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL 2664#define UVXH_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
1984#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL 2665#define UVXH_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
1985#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL 2666#define UVXH_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
1986#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL 2667#define UVXH_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
1987#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL 2668#define UVXH_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
1988#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL 2669#define UVXH_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
1989#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL 2670#define UVXH_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
1990#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL 2671#define UVXH_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
1991#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL 2672#define UVXH_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
1992#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL 2673#define UVXH_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
1993#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL 2674#define UVXH_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
1994#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL 2675#define UVXH_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
1995#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL 2676#define UVXH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
1996#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL 2677#define UVXH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
1997 2678
1998union uv2h_event_occurred2_u { 2679union uvxh_event_occurred2_u {
1999 unsigned long v; 2680 unsigned long v;
2000 struct uv2h_event_occurred2_s { 2681 struct uvxh_event_occurred2_s {
2001 unsigned long rtc_0:1; /* RW */ 2682 unsigned long rtc_0:1; /* RW */
2002 unsigned long rtc_1:1; /* RW */ 2683 unsigned long rtc_1:1; /* RW */
2003 unsigned long rtc_2:1; /* RW */ 2684 unsigned long rtc_2:1; /* RW */
@@ -2031,29 +2712,46 @@ union uv2h_event_occurred2_u {
2031 unsigned long rtc_30:1; /* RW */ 2712 unsigned long rtc_30:1; /* RW */
2032 unsigned long rtc_31:1; /* RW */ 2713 unsigned long rtc_31:1; /* RW */
2033 unsigned long rsvd_32_63:32; 2714 unsigned long rsvd_32_63:32;
2034 } s1; 2715 } sx;
2035}; 2716};
2036 2717
2037/* ========================================================================= */ 2718/* ========================================================================= */
2038/* UV2H_EVENT_OCCURRED2_ALIAS */ 2719/* UVXH_EVENT_OCCURRED2_ALIAS */
2039/* ========================================================================= */ 2720/* ========================================================================= */
2040#define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL 2721#define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL
2041#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70 2722#define UVXH_EVENT_OCCURRED2_ALIAS_32 0xb70
2723
2042 2724
2043/* ========================================================================= */ 2725/* ========================================================================= */
2044/* UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 */ 2726/* UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 */
2045/* ========================================================================= */ 2727/* ========================================================================= */
2046#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL 2728#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
2047#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 2729#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
2730#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
2731#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
2732#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL
2733#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL
2734
2735#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
2736#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
2048 2737
2049#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 2738#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
2050#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL 2739#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
2051 2740
2052union uv2h_lb_bau_sb_activation_status_2_u { 2741#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
2742#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
2743
2744union uvxh_lb_bau_sb_activation_status_2_u {
2053 unsigned long v; 2745 unsigned long v;
2746 struct uvxh_lb_bau_sb_activation_status_2_s {
2747 unsigned long aux_error:64; /* RW */
2748 } sx;
2054 struct uv2h_lb_bau_sb_activation_status_2_s { 2749 struct uv2h_lb_bau_sb_activation_status_2_s {
2055 unsigned long aux_error:64; /* RW */ 2750 unsigned long aux_error:64; /* RW */
2056 } s1; 2751 } s2;
2752 struct uv3h_lb_bau_sb_activation_status_2_s {
2753 unsigned long aux_error:64; /* RW */
2754 } s3;
2057}; 2755};
2058 2756
2059/* ========================================================================= */ 2757/* ========================================================================= */
@@ -2073,5 +2771,87 @@ union uv1h_lb_target_physical_apic_id_mask_u {
2073 } s1; 2771 } s1;
2074}; 2772};
2075 2773
2774/* ========================================================================= */
2775/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */
2776/* ========================================================================= */
2777#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL
2778
2779#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26
2780#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46
2781#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
2782#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL
2783#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL
2784#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
2785
2786union uv3h_rh_gam_mmioh_overlay_config0_mmr_u {
2787 unsigned long v;
2788 struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s {
2789 unsigned long rsvd_0_25:26;
2790 unsigned long base:20; /* RW */
2791 unsigned long m_io:6; /* RW */
2792 unsigned long n_io:4;
2793 unsigned long rsvd_56_62:7;
2794 unsigned long enable:1; /* RW */
2795 } s3;
2796};
2797
2798/* ========================================================================= */
2799/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */
2800/* ========================================================================= */
2801#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1604000UL
2802
2803#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26
2804#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46
2805#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
2806#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL
2807#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL
2808#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
2809
2810union uv3h_rh_gam_mmioh_overlay_config1_mmr_u {
2811 unsigned long v;
2812 struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s {
2813 unsigned long rsvd_0_25:26;
2814 unsigned long base:20; /* RW */
2815 unsigned long m_io:6; /* RW */
2816 unsigned long n_io:4;
2817 unsigned long rsvd_56_62:7;
2818 unsigned long enable:1; /* RW */
2819 } s3;
2820};
2821
2822/* ========================================================================= */
2823/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */
2824/* ========================================================================= */
2825#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL
2826#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
2827
2828#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
2829#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
2830
2831union uv3h_rh_gam_mmioh_redirect_config0_mmr_u {
2832 unsigned long v;
2833 struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s {
2834 unsigned long nasid:15; /* RW */
2835 unsigned long rsvd_15_63:49;
2836 } s3;
2837};
2838
2839/* ========================================================================= */
2840/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */
2841/* ========================================================================= */
2842#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL
2843#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
2844
2845#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
2846#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
2847
2848union uv3h_rh_gam_mmioh_redirect_config1_mmr_u {
2849 unsigned long v;
2850 struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s {
2851 unsigned long nasid:15; /* RW */
2852 unsigned long rsvd_15_63:49;
2853 } s3;
2854};
2855
2076 2856
2077#endif /* _ASM_X86_UV_UV_MMRS_H */ 2857#endif /* _ASM_X86_UV_UV_MMRS_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 235b49fa554b..b6fbf860e398 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -57,9 +57,12 @@
57#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 57#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
58#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 58#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
59#define SECONDARY_EXEC_RDTSCP 0x00000008 59#define SECONDARY_EXEC_RDTSCP 0x00000008
60#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
60#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 61#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
61#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 62#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
62#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 63#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
64#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
65#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
63#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 66#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
64#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 67#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
65 68
@@ -97,6 +100,7 @@ enum vmcs_field {
97 GUEST_GS_SELECTOR = 0x0000080a, 100 GUEST_GS_SELECTOR = 0x0000080a,
98 GUEST_LDTR_SELECTOR = 0x0000080c, 101 GUEST_LDTR_SELECTOR = 0x0000080c,
99 GUEST_TR_SELECTOR = 0x0000080e, 102 GUEST_TR_SELECTOR = 0x0000080e,
103 GUEST_INTR_STATUS = 0x00000810,
100 HOST_ES_SELECTOR = 0x00000c00, 104 HOST_ES_SELECTOR = 0x00000c00,
101 HOST_CS_SELECTOR = 0x00000c02, 105 HOST_CS_SELECTOR = 0x00000c02,
102 HOST_SS_SELECTOR = 0x00000c04, 106 HOST_SS_SELECTOR = 0x00000c04,
@@ -124,6 +128,14 @@ enum vmcs_field {
124 APIC_ACCESS_ADDR_HIGH = 0x00002015, 128 APIC_ACCESS_ADDR_HIGH = 0x00002015,
125 EPT_POINTER = 0x0000201a, 129 EPT_POINTER = 0x0000201a,
126 EPT_POINTER_HIGH = 0x0000201b, 130 EPT_POINTER_HIGH = 0x0000201b,
131 EOI_EXIT_BITMAP0 = 0x0000201c,
132 EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
133 EOI_EXIT_BITMAP1 = 0x0000201e,
134 EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
135 EOI_EXIT_BITMAP2 = 0x00002020,
136 EOI_EXIT_BITMAP2_HIGH = 0x00002021,
137 EOI_EXIT_BITMAP3 = 0x00002022,
138 EOI_EXIT_BITMAP3_HIGH = 0x00002023,
127 GUEST_PHYSICAL_ADDRESS = 0x00002400, 139 GUEST_PHYSICAL_ADDRESS = 0x00002400,
128 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, 140 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
129 VMCS_LINK_POINTER = 0x00002800, 141 VMCS_LINK_POINTER = 0x00002800,
@@ -346,9 +358,9 @@ enum vmcs_field {
346 358
347#define AR_RESERVD_MASK 0xfffe0f00 359#define AR_RESERVD_MASK 0xfffe0f00
348 360
349#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) 361#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
350#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) 362#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1)
351#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) 363#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
352 364
353#define VMX_NR_VPIDS (1 << 16) 365#define VMX_NR_VPIDS (1 << 16)
354#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 366#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 57693498519c..d8d99222b36a 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -69,17 +69,6 @@ struct x86_init_oem {
69}; 69};
70 70
71/** 71/**
72 * struct x86_init_mapping - platform specific initial kernel pagetable setup
73 * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
74 *
75 * For more details on the purpose of this hook, look in
76 * init_memory_mapping and the commit that added it.
77 */
78struct x86_init_mapping {
79 void (*pagetable_reserve)(u64 start, u64 end);
80};
81
82/**
83 * struct x86_init_paging - platform specific paging functions 72 * struct x86_init_paging - platform specific paging functions
84 * @pagetable_init: platform specific paging initialization call to setup 73 * @pagetable_init: platform specific paging initialization call to setup
85 * the kernel pagetables and prepare accessors functions. 74 * the kernel pagetables and prepare accessors functions.
@@ -136,7 +125,6 @@ struct x86_init_ops {
136 struct x86_init_mpparse mpparse; 125 struct x86_init_mpparse mpparse;
137 struct x86_init_irqs irqs; 126 struct x86_init_irqs irqs;
138 struct x86_init_oem oem; 127 struct x86_init_oem oem;
139 struct x86_init_mapping mapping;
140 struct x86_init_paging paging; 128 struct x86_init_paging paging;
141 struct x86_init_timers timers; 129 struct x86_init_timers timers;
142 struct x86_init_iommu iommu; 130 struct x86_init_iommu iommu;
@@ -181,19 +169,38 @@ struct x86_platform_ops {
181}; 169};
182 170
183struct pci_dev; 171struct pci_dev;
172struct msi_msg;
184 173
185struct x86_msi_ops { 174struct x86_msi_ops {
186 int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); 175 int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
176 void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
177 unsigned int dest, struct msi_msg *msg,
178 u8 hpet_id);
187 void (*teardown_msi_irq)(unsigned int irq); 179 void (*teardown_msi_irq)(unsigned int irq);
188 void (*teardown_msi_irqs)(struct pci_dev *dev); 180 void (*teardown_msi_irqs)(struct pci_dev *dev);
189 void (*restore_msi_irqs)(struct pci_dev *dev, int irq); 181 void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
182 int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
190}; 183};
191 184
185struct IO_APIC_route_entry;
186struct io_apic_irq_attr;
187struct irq_data;
188struct cpumask;
189
192struct x86_io_apic_ops { 190struct x86_io_apic_ops {
193 void (*init) (void); 191 void (*init) (void);
194 unsigned int (*read) (unsigned int apic, unsigned int reg); 192 unsigned int (*read) (unsigned int apic, unsigned int reg);
195 void (*write) (unsigned int apic, unsigned int reg, unsigned int value); 193 void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
196 void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); 194 void (*modify) (unsigned int apic, unsigned int reg, unsigned int value);
195 void (*disable)(void);
196 void (*print_entries)(unsigned int apic, unsigned int nr_entries);
197 int (*set_affinity)(struct irq_data *data,
198 const struct cpumask *mask,
199 bool force);
200 int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry,
201 unsigned int destination, int vector,
202 struct io_apic_irq_attr *attr);
203 void (*eoi_ioapic_pin)(int apic, int pin, int vector);
197}; 204};
198 205
199extern struct x86_init_ops x86_init; 206extern struct x86_init_ops x86_init;
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index cc146d51449e..ca842f2769ef 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -16,4 +16,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
16 return raw_irqs_disabled_flags(regs->flags); 16 return raw_irqs_disabled_flags(regs->flags);
17} 17}
18 18
19/* No need for a barrier -- XCHG is a barrier on x86. */
20#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
21
19#endif /* _ASM_X86_XEN_EVENTS_H */ 22#endif /* _ASM_X86_XEN_EVENTS_H */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index c20d1ce62dc6..e709884d0ef9 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -382,14 +382,14 @@ HYPERVISOR_console_io(int cmd, int count, char *str)
382 return _hypercall3(int, console_io, cmd, count, str); 382 return _hypercall3(int, console_io, cmd, count, str);
383} 383}
384 384
385extern int __must_check HYPERVISOR_physdev_op_compat(int, void *); 385extern int __must_check xen_physdev_op_compat(int, void *);
386 386
387static inline int 387static inline int
388HYPERVISOR_physdev_op(int cmd, void *arg) 388HYPERVISOR_physdev_op(int cmd, void *arg)
389{ 389{
390 int rc = _hypercall2(int, physdev_op, cmd, arg); 390 int rc = _hypercall2(int, physdev_op, cmd, arg);
391 if (unlikely(rc == -ENOSYS)) 391 if (unlikely(rc == -ENOSYS))
392 rc = HYPERVISOR_physdev_op_compat(cmd, arg); 392 rc = xen_physdev_op_compat(cmd, arg);
393 return rc; 393 return rc;
394} 394}
395 395
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 472b9b783019..6aef9fbc09b7 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -212,4 +212,6 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr);
212void make_lowmem_page_readonly(void *vaddr); 212void make_lowmem_page_readonly(void *vaddr);
213void make_lowmem_page_readwrite(void *vaddr); 213void make_lowmem_page_readwrite(void *vaddr);
214 214
215#define xen_remap(cookie, size) ioremap((cookie), (size));
216
215#endif /* _ASM_X86_XEN_PAGE_H */ 217#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index f8fde90bc45e..d8829751b3f8 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,10 +1,499 @@
1#ifdef CONFIG_KMEMCHECK 1#ifdef CONFIG_KMEMCHECK
2/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ 2/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
3# include <asm-generic/xor.h> 3# include <asm-generic/xor.h>
4#elif !defined(_ASM_X86_XOR_H)
5#define _ASM_X86_XOR_H
6
7/*
8 * Optimized RAID-5 checksumming functions for SSE.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2, or (at your option)
13 * any later version.
14 *
15 * You should have received a copy of the GNU General Public License
16 * (for example /usr/src/linux/COPYING); if not, write to the Free
17 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20/*
21 * Cache avoiding checksumming functions utilizing KNI instructions
22 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
23 */
24
25/*
26 * Based on
27 * High-speed RAID5 checksumming functions utilizing SSE instructions.
28 * Copyright (C) 1998 Ingo Molnar.
29 */
30
31/*
32 * x86-64 changes / gcc fixes from Andi Kleen.
33 * Copyright 2002 Andi Kleen, SuSE Labs.
34 *
35 * This hasn't been optimized for the hammer yet, but there are likely
36 * no advantages to be gotten from x86-64 here anyways.
37 */
38
39#include <asm/i387.h>
40
41#ifdef CONFIG_X86_32
42/* reduce register pressure */
43# define XOR_CONSTANT_CONSTRAINT "i"
4#else 44#else
45# define XOR_CONSTANT_CONSTRAINT "re"
46#endif
47
48#define OFFS(x) "16*("#x")"
49#define PF_OFFS(x) "256+16*("#x")"
50#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
51#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
52#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
53#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
54#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
55#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
56#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
57#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
58#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
59#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
60#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
61#define NOP(x)
62
63#define BLK64(pf, op, i) \
64 pf(i) \
65 op(i, 0) \
66 op(i + 1, 1) \
67 op(i + 2, 2) \
68 op(i + 3, 3)
69
70static void
71xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
72{
73 unsigned long lines = bytes >> 8;
74
75 kernel_fpu_begin();
76
77 asm volatile(
78#undef BLOCK
79#define BLOCK(i) \
80 LD(i, 0) \
81 LD(i + 1, 1) \
82 PF1(i) \
83 PF1(i + 2) \
84 LD(i + 2, 2) \
85 LD(i + 3, 3) \
86 PF0(i + 4) \
87 PF0(i + 6) \
88 XO1(i, 0) \
89 XO1(i + 1, 1) \
90 XO1(i + 2, 2) \
91 XO1(i + 3, 3) \
92 ST(i, 0) \
93 ST(i + 1, 1) \
94 ST(i + 2, 2) \
95 ST(i + 3, 3) \
96
97
98 PF0(0)
99 PF0(2)
100
101 " .align 32 ;\n"
102 " 1: ;\n"
103
104 BLOCK(0)
105 BLOCK(4)
106 BLOCK(8)
107 BLOCK(12)
108
109 " add %[inc], %[p1] ;\n"
110 " add %[inc], %[p2] ;\n"
111 " dec %[cnt] ;\n"
112 " jnz 1b ;\n"
113 : [cnt] "+r" (lines),
114 [p1] "+r" (p1), [p2] "+r" (p2)
115 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
116 : "memory");
117
118 kernel_fpu_end();
119}
120
121static void
122xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
123{
124 unsigned long lines = bytes >> 8;
125
126 kernel_fpu_begin();
127
128 asm volatile(
129#undef BLOCK
130#define BLOCK(i) \
131 BLK64(PF0, LD, i) \
132 BLK64(PF1, XO1, i) \
133 BLK64(NOP, ST, i) \
134
135 " .align 32 ;\n"
136 " 1: ;\n"
137
138 BLOCK(0)
139 BLOCK(4)
140 BLOCK(8)
141 BLOCK(12)
142
143 " add %[inc], %[p1] ;\n"
144 " add %[inc], %[p2] ;\n"
145 " dec %[cnt] ;\n"
146 " jnz 1b ;\n"
147 : [cnt] "+r" (lines),
148 [p1] "+r" (p1), [p2] "+r" (p2)
149 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
150 : "memory");
151
152 kernel_fpu_end();
153}
154
155static void
156xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
157 unsigned long *p3)
158{
159 unsigned long lines = bytes >> 8;
160
161 kernel_fpu_begin();
162
163 asm volatile(
164#undef BLOCK
165#define BLOCK(i) \
166 PF1(i) \
167 PF1(i + 2) \
168 LD(i, 0) \
169 LD(i + 1, 1) \
170 LD(i + 2, 2) \
171 LD(i + 3, 3) \
172 PF2(i) \
173 PF2(i + 2) \
174 PF0(i + 4) \
175 PF0(i + 6) \
176 XO1(i, 0) \
177 XO1(i + 1, 1) \
178 XO1(i + 2, 2) \
179 XO1(i + 3, 3) \
180 XO2(i, 0) \
181 XO2(i + 1, 1) \
182 XO2(i + 2, 2) \
183 XO2(i + 3, 3) \
184 ST(i, 0) \
185 ST(i + 1, 1) \
186 ST(i + 2, 2) \
187 ST(i + 3, 3) \
188
189
190 PF0(0)
191 PF0(2)
192
193 " .align 32 ;\n"
194 " 1: ;\n"
195
196 BLOCK(0)
197 BLOCK(4)
198 BLOCK(8)
199 BLOCK(12)
200
201 " add %[inc], %[p1] ;\n"
202 " add %[inc], %[p2] ;\n"
203 " add %[inc], %[p3] ;\n"
204 " dec %[cnt] ;\n"
205 " jnz 1b ;\n"
206 : [cnt] "+r" (lines),
207 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
208 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
209 : "memory");
210
211 kernel_fpu_end();
212}
213
214static void
215xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
216 unsigned long *p3)
217{
218 unsigned long lines = bytes >> 8;
219
220 kernel_fpu_begin();
221
222 asm volatile(
223#undef BLOCK
224#define BLOCK(i) \
225 BLK64(PF0, LD, i) \
226 BLK64(PF1, XO1, i) \
227 BLK64(PF2, XO2, i) \
228 BLK64(NOP, ST, i) \
229
230 " .align 32 ;\n"
231 " 1: ;\n"
232
233 BLOCK(0)
234 BLOCK(4)
235 BLOCK(8)
236 BLOCK(12)
237
238 " add %[inc], %[p1] ;\n"
239 " add %[inc], %[p2] ;\n"
240 " add %[inc], %[p3] ;\n"
241 " dec %[cnt] ;\n"
242 " jnz 1b ;\n"
243 : [cnt] "+r" (lines),
244 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
245 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
246 : "memory");
247
248 kernel_fpu_end();
249}
250
251static void
252xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
253 unsigned long *p3, unsigned long *p4)
254{
255 unsigned long lines = bytes >> 8;
256
257 kernel_fpu_begin();
258
259 asm volatile(
260#undef BLOCK
261#define BLOCK(i) \
262 PF1(i) \
263 PF1(i + 2) \
264 LD(i, 0) \
265 LD(i + 1, 1) \
266 LD(i + 2, 2) \
267 LD(i + 3, 3) \
268 PF2(i) \
269 PF2(i + 2) \
270 XO1(i, 0) \
271 XO1(i + 1, 1) \
272 XO1(i + 2, 2) \
273 XO1(i + 3, 3) \
274 PF3(i) \
275 PF3(i + 2) \
276 PF0(i + 4) \
277 PF0(i + 6) \
278 XO2(i, 0) \
279 XO2(i + 1, 1) \
280 XO2(i + 2, 2) \
281 XO2(i + 3, 3) \
282 XO3(i, 0) \
283 XO3(i + 1, 1) \
284 XO3(i + 2, 2) \
285 XO3(i + 3, 3) \
286 ST(i, 0) \
287 ST(i + 1, 1) \
288 ST(i + 2, 2) \
289 ST(i + 3, 3) \
290
291
292 PF0(0)
293 PF0(2)
294
295 " .align 32 ;\n"
296 " 1: ;\n"
297
298 BLOCK(0)
299 BLOCK(4)
300 BLOCK(8)
301 BLOCK(12)
302
303 " add %[inc], %[p1] ;\n"
304 " add %[inc], %[p2] ;\n"
305 " add %[inc], %[p3] ;\n"
306 " add %[inc], %[p4] ;\n"
307 " dec %[cnt] ;\n"
308 " jnz 1b ;\n"
309 : [cnt] "+r" (lines), [p1] "+r" (p1),
310 [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
311 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
312 : "memory");
313
314 kernel_fpu_end();
315}
316
317static void
318xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
319 unsigned long *p3, unsigned long *p4)
320{
321 unsigned long lines = bytes >> 8;
322
323 kernel_fpu_begin();
324
325 asm volatile(
326#undef BLOCK
327#define BLOCK(i) \
328 BLK64(PF0, LD, i) \
329 BLK64(PF1, XO1, i) \
330 BLK64(PF2, XO2, i) \
331 BLK64(PF3, XO3, i) \
332 BLK64(NOP, ST, i) \
333
334 " .align 32 ;\n"
335 " 1: ;\n"
336
337 BLOCK(0)
338 BLOCK(4)
339 BLOCK(8)
340 BLOCK(12)
341
342 " add %[inc], %[p1] ;\n"
343 " add %[inc], %[p2] ;\n"
344 " add %[inc], %[p3] ;\n"
345 " add %[inc], %[p4] ;\n"
346 " dec %[cnt] ;\n"
347 " jnz 1b ;\n"
348 : [cnt] "+r" (lines), [p1] "+r" (p1),
349 [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
350 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
351 : "memory");
352
353 kernel_fpu_end();
354}
355
356static void
357xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
358 unsigned long *p3, unsigned long *p4, unsigned long *p5)
359{
360 unsigned long lines = bytes >> 8;
361
362 kernel_fpu_begin();
363
364 asm volatile(
365#undef BLOCK
366#define BLOCK(i) \
367 PF1(i) \
368 PF1(i + 2) \
369 LD(i, 0) \
370 LD(i + 1, 1) \
371 LD(i + 2, 2) \
372 LD(i + 3, 3) \
373 PF2(i) \
374 PF2(i + 2) \
375 XO1(i, 0) \
376 XO1(i + 1, 1) \
377 XO1(i + 2, 2) \
378 XO1(i + 3, 3) \
379 PF3(i) \
380 PF3(i + 2) \
381 XO2(i, 0) \
382 XO2(i + 1, 1) \
383 XO2(i + 2, 2) \
384 XO2(i + 3, 3) \
385 PF4(i) \
386 PF4(i + 2) \
387 PF0(i + 4) \
388 PF0(i + 6) \
389 XO3(i, 0) \
390 XO3(i + 1, 1) \
391 XO3(i + 2, 2) \
392 XO3(i + 3, 3) \
393 XO4(i, 0) \
394 XO4(i + 1, 1) \
395 XO4(i + 2, 2) \
396 XO4(i + 3, 3) \
397 ST(i, 0) \
398 ST(i + 1, 1) \
399 ST(i + 2, 2) \
400 ST(i + 3, 3) \
401
402
403 PF0(0)
404 PF0(2)
405
406 " .align 32 ;\n"
407 " 1: ;\n"
408
409 BLOCK(0)
410 BLOCK(4)
411 BLOCK(8)
412 BLOCK(12)
413
414 " add %[inc], %[p1] ;\n"
415 " add %[inc], %[p2] ;\n"
416 " add %[inc], %[p3] ;\n"
417 " add %[inc], %[p4] ;\n"
418 " add %[inc], %[p5] ;\n"
419 " dec %[cnt] ;\n"
420 " jnz 1b ;\n"
421 : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
422 [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
423 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
424 : "memory");
425
426 kernel_fpu_end();
427}
428
429static void
430xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
431 unsigned long *p3, unsigned long *p4, unsigned long *p5)
432{
433 unsigned long lines = bytes >> 8;
434
435 kernel_fpu_begin();
436
437 asm volatile(
438#undef BLOCK
439#define BLOCK(i) \
440 BLK64(PF0, LD, i) \
441 BLK64(PF1, XO1, i) \
442 BLK64(PF2, XO2, i) \
443 BLK64(PF3, XO3, i) \
444 BLK64(PF4, XO4, i) \
445 BLK64(NOP, ST, i) \
446
447 " .align 32 ;\n"
448 " 1: ;\n"
449
450 BLOCK(0)
451 BLOCK(4)
452 BLOCK(8)
453 BLOCK(12)
454
455 " add %[inc], %[p1] ;\n"
456 " add %[inc], %[p2] ;\n"
457 " add %[inc], %[p3] ;\n"
458 " add %[inc], %[p4] ;\n"
459 " add %[inc], %[p5] ;\n"
460 " dec %[cnt] ;\n"
461 " jnz 1b ;\n"
462 : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
463 [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
464 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
465 : "memory");
466
467 kernel_fpu_end();
468}
469
470static struct xor_block_template xor_block_sse_pf64 = {
471 .name = "prefetch64-sse",
472 .do_2 = xor_sse_2_pf64,
473 .do_3 = xor_sse_3_pf64,
474 .do_4 = xor_sse_4_pf64,
475 .do_5 = xor_sse_5_pf64,
476};
477
478#undef LD
479#undef XO1
480#undef XO2
481#undef XO3
482#undef XO4
483#undef ST
484#undef NOP
485#undef BLK64
486#undef BLOCK
487
488#undef XOR_CONSTANT_CONSTRAINT
489
5#ifdef CONFIG_X86_32 490#ifdef CONFIG_X86_32
6# include <asm/xor_32.h> 491# include <asm/xor_32.h>
7#else 492#else
8# include <asm/xor_64.h> 493# include <asm/xor_64.h>
9#endif 494#endif
10#endif 495
496#define XOR_SELECT_TEMPLATE(FASTEST) \
497 AVX_SELECT(FASTEST)
498
499#endif /* _ASM_X86_XOR_H */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index f79cb7ec0e06..ce05722e3c68 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -2,7 +2,7 @@
2#define _ASM_X86_XOR_32_H 2#define _ASM_X86_XOR_32_H
3 3
4/* 4/*
5 * Optimized RAID-5 checksumming functions for MMX and SSE. 5 * Optimized RAID-5 checksumming functions for MMX.
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -529,290 +529,6 @@ static struct xor_block_template xor_block_p5_mmx = {
529 .do_5 = xor_p5_mmx_5, 529 .do_5 = xor_p5_mmx_5,
530}; 530};
531 531
532/*
533 * Cache avoiding checksumming functions utilizing KNI instructions
534 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
535 */
536
537#define OFFS(x) "16*("#x")"
538#define PF_OFFS(x) "256+16*("#x")"
539#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
540#define LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
541#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
542#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n"
543#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n"
544#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n"
545#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n"
546#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n"
547#define XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
548#define XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
549#define XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
550#define XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
551#define XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
552
553
554static void
555xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
556{
557 unsigned long lines = bytes >> 8;
558
559 kernel_fpu_begin();
560
561 asm volatile(
562#undef BLOCK
563#define BLOCK(i) \
564 LD(i, 0) \
565 LD(i + 1, 1) \
566 PF1(i) \
567 PF1(i + 2) \
568 LD(i + 2, 2) \
569 LD(i + 3, 3) \
570 PF0(i + 4) \
571 PF0(i + 6) \
572 XO1(i, 0) \
573 XO1(i + 1, 1) \
574 XO1(i + 2, 2) \
575 XO1(i + 3, 3) \
576 ST(i, 0) \
577 ST(i + 1, 1) \
578 ST(i + 2, 2) \
579 ST(i + 3, 3) \
580
581
582 PF0(0)
583 PF0(2)
584
585 " .align 32 ;\n"
586 " 1: ;\n"
587
588 BLOCK(0)
589 BLOCK(4)
590 BLOCK(8)
591 BLOCK(12)
592
593 " addl $256, %1 ;\n"
594 " addl $256, %2 ;\n"
595 " decl %0 ;\n"
596 " jnz 1b ;\n"
597 : "+r" (lines),
598 "+r" (p1), "+r" (p2)
599 :
600 : "memory");
601
602 kernel_fpu_end();
603}
604
605static void
606xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
607 unsigned long *p3)
608{
609 unsigned long lines = bytes >> 8;
610
611 kernel_fpu_begin();
612
613 asm volatile(
614#undef BLOCK
615#define BLOCK(i) \
616 PF1(i) \
617 PF1(i + 2) \
618 LD(i,0) \
619 LD(i + 1, 1) \
620 LD(i + 2, 2) \
621 LD(i + 3, 3) \
622 PF2(i) \
623 PF2(i + 2) \
624 PF0(i + 4) \
625 PF0(i + 6) \
626 XO1(i,0) \
627 XO1(i + 1, 1) \
628 XO1(i + 2, 2) \
629 XO1(i + 3, 3) \
630 XO2(i,0) \
631 XO2(i + 1, 1) \
632 XO2(i + 2, 2) \
633 XO2(i + 3, 3) \
634 ST(i,0) \
635 ST(i + 1, 1) \
636 ST(i + 2, 2) \
637 ST(i + 3, 3) \
638
639
640 PF0(0)
641 PF0(2)
642
643 " .align 32 ;\n"
644 " 1: ;\n"
645
646 BLOCK(0)
647 BLOCK(4)
648 BLOCK(8)
649 BLOCK(12)
650
651 " addl $256, %1 ;\n"
652 " addl $256, %2 ;\n"
653 " addl $256, %3 ;\n"
654 " decl %0 ;\n"
655 " jnz 1b ;\n"
656 : "+r" (lines),
657 "+r" (p1), "+r"(p2), "+r"(p3)
658 :
659 : "memory" );
660
661 kernel_fpu_end();
662}
663
664static void
665xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
666 unsigned long *p3, unsigned long *p4)
667{
668 unsigned long lines = bytes >> 8;
669
670 kernel_fpu_begin();
671
672 asm volatile(
673#undef BLOCK
674#define BLOCK(i) \
675 PF1(i) \
676 PF1(i + 2) \
677 LD(i,0) \
678 LD(i + 1, 1) \
679 LD(i + 2, 2) \
680 LD(i + 3, 3) \
681 PF2(i) \
682 PF2(i + 2) \
683 XO1(i,0) \
684 XO1(i + 1, 1) \
685 XO1(i + 2, 2) \
686 XO1(i + 3, 3) \
687 PF3(i) \
688 PF3(i + 2) \
689 PF0(i + 4) \
690 PF0(i + 6) \
691 XO2(i,0) \
692 XO2(i + 1, 1) \
693 XO2(i + 2, 2) \
694 XO2(i + 3, 3) \
695 XO3(i,0) \
696 XO3(i + 1, 1) \
697 XO3(i + 2, 2) \
698 XO3(i + 3, 3) \
699 ST(i,0) \
700 ST(i + 1, 1) \
701 ST(i + 2, 2) \
702 ST(i + 3, 3) \
703
704
705 PF0(0)
706 PF0(2)
707
708 " .align 32 ;\n"
709 " 1: ;\n"
710
711 BLOCK(0)
712 BLOCK(4)
713 BLOCK(8)
714 BLOCK(12)
715
716 " addl $256, %1 ;\n"
717 " addl $256, %2 ;\n"
718 " addl $256, %3 ;\n"
719 " addl $256, %4 ;\n"
720 " decl %0 ;\n"
721 " jnz 1b ;\n"
722 : "+r" (lines),
723 "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
724 :
725 : "memory" );
726
727 kernel_fpu_end();
728}
729
730static void
731xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
732 unsigned long *p3, unsigned long *p4, unsigned long *p5)
733{
734 unsigned long lines = bytes >> 8;
735
736 kernel_fpu_begin();
737
738 /* Make sure GCC forgets anything it knows about p4 or p5,
739 such that it won't pass to the asm volatile below a
740 register that is shared with any other variable. That's
741 because we modify p4 and p5 there, but we can't mark them
742 as read/write, otherwise we'd overflow the 10-asm-operands
743 limit of GCC < 3.1. */
744 asm("" : "+r" (p4), "+r" (p5));
745
746 asm volatile(
747#undef BLOCK
748#define BLOCK(i) \
749 PF1(i) \
750 PF1(i + 2) \
751 LD(i,0) \
752 LD(i + 1, 1) \
753 LD(i + 2, 2) \
754 LD(i + 3, 3) \
755 PF2(i) \
756 PF2(i + 2) \
757 XO1(i,0) \
758 XO1(i + 1, 1) \
759 XO1(i + 2, 2) \
760 XO1(i + 3, 3) \
761 PF3(i) \
762 PF3(i + 2) \
763 XO2(i,0) \
764 XO2(i + 1, 1) \
765 XO2(i + 2, 2) \
766 XO2(i + 3, 3) \
767 PF4(i) \
768 PF4(i + 2) \
769 PF0(i + 4) \
770 PF0(i + 6) \
771 XO3(i,0) \
772 XO3(i + 1, 1) \
773 XO3(i + 2, 2) \
774 XO3(i + 3, 3) \
775 XO4(i,0) \
776 XO4(i + 1, 1) \
777 XO4(i + 2, 2) \
778 XO4(i + 3, 3) \
779 ST(i,0) \
780 ST(i + 1, 1) \
781 ST(i + 2, 2) \
782 ST(i + 3, 3) \
783
784
785 PF0(0)
786 PF0(2)
787
788 " .align 32 ;\n"
789 " 1: ;\n"
790
791 BLOCK(0)
792 BLOCK(4)
793 BLOCK(8)
794 BLOCK(12)
795
796 " addl $256, %1 ;\n"
797 " addl $256, %2 ;\n"
798 " addl $256, %3 ;\n"
799 " addl $256, %4 ;\n"
800 " addl $256, %5 ;\n"
801 " decl %0 ;\n"
802 " jnz 1b ;\n"
803 : "+r" (lines),
804 "+r" (p1), "+r" (p2), "+r" (p3)
805 : "r" (p4), "r" (p5)
806 : "memory");
807
808 /* p4 and p5 were modified, and now the variables are dead.
809 Clobber them just to be sure nobody does something stupid
810 like assuming they have some legal value. */
811 asm("" : "=r" (p4), "=r" (p5));
812
813 kernel_fpu_end();
814}
815
816static struct xor_block_template xor_block_pIII_sse = { 532static struct xor_block_template xor_block_pIII_sse = {
817 .name = "pIII_sse", 533 .name = "pIII_sse",
818 .do_2 = xor_sse_2, 534 .do_2 = xor_sse_2,
@@ -827,26 +543,25 @@ static struct xor_block_template xor_block_pIII_sse = {
827/* Also try the generic routines. */ 543/* Also try the generic routines. */
828#include <asm-generic/xor.h> 544#include <asm-generic/xor.h>
829 545
546/* We force the use of the SSE xor block because it can write around L2.
547 We may also be able to load into the L1 only depending on how the cpu
548 deals with a load to a line that is being prefetched. */
830#undef XOR_TRY_TEMPLATES 549#undef XOR_TRY_TEMPLATES
831#define XOR_TRY_TEMPLATES \ 550#define XOR_TRY_TEMPLATES \
832do { \ 551do { \
833 xor_speed(&xor_block_8regs); \
834 xor_speed(&xor_block_8regs_p); \
835 xor_speed(&xor_block_32regs); \
836 xor_speed(&xor_block_32regs_p); \
837 AVX_XOR_SPEED; \ 552 AVX_XOR_SPEED; \
838 if (cpu_has_xmm) \ 553 if (cpu_has_xmm) { \
839 xor_speed(&xor_block_pIII_sse); \ 554 xor_speed(&xor_block_pIII_sse); \
840 if (cpu_has_mmx) { \ 555 xor_speed(&xor_block_sse_pf64); \
556 } else if (cpu_has_mmx) { \
841 xor_speed(&xor_block_pII_mmx); \ 557 xor_speed(&xor_block_pII_mmx); \
842 xor_speed(&xor_block_p5_mmx); \ 558 xor_speed(&xor_block_p5_mmx); \
559 } else { \
560 xor_speed(&xor_block_8regs); \
561 xor_speed(&xor_block_8regs_p); \
562 xor_speed(&xor_block_32regs); \
563 xor_speed(&xor_block_32regs_p); \
843 } \ 564 } \
844} while (0) 565} while (0)
845 566
846/* We force the use of the SSE xor block because it can write around L2.
847 We may also be able to load into the L1 only depending on how the cpu
848 deals with a load to a line that is being prefetched. */
849#define XOR_SELECT_TEMPLATE(FASTEST) \
850 AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
851
852#endif /* _ASM_X86_XOR_32_H */ 567#endif /* _ASM_X86_XOR_32_H */
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 87ac522c4af5..546f1e3b87cc 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -1,301 +1,6 @@
1#ifndef _ASM_X86_XOR_64_H 1#ifndef _ASM_X86_XOR_64_H
2#define _ASM_X86_XOR_64_H 2#define _ASM_X86_XOR_64_H
3 3
4/*
5 * Optimized RAID-5 checksumming functions for MMX and SSE.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
11 *
12 * You should have received a copy of the GNU General Public License
13 * (for example /usr/src/linux/COPYING); if not, write to the Free
14 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17
18/*
19 * Cache avoiding checksumming functions utilizing KNI instructions
20 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
21 */
22
23/*
24 * Based on
25 * High-speed RAID5 checksumming functions utilizing SSE instructions.
26 * Copyright (C) 1998 Ingo Molnar.
27 */
28
29/*
30 * x86-64 changes / gcc fixes from Andi Kleen.
31 * Copyright 2002 Andi Kleen, SuSE Labs.
32 *
33 * This hasn't been optimized for the hammer yet, but there are likely
34 * no advantages to be gotten from x86-64 here anyways.
35 */
36
37#include <asm/i387.h>
38
39#define OFFS(x) "16*("#x")"
40#define PF_OFFS(x) "256+16*("#x")"
41#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
42#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
43#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
44#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
45#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
46#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
47#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
48#define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n"
49#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
50#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
51#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
52#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
53#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
54
55
56static void
57xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
58{
59 unsigned int lines = bytes >> 8;
60
61 kernel_fpu_begin();
62
63 asm volatile(
64#undef BLOCK
65#define BLOCK(i) \
66 LD(i, 0) \
67 LD(i + 1, 1) \
68 PF1(i) \
69 PF1(i + 2) \
70 LD(i + 2, 2) \
71 LD(i + 3, 3) \
72 PF0(i + 4) \
73 PF0(i + 6) \
74 XO1(i, 0) \
75 XO1(i + 1, 1) \
76 XO1(i + 2, 2) \
77 XO1(i + 3, 3) \
78 ST(i, 0) \
79 ST(i + 1, 1) \
80 ST(i + 2, 2) \
81 ST(i + 3, 3) \
82
83
84 PF0(0)
85 PF0(2)
86
87 " .align 32 ;\n"
88 " 1: ;\n"
89
90 BLOCK(0)
91 BLOCK(4)
92 BLOCK(8)
93 BLOCK(12)
94
95 " addq %[inc], %[p1] ;\n"
96 " addq %[inc], %[p2] ;\n"
97 " decl %[cnt] ; jnz 1b"
98 : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
99 : [inc] "r" (256UL)
100 : "memory");
101
102 kernel_fpu_end();
103}
104
105static void
106xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
107 unsigned long *p3)
108{
109 unsigned int lines = bytes >> 8;
110
111 kernel_fpu_begin();
112 asm volatile(
113#undef BLOCK
114#define BLOCK(i) \
115 PF1(i) \
116 PF1(i + 2) \
117 LD(i, 0) \
118 LD(i + 1, 1) \
119 LD(i + 2, 2) \
120 LD(i + 3, 3) \
121 PF2(i) \
122 PF2(i + 2) \
123 PF0(i + 4) \
124 PF0(i + 6) \
125 XO1(i, 0) \
126 XO1(i + 1, 1) \
127 XO1(i + 2, 2) \
128 XO1(i + 3, 3) \
129 XO2(i, 0) \
130 XO2(i + 1, 1) \
131 XO2(i + 2, 2) \
132 XO2(i + 3, 3) \
133 ST(i, 0) \
134 ST(i + 1, 1) \
135 ST(i + 2, 2) \
136 ST(i + 3, 3) \
137
138
139 PF0(0)
140 PF0(2)
141
142 " .align 32 ;\n"
143 " 1: ;\n"
144
145 BLOCK(0)
146 BLOCK(4)
147 BLOCK(8)
148 BLOCK(12)
149
150 " addq %[inc], %[p1] ;\n"
151 " addq %[inc], %[p2] ;\n"
152 " addq %[inc], %[p3] ;\n"
153 " decl %[cnt] ; jnz 1b"
154 : [cnt] "+r" (lines),
155 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
156 : [inc] "r" (256UL)
157 : "memory");
158 kernel_fpu_end();
159}
160
161static void
162xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
163 unsigned long *p3, unsigned long *p4)
164{
165 unsigned int lines = bytes >> 8;
166
167 kernel_fpu_begin();
168
169 asm volatile(
170#undef BLOCK
171#define BLOCK(i) \
172 PF1(i) \
173 PF1(i + 2) \
174 LD(i, 0) \
175 LD(i + 1, 1) \
176 LD(i + 2, 2) \
177 LD(i + 3, 3) \
178 PF2(i) \
179 PF2(i + 2) \
180 XO1(i, 0) \
181 XO1(i + 1, 1) \
182 XO1(i + 2, 2) \
183 XO1(i + 3, 3) \
184 PF3(i) \
185 PF3(i + 2) \
186 PF0(i + 4) \
187 PF0(i + 6) \
188 XO2(i, 0) \
189 XO2(i + 1, 1) \
190 XO2(i + 2, 2) \
191 XO2(i + 3, 3) \
192 XO3(i, 0) \
193 XO3(i + 1, 1) \
194 XO3(i + 2, 2) \
195 XO3(i + 3, 3) \
196 ST(i, 0) \
197 ST(i + 1, 1) \
198 ST(i + 2, 2) \
199 ST(i + 3, 3) \
200
201
202 PF0(0)
203 PF0(2)
204
205 " .align 32 ;\n"
206 " 1: ;\n"
207
208 BLOCK(0)
209 BLOCK(4)
210 BLOCK(8)
211 BLOCK(12)
212
213 " addq %[inc], %[p1] ;\n"
214 " addq %[inc], %[p2] ;\n"
215 " addq %[inc], %[p3] ;\n"
216 " addq %[inc], %[p4] ;\n"
217 " decl %[cnt] ; jnz 1b"
218 : [cnt] "+c" (lines),
219 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
220 : [inc] "r" (256UL)
221 : "memory" );
222
223 kernel_fpu_end();
224}
225
226static void
227xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
228 unsigned long *p3, unsigned long *p4, unsigned long *p5)
229{
230 unsigned int lines = bytes >> 8;
231
232 kernel_fpu_begin();
233
234 asm volatile(
235#undef BLOCK
236#define BLOCK(i) \
237 PF1(i) \
238 PF1(i + 2) \
239 LD(i, 0) \
240 LD(i + 1, 1) \
241 LD(i + 2, 2) \
242 LD(i + 3, 3) \
243 PF2(i) \
244 PF2(i + 2) \
245 XO1(i, 0) \
246 XO1(i + 1, 1) \
247 XO1(i + 2, 2) \
248 XO1(i + 3, 3) \
249 PF3(i) \
250 PF3(i + 2) \
251 XO2(i, 0) \
252 XO2(i + 1, 1) \
253 XO2(i + 2, 2) \
254 XO2(i + 3, 3) \
255 PF4(i) \
256 PF4(i + 2) \
257 PF0(i + 4) \
258 PF0(i + 6) \
259 XO3(i, 0) \
260 XO3(i + 1, 1) \
261 XO3(i + 2, 2) \
262 XO3(i + 3, 3) \
263 XO4(i, 0) \
264 XO4(i + 1, 1) \
265 XO4(i + 2, 2) \
266 XO4(i + 3, 3) \
267 ST(i, 0) \
268 ST(i + 1, 1) \
269 ST(i + 2, 2) \
270 ST(i + 3, 3) \
271
272
273 PF0(0)
274 PF0(2)
275
276 " .align 32 ;\n"
277 " 1: ;\n"
278
279 BLOCK(0)
280 BLOCK(4)
281 BLOCK(8)
282 BLOCK(12)
283
284 " addq %[inc], %[p1] ;\n"
285 " addq %[inc], %[p2] ;\n"
286 " addq %[inc], %[p3] ;\n"
287 " addq %[inc], %[p4] ;\n"
288 " addq %[inc], %[p5] ;\n"
289 " decl %[cnt] ; jnz 1b"
290 : [cnt] "+c" (lines),
291 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
292 [p5] "+r" (p5)
293 : [inc] "r" (256UL)
294 : "memory");
295
296 kernel_fpu_end();
297}
298
299static struct xor_block_template xor_block_sse = { 4static struct xor_block_template xor_block_sse = {
300 .name = "generic_sse", 5 .name = "generic_sse",
301 .do_2 = xor_sse_2, 6 .do_2 = xor_sse_2,
@@ -308,17 +13,15 @@ static struct xor_block_template xor_block_sse = {
308/* Also try the AVX routines */ 13/* Also try the AVX routines */
309#include <asm/xor_avx.h> 14#include <asm/xor_avx.h>
310 15
16/* We force the use of the SSE xor block because it can write around L2.
17 We may also be able to load into the L1 only depending on how the cpu
18 deals with a load to a line that is being prefetched. */
311#undef XOR_TRY_TEMPLATES 19#undef XOR_TRY_TEMPLATES
312#define XOR_TRY_TEMPLATES \ 20#define XOR_TRY_TEMPLATES \
313do { \ 21do { \
314 AVX_XOR_SPEED; \ 22 AVX_XOR_SPEED; \
23 xor_speed(&xor_block_sse_pf64); \
315 xor_speed(&xor_block_sse); \ 24 xor_speed(&xor_block_sse); \
316} while (0) 25} while (0)
317 26
318/* We force the use of the SSE xor block because it can write around L2.
319 We may also be able to load into the L1 only depending on how the cpu
320 deals with a load to a line that is being prefetched. */
321#define XOR_SELECT_TEMPLATE(FASTEST) \
322 AVX_SELECT(&xor_block_sse)
323
324#endif /* _ASM_X86_XOR_64_H */ 27#endif /* _ASM_X86_XOR_64_H */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 58c829871c31..a0eab85ce7b8 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -4,66 +4,6 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <asm/ioctls.h> 5#include <asm/ioctls.h>
6 6
7/*
8 * Machine Check support for x86
9 */
10
11/* MCG_CAP register defines */
12#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
13#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
14#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
15#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
16#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
17#define MCG_EXT_CNT_SHIFT 16
18#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
19#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
20
21/* MCG_STATUS register defines */
22#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
23#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
24#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
25
26/* MCi_STATUS register defines */
27#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
28#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
29#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
30#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
31#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
32#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
33#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
34#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
35#define MCI_STATUS_AR (1ULL<<55) /* Action required */
36#define MCACOD 0xffff /* MCA Error Code */
37
38/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
39#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
40#define MCACOD_SCRUBMSK 0xfff0
41#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
42#define MCACOD_DATA 0x0134 /* Data Load */
43#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
44
45/* MCi_MISC register defines */
46#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
47#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
48#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
49#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
50#define MCI_MISC_ADDR_PHYS 2 /* physical address */
51#define MCI_MISC_ADDR_MEM 3 /* memory address */
52#define MCI_MISC_ADDR_GENERIC 7 /* generic */
53
54/* CTL2 register defines */
55#define MCI_CTL2_CMCI_EN (1ULL << 30)
56#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
57
58#define MCJ_CTX_MASK 3
59#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
60#define MCJ_CTX_RANDOM 0 /* inject context: random */
61#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
62#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
63#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
64#define MCJ_EXCEPTION 0x8 /* raise as exception */
65#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
66
67/* Fields are zero when not available */ 7/* Fields are zero when not available */
68struct mce { 8struct mce {
69 __u64 status; 9 __u64 status;
@@ -87,35 +27,8 @@ struct mce {
87 __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ 27 __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
88}; 28};
89 29
90/*
91 * This structure contains all data related to the MCE log. Also
92 * carries a signature to make it easier to find from external
93 * debugging tools. Each entry is only valid when its finished flag
94 * is set.
95 */
96
97#define MCE_LOG_LEN 32
98
99struct mce_log {
100 char signature[12]; /* "MACHINECHECK" */
101 unsigned len; /* = MCE_LOG_LEN */
102 unsigned next;
103 unsigned flags;
104 unsigned recordlen; /* length of struct mce */
105 struct mce entry[MCE_LOG_LEN];
106};
107
108#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
109
110#define MCE_LOG_SIGNATURE "MACHINECHECK"
111
112#define MCE_GET_RECORD_LEN _IOR('M', 1, int) 30#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
113#define MCE_GET_LOG_LEN _IOR('M', 2, int) 31#define MCE_GET_LOG_LEN _IOR('M', 2, int)
114#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int) 32#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
115 33
116/* Software defined banks */
117#define MCE_EXTENDED_BANK 128
118#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
119#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
120
121#endif /* _UAPI_ASM_X86_MCE_H */ 34#endif /* _UAPI_ASM_X86_MCE_H */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 433a59fb1a74..7a060f4b411f 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -44,6 +44,7 @@
44#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) 44#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
45#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) 45#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
46 46
47#define MSR_PLATFORM_INFO 0x000000ce
47#define MSR_MTRRcap 0x000000fe 48#define MSR_MTRRcap 0x000000fe
48#define MSR_IA32_BBL_CR_CTL 0x00000119 49#define MSR_IA32_BBL_CR_CTL 0x00000119
49#define MSR_IA32_BBL_CR_CTL3 0x0000011e 50#define MSR_IA32_BBL_CR_CTL3 0x0000011e
@@ -103,6 +104,8 @@
103#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) 104#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
104#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) 105#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
105 106
107#define MSR_IA32_POWER_CTL 0x000001fc
108
106#define MSR_IA32_MC0_CTL 0x00000400 109#define MSR_IA32_MC0_CTL 0x00000400
107#define MSR_IA32_MC0_STATUS 0x00000401 110#define MSR_IA32_MC0_STATUS 0x00000401
108#define MSR_IA32_MC0_ADDR 0x00000402 111#define MSR_IA32_MC0_ADDR 0x00000402
@@ -173,6 +176,7 @@
173#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 176#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
174#define MSR_AMD64_OSVW_STATUS 0xc0010141 177#define MSR_AMD64_OSVW_STATUS 0xc0010141
175#define MSR_AMD64_DC_CFG 0xc0011022 178#define MSR_AMD64_DC_CFG 0xc0011022
179#define MSR_AMD64_BU_CFG2 0xc001102a
176#define MSR_AMD64_IBSFETCHCTL 0xc0011030 180#define MSR_AMD64_IBSFETCHCTL 0xc0011030
177#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 181#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
178#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 182#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
@@ -194,6 +198,8 @@
194/* Fam 15h MSRs */ 198/* Fam 15h MSRs */
195#define MSR_F15H_PERF_CTL 0xc0010200 199#define MSR_F15H_PERF_CTL 0xc0010200
196#define MSR_F15H_PERF_CTR 0xc0010201 200#define MSR_F15H_PERF_CTR 0xc0010201
201#define MSR_F15H_NB_PERF_CTL 0xc0010240
202#define MSR_F15H_NB_PERF_CTR 0xc0010241
197 203
198/* Fam 10h MSRs */ 204/* Fam 10h MSRs */
199#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 205#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
@@ -272,6 +278,7 @@
272#define MSR_IA32_PLATFORM_ID 0x00000017 278#define MSR_IA32_PLATFORM_ID 0x00000017
273#define MSR_IA32_EBL_CR_POWERON 0x0000002a 279#define MSR_IA32_EBL_CR_POWERON 0x0000002a
274#define MSR_EBC_FREQUENCY_ID 0x0000002c 280#define MSR_EBC_FREQUENCY_ID 0x0000002c
281#define MSR_SMI_COUNT 0x00000034
275#define MSR_IA32_FEATURE_CONTROL 0x0000003a 282#define MSR_IA32_FEATURE_CONTROL 0x0000003a
276#define MSR_IA32_TSC_ADJUST 0x0000003b 283#define MSR_IA32_TSC_ADJUST 0x0000003b
277 284
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index aa7d6ae39e0e..8264f47cf53e 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -95,9 +95,9 @@ typedef unsigned long sigset_t;
95#ifndef __ASSEMBLY__ 95#ifndef __ASSEMBLY__
96 96
97 97
98#ifdef __i386__
99# ifndef __KERNEL__ 98# ifndef __KERNEL__
100/* Here we must cater to libcs that poke about in kernel headers. */ 99/* Here we must cater to libcs that poke about in kernel headers. */
100#ifdef __i386__
101 101
102struct sigaction { 102struct sigaction {
103 union { 103 union {
@@ -112,7 +112,6 @@ struct sigaction {
112#define sa_handler _u._sa_handler 112#define sa_handler _u._sa_handler
113#define sa_sigaction _u._sa_sigaction 113#define sa_sigaction _u._sa_sigaction
114 114
115# endif /* ! __KERNEL__ */
116#else /* __i386__ */ 115#else /* __i386__ */
117 116
118struct sigaction { 117struct sigaction {
@@ -122,11 +121,8 @@ struct sigaction {
122 sigset_t sa_mask; /* mask last for extensibility */ 121 sigset_t sa_mask; /* mask last for extensibility */
123}; 122};
124 123
125struct k_sigaction {
126 struct sigaction sa;
127};
128
129#endif /* !__i386__ */ 124#endif /* !__i386__ */
125# endif /* ! __KERNEL__ */
130 126
131typedef struct sigaltstack { 127typedef struct sigaltstack {
132 void __user *ss_sp; 128 void __user *ss_sp;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 979d03bce135..2871fccfee68 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -62,10 +62,12 @@
62#define EXIT_REASON_MCE_DURING_VMENTRY 41 62#define EXIT_REASON_MCE_DURING_VMENTRY 41
63#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 63#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
64#define EXIT_REASON_APIC_ACCESS 44 64#define EXIT_REASON_APIC_ACCESS 44
65#define EXIT_REASON_EOI_INDUCED 45
65#define EXIT_REASON_EPT_VIOLATION 48 66#define EXIT_REASON_EPT_VIOLATION 48
66#define EXIT_REASON_EPT_MISCONFIG 49 67#define EXIT_REASON_EPT_MISCONFIG 49
67#define EXIT_REASON_WBINVD 54 68#define EXIT_REASON_WBINVD 54
68#define EXIT_REASON_XSETBV 55 69#define EXIT_REASON_XSETBV 55
70#define EXIT_REASON_APIC_WRITE 56
69#define EXIT_REASON_INVPCID 58 71#define EXIT_REASON_INVPCID 58
70 72
71#define VMX_EXIT_REASONS \ 73#define VMX_EXIT_REASONS \
@@ -103,7 +105,12 @@
103 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ 105 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
104 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ 106 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
105 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ 107 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
106 { EXIT_REASON_WBINVD, "WBINVD" } 108 { EXIT_REASON_WBINVD, "WBINVD" }, \
109 { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
110 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
111 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
112 { EXIT_REASON_INVD, "INVD" }, \
113 { EXIT_REASON_INVPCID, "INVPCID" }
107 114
108 115
109#endif /* _UAPIVMX_H */ 116#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34e923a53762..7bd3bd310106 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o
65obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 65obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
66obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 66obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
67obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 67obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
68obj-$(CONFIG_KPROBES) += kprobes.o 68obj-y += kprobes/
69obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
70obj-$(CONFIG_MODULES) += module.o 69obj-$(CONFIG_MODULES) += module.o
71obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o 70obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
72obj-$(CONFIG_KGDB) += kgdb.o 71obj-$(CONFIG_KGDB) += kgdb.o
@@ -88,6 +87,9 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
88 87
89obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 88obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
90 89
90obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o
91obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o
92obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o
91microcode-y := microcode_core.o 93microcode-y := microcode_core.o
92microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o 94microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
93microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o 95microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bacf4b0d91f4..230c8ea878e5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(acpi_disabled);
51 51
52#ifdef CONFIG_X86_64 52#ifdef CONFIG_X86_64
53# include <asm/proto.h> 53# include <asm/proto.h>
54# include <asm/numa_64.h>
55#endif /* X86 */ 54#endif /* X86 */
56 55
57#define BAD_MADT_ENTRY(entry, end) ( \ 56#define BAD_MADT_ENTRY(entry, end) ( \
@@ -697,6 +696,10 @@ EXPORT_SYMBOL(acpi_map_lsapic);
697 696
698int acpi_unmap_lsapic(int cpu) 697int acpi_unmap_lsapic(int cpu)
699{ 698{
699#ifdef CONFIG_ACPI_NUMA
700 set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE);
701#endif
702
700 per_cpu(x86_cpu_to_apicid, cpu) = -1; 703 per_cpu(x86_cpu_to_apicid, cpu) = -1;
701 set_cpu_present(cpu, false); 704 set_cpu_present(cpu, false);
702 num_processors--; 705 num_processors--;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index d5e0d717005a..0532f5d6e4ef 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -69,7 +69,7 @@ int acpi_suspend_lowlevel(void)
69 69
70#ifndef CONFIG_64BIT 70#ifndef CONFIG_64BIT
71 header->pmode_entry = (u32)&wakeup_pmode_return; 71 header->pmode_entry = (u32)&wakeup_pmode_return;
72 header->pmode_cr3 = (u32)__pa(&initial_page_table); 72 header->pmode_cr3 = (u32)__pa_symbol(initial_page_table);
73 saved_magic = 0x12345678; 73 saved_magic = 0x12345678;
74#else /* CONFIG_64BIT */ 74#else /* CONFIG_64BIT */
75#ifdef CONFIG_SMP 75#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index e66311200cbd..b574b295a2f9 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -768,10 +768,9 @@ int __init gart_iommu_init(void)
768 aper_base = info.aper_base; 768 aper_base = info.aper_base;
769 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); 769 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
770 770
771 if (end_pfn > max_low_pfn_mapped) { 771 start_pfn = PFN_DOWN(aper_base);
772 start_pfn = (aper_base>>PAGE_SHIFT); 772 if (!pfn_range_is_mapped(start_pfn, end_pfn))
773 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); 773 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
774 }
775 774
776 pr_info("PCI-DMA: using GART IOMMU.\n"); 775 pr_info("PCI-DMA: using GART IOMMU.\n");
777 iommu_size = check_iommu_size(info.aper_base, aper_size); 776 iommu_size = check_iommu_size(info.aper_base, aper_size);
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index afdc3f756dea..c9876efecafb 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -240,7 +240,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
240 dw_apb_clockevent_pause(adev->timer); 240 dw_apb_clockevent_pause(adev->timer);
241 if (system_state == SYSTEM_RUNNING) { 241 if (system_state == SYSTEM_RUNNING) {
242 pr_debug("skipping APBT CPU %lu offline\n", cpu); 242 pr_debug("skipping APBT CPU %lu offline\n", cpu);
243 } else if (adev) { 243 } else {
244 pr_debug("APBT clockevent for cpu %lu offline\n", cpu); 244 pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
245 dw_apb_clockevent_stop(adev->timer); 245 dw_apb_clockevent_stop(adev->timer);
246 } 246 }
@@ -311,7 +311,6 @@ void __init apbt_time_init(void)
311#ifdef CONFIG_SMP 311#ifdef CONFIG_SMP
312 int i; 312 int i;
313 struct sfi_timer_table_entry *p_mtmr; 313 struct sfi_timer_table_entry *p_mtmr;
314 unsigned int percpu_timer;
315 struct apbt_dev *adev; 314 struct apbt_dev *adev;
316#endif 315#endif
317 316
@@ -346,13 +345,10 @@ void __init apbt_time_init(void)
346 return; 345 return;
347 } 346 }
348 pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); 347 pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
349 if (num_possible_cpus() <= sfi_mtimer_num) { 348 if (num_possible_cpus() <= sfi_mtimer_num)
350 percpu_timer = 1;
351 apbt_num_timers_used = num_possible_cpus(); 349 apbt_num_timers_used = num_possible_cpus();
352 } else { 350 else
353 percpu_timer = 0;
354 apbt_num_timers_used = 1; 351 apbt_num_timers_used = 1;
355 }
356 pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); 352 pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
357 353
358 /* here we set up per CPU timer data structure */ 354 /* here we set up per CPU timer data structure */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b994cc84aa7e..904611bf0e5a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -131,7 +131,7 @@ static int __init parse_lapic(char *arg)
131{ 131{
132 if (config_enabled(CONFIG_X86_32) && !arg) 132 if (config_enabled(CONFIG_X86_32) && !arg)
133 force_enable_local_apic = 1; 133 force_enable_local_apic = 1;
134 else if (!strncmp(arg, "notscdeadline", 13)) 134 else if (arg && !strncmp(arg, "notscdeadline", 13))
135 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 135 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
136 return 0; 136 return 0;
137} 137}
@@ -1477,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void)
1477 * Now that local APIC setup is completed for BP, configure the fault 1477 * Now that local APIC setup is completed for BP, configure the fault
1478 * handling for interrupt remapping. 1478 * handling for interrupt remapping.
1479 */ 1479 */
1480 if (irq_remapping_enabled) 1480 irq_remap_enable_fault_handling();
1481 irq_remap_enable_fault_handling();
1482 1481
1483} 1482}
1484 1483
@@ -2251,8 +2250,7 @@ static int lapic_suspend(void)
2251 local_irq_save(flags); 2250 local_irq_save(flags);
2252 disable_local_APIC(); 2251 disable_local_APIC();
2253 2252
2254 if (irq_remapping_enabled) 2253 irq_remapping_disable();
2255 irq_remapping_disable();
2256 2254
2257 local_irq_restore(flags); 2255 local_irq_restore(flags);
2258 return 0; 2256 return 0;
@@ -2268,16 +2266,15 @@ static void lapic_resume(void)
2268 return; 2266 return;
2269 2267
2270 local_irq_save(flags); 2268 local_irq_save(flags);
2271 if (irq_remapping_enabled) { 2269
2272 /* 2270 /*
2273 * IO-APIC and PIC have their own resume routines. 2271 * IO-APIC and PIC have their own resume routines.
2274 * We just mask them here to make sure the interrupt 2272 * We just mask them here to make sure the interrupt
2275 * subsystem is completely quiet while we enable x2apic 2273 * subsystem is completely quiet while we enable x2apic
2276 * and interrupt-remapping. 2274 * and interrupt-remapping.
2277 */ 2275 */
2278 mask_ioapic_entries(); 2276 mask_ioapic_entries();
2279 legacy_pic->mask_all(); 2277 legacy_pic->mask_all();
2280 }
2281 2278
2282 if (x2apic_mode) 2279 if (x2apic_mode)
2283 enable_x2apic(); 2280 enable_x2apic();
@@ -2320,8 +2317,7 @@ static void lapic_resume(void)
2320 apic_write(APIC_ESR, 0); 2317 apic_write(APIC_ESR, 0);
2321 apic_read(APIC_ESR); 2318 apic_read(APIC_ESR);
2322 2319
2323 if (irq_remapping_enabled) 2320 irq_remapping_reenable(x2apic_mode);
2324 irq_remapping_reenable(x2apic_mode);
2325 2321
2326 local_irq_restore(flags); 2322 local_irq_restore(flags);
2327} 2323}
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 9c2aa89a11cb..9a9110918ca7 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -28,6 +28,7 @@
28#include <asm/apic.h> 28#include <asm/apic.h>
29#include <asm/ipi.h> 29#include <asm/ipi.h>
30#include <asm/apic_flat_64.h> 30#include <asm/apic_flat_64.h>
31#include <asm/pgtable.h>
31 32
32static int numachip_system __read_mostly; 33static int numachip_system __read_mostly;
33 34
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b739d398bb29..9ed796ccc32c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,22 +68,6 @@
68#define for_each_irq_pin(entry, head) \ 68#define for_each_irq_pin(entry, head) \
69 for (entry = head; entry; entry = entry->next) 69 for (entry = head; entry; entry = entry->next)
70 70
71#ifdef CONFIG_IRQ_REMAP
72static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
73static inline bool irq_remapped(struct irq_cfg *cfg)
74{
75 return cfg->irq_2_iommu.iommu != NULL;
76}
77#else
78static inline bool irq_remapped(struct irq_cfg *cfg)
79{
80 return false;
81}
82static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
83{
84}
85#endif
86
87/* 71/*
88 * Is the SiS APIC rmw bug present ? 72 * Is the SiS APIC rmw bug present ?
89 * -1 = don't know, 0 = no, 1 = yes 73 * -1 = don't know, 0 = no, 1 = yes
@@ -300,9 +284,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
300 return cfg; 284 return cfg;
301} 285}
302 286
303static int alloc_irq_from(unsigned int from, int node) 287static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
304{ 288{
305 return irq_alloc_desc_from(from, node); 289 return irq_alloc_descs_from(from, count, node);
306} 290}
307 291
308static void free_irq_at(unsigned int at, struct irq_cfg *cfg) 292static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
@@ -326,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
326 + (mpc_ioapic_addr(idx) & ~PAGE_MASK); 310 + (mpc_ioapic_addr(idx) & ~PAGE_MASK);
327} 311}
328 312
329static inline void io_apic_eoi(unsigned int apic, unsigned int vector) 313void io_apic_eoi(unsigned int apic, unsigned int vector)
330{ 314{
331 struct io_apic __iomem *io_apic = io_apic_base(apic); 315 struct io_apic __iomem *io_apic = io_apic_base(apic);
332 writel(vector, &io_apic->eoi); 316 writel(vector, &io_apic->eoi);
@@ -573,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data)
573 * Otherwise, we simulate the EOI message manually by changing the trigger 557 * Otherwise, we simulate the EOI message manually by changing the trigger
574 * mode to edge and then back to level, with RTE being masked during this. 558 * mode to edge and then back to level, with RTE being masked during this.
575 */ 559 */
576static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) 560void native_eoi_ioapic_pin(int apic, int pin, int vector)
577{ 561{
578 if (mpc_ioapic_ver(apic) >= 0x20) { 562 if (mpc_ioapic_ver(apic) >= 0x20) {
579 /* 563 io_apic_eoi(apic, vector);
580 * Intr-remapping uses pin number as the virtual vector
581 * in the RTE. Actual vector is programmed in
582 * intr-remapping table entry. Hence for the io-apic
583 * EOI we use the pin number.
584 */
585 if (cfg && irq_remapped(cfg))
586 io_apic_eoi(apic, pin);
587 else
588 io_apic_eoi(apic, vector);
589 } else { 564 } else {
590 struct IO_APIC_route_entry entry, entry1; 565 struct IO_APIC_route_entry entry, entry1;
591 566
@@ -606,14 +581,15 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
606 } 581 }
607} 582}
608 583
609static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) 584void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
610{ 585{
611 struct irq_pin_list *entry; 586 struct irq_pin_list *entry;
612 unsigned long flags; 587 unsigned long flags;
613 588
614 raw_spin_lock_irqsave(&ioapic_lock, flags); 589 raw_spin_lock_irqsave(&ioapic_lock, flags);
615 for_each_irq_pin(entry, cfg->irq_2_pin) 590 for_each_irq_pin(entry, cfg->irq_2_pin)
616 __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); 591 x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
592 cfg->vector);
617 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 593 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
618} 594}
619 595
@@ -650,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
650 } 626 }
651 627
652 raw_spin_lock_irqsave(&ioapic_lock, flags); 628 raw_spin_lock_irqsave(&ioapic_lock, flags);
653 __eoi_ioapic_pin(apic, pin, entry.vector, NULL); 629 x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
654 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 630 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
655 } 631 }
656 632
@@ -1304,25 +1280,18 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1304 fasteoi = false; 1280 fasteoi = false;
1305 } 1281 }
1306 1282
1307 if (irq_remapped(cfg)) { 1283 if (setup_remapped_irq(irq, cfg, chip))
1308 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1309 irq_remap_modify_chip_defaults(chip);
1310 fasteoi = trigger != 0; 1284 fasteoi = trigger != 0;
1311 }
1312 1285
1313 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; 1286 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
1314 irq_set_chip_and_handler_name(irq, chip, hdl, 1287 irq_set_chip_and_handler_name(irq, chip, hdl,
1315 fasteoi ? "fasteoi" : "edge"); 1288 fasteoi ? "fasteoi" : "edge");
1316} 1289}
1317 1290
1318static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, 1291int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
1319 unsigned int destination, int vector, 1292 unsigned int destination, int vector,
1320 struct io_apic_irq_attr *attr) 1293 struct io_apic_irq_attr *attr)
1321{ 1294{
1322 if (irq_remapping_enabled)
1323 return setup_ioapic_remapped_entry(irq, entry, destination,
1324 vector, attr);
1325
1326 memset(entry, 0, sizeof(*entry)); 1295 memset(entry, 0, sizeof(*entry));
1327 1296
1328 entry->delivery_mode = apic->irq_delivery_mode; 1297 entry->delivery_mode = apic->irq_delivery_mode;
@@ -1370,8 +1339,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1370 attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, 1339 attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
1371 cfg->vector, irq, attr->trigger, attr->polarity, dest); 1340 cfg->vector, irq, attr->trigger, attr->polarity, dest);
1372 1341
1373 if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { 1342 if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
1374 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1343 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1375 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); 1344 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
1376 __clear_irq_vector(irq, cfg); 1345 __clear_irq_vector(irq, cfg);
1377 1346
@@ -1479,9 +1448,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1479 struct IO_APIC_route_entry entry; 1448 struct IO_APIC_route_entry entry;
1480 unsigned int dest; 1449 unsigned int dest;
1481 1450
1482 if (irq_remapping_enabled)
1483 return;
1484
1485 memset(&entry, 0, sizeof(entry)); 1451 memset(&entry, 0, sizeof(entry));
1486 1452
1487 /* 1453 /*
@@ -1513,9 +1479,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1513 ioapic_write_entry(ioapic_idx, pin, entry); 1479 ioapic_write_entry(ioapic_idx, pin, entry);
1514} 1480}
1515 1481
1516__apicdebuginit(void) print_IO_APIC(int ioapic_idx) 1482void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
1517{ 1483{
1518 int i; 1484 int i;
1485
1486 pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
1487
1488 for (i = 0; i <= nr_entries; i++) {
1489 struct IO_APIC_route_entry entry;
1490
1491 entry = ioapic_read_entry(apic, i);
1492
1493 pr_debug(" %02x %02X ", i, entry.dest);
1494 pr_cont("%1d %1d %1d %1d %1d "
1495 "%1d %1d %02X\n",
1496 entry.mask,
1497 entry.trigger,
1498 entry.irr,
1499 entry.polarity,
1500 entry.delivery_status,
1501 entry.dest_mode,
1502 entry.delivery_mode,
1503 entry.vector);
1504 }
1505}
1506
1507void intel_ir_io_apic_print_entries(unsigned int apic,
1508 unsigned int nr_entries)
1509{
1510 int i;
1511
1512 pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
1513
1514 for (i = 0; i <= nr_entries; i++) {
1515 struct IR_IO_APIC_route_entry *ir_entry;
1516 struct IO_APIC_route_entry entry;
1517
1518 entry = ioapic_read_entry(apic, i);
1519
1520 ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
1521
1522 pr_debug(" %02x %04X ", i, ir_entry->index);
1523 pr_cont("%1d %1d %1d %1d %1d "
1524 "%1d %1d %X %02X\n",
1525 ir_entry->format,
1526 ir_entry->mask,
1527 ir_entry->trigger,
1528 ir_entry->irr,
1529 ir_entry->polarity,
1530 ir_entry->delivery_status,
1531 ir_entry->index2,
1532 ir_entry->zero,
1533 ir_entry->vector);
1534 }
1535}
1536
1537__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1538{
1519 union IO_APIC_reg_00 reg_00; 1539 union IO_APIC_reg_00 reg_00;
1520 union IO_APIC_reg_01 reg_01; 1540 union IO_APIC_reg_01 reg_01;
1521 union IO_APIC_reg_02 reg_02; 1541 union IO_APIC_reg_02 reg_02;
@@ -1568,58 +1588,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1568 1588
1569 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1589 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1570 1590
1571 if (irq_remapping_enabled) { 1591 x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
1572 printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
1573 " Pol Stat Indx2 Zero Vect:\n");
1574 } else {
1575 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1576 " Stat Dmod Deli Vect:\n");
1577 }
1578
1579 for (i = 0; i <= reg_01.bits.entries; i++) {
1580 if (irq_remapping_enabled) {
1581 struct IO_APIC_route_entry entry;
1582 struct IR_IO_APIC_route_entry *ir_entry;
1583
1584 entry = ioapic_read_entry(ioapic_idx, i);
1585 ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
1586 printk(KERN_DEBUG " %02x %04X ",
1587 i,
1588 ir_entry->index
1589 );
1590 pr_cont("%1d %1d %1d %1d %1d "
1591 "%1d %1d %X %02X\n",
1592 ir_entry->format,
1593 ir_entry->mask,
1594 ir_entry->trigger,
1595 ir_entry->irr,
1596 ir_entry->polarity,
1597 ir_entry->delivery_status,
1598 ir_entry->index2,
1599 ir_entry->zero,
1600 ir_entry->vector
1601 );
1602 } else {
1603 struct IO_APIC_route_entry entry;
1604
1605 entry = ioapic_read_entry(ioapic_idx, i);
1606 printk(KERN_DEBUG " %02x %02X ",
1607 i,
1608 entry.dest
1609 );
1610 pr_cont("%1d %1d %1d %1d %1d "
1611 "%1d %1d %02X\n",
1612 entry.mask,
1613 entry.trigger,
1614 entry.irr,
1615 entry.polarity,
1616 entry.delivery_status,
1617 entry.dest_mode,
1618 entry.delivery_mode,
1619 entry.vector
1620 );
1621 }
1622 }
1623} 1592}
1624 1593
1625__apicdebuginit(void) print_IO_APICs(void) 1594__apicdebuginit(void) print_IO_APICs(void)
@@ -1921,30 +1890,14 @@ void __init enable_IO_APIC(void)
1921 clear_IO_APIC(); 1890 clear_IO_APIC();
1922} 1891}
1923 1892
1924/* 1893void native_disable_io_apic(void)
1925 * Not an __init, needed by the reboot code
1926 */
1927void disable_IO_APIC(void)
1928{ 1894{
1929 /* 1895 /*
1930 * Clear the IO-APIC before rebooting:
1931 */
1932 clear_IO_APIC();
1933
1934 if (!legacy_pic->nr_legacy_irqs)
1935 return;
1936
1937 /*
1938 * If the i8259 is routed through an IOAPIC 1896 * If the i8259 is routed through an IOAPIC
1939 * Put that IOAPIC in virtual wire mode 1897 * Put that IOAPIC in virtual wire mode
1940 * so legacy interrupts can be delivered. 1898 * so legacy interrupts can be delivered.
1941 *
1942 * With interrupt-remapping, for now we will use virtual wire A mode,
1943 * as virtual wire B is little complex (need to configure both
1944 * IOAPIC RTE as well as interrupt-remapping table entry).
1945 * As this gets called during crash dump, keep this simple for now.
1946 */ 1899 */
1947 if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { 1900 if (ioapic_i8259.pin != -1) {
1948 struct IO_APIC_route_entry entry; 1901 struct IO_APIC_route_entry entry;
1949 1902
1950 memset(&entry, 0, sizeof(entry)); 1903 memset(&entry, 0, sizeof(entry));
@@ -1964,12 +1917,25 @@ void disable_IO_APIC(void)
1964 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); 1917 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1965 } 1918 }
1966 1919
1920 if (cpu_has_apic || apic_from_smp_config())
1921 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1922
1923}
1924
1925/*
1926 * Not an __init, needed by the reboot code
1927 */
1928void disable_IO_APIC(void)
1929{
1967 /* 1930 /*
1968 * Use virtual wire A mode when interrupt remapping is enabled. 1931 * Clear the IO-APIC before rebooting:
1969 */ 1932 */
1970 if (cpu_has_apic || apic_from_smp_config()) 1933 clear_IO_APIC();
1971 disconnect_bsp_APIC(!irq_remapping_enabled && 1934
1972 ioapic_i8259.pin != -1); 1935 if (!legacy_pic->nr_legacy_irqs)
1936 return;
1937
1938 x86_io_apic_ops.disable();
1973} 1939}
1974 1940
1975#ifdef CONFIG_X86_32 1941#ifdef CONFIG_X86_32
@@ -2322,12 +2288,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2322 2288
2323 apic = entry->apic; 2289 apic = entry->apic;
2324 pin = entry->pin; 2290 pin = entry->pin;
2325 /* 2291
2326 * With interrupt-remapping, destination information comes 2292 io_apic_write(apic, 0x11 + pin*2, dest);
2327 * from interrupt-remapping table entry.
2328 */
2329 if (!irq_remapped(cfg))
2330 io_apic_write(apic, 0x11 + pin*2, dest);
2331 reg = io_apic_read(apic, 0x10 + pin*2); 2293 reg = io_apic_read(apic, 0x10 + pin*2);
2332 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 2294 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2333 reg |= vector; 2295 reg |= vector;
@@ -2369,9 +2331,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2369 return 0; 2331 return 0;
2370} 2332}
2371 2333
2372static int 2334
2373ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, 2335int native_ioapic_set_affinity(struct irq_data *data,
2374 bool force) 2336 const struct cpumask *mask,
2337 bool force)
2375{ 2338{
2376 unsigned int dest, irq = data->irq; 2339 unsigned int dest, irq = data->irq;
2377 unsigned long flags; 2340 unsigned long flags;
@@ -2548,33 +2511,6 @@ static void ack_apic_level(struct irq_data *data)
2548 ioapic_irqd_unmask(data, cfg, masked); 2511 ioapic_irqd_unmask(data, cfg, masked);
2549} 2512}
2550 2513
2551#ifdef CONFIG_IRQ_REMAP
2552static void ir_ack_apic_edge(struct irq_data *data)
2553{
2554 ack_APIC_irq();
2555}
2556
2557static void ir_ack_apic_level(struct irq_data *data)
2558{
2559 ack_APIC_irq();
2560 eoi_ioapic_irq(data->irq, data->chip_data);
2561}
2562
2563static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
2564{
2565 seq_printf(p, " IR-%s", data->chip->name);
2566}
2567
2568static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
2569{
2570 chip->irq_print_chip = ir_print_prefix;
2571 chip->irq_ack = ir_ack_apic_edge;
2572 chip->irq_eoi = ir_ack_apic_level;
2573
2574 chip->irq_set_affinity = set_remapped_irq_affinity;
2575}
2576#endif /* CONFIG_IRQ_REMAP */
2577
2578static struct irq_chip ioapic_chip __read_mostly = { 2514static struct irq_chip ioapic_chip __read_mostly = {
2579 .name = "IO-APIC", 2515 .name = "IO-APIC",
2580 .irq_startup = startup_ioapic_irq, 2516 .irq_startup = startup_ioapic_irq,
@@ -2582,7 +2518,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
2582 .irq_unmask = unmask_ioapic_irq, 2518 .irq_unmask = unmask_ioapic_irq,
2583 .irq_ack = ack_apic_edge, 2519 .irq_ack = ack_apic_edge,
2584 .irq_eoi = ack_apic_level, 2520 .irq_eoi = ack_apic_level,
2585 .irq_set_affinity = ioapic_set_affinity, 2521 .irq_set_affinity = native_ioapic_set_affinity,
2586 .irq_retrigger = ioapic_retrigger_irq, 2522 .irq_retrigger = ioapic_retrigger_irq,
2587}; 2523};
2588 2524
@@ -2781,8 +2717,7 @@ static inline void __init check_timer(void)
2781 * 8259A. 2717 * 8259A.
2782 */ 2718 */
2783 if (pin1 == -1) { 2719 if (pin1 == -1) {
2784 if (irq_remapping_enabled) 2720 panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
2785 panic("BIOS bug: timer not connected to IO-APIC");
2786 pin1 = pin2; 2721 pin1 = pin2;
2787 apic1 = apic2; 2722 apic1 = apic2;
2788 no_pin1 = 1; 2723 no_pin1 = 1;
@@ -2814,8 +2749,7 @@ static inline void __init check_timer(void)
2814 clear_IO_APIC_pin(0, pin1); 2749 clear_IO_APIC_pin(0, pin1);
2815 goto out; 2750 goto out;
2816 } 2751 }
2817 if (irq_remapping_enabled) 2752 panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
2818 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2819 local_irq_disable(); 2753 local_irq_disable();
2820 clear_IO_APIC_pin(apic1, pin1); 2754 clear_IO_APIC_pin(apic1, pin1);
2821 if (!no_pin1) 2755 if (!no_pin1)
@@ -2982,37 +2916,58 @@ device_initcall(ioapic_init_ops);
2982/* 2916/*
2983 * Dynamic irq allocate and deallocation 2917 * Dynamic irq allocate and deallocation
2984 */ 2918 */
2985unsigned int create_irq_nr(unsigned int from, int node) 2919unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
2986{ 2920{
2987 struct irq_cfg *cfg; 2921 struct irq_cfg **cfg;
2988 unsigned long flags; 2922 unsigned long flags;
2989 unsigned int ret = 0; 2923 int irq, i;
2990 int irq;
2991 2924
2992 if (from < nr_irqs_gsi) 2925 if (from < nr_irqs_gsi)
2993 from = nr_irqs_gsi; 2926 from = nr_irqs_gsi;
2994 2927
2995 irq = alloc_irq_from(from, node); 2928 cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
2996 if (irq < 0) 2929 if (!cfg)
2997 return 0;
2998 cfg = alloc_irq_cfg(irq, node);
2999 if (!cfg) {
3000 free_irq_at(irq, NULL);
3001 return 0; 2930 return 0;
2931
2932 irq = alloc_irqs_from(from, count, node);
2933 if (irq < 0)
2934 goto out_cfgs;
2935
2936 for (i = 0; i < count; i++) {
2937 cfg[i] = alloc_irq_cfg(irq + i, node);
2938 if (!cfg[i])
2939 goto out_irqs;
3002 } 2940 }
3003 2941
3004 raw_spin_lock_irqsave(&vector_lock, flags); 2942 raw_spin_lock_irqsave(&vector_lock, flags);
3005 if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) 2943 for (i = 0; i < count; i++)
3006 ret = irq; 2944 if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
2945 goto out_vecs;
3007 raw_spin_unlock_irqrestore(&vector_lock, flags); 2946 raw_spin_unlock_irqrestore(&vector_lock, flags);
3008 2947
3009 if (ret) { 2948 for (i = 0; i < count; i++) {
3010 irq_set_chip_data(irq, cfg); 2949 irq_set_chip_data(irq + i, cfg[i]);
3011 irq_clear_status_flags(irq, IRQ_NOREQUEST); 2950 irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
3012 } else {
3013 free_irq_at(irq, cfg);
3014 } 2951 }
3015 return ret; 2952
2953 kfree(cfg);
2954 return irq;
2955
2956out_vecs:
2957 for (i--; i >= 0; i--)
2958 __clear_irq_vector(irq + i, cfg[i]);
2959 raw_spin_unlock_irqrestore(&vector_lock, flags);
2960out_irqs:
2961 for (i = 0; i < count; i++)
2962 free_irq_at(irq + i, cfg[i]);
2963out_cfgs:
2964 kfree(cfg);
2965 return 0;
2966}
2967
2968unsigned int create_irq_nr(unsigned int from, int node)
2969{
2970 return __create_irqs(from, 1, node);
3016} 2971}
3017 2972
3018int create_irq(void) 2973int create_irq(void)
@@ -3037,48 +2992,35 @@ void destroy_irq(unsigned int irq)
3037 2992
3038 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 2993 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3039 2994
3040 if (irq_remapped(cfg)) 2995 free_remapped_irq(irq);
3041 free_remapped_irq(irq); 2996
3042 raw_spin_lock_irqsave(&vector_lock, flags); 2997 raw_spin_lock_irqsave(&vector_lock, flags);
3043 __clear_irq_vector(irq, cfg); 2998 __clear_irq_vector(irq, cfg);
3044 raw_spin_unlock_irqrestore(&vector_lock, flags); 2999 raw_spin_unlock_irqrestore(&vector_lock, flags);
3045 free_irq_at(irq, cfg); 3000 free_irq_at(irq, cfg);
3046} 3001}
3047 3002
3003void destroy_irqs(unsigned int irq, unsigned int count)
3004{
3005 unsigned int i;
3006
3007 for (i = 0; i < count; i++)
3008 destroy_irq(irq + i);
3009}
3010
3048/* 3011/*
3049 * MSI message composition 3012 * MSI message composition
3050 */ 3013 */
3051#ifdef CONFIG_PCI_MSI 3014void native_compose_msi_msg(struct pci_dev *pdev,
3052static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, 3015 unsigned int irq, unsigned int dest,
3053 struct msi_msg *msg, u8 hpet_id) 3016 struct msi_msg *msg, u8 hpet_id)
3054{ 3017{
3055 struct irq_cfg *cfg; 3018 struct irq_cfg *cfg = irq_cfg(irq);
3056 int err;
3057 unsigned dest;
3058
3059 if (disable_apic)
3060 return -ENXIO;
3061
3062 cfg = irq_cfg(irq);
3063 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3064 if (err)
3065 return err;
3066 3019
3067 err = apic->cpu_mask_to_apicid_and(cfg->domain, 3020 msg->address_hi = MSI_ADDR_BASE_HI;
3068 apic->target_cpus(), &dest);
3069 if (err)
3070 return err;
3071
3072 if (irq_remapped(cfg)) {
3073 compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
3074 return err;
3075 }
3076 3021
3077 if (x2apic_enabled()) 3022 if (x2apic_enabled())
3078 msg->address_hi = MSI_ADDR_BASE_HI | 3023 msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
3079 MSI_ADDR_EXT_DEST_ID(dest);
3080 else
3081 msg->address_hi = MSI_ADDR_BASE_HI;
3082 3024
3083 msg->address_lo = 3025 msg->address_lo =
3084 MSI_ADDR_BASE_LO | 3026 MSI_ADDR_BASE_LO |
@@ -3097,8 +3039,32 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3097 MSI_DATA_DELIVERY_FIXED: 3039 MSI_DATA_DELIVERY_FIXED:
3098 MSI_DATA_DELIVERY_LOWPRI) | 3040 MSI_DATA_DELIVERY_LOWPRI) |
3099 MSI_DATA_VECTOR(cfg->vector); 3041 MSI_DATA_VECTOR(cfg->vector);
3042}
3100 3043
3101 return err; 3044#ifdef CONFIG_PCI_MSI
3045static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3046 struct msi_msg *msg, u8 hpet_id)
3047{
3048 struct irq_cfg *cfg;
3049 int err;
3050 unsigned dest;
3051
3052 if (disable_apic)
3053 return -ENXIO;
3054
3055 cfg = irq_cfg(irq);
3056 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3057 if (err)
3058 return err;
3059
3060 err = apic->cpu_mask_to_apicid_and(cfg->domain,
3061 apic->target_cpus(), &dest);
3062 if (err)
3063 return err;
3064
3065 x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
3066
3067 return 0;
3102} 3068}
3103 3069
3104static int 3070static int
@@ -3136,23 +3102,28 @@ static struct irq_chip msi_chip = {
3136 .irq_retrigger = ioapic_retrigger_irq, 3102 .irq_retrigger = ioapic_retrigger_irq,
3137}; 3103};
3138 3104
3139static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3105int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
3106 unsigned int irq_base, unsigned int irq_offset)
3140{ 3107{
3141 struct irq_chip *chip = &msi_chip; 3108 struct irq_chip *chip = &msi_chip;
3142 struct msi_msg msg; 3109 struct msi_msg msg;
3110 unsigned int irq = irq_base + irq_offset;
3143 int ret; 3111 int ret;
3144 3112
3145 ret = msi_compose_msg(dev, irq, &msg, -1); 3113 ret = msi_compose_msg(dev, irq, &msg, -1);
3146 if (ret < 0) 3114 if (ret < 0)
3147 return ret; 3115 return ret;
3148 3116
3149 irq_set_msi_desc(irq, msidesc); 3117 irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
3150 write_msi_msg(irq, &msg);
3151 3118
3152 if (irq_remapped(irq_get_chip_data(irq))) { 3119 /*
3153 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3120 * MSI-X message is written per-IRQ, the offset is always 0.
3154 irq_remap_modify_chip_defaults(chip); 3121 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
3155 } 3122 */
3123 if (!irq_offset)
3124 write_msi_msg(irq, &msg);
3125
3126 setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
3156 3127
3157 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3128 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3158 3129
@@ -3163,46 +3134,26 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3163 3134
3164int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3135int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3165{ 3136{
3166 int node, ret, sub_handle, index = 0;
3167 unsigned int irq, irq_want; 3137 unsigned int irq, irq_want;
3168 struct msi_desc *msidesc; 3138 struct msi_desc *msidesc;
3139 int node, ret;
3169 3140
3170 /* x86 doesn't support multiple MSI yet */ 3141 /* Multiple MSI vectors only supported with interrupt remapping */
3171 if (type == PCI_CAP_ID_MSI && nvec > 1) 3142 if (type == PCI_CAP_ID_MSI && nvec > 1)
3172 return 1; 3143 return 1;
3173 3144
3174 node = dev_to_node(&dev->dev); 3145 node = dev_to_node(&dev->dev);
3175 irq_want = nr_irqs_gsi; 3146 irq_want = nr_irqs_gsi;
3176 sub_handle = 0;
3177 list_for_each_entry(msidesc, &dev->msi_list, list) { 3147 list_for_each_entry(msidesc, &dev->msi_list, list) {
3178 irq = create_irq_nr(irq_want, node); 3148 irq = create_irq_nr(irq_want, node);
3179 if (irq == 0) 3149 if (irq == 0)
3180 return -1; 3150 return -ENOSPC;
3151
3181 irq_want = irq + 1; 3152 irq_want = irq + 1;
3182 if (!irq_remapping_enabled)
3183 goto no_ir;
3184 3153
3185 if (!sub_handle) { 3154 ret = setup_msi_irq(dev, msidesc, irq, 0);
3186 /*
3187 * allocate the consecutive block of IRTE's
3188 * for 'nvec'
3189 */
3190 index = msi_alloc_remapped_irq(dev, irq, nvec);
3191 if (index < 0) {
3192 ret = index;
3193 goto error;
3194 }
3195 } else {
3196 ret = msi_setup_remapped_irq(dev, irq, index,
3197 sub_handle);
3198 if (ret < 0)
3199 goto error;
3200 }
3201no_ir:
3202 ret = setup_msi_irq(dev, msidesc, irq);
3203 if (ret < 0) 3155 if (ret < 0)
3204 goto error; 3156 goto error;
3205 sub_handle++;
3206 } 3157 }
3207 return 0; 3158 return 0;
3208 3159
@@ -3298,26 +3249,19 @@ static struct irq_chip hpet_msi_type = {
3298 .irq_retrigger = ioapic_retrigger_irq, 3249 .irq_retrigger = ioapic_retrigger_irq,
3299}; 3250};
3300 3251
3301int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 3252int default_setup_hpet_msi(unsigned int irq, unsigned int id)
3302{ 3253{
3303 struct irq_chip *chip = &hpet_msi_type; 3254 struct irq_chip *chip = &hpet_msi_type;
3304 struct msi_msg msg; 3255 struct msi_msg msg;
3305 int ret; 3256 int ret;
3306 3257
3307 if (irq_remapping_enabled) {
3308 ret = setup_hpet_msi_remapped(irq, id);
3309 if (ret)
3310 return ret;
3311 }
3312
3313 ret = msi_compose_msg(NULL, irq, &msg, id); 3258 ret = msi_compose_msg(NULL, irq, &msg, id);
3314 if (ret < 0) 3259 if (ret < 0)
3315 return ret; 3260 return ret;
3316 3261
3317 hpet_msi_write(irq_get_handler_data(irq), &msg); 3262 hpet_msi_write(irq_get_handler_data(irq), &msg);
3318 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3263 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3319 if (irq_remapped(irq_get_chip_data(irq))) 3264 setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
3320 irq_remap_modify_chip_defaults(chip);
3321 3265
3322 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3266 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3323 return 0; 3267 return 0;
@@ -3683,10 +3627,7 @@ void __init setup_ioapic_dest(void)
3683 else 3627 else
3684 mask = apic->target_cpus(); 3628 mask = apic->target_cpus();
3685 3629
3686 if (irq_remapping_enabled) 3630 x86_io_apic_ops.set_affinity(idata, mask, false);
3687 set_remapped_irq_affinity(idata, mask, false);
3688 else
3689 ioapic_set_affinity(idata, mask, false);
3690 } 3631 }
3691 3632
3692} 3633}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index cce91bf26676..7434d8556d09 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
106 unsigned long mask = cpumask_bits(cpumask)[0]; 106 unsigned long mask = cpumask_bits(cpumask)[0];
107 unsigned long flags; 107 unsigned long flags;
108 108
109 if (WARN_ONCE(!mask, "empty IPI mask")) 109 if (!mask)
110 return; 110 return;
111 111
112 local_irq_save(flags); 112 local_irq_save(flags);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index e03a1e180e81..562a76d433c8 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg)
20} 20}
21early_param("x2apic_phys", set_x2apic_phys_mode); 21early_param("x2apic_phys", set_x2apic_phys_mode);
22 22
23static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 23static bool x2apic_fadt_phys(void)
24{ 24{
25 if (x2apic_phys) 25 if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
26 return x2apic_enabled(); 26 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
27 else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
28 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) &&
29 x2apic_enabled()) {
30 printk(KERN_DEBUG "System requires x2apic physical mode\n"); 27 printk(KERN_DEBUG "System requires x2apic physical mode\n");
31 return 1; 28 return true;
32 } 29 }
33 else 30 return false;
34 return 0; 31}
32
33static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
35} 36}
36 37
37static void 38static void
@@ -82,7 +83,7 @@ static void init_x2apic_ldr(void)
82 83
83static int x2apic_phys_probe(void) 84static int x2apic_phys_probe(void)
84{ 85{
85 if (x2apic_mode && x2apic_phys) 86 if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
86 return 1; 87 return 1;
87 88
88 return apic == &apic_x2apic_phys; 89 return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8cfade9510a4..794f6eb54cd3 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV APIC functions (note: not an Intel compatible APIC) 6 * SGI UV APIC functions (note: not an Intel compatible APIC)
7 * 7 *
8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10#include <linux/cpumask.h> 10#include <linux/cpumask.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -91,10 +91,16 @@ static int __init early_get_pnodeid(void)
91 m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); 91 m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
92 uv_min_hub_revision_id = node_id.s.revision; 92 uv_min_hub_revision_id = node_id.s.revision;
93 93
94 if (node_id.s.part_number == UV2_HUB_PART_NUMBER) 94 switch (node_id.s.part_number) {
95 uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; 95 case UV2_HUB_PART_NUMBER:
96 if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X) 96 case UV2_HUB_PART_NUMBER_X:
97 uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; 97 uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
98 break;
99 case UV3_HUB_PART_NUMBER:
100 case UV3_HUB_PART_NUMBER_X:
101 uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1;
102 break;
103 }
98 104
99 uv_hub_info->hub_revision = uv_min_hub_revision_id; 105 uv_hub_info->hub_revision = uv_min_hub_revision_id;
100 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); 106 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
@@ -130,13 +136,16 @@ static void __init uv_set_apicid_hibit(void)
130 136
131static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 137static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
132{ 138{
133 int pnodeid, is_uv1, is_uv2; 139 int pnodeid, is_uv1, is_uv2, is_uv3;
134 140
135 is_uv1 = !strcmp(oem_id, "SGI"); 141 is_uv1 = !strcmp(oem_id, "SGI");
136 is_uv2 = !strcmp(oem_id, "SGI2"); 142 is_uv2 = !strcmp(oem_id, "SGI2");
137 if (is_uv1 || is_uv2) { 143 is_uv3 = !strncmp(oem_id, "SGI3", 4); /* there are varieties of UV3 */
144 if (is_uv1 || is_uv2 || is_uv3) {
138 uv_hub_info->hub_revision = 145 uv_hub_info->hub_revision =
139 is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE; 146 (is_uv1 ? UV1_HUB_REVISION_BASE :
147 (is_uv2 ? UV2_HUB_REVISION_BASE :
148 UV3_HUB_REVISION_BASE));
140 pnodeid = early_get_pnodeid(); 149 pnodeid = early_get_pnodeid();
141 early_get_apic_pnode_shift(); 150 early_get_apic_pnode_shift();
142 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 151 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
@@ -450,14 +459,17 @@ static __init void map_high(char *id, unsigned long base, int pshift,
450 459
451 paddr = base << pshift; 460 paddr = base << pshift;
452 bytes = (1UL << bshift) * (max_pnode + 1); 461 bytes = (1UL << bshift) * (max_pnode + 1);
453 printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, 462 if (!paddr) {
454 paddr + bytes); 463 pr_info("UV: Map %s_HI base address NULL\n", id);
464 return;
465 }
466 pr_info("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
455 if (map_type == map_uc) 467 if (map_type == map_uc)
456 init_extra_mapping_uc(paddr, bytes); 468 init_extra_mapping_uc(paddr, bytes);
457 else 469 else
458 init_extra_mapping_wb(paddr, bytes); 470 init_extra_mapping_wb(paddr, bytes);
459
460} 471}
472
461static __init void map_gru_high(int max_pnode) 473static __init void map_gru_high(int max_pnode)
462{ 474{
463 union uvh_rh_gam_gru_overlay_config_mmr_u gru; 475 union uvh_rh_gam_gru_overlay_config_mmr_u gru;
@@ -468,7 +480,8 @@ static __init void map_gru_high(int max_pnode)
468 map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); 480 map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
469 gru_start_paddr = ((u64)gru.s.base << shift); 481 gru_start_paddr = ((u64)gru.s.base << shift);
470 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); 482 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
471 483 } else {
484 pr_info("UV: GRU disabled\n");
472 } 485 }
473} 486}
474 487
@@ -480,23 +493,146 @@ static __init void map_mmr_high(int max_pnode)
480 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); 493 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
481 if (mmr.s.enable) 494 if (mmr.s.enable)
482 map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); 495 map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
496 else
497 pr_info("UV: MMR disabled\n");
498}
499
500/*
501 * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY
502 * and REDIRECT MMR regs are exactly the same on UV3.
503 */
504struct mmioh_config {
505 unsigned long overlay;
506 unsigned long redirect;
507 char *id;
508};
509
510static __initdata struct mmioh_config mmiohs[] = {
511 {
512 UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR,
513 UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR,
514 "MMIOH0"
515 },
516 {
517 UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR,
518 UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR,
519 "MMIOH1"
520 },
521};
522
523static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
524{
525 union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay;
526 unsigned long mmr;
527 unsigned long base;
528 int i, n, shift, m_io, max_io;
529 int nasid, lnasid, fi, li;
530 char *id;
531
532 id = mmiohs[index].id;
533 overlay.v = uv_read_local_mmr(mmiohs[index].overlay);
534 pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n",
535 id, overlay.v, overlay.s3.base, overlay.s3.m_io);
536 if (!overlay.s3.enable) {
537 pr_info("UV: %s disabled\n", id);
538 return;
539 }
540
541 shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT;
542 base = (unsigned long)overlay.s3.base;
543 m_io = overlay.s3.m_io;
544 mmr = mmiohs[index].redirect;
545 n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
546 min_pnode *= 2; /* convert to NASID */
547 max_pnode *= 2;
548 max_io = lnasid = fi = li = -1;
549
550 for (i = 0; i < n; i++) {
551 union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect;
552
553 redirect.v = uv_read_local_mmr(mmr + i * 8);
554 nasid = redirect.s3.nasid;
555 if (nasid < min_pnode || max_pnode < nasid)
556 nasid = -1; /* invalid NASID */
557
558 if (nasid == lnasid) {
559 li = i;
560 if (i != n-1) /* last entry check */
561 continue;
562 }
563
564 /* check if we have a cached (or last) redirect to print */
565 if (lnasid != -1 || (i == n-1 && nasid != -1)) {
566 unsigned long addr1, addr2;
567 int f, l;
568
569 if (lnasid == -1) {
570 f = l = i;
571 lnasid = nasid;
572 } else {
573 f = fi;
574 l = li;
575 }
576 addr1 = (base << shift) +
577 f * (unsigned long)(1 << m_io);
578 addr2 = (base << shift) +
579 (l + 1) * (unsigned long)(1 << m_io);
580 pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
581 id, fi, li, lnasid, addr1, addr2);
582 if (max_io < l)
583 max_io = l;
584 }
585 fi = li = i;
586 lnasid = nasid;
587 }
588
589 pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n",
590 id, base, shift, m_io, max_io);
591
592 if (max_io >= 0)
593 map_high(id, base, shift, m_io, max_io, map_uc);
483} 594}
484 595
485static __init void map_mmioh_high(int max_pnode) 596static __init void map_mmioh_high(int min_pnode, int max_pnode)
486{ 597{
487 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; 598 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
488 int shift; 599 unsigned long mmr, base;
600 int shift, enable, m_io, n_io;
489 601
490 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); 602 if (is_uv3_hub()) {
491 if (is_uv1_hub() && mmioh.s1.enable) { 603 /* Map both MMIOH Regions */
492 shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; 604 map_mmioh_high_uv3(0, min_pnode, max_pnode);
493 map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io, 605 map_mmioh_high_uv3(1, min_pnode, max_pnode);
494 max_pnode, map_uc); 606 return;
495 } 607 }
496 if (is_uv2_hub() && mmioh.s2.enable) { 608
609 if (is_uv1_hub()) {
610 mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
611 shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
612 mmioh.v = uv_read_local_mmr(mmr);
613 enable = !!mmioh.s1.enable;
614 base = mmioh.s1.base;
615 m_io = mmioh.s1.m_io;
616 n_io = mmioh.s1.n_io;
617 } else if (is_uv2_hub()) {
618 mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
497 shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; 619 shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
498 map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io, 620 mmioh.v = uv_read_local_mmr(mmr);
499 max_pnode, map_uc); 621 enable = !!mmioh.s2.enable;
622 base = mmioh.s2.base;
623 m_io = mmioh.s2.m_io;
624 n_io = mmioh.s2.n_io;
625 } else
626 return;
627
628 if (enable) {
629 max_pnode &= (1 << n_io) - 1;
630 pr_info(
631 "UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n",
632 base, shift, m_io, n_io, max_pnode);
633 map_high("MMIOH", base, shift, m_io, max_pnode, map_uc);
634 } else {
635 pr_info("UV: MMIOH disabled\n");
500 } 636 }
501} 637}
502 638
@@ -724,42 +860,41 @@ void uv_nmi_init(void)
724void __init uv_system_init(void) 860void __init uv_system_init(void)
725{ 861{
726 union uvh_rh_gam_config_mmr_u m_n_config; 862 union uvh_rh_gam_config_mmr_u m_n_config;
727 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
728 union uvh_node_id_u node_id; 863 union uvh_node_id_u node_id;
729 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; 864 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
730 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io; 865 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
731 int gnode_extra, max_pnode = 0; 866 int gnode_extra, min_pnode = 999999, max_pnode = -1;
732 unsigned long mmr_base, present, paddr; 867 unsigned long mmr_base, present, paddr;
733 unsigned short pnode_mask, pnode_io_mask; 868 unsigned short pnode_mask;
869 char *hub = (is_uv1_hub() ? "UV1" :
870 (is_uv2_hub() ? "UV2" :
871 "UV3"));
734 872
735 printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2"); 873 pr_info("UV: Found %s hub\n", hub);
736 map_low_mmrs(); 874 map_low_mmrs();
737 875
738 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); 876 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
739 m_val = m_n_config.s.m_skt; 877 m_val = m_n_config.s.m_skt;
740 n_val = m_n_config.s.n_skt; 878 n_val = m_n_config.s.n_skt;
741 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); 879 pnode_mask = (1 << n_val) - 1;
742 n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io;
743 mmr_base = 880 mmr_base =
744 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & 881 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
745 ~UV_MMR_ENABLE; 882 ~UV_MMR_ENABLE;
746 pnode_mask = (1 << n_val) - 1;
747 pnode_io_mask = (1 << n_io) - 1;
748 883
749 node_id.v = uv_read_local_mmr(UVH_NODE_ID); 884 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
750 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; 885 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
751 gnode_upper = ((unsigned long)gnode_extra << m_val); 886 gnode_upper = ((unsigned long)gnode_extra << m_val);
752 printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n", 887 pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x\n",
753 n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask); 888 n_val, m_val, pnode_mask, gnode_upper, gnode_extra);
754 889
755 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); 890 pr_info("UV: global MMR base 0x%lx\n", mmr_base);
756 891
757 for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) 892 for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
758 uv_possible_blades += 893 uv_possible_blades +=
759 hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); 894 hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
760 895
761 /* uv_num_possible_blades() is really the hub count */ 896 /* uv_num_possible_blades() is really the hub count */
762 printk(KERN_INFO "UV: Found %d blades, %d hubs\n", 897 pr_info("UV: Found %d blades, %d hubs\n",
763 is_uv1_hub() ? uv_num_possible_blades() : 898 is_uv1_hub() ? uv_num_possible_blades() :
764 (uv_num_possible_blades() + 1) / 2, 899 (uv_num_possible_blades() + 1) / 2,
765 uv_num_possible_blades()); 900 uv_num_possible_blades());
@@ -794,6 +929,7 @@ void __init uv_system_init(void)
794 uv_blade_info[blade].nr_possible_cpus = 0; 929 uv_blade_info[blade].nr_possible_cpus = 0;
795 uv_blade_info[blade].nr_online_cpus = 0; 930 uv_blade_info[blade].nr_online_cpus = 0;
796 spin_lock_init(&uv_blade_info[blade].nmi_lock); 931 spin_lock_init(&uv_blade_info[blade].nmi_lock);
932 min_pnode = min(pnode, min_pnode);
797 max_pnode = max(pnode, max_pnode); 933 max_pnode = max(pnode, max_pnode);
798 blade++; 934 blade++;
799 } 935 }
@@ -856,7 +992,7 @@ void __init uv_system_init(void)
856 992
857 map_gru_high(max_pnode); 993 map_gru_high(max_pnode);
858 map_mmr_high(max_pnode); 994 map_mmr_high(max_pnode);
859 map_mmioh_high(max_pnode & pnode_io_mask); 995 map_mmioh_high(min_pnode, max_pnode);
860 996
861 uv_cpu_init(); 997 uv_cpu_init();
862 uv_scir_register_cpu_notifier(); 998 uv_scir_register_cpu_notifier();
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d65464e43503..66b5faffe14a 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -232,6 +232,7 @@
232#include <linux/acpi.h> 232#include <linux/acpi.h>
233#include <linux/syscore_ops.h> 233#include <linux/syscore_ops.h>
234#include <linux/i8253.h> 234#include <linux/i8253.h>
235#include <linux/cpuidle.h>
235 236
236#include <asm/uaccess.h> 237#include <asm/uaccess.h>
237#include <asm/desc.h> 238#include <asm/desc.h>
@@ -360,13 +361,35 @@ struct apm_user {
360 * idle percentage above which bios idle calls are done 361 * idle percentage above which bios idle calls are done
361 */ 362 */
362#ifdef CONFIG_APM_CPU_IDLE 363#ifdef CONFIG_APM_CPU_IDLE
363#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012
364#define DEFAULT_IDLE_THRESHOLD 95 364#define DEFAULT_IDLE_THRESHOLD 95
365#else 365#else
366#define DEFAULT_IDLE_THRESHOLD 100 366#define DEFAULT_IDLE_THRESHOLD 100
367#endif 367#endif
368#define DEFAULT_IDLE_PERIOD (100 / 3) 368#define DEFAULT_IDLE_PERIOD (100 / 3)
369 369
370static int apm_cpu_idle(struct cpuidle_device *dev,
371 struct cpuidle_driver *drv, int index);
372
373static struct cpuidle_driver apm_idle_driver = {
374 .name = "apm_idle",
375 .owner = THIS_MODULE,
376 .en_core_tk_irqen = 1,
377 .states = {
378 { /* entry 0 is for polling */ },
379 { /* entry 1 is for APM idle */
380 .name = "APM",
381 .desc = "APM idle",
382 .flags = CPUIDLE_FLAG_TIME_VALID,
383 .exit_latency = 250, /* WAG */
384 .target_residency = 500, /* WAG */
385 .enter = &apm_cpu_idle
386 },
387 },
388 .state_count = 2,
389};
390
391static struct cpuidle_device apm_cpuidle_device;
392
370/* 393/*
371 * Local variables 394 * Local variables
372 */ 395 */
@@ -377,7 +400,6 @@ static struct {
377static int clock_slowed; 400static int clock_slowed;
378static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; 401static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
379static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; 402static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
380static int set_pm_idle;
381static int suspends_pending; 403static int suspends_pending;
382static int standbys_pending; 404static int standbys_pending;
383static int ignore_sys_suspend; 405static int ignore_sys_suspend;
@@ -884,8 +906,6 @@ static void apm_do_busy(void)
884#define IDLE_CALC_LIMIT (HZ * 100) 906#define IDLE_CALC_LIMIT (HZ * 100)
885#define IDLE_LEAKY_MAX 16 907#define IDLE_LEAKY_MAX 16
886 908
887static void (*original_pm_idle)(void) __read_mostly;
888
889/** 909/**
890 * apm_cpu_idle - cpu idling for APM capable Linux 910 * apm_cpu_idle - cpu idling for APM capable Linux
891 * 911 *
@@ -894,35 +914,36 @@ static void (*original_pm_idle)(void) __read_mostly;
894 * Furthermore it calls the system default idle routine. 914 * Furthermore it calls the system default idle routine.
895 */ 915 */
896 916
897static void apm_cpu_idle(void) 917static int apm_cpu_idle(struct cpuidle_device *dev,
918 struct cpuidle_driver *drv, int index)
898{ 919{
899 static int use_apm_idle; /* = 0 */ 920 static int use_apm_idle; /* = 0 */
900 static unsigned int last_jiffies; /* = 0 */ 921 static unsigned int last_jiffies; /* = 0 */
901 static unsigned int last_stime; /* = 0 */ 922 static unsigned int last_stime; /* = 0 */
923 cputime_t stime;
902 924
903 int apm_idle_done = 0; 925 int apm_idle_done = 0;
904 unsigned int jiffies_since_last_check = jiffies - last_jiffies; 926 unsigned int jiffies_since_last_check = jiffies - last_jiffies;
905 unsigned int bucket; 927 unsigned int bucket;
906 928
907 WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
908recalc: 929recalc:
930 task_cputime(current, NULL, &stime);
909 if (jiffies_since_last_check > IDLE_CALC_LIMIT) { 931 if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
910 use_apm_idle = 0; 932 use_apm_idle = 0;
911 last_jiffies = jiffies;
912 last_stime = current->stime;
913 } else if (jiffies_since_last_check > idle_period) { 933 } else if (jiffies_since_last_check > idle_period) {
914 unsigned int idle_percentage; 934 unsigned int idle_percentage;
915 935
916 idle_percentage = current->stime - last_stime; 936 idle_percentage = stime - last_stime;
917 idle_percentage *= 100; 937 idle_percentage *= 100;
918 idle_percentage /= jiffies_since_last_check; 938 idle_percentage /= jiffies_since_last_check;
919 use_apm_idle = (idle_percentage > idle_threshold); 939 use_apm_idle = (idle_percentage > idle_threshold);
920 if (apm_info.forbid_idle) 940 if (apm_info.forbid_idle)
921 use_apm_idle = 0; 941 use_apm_idle = 0;
922 last_jiffies = jiffies;
923 last_stime = current->stime;
924 } 942 }
925 943
944 last_jiffies = jiffies;
945 last_stime = stime;
946
926 bucket = IDLE_LEAKY_MAX; 947 bucket = IDLE_LEAKY_MAX;
927 948
928 while (!need_resched()) { 949 while (!need_resched()) {
@@ -950,10 +971,7 @@ recalc:
950 break; 971 break;
951 } 972 }
952 } 973 }
953 if (original_pm_idle) 974 default_idle();
954 original_pm_idle();
955 else
956 default_idle();
957 local_irq_disable(); 975 local_irq_disable();
958 jiffies_since_last_check = jiffies - last_jiffies; 976 jiffies_since_last_check = jiffies - last_jiffies;
959 if (jiffies_since_last_check > idle_period) 977 if (jiffies_since_last_check > idle_period)
@@ -963,7 +981,7 @@ recalc:
963 if (apm_idle_done) 981 if (apm_idle_done)
964 apm_do_busy(); 982 apm_do_busy();
965 983
966 local_irq_enable(); 984 return index;
967} 985}
968 986
969/** 987/**
@@ -2381,9 +2399,9 @@ static int __init apm_init(void)
2381 if (HZ != 100) 2399 if (HZ != 100)
2382 idle_period = (idle_period * HZ) / 100; 2400 idle_period = (idle_period * HZ) / 100;
2383 if (idle_threshold < 100) { 2401 if (idle_threshold < 100) {
2384 original_pm_idle = pm_idle; 2402 if (!cpuidle_register_driver(&apm_idle_driver))
2385 pm_idle = apm_cpu_idle; 2403 if (cpuidle_register_device(&apm_cpuidle_device))
2386 set_pm_idle = 1; 2404 cpuidle_unregister_driver(&apm_idle_driver);
2387 } 2405 }
2388 2406
2389 return 0; 2407 return 0;
@@ -2393,15 +2411,9 @@ static void __exit apm_exit(void)
2393{ 2411{
2394 int error; 2412 int error;
2395 2413
2396 if (set_pm_idle) { 2414 cpuidle_unregister_device(&apm_cpuidle_device);
2397 pm_idle = original_pm_idle; 2415 cpuidle_unregister_driver(&apm_idle_driver);
2398 /* 2416
2399 * We are about to unload the current idle thread pm callback
2400 * (pm_idle), Wait for all processors to update cached/local
2401 * copies of pm_idle before proceeding.
2402 */
2403 kick_all_cpus_sync();
2404 }
2405 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) 2417 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
2406 && (apm_info.connection_version > 0x0100)) { 2418 && (apm_info.connection_version > 0x0100)) {
2407 error = apm_engage_power_management(APM_DEVICE_ALL, 0); 2419 error = apm_engage_power_management(APM_DEVICE_ALL, 0);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 15239fffd6fe..fa96eb0d02fb 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -12,7 +12,6 @@
12#include <asm/pci-direct.h> 12#include <asm/pci-direct.h>
13 13
14#ifdef CONFIG_X86_64 14#ifdef CONFIG_X86_64
15# include <asm/numa_64.h>
16# include <asm/mmconfig.h> 15# include <asm/mmconfig.h>
17# include <asm/cacheflush.h> 16# include <asm/cacheflush.h>
18#endif 17#endif
@@ -220,8 +219,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
220 */ 219 */
221 WARN_ONCE(1, "WARNING: This combination of AMD" 220 WARN_ONCE(1, "WARNING: This combination of AMD"
222 " processors is not suitable for SMP.\n"); 221 " processors is not suitable for SMP.\n");
223 if (!test_taint(TAINT_UNSAFE_SMP)) 222 add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE);
224 add_taint(TAINT_UNSAFE_SMP);
225 223
226valid_k7: 224valid_k7:
227 ; 225 ;
@@ -364,9 +362,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
364#endif 362#endif
365} 363}
366 364
367int amd_get_nb_id(int cpu) 365u16 amd_get_nb_id(int cpu)
368{ 366{
369 int id = 0; 367 u16 id = 0;
370#ifdef CONFIG_SMP 368#ifdef CONFIG_SMP
371 id = per_cpu(cpu_llc_id, cpu); 369 id = per_cpu(cpu_llc_id, cpu);
372#endif 370#endif
@@ -518,10 +516,9 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
518static void __cpuinit init_amd(struct cpuinfo_x86 *c) 516static void __cpuinit init_amd(struct cpuinfo_x86 *c)
519{ 517{
520 u32 dummy; 518 u32 dummy;
521
522#ifdef CONFIG_SMP
523 unsigned long long value; 519 unsigned long long value;
524 520
521#ifdef CONFIG_SMP
525 /* 522 /*
526 * Disable TLB flush filter by setting HWCR.FFDIS on K8 523 * Disable TLB flush filter by setting HWCR.FFDIS on K8
527 * bit 6 of msr C001_0015 524 * bit 6 of msr C001_0015
@@ -559,12 +556,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
559 * (AMD Erratum #110, docId: 25759). 556 * (AMD Erratum #110, docId: 25759).
560 */ 557 */
561 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { 558 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
562 u64 val;
563
564 clear_cpu_cap(c, X86_FEATURE_LAHF_LM); 559 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
565 if (!rdmsrl_amd_safe(0xc001100d, &val)) { 560 if (!rdmsrl_amd_safe(0xc001100d, &value)) {
566 val &= ~(1ULL << 32); 561 value &= ~(1ULL << 32);
567 wrmsrl_amd_safe(0xc001100d, val); 562 wrmsrl_amd_safe(0xc001100d, value);
568 } 563 }
569 } 564 }
570 565
@@ -617,13 +612,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
617 if ((c->x86 == 0x15) && 612 if ((c->x86 == 0x15) &&
618 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && 613 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
619 !cpu_has(c, X86_FEATURE_TOPOEXT)) { 614 !cpu_has(c, X86_FEATURE_TOPOEXT)) {
620 u64 val;
621 615
622 if (!rdmsrl_safe(0xc0011005, &val)) { 616 if (!rdmsrl_safe(0xc0011005, &value)) {
623 val |= 1ULL << 54; 617 value |= 1ULL << 54;
624 wrmsrl_safe(0xc0011005, val); 618 wrmsrl_safe(0xc0011005, value);
625 rdmsrl(0xc0011005, val); 619 rdmsrl(0xc0011005, value);
626 if (val & (1ULL << 54)) { 620 if (value & (1ULL << 54)) {
627 set_cpu_cap(c, X86_FEATURE_TOPOEXT); 621 set_cpu_cap(c, X86_FEATURE_TOPOEXT);
628 printk(KERN_INFO FW_INFO "CPU: Re-enabling " 622 printk(KERN_INFO FW_INFO "CPU: Re-enabling "
629 "disabled Topology Extensions Support\n"); 623 "disabled Topology Extensions Support\n");
@@ -637,11 +631,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
637 */ 631 */
638 if ((c->x86 == 0x15) && 632 if ((c->x86 == 0x15) &&
639 (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { 633 (c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
640 u64 val;
641 634
642 if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { 635 if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) {
643 val |= 0x1E; 636 value |= 0x1E;
644 wrmsrl_safe(0xc0011021, val); 637 wrmsrl_safe(0xc0011021, value);
645 } 638 }
646 } 639 }
647 640
@@ -685,12 +678,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
685 * benefit in doing so. 678 * benefit in doing so.
686 */ 679 */
687 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { 680 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
681 unsigned long pfn = tseg >> PAGE_SHIFT;
682
688 printk(KERN_DEBUG "tseg: %010llx\n", tseg); 683 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
689 if ((tseg>>PMD_SHIFT) < 684 if (pfn_range_is_mapped(pfn, pfn + 1))
690 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
691 ((tseg>>PMD_SHIFT) <
692 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
693 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
694 set_memory_4k((unsigned long)__va(tseg), 1); 685 set_memory_4k((unsigned long)__va(tseg), 1);
695 } 686 }
696 } 687 }
@@ -703,13 +694,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
703 if (c->x86 > 0x11) 694 if (c->x86 > 0x11)
704 set_cpu_cap(c, X86_FEATURE_ARAT); 695 set_cpu_cap(c, X86_FEATURE_ARAT);
705 696
706 /*
707 * Disable GART TLB Walk Errors on Fam10h. We do this here
708 * because this is always needed when GART is enabled, even in a
709 * kernel which has no MCE support built in.
710 */
711 if (c->x86 == 0x10) { 697 if (c->x86 == 0x10) {
712 /* 698 /*
699 * Disable GART TLB Walk Errors on Fam10h. We do this here
700 * because this is always needed when GART is enabled, even in a
701 * kernel which has no MCE support built in.
713 * BIOS should disable GartTlbWlk Errors themself. If 702 * BIOS should disable GartTlbWlk Errors themself. If
714 * it doesn't do it here as suggested by the BKDG. 703 * it doesn't do it here as suggested by the BKDG.
715 * 704 *
@@ -723,6 +712,21 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
723 mask |= (1 << 10); 712 mask |= (1 << 10);
724 wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); 713 wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask);
725 } 714 }
715
716 /*
717 * On family 10h BIOS may not have properly enabled WC+ support,
718 * causing it to be converted to CD memtype. This may result in
719 * performance degradation for certain nested-paging guests.
720 * Prevent this conversion by clearing bit 24 in
721 * MSR_AMD64_BU_CFG2.
722 *
723 * NOTE: we want to use the _safe accessors so as not to #GP kvm
724 * guests on older kvm hosts.
725 */
726
727 rdmsrl_safe(MSR_AMD64_BU_CFG2, &value);
728 value &= ~(1ULL << 24);
729 wrmsrl_safe(MSR_AMD64_BU_CFG2, value);
726 } 730 }
727 731
728 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); 732 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 92dfec986a48..af6455e3fcc9 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -17,15 +17,6 @@
17#include <asm/paravirt.h> 17#include <asm/paravirt.h>
18#include <asm/alternative.h> 18#include <asm/alternative.h>
19 19
20static int __init no_halt(char *s)
21{
22 WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n");
23 boot_cpu_data.hlt_works_ok = 0;
24 return 1;
25}
26
27__setup("no-hlt", no_halt);
28
29static int __init no_387(char *s) 20static int __init no_387(char *s)
30{ 21{
31 boot_cpu_data.hard_math = 0; 22 boot_cpu_data.hard_math = 0;
@@ -89,23 +80,6 @@ static void __init check_fpu(void)
89 pr_warn("Hmm, FPU with FDIV bug\n"); 80 pr_warn("Hmm, FPU with FDIV bug\n");
90} 81}
91 82
92static void __init check_hlt(void)
93{
94 if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
95 return;
96
97 pr_info("Checking 'hlt' instruction... ");
98 if (!boot_cpu_data.hlt_works_ok) {
99 pr_cont("disabled\n");
100 return;
101 }
102 halt();
103 halt();
104 halt();
105 halt();
106 pr_cont("OK\n");
107}
108
109/* 83/*
110 * Check whether we are able to run this kernel safely on SMP. 84 * Check whether we are able to run this kernel safely on SMP.
111 * 85 *
@@ -129,7 +103,6 @@ void __init check_bugs(void)
129 print_cpu_info(&boot_cpu_data); 103 print_cpu_info(&boot_cpu_data);
130#endif 104#endif
131 check_config(); 105 check_config();
132 check_hlt();
133 init_utsname()->machine[1] = 106 init_utsname()->machine[1] =
134 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 107 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
135 alternative_instructions(); 108 alternative_instructions();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9c3ab43a6954..d814772c5bed 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -37,6 +37,8 @@
37#include <asm/mce.h> 37#include <asm/mce.h>
38#include <asm/msr.h> 38#include <asm/msr.h>
39#include <asm/pat.h> 39#include <asm/pat.h>
40#include <asm/microcode.h>
41#include <asm/microcode_intel.h>
40 42
41#ifdef CONFIG_X86_LOCAL_APIC 43#ifdef CONFIG_X86_LOCAL_APIC
42#include <asm/uv/uv.h> 44#include <asm/uv/uv.h>
@@ -213,7 +215,7 @@ static inline int flag_is_changeable_p(u32 flag)
213} 215}
214 216
215/* Probe for the CPUID instruction */ 217/* Probe for the CPUID instruction */
216static int __cpuinit have_cpuid_p(void) 218int __cpuinit have_cpuid_p(void)
217{ 219{
218 return flag_is_changeable_p(X86_EFLAGS_ID); 220 return flag_is_changeable_p(X86_EFLAGS_ID);
219} 221}
@@ -249,11 +251,6 @@ static inline int flag_is_changeable_p(u32 flag)
249{ 251{
250 return 1; 252 return 1;
251} 253}
252/* Probe for the CPUID instruction */
253static inline int have_cpuid_p(void)
254{
255 return 1;
256}
257static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 254static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
258{ 255{
259} 256}
@@ -1223,6 +1220,12 @@ void __cpuinit cpu_init(void)
1223 int cpu; 1220 int cpu;
1224 int i; 1221 int i;
1225 1222
1223 /*
1224 * Load microcode on this cpu if a valid microcode is available.
1225 * This is early microcode loading procedure.
1226 */
1227 load_ucode_ap();
1228
1226 cpu = stack_smp_processor_id(); 1229 cpu = stack_smp_processor_id();
1227 t = &per_cpu(init_tss, cpu); 1230 t = &per_cpu(init_tss, cpu);
1228 oist = &per_cpu(orig_ist, cpu); 1231 oist = &per_cpu(orig_ist, cpu);
@@ -1314,6 +1317,8 @@ void __cpuinit cpu_init(void)
1314 struct tss_struct *t = &per_cpu(init_tss, cpu); 1317 struct tss_struct *t = &per_cpu(init_tss, cpu);
1315 struct thread_struct *thread = &curr->thread; 1318 struct thread_struct *thread = &curr->thread;
1316 1319
1320 show_ucode_info_early();
1321
1317 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { 1322 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
1318 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1323 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
1319 for (;;) 1324 for (;;)
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index a8f8fa9769d6..1e7e84a02eba 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void)
79 if (x86_hyper->init_platform) 79 if (x86_hyper->init_platform)
80 x86_hyper->init_platform(); 80 x86_hyper->init_platform();
81} 81}
82
83bool __init hypervisor_x2apic_available(void)
84{
85 return x86_hyper &&
86 x86_hyper->x2apic_available &&
87 x86_hyper->x2apic_available();
88}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fcaabd0432c5..1905ce98bee0 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -17,7 +17,6 @@
17 17
18#ifdef CONFIG_X86_64 18#ifdef CONFIG_X86_64
19#include <linux/topology.h> 19#include <linux/topology.h>
20#include <asm/numa_64.h>
21#endif 20#endif
22 21
23#include "cpu.h" 22#include "cpu.h"
@@ -168,7 +167,7 @@ int __cpuinit ppro_with_ram_bug(void)
168#ifdef CONFIG_X86_F00F_BUG 167#ifdef CONFIG_X86_F00F_BUG
169static void __cpuinit trap_init_f00f_bug(void) 168static void __cpuinit trap_init_f00f_bug(void)
170{ 169{
171 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); 170 __set_fixmap(FIX_F00F_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
172 171
173 /* 172 /*
174 * Update the IDT descriptor and reload the IDT so that 173 * Update the IDT descriptor and reload the IDT so that
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fe9edec6698a..7c6f7d548c0f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -298,8 +298,7 @@ struct _cache_attr {
298 unsigned int); 298 unsigned int);
299}; 299};
300 300
301#ifdef CONFIG_AMD_NB 301#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
302
303/* 302/*
304 * L3 cache descriptors 303 * L3 cache descriptors
305 */ 304 */
@@ -524,9 +523,9 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
524static struct _cache_attr subcaches = 523static struct _cache_attr subcaches =
525 __ATTR(subcaches, 0644, show_subcaches, store_subcaches); 524 __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
526 525
527#else /* CONFIG_AMD_NB */ 526#else
528#define amd_init_l3_cache(x, y) 527#define amd_init_l3_cache(x, y)
529#endif /* CONFIG_AMD_NB */ 528#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
530 529
531static int 530static int
532__cpuinit cpuid4_cache_lookup_regs(int index, 531__cpuinit cpuid4_cache_lookup_regs(int index,
@@ -1227,7 +1226,7 @@ static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1227 .notifier_call = cacheinfo_cpu_callback, 1226 .notifier_call = cacheinfo_cpu_callback,
1228}; 1227};
1229 1228
1230static int __cpuinit cache_sysfs_init(void) 1229static int __init cache_sysfs_init(void)
1231{ 1230{
1232 int i; 1231 int i;
1233 1232
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 80dbda84f1c3..7bc126346ace 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -512,11 +512,8 @@ int mce_available(struct cpuinfo_x86 *c)
512 512
513static void mce_schedule_work(void) 513static void mce_schedule_work(void)
514{ 514{
515 if (!mce_ring_empty()) { 515 if (!mce_ring_empty())
516 struct work_struct *work = &__get_cpu_var(mce_work); 516 schedule_work(&__get_cpu_var(mce_work));
517 if (!work_pending(work))
518 schedule_work(work);
519 }
520} 517}
521 518
522DEFINE_PER_CPU(struct irq_work, mce_irq_work); 519DEFINE_PER_CPU(struct irq_work, mce_irq_work);
@@ -1085,7 +1082,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1085 /* 1082 /*
1086 * Set taint even when machine check was not enabled. 1083 * Set taint even when machine check was not enabled.
1087 */ 1084 */
1088 add_taint(TAINT_MACHINE_CHECK); 1085 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
1089 1086
1090 severity = mce_severity(&m, cfg->tolerant, NULL); 1087 severity = mce_severity(&m, cfg->tolerant, NULL);
1091 1088
@@ -1351,12 +1348,7 @@ int mce_notify_irq(void)
1351 /* wake processes polling /dev/mcelog */ 1348 /* wake processes polling /dev/mcelog */
1352 wake_up_interruptible(&mce_chrdev_wait); 1349 wake_up_interruptible(&mce_chrdev_wait);
1353 1350
1354 /* 1351 if (mce_helper[0])
1355 * There is no risk of missing notifications because
1356 * work_pending is always cleared before the function is
1357 * executed.
1358 */
1359 if (mce_helper[0] && !work_pending(&mce_trigger_work))
1360 schedule_work(&mce_trigger_work); 1352 schedule_work(&mce_trigger_work);
1361 1353
1362 if (__ratelimit(&ratelimit)) 1354 if (__ratelimit(&ratelimit))
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 2d5454cd2c4f..1c044b1ccc59 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -33,7 +33,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
33 smp_processor_id()); 33 smp_processor_id());
34 } 34 }
35 35
36 add_taint(TAINT_MACHINE_CHECK); 36 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
37} 37}
38 38
39/* Set up machine check reporting for processors with Intel style MCE: */ 39/* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 2d7998fb628c..e9a701aecaa1 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -15,7 +15,7 @@
15static void winchip_machine_check(struct pt_regs *regs, long error_code) 15static void winchip_machine_check(struct pt_regs *regs, long error_code)
16{ 16{
17 printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); 17 printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
18 add_taint(TAINT_MACHINE_CHECK); 18 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
19} 19}
20 20
21/* Set up machine check reporting on the Winchip C6 series */ 21/* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 0a630dd4b620..a7d26d83fb70 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -14,10 +14,15 @@
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/clocksource.h> 15#include <linux/clocksource.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/hardirq.h>
18#include <linux/interrupt.h>
17#include <asm/processor.h> 19#include <asm/processor.h>
18#include <asm/hypervisor.h> 20#include <asm/hypervisor.h>
19#include <asm/hyperv.h> 21#include <asm/hyperv.h>
20#include <asm/mshyperv.h> 22#include <asm/mshyperv.h>
23#include <asm/desc.h>
24#include <asm/idle.h>
25#include <asm/irq_regs.h>
21 26
22struct ms_hyperv_info ms_hyperv; 27struct ms_hyperv_info ms_hyperv;
23EXPORT_SYMBOL_GPL(ms_hyperv); 28EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -30,6 +35,13 @@ static bool __init ms_hyperv_platform(void)
30 if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) 35 if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
31 return false; 36 return false;
32 37
38 /*
39 * Xen emulates Hyper-V to support enlightened Windows.
40 * Check to see first if we are on a Xen Hypervisor.
41 */
42 if (xen_cpuid_base())
43 return false;
44
33 cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, 45 cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
34 &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); 46 &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
35 47
@@ -68,7 +80,14 @@ static void __init ms_hyperv_init_platform(void)
68 printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", 80 printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
69 ms_hyperv.features, ms_hyperv.hints); 81 ms_hyperv.features, ms_hyperv.hints);
70 82
71 clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); 83 if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
84 clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
85#if IS_ENABLED(CONFIG_HYPERV)
86 /*
87 * Setup the IDT for hypervisor callback.
88 */
89 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
90#endif
72} 91}
73 92
74const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { 93const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
@@ -77,3 +96,36 @@ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
77 .init_platform = ms_hyperv_init_platform, 96 .init_platform = ms_hyperv_init_platform,
78}; 97};
79EXPORT_SYMBOL(x86_hyper_ms_hyperv); 98EXPORT_SYMBOL(x86_hyper_ms_hyperv);
99
100#if IS_ENABLED(CONFIG_HYPERV)
101static int vmbus_irq = -1;
102static irq_handler_t vmbus_isr;
103
104void hv_register_vmbus_handler(int irq, irq_handler_t handler)
105{
106 vmbus_irq = irq;
107 vmbus_isr = handler;
108}
109
110void hyperv_vector_handler(struct pt_regs *regs)
111{
112 struct pt_regs *old_regs = set_irq_regs(regs);
113 struct irq_desc *desc;
114
115 irq_enter();
116 exit_idle();
117
118 desc = irq_to_desc(vmbus_irq);
119
120 if (desc)
121 generic_handle_irq_desc(vmbus_irq, desc);
122
123 irq_exit();
124 set_irq_regs(old_regs);
125}
126#else
127void hv_register_vmbus_handler(int irq, irq_handler_t handler)
128{
129}
130#endif
131EXPORT_SYMBOL_GPL(hv_register_vmbus_handler);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index e9fe907cd249..fa72a39e5d46 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -542,7 +542,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
542 542
543 if (tmp != mask_lo) { 543 if (tmp != mask_lo) {
544 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); 544 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
545 add_taint(TAINT_FIRMWARE_WORKAROUND); 545 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
546 mask_lo = tmp; 546 mask_lo = tmp;
547 } 547 }
548 } 548 }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6774c17a5576..bf0f01aea994 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -829,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
829 } else { 829 } else {
830 hwc->config_base = x86_pmu_config_addr(hwc->idx); 830 hwc->config_base = x86_pmu_config_addr(hwc->idx);
831 hwc->event_base = x86_pmu_event_addr(hwc->idx); 831 hwc->event_base = x86_pmu_event_addr(hwc->idx);
832 hwc->event_base_rdpmc = hwc->idx; 832 hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
833 } 833 }
834} 834}
835 835
@@ -1310,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = {
1310 .attrs = NULL, 1310 .attrs = NULL,
1311}; 1311};
1312 1312
1313struct perf_pmu_events_attr {
1314 struct device_attribute attr;
1315 u64 id;
1316};
1317
1318/* 1313/*
1319 * Remove all undefined events (x86_pmu.event_map(id) == 0) 1314 * Remove all undefined events (x86_pmu.event_map(id) == 0)
1320 * out of events_attr attributes. 1315 * out of events_attr attributes.
@@ -1348,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at
1348#define EVENT_VAR(_id) event_attr_##_id 1343#define EVENT_VAR(_id) event_attr_##_id
1349#define EVENT_PTR(_id) &event_attr_##_id.attr.attr 1344#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
1350 1345
1351#define EVENT_ATTR(_name, _id) \ 1346#define EVENT_ATTR(_name, _id) \
1352static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ 1347 PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
1353 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ 1348 events_sysfs_show)
1354 .id = PERF_COUNT_HW_##_id, \
1355};
1356 1349
1357EVENT_ATTR(cpu-cycles, CPU_CYCLES ); 1350EVENT_ATTR(cpu-cycles, CPU_CYCLES );
1358EVENT_ATTR(instructions, INSTRUCTIONS ); 1351EVENT_ATTR(instructions, INSTRUCTIONS );
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 115c1ea97746..7f5c75c2afdd 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -325,6 +325,8 @@ struct x86_pmu {
325 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); 325 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
326 unsigned eventsel; 326 unsigned eventsel;
327 unsigned perfctr; 327 unsigned perfctr;
328 int (*addr_offset)(int index, bool eventsel);
329 int (*rdpmc_index)(int index);
328 u64 (*event_map)(int); 330 u64 (*event_map)(int);
329 int max_events; 331 int max_events;
330 int num_counters; 332 int num_counters;
@@ -446,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs
446 448
447u64 x86_perf_event_update(struct perf_event *event); 449u64 x86_perf_event_update(struct perf_event *event);
448 450
449static inline int x86_pmu_addr_offset(int index) 451static inline unsigned int x86_pmu_config_addr(int index)
450{ 452{
451 int offset; 453 return x86_pmu.eventsel + (x86_pmu.addr_offset ?
452 454 x86_pmu.addr_offset(index, true) : index);
453 /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
454 alternative_io(ASM_NOP2,
455 "shll $1, %%eax",
456 X86_FEATURE_PERFCTR_CORE,
457 "=a" (offset),
458 "a" (index));
459
460 return offset;
461} 455}
462 456
463static inline unsigned int x86_pmu_config_addr(int index) 457static inline unsigned int x86_pmu_event_addr(int index)
464{ 458{
465 return x86_pmu.eventsel + x86_pmu_addr_offset(index); 459 return x86_pmu.perfctr + (x86_pmu.addr_offset ?
460 x86_pmu.addr_offset(index, false) : index);
466} 461}
467 462
468static inline unsigned int x86_pmu_event_addr(int index) 463static inline int x86_pmu_rdpmc_index(int index)
469{ 464{
470 return x86_pmu.perfctr + x86_pmu_addr_offset(index); 465 return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
471} 466}
472 467
473int x86_setup_perfctr(struct perf_event *event); 468int x86_setup_perfctr(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c93bc4e813a0..dfdab42aed27 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event)
132 return amd_perfmon_event_map[hw_event]; 132 return amd_perfmon_event_map[hw_event];
133} 133}
134 134
135static int amd_pmu_hw_config(struct perf_event *event) 135static struct event_constraint *amd_nb_event_constraint;
136
137/*
138 * Previously calculated offsets
139 */
140static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
141static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
142static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
143
144/*
145 * Legacy CPUs:
146 * 4 counters starting at 0xc0010000 each offset by 1
147 *
148 * CPUs with core performance counter extensions:
149 * 6 counters starting at 0xc0010200 each offset by 2
150 *
151 * CPUs with north bridge performance counter extensions:
152 * 4 additional counters starting at 0xc0010240 each offset by 2
153 * (indexed right above either one of the above core counters)
154 */
155static inline int amd_pmu_addr_offset(int index, bool eventsel)
136{ 156{
137 int ret; 157 int offset, first, base;
138 158
139 /* pass precise event sampling to ibs: */ 159 if (!index)
140 if (event->attr.precise_ip && get_ibs_caps()) 160 return index;
141 return -ENOENT; 161
162 if (eventsel)
163 offset = event_offsets[index];
164 else
165 offset = count_offsets[index];
166
167 if (offset)
168 return offset;
169
170 if (amd_nb_event_constraint &&
171 test_bit(index, amd_nb_event_constraint->idxmsk)) {
172 /*
173 * calculate the offset of NB counters with respect to
174 * base eventsel or perfctr
175 */
176
177 first = find_first_bit(amd_nb_event_constraint->idxmsk,
178 X86_PMC_IDX_MAX);
179
180 if (eventsel)
181 base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
182 else
183 base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
184
185 offset = base + ((index - first) << 1);
186 } else if (!cpu_has_perfctr_core)
187 offset = index;
188 else
189 offset = index << 1;
190
191 if (eventsel)
192 event_offsets[index] = offset;
193 else
194 count_offsets[index] = offset;
195
196 return offset;
197}
198
199static inline int amd_pmu_rdpmc_index(int index)
200{
201 int ret, first;
202
203 if (!index)
204 return index;
205
206 ret = rdpmc_indexes[index];
142 207
143 ret = x86_pmu_hw_config(event);
144 if (ret) 208 if (ret)
145 return ret; 209 return ret;
146 210
147 if (has_branch_stack(event)) 211 if (amd_nb_event_constraint &&
148 return -EOPNOTSUPP; 212 test_bit(index, amd_nb_event_constraint->idxmsk)) {
213 /*
214 * according to the mnual, ECX value of the NB counters is
215 * the index of the NB counter (0, 1, 2 or 3) plus 6
216 */
217
218 first = find_first_bit(amd_nb_event_constraint->idxmsk,
219 X86_PMC_IDX_MAX);
220 ret = index - first + 6;
221 } else
222 ret = index;
223
224 rdpmc_indexes[index] = ret;
225
226 return ret;
227}
149 228
229static int amd_core_hw_config(struct perf_event *event)
230{
150 if (event->attr.exclude_host && event->attr.exclude_guest) 231 if (event->attr.exclude_host && event->attr.exclude_guest)
151 /* 232 /*
152 * When HO == GO == 1 the hardware treats that as GO == HO == 0 233 * When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event)
156 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | 237 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
157 ARCH_PERFMON_EVENTSEL_OS); 238 ARCH_PERFMON_EVENTSEL_OS);
158 else if (event->attr.exclude_host) 239 else if (event->attr.exclude_host)
159 event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; 240 event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
160 else if (event->attr.exclude_guest) 241 else if (event->attr.exclude_guest)
161 event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; 242 event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
243
244 return 0;
245}
246
247/*
248 * NB counters do not support the following event select bits:
249 * Host/Guest only
250 * Counter mask
251 * Invert counter mask
252 * Edge detect
253 * OS/User mode
254 */
255static int amd_nb_hw_config(struct perf_event *event)
256{
257 /* for NB, we only allow system wide counting mode */
258 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
259 return -EINVAL;
260
261 if (event->attr.exclude_user || event->attr.exclude_kernel ||
262 event->attr.exclude_host || event->attr.exclude_guest)
263 return -EINVAL;
162 264
163 if (event->attr.type != PERF_TYPE_RAW) 265 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
164 return 0; 266 ARCH_PERFMON_EVENTSEL_OS);
165 267
166 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; 268 if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
269 ARCH_PERFMON_EVENTSEL_INT))
270 return -EINVAL;
167 271
168 return 0; 272 return 0;
169} 273}
@@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
181 return (hwc->config & 0xe0) == 0xe0; 285 return (hwc->config & 0xe0) == 0xe0;
182} 286}
183 287
288static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
289{
290 return amd_nb_event_constraint && amd_is_nb_event(hwc);
291}
292
184static inline int amd_has_nb(struct cpu_hw_events *cpuc) 293static inline int amd_has_nb(struct cpu_hw_events *cpuc)
185{ 294{
186 struct amd_nb *nb = cpuc->amd_nb; 295 struct amd_nb *nb = cpuc->amd_nb;
@@ -188,20 +297,37 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
188 return nb && nb->nb_id != -1; 297 return nb && nb->nb_id != -1;
189} 298}
190 299
191static void amd_put_event_constraints(struct cpu_hw_events *cpuc, 300static int amd_pmu_hw_config(struct perf_event *event)
192 struct perf_event *event) 301{
302 int ret;
303
304 /* pass precise event sampling to ibs: */
305 if (event->attr.precise_ip && get_ibs_caps())
306 return -ENOENT;
307
308 if (has_branch_stack(event))
309 return -EOPNOTSUPP;
310
311 ret = x86_pmu_hw_config(event);
312 if (ret)
313 return ret;
314
315 if (event->attr.type == PERF_TYPE_RAW)
316 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
317
318 if (amd_is_perfctr_nb_event(&event->hw))
319 return amd_nb_hw_config(event);
320
321 return amd_core_hw_config(event);
322}
323
324static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
325 struct perf_event *event)
193{ 326{
194 struct hw_perf_event *hwc = &event->hw;
195 struct amd_nb *nb = cpuc->amd_nb; 327 struct amd_nb *nb = cpuc->amd_nb;
196 int i; 328 int i;
197 329
198 /* 330 /*
199 * only care about NB events
200 */
201 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
202 return;
203
204 /*
205 * need to scan whole list because event may not have 331 * need to scan whole list because event may not have
206 * been assigned during scheduling 332 * been assigned during scheduling
207 * 333 *
@@ -215,6 +341,19 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
215 } 341 }
216} 342}
217 343
344static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
345{
346 int core_id = cpu_data(smp_processor_id()).cpu_core_id;
347
348 /* deliver interrupts only to this core */
349 if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
350 hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
351 hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
352 hwc->config |= (u64)(core_id) <<
353 AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
354 }
355}
356
218 /* 357 /*
219 * AMD64 NorthBridge events need special treatment because 358 * AMD64 NorthBridge events need special treatment because
220 * counter access needs to be synchronized across all cores 359 * counter access needs to be synchronized across all cores
@@ -247,24 +386,24 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
247 * 386 *
248 * Given that resources are allocated (cmpxchg), they must be 387 * Given that resources are allocated (cmpxchg), they must be
249 * eventually freed for others to use. This is accomplished by 388 * eventually freed for others to use. This is accomplished by
250 * calling amd_put_event_constraints(). 389 * calling __amd_put_nb_event_constraints()
251 * 390 *
252 * Non NB events are not impacted by this restriction. 391 * Non NB events are not impacted by this restriction.
253 */ 392 */
254static struct event_constraint * 393static struct event_constraint *
255amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 394__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
395 struct event_constraint *c)
256{ 396{
257 struct hw_perf_event *hwc = &event->hw; 397 struct hw_perf_event *hwc = &event->hw;
258 struct amd_nb *nb = cpuc->amd_nb; 398 struct amd_nb *nb = cpuc->amd_nb;
259 struct perf_event *old = NULL; 399 struct perf_event *old;
260 int max = x86_pmu.num_counters; 400 int idx, new = -1;
261 int i, j, k = -1;
262 401
263 /* 402 if (!c)
264 * if not NB event or no NB, then no constraints 403 c = &unconstrained;
265 */ 404
266 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) 405 if (cpuc->is_fake)
267 return &unconstrained; 406 return c;
268 407
269 /* 408 /*
270 * detect if already present, if so reuse 409 * detect if already present, if so reuse
@@ -276,48 +415,36 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
276 * because of successive calls to x86_schedule_events() from 415 * because of successive calls to x86_schedule_events() from
277 * hw_perf_group_sched_in() without hw_perf_enable() 416 * hw_perf_group_sched_in() without hw_perf_enable()
278 */ 417 */
279 for (i = 0; i < max; i++) { 418 for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
280 /* 419 if (new == -1 || hwc->idx == idx)
281 * keep track of first free slot 420 /* assign free slot, prefer hwc->idx */
282 */ 421 old = cmpxchg(nb->owners + idx, NULL, event);
283 if (k == -1 && !nb->owners[i]) 422 else if (nb->owners[idx] == event)
284 k = i; 423 /* event already present */
424 old = event;
425 else
426 continue;
427
428 if (old && old != event)
429 continue;
430
431 /* reassign to this slot */
432 if (new != -1)
433 cmpxchg(nb->owners + new, event, NULL);
434 new = idx;
285 435
286 /* already present, reuse */ 436 /* already present, reuse */
287 if (nb->owners[i] == event) 437 if (old == event)
288 goto done;
289 }
290 /*
291 * not present, so grab a new slot
292 * starting either at:
293 */
294 if (hwc->idx != -1) {
295 /* previous assignment */
296 i = hwc->idx;
297 } else if (k != -1) {
298 /* start from free slot found */
299 i = k;
300 } else {
301 /*
302 * event not found, no slot found in
303 * first pass, try again from the
304 * beginning
305 */
306 i = 0;
307 }
308 j = i;
309 do {
310 old = cmpxchg(nb->owners+i, NULL, event);
311 if (!old)
312 break; 438 break;
313 if (++i == max) 439 }
314 i = 0; 440
315 } while (i != j); 441 if (new == -1)
316done: 442 return &emptyconstraint;
317 if (!old) 443
318 return &nb->event_constraints[i]; 444 if (amd_is_perfctr_nb_event(hwc))
319 445 amd_nb_interrupt_hw_config(hwc);
320 return &emptyconstraint; 446
447 return &nb->event_constraints[new];
321} 448}
322 449
323static struct amd_nb *amd_alloc_nb(int cpu) 450static struct amd_nb *amd_alloc_nb(int cpu)
@@ -364,7 +491,7 @@ static void amd_pmu_cpu_starting(int cpu)
364 struct amd_nb *nb; 491 struct amd_nb *nb;
365 int i, nb_id; 492 int i, nb_id;
366 493
367 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; 494 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
368 495
369 if (boot_cpu_data.x86_max_cores < 2) 496 if (boot_cpu_data.x86_max_cores < 2)
370 return; 497 return;
@@ -407,6 +534,26 @@ static void amd_pmu_cpu_dead(int cpu)
407 } 534 }
408} 535}
409 536
537static struct event_constraint *
538amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
539{
540 /*
541 * if not NB event or no NB, then no constraints
542 */
543 if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
544 return &unconstrained;
545
546 return __amd_get_nb_event_constraints(cpuc, event,
547 amd_nb_event_constraint);
548}
549
550static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
551 struct perf_event *event)
552{
553 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
554 __amd_put_nb_event_constraints(cpuc, event);
555}
556
410PMU_FORMAT_ATTR(event, "config:0-7,32-35"); 557PMU_FORMAT_ATTR(event, "config:0-7,32-35");
411PMU_FORMAT_ATTR(umask, "config:8-15" ); 558PMU_FORMAT_ATTR(umask, "config:8-15" );
412PMU_FORMAT_ATTR(edge, "config:18" ); 559PMU_FORMAT_ATTR(edge, "config:18" );
@@ -496,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 643static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 644static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
498 645
646static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
647static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
648
499static struct event_constraint * 649static struct event_constraint *
500amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) 650amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
501{ 651{
@@ -561,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
561 return &amd_f15_PMC20; 711 return &amd_f15_PMC20;
562 } 712 }
563 case AMD_EVENT_NB: 713 case AMD_EVENT_NB:
564 /* not yet implemented */ 714 return __amd_get_nb_event_constraints(cpuc, event,
565 return &emptyconstraint; 715 amd_nb_event_constraint);
566 default: 716 default:
567 return &emptyconstraint; 717 return &emptyconstraint;
568 } 718 }
@@ -587,6 +737,8 @@ static __initconst const struct x86_pmu amd_pmu = {
587 .schedule_events = x86_schedule_events, 737 .schedule_events = x86_schedule_events,
588 .eventsel = MSR_K7_EVNTSEL0, 738 .eventsel = MSR_K7_EVNTSEL0,
589 .perfctr = MSR_K7_PERFCTR0, 739 .perfctr = MSR_K7_PERFCTR0,
740 .addr_offset = amd_pmu_addr_offset,
741 .rdpmc_index = amd_pmu_rdpmc_index,
590 .event_map = amd_pmu_event_map, 742 .event_map = amd_pmu_event_map,
591 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 743 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
592 .num_counters = AMD64_NUM_COUNTERS, 744 .num_counters = AMD64_NUM_COUNTERS,
@@ -608,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = {
608 760
609static int setup_event_constraints(void) 761static int setup_event_constraints(void)
610{ 762{
611 if (boot_cpu_data.x86 >= 0x15) 763 if (boot_cpu_data.x86 == 0x15)
612 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; 764 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
613 return 0; 765 return 0;
614} 766}
@@ -638,6 +790,23 @@ static int setup_perfctr_core(void)
638 return 0; 790 return 0;
639} 791}
640 792
793static int setup_perfctr_nb(void)
794{
795 if (!cpu_has_perfctr_nb)
796 return -ENODEV;
797
798 x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
799
800 if (cpu_has_perfctr_core)
801 amd_nb_event_constraint = &amd_NBPMC96;
802 else
803 amd_nb_event_constraint = &amd_NBPMC74;
804
805 printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
806
807 return 0;
808}
809
641__init int amd_pmu_init(void) 810__init int amd_pmu_init(void)
642{ 811{
643 /* Performance-monitoring supported from K7 and later: */ 812 /* Performance-monitoring supported from K7 and later: */
@@ -648,6 +817,7 @@ __init int amd_pmu_init(void)
648 817
649 setup_event_constraints(); 818 setup_event_constraints();
650 setup_perfctr_core(); 819 setup_perfctr_core();
820 setup_perfctr_nb();
651 821
652 /* Events are common for all AMDs */ 822 /* Events are common for all AMDs */
653 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 823 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
@@ -678,7 +848,7 @@ void amd_pmu_disable_virt(void)
678 * SVM is disabled the Guest-only bits still gets set and the counter 848 * SVM is disabled the Guest-only bits still gets set and the counter
679 * will not count anything. 849 * will not count anything.
680 */ 850 */
681 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; 851 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
682 852
683 /* Reload all events */ 853 /* Reload all events */
684 x86_pmu_disable_all(); 854 x86_pmu_disable_all();
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 6336bcbd0618..5f0581e713c2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -528,7 +528,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
528 if (!test_bit(IBS_STARTED, pcpu->state)) { 528 if (!test_bit(IBS_STARTED, pcpu->state)) {
529 /* 529 /*
530 * Catch spurious interrupts after stopping IBS: After 530 * Catch spurious interrupts after stopping IBS: After
531 * disabling IBS there could be still incomming NMIs 531 * disabling IBS there could be still incoming NMIs
532 * with samples that even have the valid bit cleared. 532 * with samples that even have the valid bit cleared.
533 * Mark all this NMIs as handled. 533 * Mark all this NMIs as handled.
534 */ 534 */
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 93b9e1181f83..dab7580c47ae 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -101,12 +101,37 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
101 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 101 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
102 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 102 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
103 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 103 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
104 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
105 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
106 INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
107 INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
104 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 108 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
105 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 109 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
106 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 110 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
107 EVENT_CONSTRAINT_END 111 EVENT_CONSTRAINT_END
108}; 112};
109 113
114static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
115{
116 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
117 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
118 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
119 INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
120 INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
121 INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
122 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
123 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
124 INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
125 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
126 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
127 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
128 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
129 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
130 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
131 INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
132 EVENT_CONSTRAINT_END
133};
134
110static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 135static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
111{ 136{
112 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 137 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
@@ -2019,7 +2044,10 @@ __init int intel_pmu_init(void)
2019 break; 2044 break;
2020 2045
2021 case 28: /* Atom */ 2046 case 28: /* Atom */
2022 case 54: /* Cedariew */ 2047 case 38: /* Lincroft */
2048 case 39: /* Penwell */
2049 case 53: /* Cloverview */
2050 case 54: /* Cedarview */
2023 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 2051 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2024 sizeof(hw_cache_event_ids)); 2052 sizeof(hw_cache_event_ids));
2025 2053
@@ -2084,6 +2112,7 @@ __init int intel_pmu_init(void)
2084 pr_cont("SandyBridge events, "); 2112 pr_cont("SandyBridge events, ");
2085 break; 2113 break;
2086 case 58: /* IvyBridge */ 2114 case 58: /* IvyBridge */
2115 case 62: /* IvyBridge EP */
2087 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 2116 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
2088 sizeof(hw_cache_event_ids)); 2117 sizeof(hw_cache_event_ids));
2089 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 2118 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
@@ -2091,7 +2120,7 @@ __init int intel_pmu_init(void)
2091 2120
2092 intel_pmu_lbr_init_snb(); 2121 intel_pmu_lbr_init_snb();
2093 2122
2094 x86_pmu.event_constraints = intel_snb_event_constraints; 2123 x86_pmu.event_constraints = intel_ivb_event_constraints;
2095 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; 2124 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
2096 x86_pmu.pebs_aliases = intel_pebs_aliases_snb; 2125 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2097 x86_pmu.extra_regs = intel_snb_extra_regs; 2126 x86_pmu.extra_regs = intel_snb_extra_regs;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 826054a4f2ee..b05a575d56f4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -729,3 +729,13 @@ void intel_ds_init(void)
729 } 729 }
730 } 730 }
731} 731}
732
733void perf_restore_debug_store(void)
734{
735 struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
736
737 if (!x86_pmu.bts && !x86_pmu.pebs)
738 return;
739
740 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
741}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index f2af39f5dc3d..4820c232a0b9 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -19,7 +19,7 @@ static const u64 p6_perfmon_event_map[] =
19 19
20}; 20};
21 21
22static __initconst u64 p6_hw_cache_event_ids 22static u64 p6_hw_cache_event_ids
23 [PERF_COUNT_HW_CACHE_MAX] 23 [PERF_COUNT_HW_CACHE_MAX]
24 [PERF_COUNT_HW_CACHE_OP_MAX] 24 [PERF_COUNT_HW_CACHE_OP_MAX]
25 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 25 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 3286a92e662a..e280253f6f94 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -28,7 +28,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
28{ 28{
29 seq_printf(m, 29 seq_printf(m,
30 "fdiv_bug\t: %s\n" 30 "fdiv_bug\t: %s\n"
31 "hlt_bug\t\t: %s\n"
32 "f00f_bug\t: %s\n" 31 "f00f_bug\t: %s\n"
33 "coma_bug\t: %s\n" 32 "coma_bug\t: %s\n"
34 "fpu\t\t: %s\n" 33 "fpu\t\t: %s\n"
@@ -36,7 +35,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
36 "cpuid level\t: %d\n" 35 "cpuid level\t: %d\n"
37 "wp\t\t: %s\n", 36 "wp\t\t: %s\n",
38 c->fdiv_bug ? "yes" : "no", 37 c->fdiv_bug ? "yes" : "no",
39 c->hlt_works_ok ? "no" : "yes",
40 c->f00f_bug ? "yes" : "no", 38 c->f00f_bug ? "yes" : "no",
41 c->coma_bug ? "yes" : "no", 39 c->coma_bug ? "yes" : "no",
42 c->hard_math ? "yes" : "no", 40 c->hard_math ? "yes" : "no",
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d22d0c4edcfd..03a36321ec54 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -33,6 +33,9 @@
33 33
34#define VMWARE_PORT_CMD_GETVERSION 10 34#define VMWARE_PORT_CMD_GETVERSION 10
35#define VMWARE_PORT_CMD_GETHZ 45 35#define VMWARE_PORT_CMD_GETHZ 45
36#define VMWARE_PORT_CMD_GETVCPU_INFO 68
37#define VMWARE_PORT_CMD_LEGACY_X2APIC 3
38#define VMWARE_PORT_CMD_VCPU_RESERVED 31
36 39
37#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ 40#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
38 __asm__("inl (%%dx)" : \ 41 __asm__("inl (%%dx)" : \
@@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c)
125 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); 128 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
126} 129}
127 130
131/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
132static bool __init vmware_legacy_x2apic_available(void)
133{
134 uint32_t eax, ebx, ecx, edx;
135 VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx);
136 return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 &&
137 (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
138}
139
128const __refconst struct hypervisor_x86 x86_hyper_vmware = { 140const __refconst struct hypervisor_x86 x86_hyper_vmware = {
129 .name = "VMware", 141 .name = "VMware",
130 .detect = vmware_platform, 142 .detect = vmware_platform,
131 .set_cpu_features = vmware_set_cpu_features, 143 .set_cpu_features = vmware_set_cpu_features,
132 .init_platform = vmware_platform_setup, 144 .init_platform = vmware_platform_setup,
145 .x2apic_available = vmware_legacy_x2apic_available,
133}; 146};
134EXPORT_SYMBOL(x86_hyper_vmware); 147EXPORT_SYMBOL(x86_hyper_vmware);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 60c78917190c..1e4dbcfe6d31 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -85,7 +85,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
85{ 85{
86 char __user *tmp = buf; 86 char __user *tmp = buf;
87 struct cpuid_regs cmd; 87 struct cpuid_regs cmd;
88 int cpu = iminor(file->f_path.dentry->d_inode); 88 int cpu = iminor(file_inode(file));
89 u64 pos = *ppos; 89 u64 pos = *ppos;
90 ssize_t bytes = 0; 90 ssize_t bytes = 0;
91 int err = 0; 91 int err = 0;
@@ -116,7 +116,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
116 unsigned int cpu; 116 unsigned int cpu;
117 struct cpuinfo_x86 *c; 117 struct cpuinfo_x86 *c;
118 118
119 cpu = iminor(file->f_path.dentry->d_inode); 119 cpu = iminor(file_inode(file));
120 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) 120 if (cpu >= nr_cpu_ids || !cpu_online(cpu))
121 return -ENXIO; /* No such CPU */ 121 return -ENXIO; /* No such CPU */
122 122
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ae42418bc50f..c8797d55b245 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -232,7 +232,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
232 232
233 bust_spinlocks(0); 233 bust_spinlocks(0);
234 die_owner = -1; 234 die_owner = -1;
235 add_taint(TAINT_DIE); 235 add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
236 die_nest_count--; 236 die_nest_count--;
237 if (!die_nest_count) 237 if (!die_nest_count)
238 /* Nest count reaches zero, release the lock. */ 238 /* Nest count reaches zero, release the lock. */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index df06ade26bef..d32abeabbda5 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -835,7 +835,7 @@ static int __init parse_memopt(char *p)
835} 835}
836early_param("mem", parse_memopt); 836early_param("mem", parse_memopt);
837 837
838static int __init parse_memmap_opt(char *p) 838static int __init parse_memmap_one(char *p)
839{ 839{
840 char *oldp; 840 char *oldp;
841 u64 start_at, mem_size; 841 u64 start_at, mem_size;
@@ -877,6 +877,20 @@ static int __init parse_memmap_opt(char *p)
877 877
878 return *p == '\0' ? 0 : -EINVAL; 878 return *p == '\0' ? 0 : -EINVAL;
879} 879}
880static int __init parse_memmap_opt(char *str)
881{
882 while (str) {
883 char *k = strchr(str, ',');
884
885 if (k)
886 *k++ = 0;
887
888 parse_memmap_one(str);
889 str = k;
890 }
891
892 return 0;
893}
880early_param("memmap", parse_memmap_opt); 894early_param("memmap", parse_memmap_opt);
881 895
882void __init finish_e820_parsing(void) 896void __init finish_e820_parsing(void)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 6ed91d9980e2..8f3e2dec1df3 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -699,51 +699,6 @@ END(syscall_badsys)
699 */ 699 */
700 .popsection 700 .popsection
701 701
702/*
703 * System calls that need a pt_regs pointer.
704 */
705#define PTREGSCALL0(name) \
706ENTRY(ptregs_##name) ; \
707 leal 4(%esp),%eax; \
708 jmp sys_##name; \
709ENDPROC(ptregs_##name)
710
711#define PTREGSCALL1(name) \
712ENTRY(ptregs_##name) ; \
713 leal 4(%esp),%edx; \
714 movl (PT_EBX+4)(%esp),%eax; \
715 jmp sys_##name; \
716ENDPROC(ptregs_##name)
717
718#define PTREGSCALL2(name) \
719ENTRY(ptregs_##name) ; \
720 leal 4(%esp),%ecx; \
721 movl (PT_ECX+4)(%esp),%edx; \
722 movl (PT_EBX+4)(%esp),%eax; \
723 jmp sys_##name; \
724ENDPROC(ptregs_##name)
725
726#define PTREGSCALL3(name) \
727ENTRY(ptregs_##name) ; \
728 CFI_STARTPROC; \
729 leal 4(%esp),%eax; \
730 pushl_cfi %eax; \
731 movl PT_EDX(%eax),%ecx; \
732 movl PT_ECX(%eax),%edx; \
733 movl PT_EBX(%eax),%eax; \
734 call sys_##name; \
735 addl $4,%esp; \
736 CFI_ADJUST_CFA_OFFSET -4; \
737 ret; \
738 CFI_ENDPROC; \
739ENDPROC(ptregs_##name)
740
741PTREGSCALL1(iopl)
742PTREGSCALL0(sigreturn)
743PTREGSCALL0(rt_sigreturn)
744PTREGSCALL2(vm86)
745PTREGSCALL1(vm86old)
746
747.macro FIXUP_ESPFIX_STACK 702.macro FIXUP_ESPFIX_STACK
748/* 703/*
749 * Switch back for ESPFIX stack to the normal zerobased stack 704 * Switch back for ESPFIX stack to the normal zerobased stack
@@ -1091,11 +1046,18 @@ ENTRY(xen_failsafe_callback)
1091 _ASM_EXTABLE(4b,9b) 1046 _ASM_EXTABLE(4b,9b)
1092ENDPROC(xen_failsafe_callback) 1047ENDPROC(xen_failsafe_callback)
1093 1048
1094BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, 1049BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1095 xen_evtchn_do_upcall) 1050 xen_evtchn_do_upcall)
1096 1051
1097#endif /* CONFIG_XEN */ 1052#endif /* CONFIG_XEN */
1098 1053
1054#if IS_ENABLED(CONFIG_HYPERV)
1055
1056BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1057 hyperv_vector_handler)
1058
1059#endif /* CONFIG_HYPERV */
1060
1099#ifdef CONFIG_FUNCTION_TRACER 1061#ifdef CONFIG_FUNCTION_TRACER
1100#ifdef CONFIG_DYNAMIC_FTRACE 1062#ifdef CONFIG_DYNAMIC_FTRACE
1101 1063
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index cb3c591339aa..c1d01e6ca790 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -828,23 +828,6 @@ int_restore_rest:
828 CFI_ENDPROC 828 CFI_ENDPROC
829END(system_call) 829END(system_call)
830 830
831/*
832 * Certain special system calls that need to save a complete full stack frame.
833 */
834 .macro PTREGSCALL label,func,arg
835ENTRY(\label)
836 PARTIAL_FRAME 1 8 /* offset 8: return address */
837 subq $REST_SKIP, %rsp
838 CFI_ADJUST_CFA_OFFSET REST_SKIP
839 call save_rest
840 DEFAULT_FRAME 0 8 /* offset 8: return address */
841 leaq 8(%rsp), \arg /* pt_regs pointer */
842 call \func
843 jmp ptregscall_common
844 CFI_ENDPROC
845END(\label)
846 .endm
847
848 .macro FORK_LIKE func 831 .macro FORK_LIKE func
849ENTRY(stub_\func) 832ENTRY(stub_\func)
850 CFI_STARTPROC 833 CFI_STARTPROC
@@ -861,10 +844,22 @@ ENTRY(stub_\func)
861END(stub_\func) 844END(stub_\func)
862 .endm 845 .endm
863 846
847 .macro FIXED_FRAME label,func
848ENTRY(\label)
849 CFI_STARTPROC
850 PARTIAL_FRAME 0 8 /* offset 8: return address */
851 FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
852 call \func
853 RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
854 ret
855 CFI_ENDPROC
856END(\label)
857 .endm
858
864 FORK_LIKE clone 859 FORK_LIKE clone
865 FORK_LIKE fork 860 FORK_LIKE fork
866 FORK_LIKE vfork 861 FORK_LIKE vfork
867 PTREGSCALL stub_iopl, sys_iopl, %rsi 862 FIXED_FRAME stub_iopl, sys_iopl
868 863
869ENTRY(ptregscall_common) 864ENTRY(ptregscall_common)
870 DEFAULT_FRAME 1 8 /* offset 8: return address */ 865 DEFAULT_FRAME 1 8 /* offset 8: return address */
@@ -886,7 +881,6 @@ ENTRY(stub_execve)
886 SAVE_REST 881 SAVE_REST
887 FIXUP_TOP_OF_STACK %r11 882 FIXUP_TOP_OF_STACK %r11
888 call sys_execve 883 call sys_execve
889 RESTORE_TOP_OF_STACK %r11
890 movq %rax,RAX(%rsp) 884 movq %rax,RAX(%rsp)
891 RESTORE_REST 885 RESTORE_REST
892 jmp int_ret_from_sys_call 886 jmp int_ret_from_sys_call
@@ -902,7 +896,6 @@ ENTRY(stub_rt_sigreturn)
902 addq $8, %rsp 896 addq $8, %rsp
903 PARTIAL_FRAME 0 897 PARTIAL_FRAME 0
904 SAVE_REST 898 SAVE_REST
905 movq %rsp,%rdi
906 FIXUP_TOP_OF_STACK %r11 899 FIXUP_TOP_OF_STACK %r11
907 call sys_rt_sigreturn 900 call sys_rt_sigreturn
908 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 901 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
@@ -917,7 +910,6 @@ ENTRY(stub_x32_rt_sigreturn)
917 addq $8, %rsp 910 addq $8, %rsp
918 PARTIAL_FRAME 0 911 PARTIAL_FRAME 0
919 SAVE_REST 912 SAVE_REST
920 movq %rsp,%rdi
921 FIXUP_TOP_OF_STACK %r11 913 FIXUP_TOP_OF_STACK %r11
922 call sys32_x32_rt_sigreturn 914 call sys32_x32_rt_sigreturn
923 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 915 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
@@ -1454,11 +1446,16 @@ ENTRY(xen_failsafe_callback)
1454 CFI_ENDPROC 1446 CFI_ENDPROC
1455END(xen_failsafe_callback) 1447END(xen_failsafe_callback)
1456 1448
1457apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ 1449apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
1458 xen_hvm_callback_vector xen_evtchn_do_upcall 1450 xen_hvm_callback_vector xen_evtchn_do_upcall
1459 1451
1460#endif /* CONFIG_XEN */ 1452#endif /* CONFIG_XEN */
1461 1453
1454#if IS_ENABLED(CONFIG_HYPERV)
1455apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
1456 hyperv_callback_vector hyperv_vector_handler
1457#endif /* CONFIG_HYPERV */
1458
1462/* 1459/*
1463 * Some functions should be protected against kprobes 1460 * Some functions should be protected against kprobes
1464 */ 1461 */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d414029f1d8..42a392a9fd02 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -89,7 +89,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
89 * kernel identity mapping to modify code. 89 * kernel identity mapping to modify code.
90 */ 90 */
91 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
92 ip = (unsigned long)__va(__pa(ip)); 92 ip = (unsigned long)__va(__pa_symbol(ip));
93 93
94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
95} 95}
@@ -279,7 +279,7 @@ static int ftrace_write(unsigned long ip, const char *val, int size)
279 * kernel identity mapping to modify code. 279 * kernel identity mapping to modify code.
280 */ 280 */
281 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 281 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
282 ip = (unsigned long)__va(__pa(ip)); 282 ip = (unsigned long)__va(__pa_symbol(ip));
283 283
284 return probe_kernel_write((void *)ip, val, size); 284 return probe_kernel_write((void *)ip, val, size);
285} 285}
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index 48d9d4ea1020..992f442ca155 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -5,8 +5,6 @@
5#include <asm/setup.h> 5#include <asm/setup.h>
6#include <asm/bios_ebda.h> 6#include <asm/bios_ebda.h>
7 7
8#define BIOS_LOWMEM_KILOBYTES 0x413
9
10/* 8/*
11 * The BIOS places the EBDA/XBDA at the top of conventional 9 * The BIOS places the EBDA/XBDA at the top of conventional
12 * memory, and usually decreases the reported amount of 10 * memory, and usually decreases the reported amount of
@@ -16,17 +14,30 @@
16 * chipset: reserve a page before VGA to prevent PCI prefetch 14 * chipset: reserve a page before VGA to prevent PCI prefetch
17 * into it (errata #56). Usually the page is reserved anyways, 15 * into it (errata #56). Usually the page is reserved anyways,
18 * unless you have no PS/2 mouse plugged in. 16 * unless you have no PS/2 mouse plugged in.
17 *
18 * This functions is deliberately very conservative. Losing
19 * memory in the bottom megabyte is rarely a problem, as long
20 * as we have enough memory to install the trampoline. Using
21 * memory that is in use by the BIOS or by some DMA device
22 * the BIOS didn't shut down *is* a big problem.
19 */ 23 */
24
25#define BIOS_LOWMEM_KILOBYTES 0x413
26#define LOWMEM_CAP 0x9f000U /* Absolute maximum */
27#define INSANE_CUTOFF 0x20000U /* Less than this = insane */
28
20void __init reserve_ebda_region(void) 29void __init reserve_ebda_region(void)
21{ 30{
22 unsigned int lowmem, ebda_addr; 31 unsigned int lowmem, ebda_addr;
23 32
24 /* To determine the position of the EBDA and the */ 33 /*
25 /* end of conventional memory, we need to look at */ 34 * To determine the position of the EBDA and the
26 /* the BIOS data area. In a paravirtual environment */ 35 * end of conventional memory, we need to look at
27 /* that area is absent. We'll just have to assume */ 36 * the BIOS data area. In a paravirtual environment
28 /* that the paravirt case can handle memory setup */ 37 * that area is absent. We'll just have to assume
29 /* correctly, without our help. */ 38 * that the paravirt case can handle memory setup
39 * correctly, without our help.
40 */
30 if (paravirt_enabled()) 41 if (paravirt_enabled())
31 return; 42 return;
32 43
@@ -37,19 +48,23 @@ void __init reserve_ebda_region(void)
37 /* start of EBDA area */ 48 /* start of EBDA area */
38 ebda_addr = get_bios_ebda(); 49 ebda_addr = get_bios_ebda();
39 50
40 /* Fixup: bios puts an EBDA in the top 64K segment */ 51 /*
41 /* of conventional memory, but does not adjust lowmem. */ 52 * Note: some old Dells seem to need 4k EBDA without
42 if ((lowmem - ebda_addr) <= 0x10000) 53 * reporting so, so just consider the memory above 0x9f000
43 lowmem = ebda_addr; 54 * to be off limits (bugzilla 2990).
55 */
56
57 /* If the EBDA address is below 128K, assume it is bogus */
58 if (ebda_addr < INSANE_CUTOFF)
59 ebda_addr = LOWMEM_CAP;
44 60
45 /* Fixup: bios does not report an EBDA at all. */ 61 /* If lowmem is less than 128K, assume it is bogus */
46 /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ 62 if (lowmem < INSANE_CUTOFF)
47 if ((ebda_addr == 0) && (lowmem >= 0x9f000)) 63 lowmem = LOWMEM_CAP;
48 lowmem = 0x9f000;
49 64
50 /* Paranoia: should never happen, but... */ 65 /* Use the lower of the lowmem and EBDA markers as the cutoff */
51 if ((lowmem == 0) || (lowmem >= 0x100000)) 66 lowmem = min(lowmem, ebda_addr);
52 lowmem = 0x9f000; 67 lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */
53 68
54 /* reserve all memory between lowmem and the 1MB mark */ 69 /* reserve all memory between lowmem and the 1MB mark */
55 memblock_reserve(lowmem, 0x100000 - lowmem); 70 memblock_reserve(lowmem, 0x100000 - lowmem);
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c18f59d10101..138463a24877 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,6 +18,7 @@
18#include <asm/io_apic.h> 18#include <asm/io_apic.h>
19#include <asm/bios_ebda.h> 19#include <asm/bios_ebda.h>
20#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
21#include <asm/bootparam_utils.h>
21 22
22static void __init i386_default_early_setup(void) 23static void __init i386_default_early_setup(void)
23{ 24{
@@ -30,19 +31,7 @@ static void __init i386_default_early_setup(void)
30 31
31void __init i386_start_kernel(void) 32void __init i386_start_kernel(void)
32{ 33{
33 memblock_reserve(__pa_symbol(&_text), 34 sanitize_boot_params(&boot_params);
34 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
35
36#ifdef CONFIG_BLK_DEV_INITRD
37 /* Reserve INITRD */
38 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
39 /* Assume only end is not page aligned */
40 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
41 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
42 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
43 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
44 }
45#endif
46 35
47 /* Call the subarch specific early setup function */ 36 /* Call the subarch specific early setup function */
48 switch (boot_params.hdr.hardware_subarch) { 37 switch (boot_params.hdr.hardware_subarch) {
@@ -57,11 +46,5 @@ void __init i386_start_kernel(void)
57 break; 46 break;
58 } 47 }
59 48
60 /*
61 * At this point everything still needed from the boot loader
62 * or BIOS or kernel text should be early reserved or marked not
63 * RAM in e820. All other memory is free game.
64 */
65
66 start_kernel(); 49 start_kernel();
67} 50}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 037df57a99ac..c5e403f6d869 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -25,12 +25,84 @@
25#include <asm/kdebug.h> 25#include <asm/kdebug.h>
26#include <asm/e820.h> 26#include <asm/e820.h>
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/bootparam_utils.h>
29#include <asm/microcode.h>
28 30
29static void __init zap_identity_mappings(void) 31/*
32 * Manage page tables very early on.
33 */
34extern pgd_t early_level4_pgt[PTRS_PER_PGD];
35extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
36static unsigned int __initdata next_early_pgt = 2;
37
38/* Wipe all early page tables except for the kernel symbol map */
39static void __init reset_early_page_tables(void)
30{ 40{
31 pgd_t *pgd = pgd_offset_k(0UL); 41 unsigned long i;
32 pgd_clear(pgd); 42
33 __flush_tlb_all(); 43 for (i = 0; i < PTRS_PER_PGD-1; i++)
44 early_level4_pgt[i].pgd = 0;
45
46 next_early_pgt = 0;
47
48 write_cr3(__pa(early_level4_pgt));
49}
50
51/* Create a new PMD entry */
52int __init early_make_pgtable(unsigned long address)
53{
54 unsigned long physaddr = address - __PAGE_OFFSET;
55 unsigned long i;
56 pgdval_t pgd, *pgd_p;
57 pudval_t pud, *pud_p;
58 pmdval_t pmd, *pmd_p;
59
60 /* Invalid address or early pgt is done ? */
61 if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt))
62 return -1;
63
64again:
65 pgd_p = &early_level4_pgt[pgd_index(address)].pgd;
66 pgd = *pgd_p;
67
68 /*
69 * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
70 * critical -- __PAGE_OFFSET would point us back into the dynamic
71 * range and we might end up looping forever...
72 */
73 if (pgd)
74 pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
75 else {
76 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
77 reset_early_page_tables();
78 goto again;
79 }
80
81 pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
82 for (i = 0; i < PTRS_PER_PUD; i++)
83 pud_p[i] = 0;
84 *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
85 }
86 pud_p += pud_index(address);
87 pud = *pud_p;
88
89 if (pud)
90 pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
91 else {
92 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
93 reset_early_page_tables();
94 goto again;
95 }
96
97 pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
98 for (i = 0; i < PTRS_PER_PMD; i++)
99 pmd_p[i] = 0;
100 *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
101 }
102 pmd = (physaddr & PMD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL);
103 pmd_p[pmd_index(address)] = pmd;
104
105 return 0;
34} 106}
35 107
36/* Don't add a printk in there. printk relies on the PDA which is not initialized 108/* Don't add a printk in there. printk relies on the PDA which is not initialized
@@ -41,13 +113,25 @@ static void __init clear_bss(void)
41 (unsigned long) __bss_stop - (unsigned long) __bss_start); 113 (unsigned long) __bss_stop - (unsigned long) __bss_start);
42} 114}
43 115
116static unsigned long get_cmd_line_ptr(void)
117{
118 unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
119
120 cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32;
121
122 return cmd_line_ptr;
123}
124
44static void __init copy_bootdata(char *real_mode_data) 125static void __init copy_bootdata(char *real_mode_data)
45{ 126{
46 char * command_line; 127 char * command_line;
128 unsigned long cmd_line_ptr;
47 129
48 memcpy(&boot_params, real_mode_data, sizeof boot_params); 130 memcpy(&boot_params, real_mode_data, sizeof boot_params);
49 if (boot_params.hdr.cmd_line_ptr) { 131 sanitize_boot_params(&boot_params);
50 command_line = __va(boot_params.hdr.cmd_line_ptr); 132 cmd_line_ptr = get_cmd_line_ptr();
133 if (cmd_line_ptr) {
134 command_line = __va(cmd_line_ptr);
51 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); 135 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
52 } 136 }
53} 137}
@@ -70,54 +154,40 @@ void __init x86_64_start_kernel(char * real_mode_data)
70 (__START_KERNEL & PGDIR_MASK))); 154 (__START_KERNEL & PGDIR_MASK)));
71 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); 155 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
72 156
157 /* Kill off the identity-map trampoline */
158 reset_early_page_tables();
159
73 /* clear bss before set_intr_gate with early_idt_handler */ 160 /* clear bss before set_intr_gate with early_idt_handler */
74 clear_bss(); 161 clear_bss();
75 162
76 /* Make NULL pointers segfault */ 163 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
77 zap_identity_mappings();
78
79 max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
80
81 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
82#ifdef CONFIG_EARLY_PRINTK
83 set_intr_gate(i, &early_idt_handlers[i]); 164 set_intr_gate(i, &early_idt_handlers[i]);
84#else
85 set_intr_gate(i, early_idt_handler);
86#endif
87 }
88 load_idt((const struct desc_ptr *)&idt_descr); 165 load_idt((const struct desc_ptr *)&idt_descr);
89 166
167 copy_bootdata(__va(real_mode_data));
168
169 /*
170 * Load microcode early on BSP.
171 */
172 load_ucode_bsp();
173
90 if (console_loglevel == 10) 174 if (console_loglevel == 10)
91 early_printk("Kernel alive\n"); 175 early_printk("Kernel alive\n");
92 176
177 clear_page(init_level4_pgt);
178 /* set init_level4_pgt kernel high mapping*/
179 init_level4_pgt[511] = early_level4_pgt[511];
180
93 x86_64_start_reservations(real_mode_data); 181 x86_64_start_reservations(real_mode_data);
94} 182}
95 183
96void __init x86_64_start_reservations(char *real_mode_data) 184void __init x86_64_start_reservations(char *real_mode_data)
97{ 185{
98 copy_bootdata(__va(real_mode_data)); 186 /* version is always not zero if it is copied */
99 187 if (!boot_params.hdr.version)
100 memblock_reserve(__pa_symbol(&_text), 188 copy_bootdata(__va(real_mode_data));
101 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
102
103#ifdef CONFIG_BLK_DEV_INITRD
104 /* Reserve INITRD */
105 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
106 /* Assume only end is not page aligned */
107 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
108 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
109 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
110 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
111 }
112#endif
113 189
114 reserve_ebda_region(); 190 reserve_ebda_region();
115 191
116 /*
117 * At this point everything still needed from the boot loader
118 * or BIOS or kernel text should be early reserved or marked not
119 * RAM in e820. All other memory is free game.
120 */
121
122 start_kernel(); 192 start_kernel();
123} 193}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index c8932c79e78b..73afd11799ca 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -144,6 +144,11 @@ ENTRY(startup_32)
144 movl %eax, pa(olpc_ofw_pgd) 144 movl %eax, pa(olpc_ofw_pgd)
145#endif 145#endif
146 146
147#ifdef CONFIG_MICROCODE_EARLY
148 /* Early load ucode on BSP. */
149 call load_ucode_bsp
150#endif
151
147/* 152/*
148 * Initialize page tables. This creates a PDE and a set of page 153 * Initialize page tables. This creates a PDE and a set of page
149 * tables, which are located immediately beyond __brk_base. The variable 154 * tables, which are located immediately beyond __brk_base. The variable
@@ -299,6 +304,12 @@ ENTRY(startup_32_smp)
299 movl %eax,%ss 304 movl %eax,%ss
300 leal -__PAGE_OFFSET(%ecx),%esp 305 leal -__PAGE_OFFSET(%ecx),%esp
301 306
307#ifdef CONFIG_MICROCODE_EARLY
308 /* Early load ucode on AP. */
309 call load_ucode_ap
310#endif
311
312
302default_entry: 313default_entry:
303#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ 314#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
304 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ 315 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
@@ -307,36 +318,45 @@ default_entry:
307 movl %eax,%cr0 318 movl %eax,%cr0
308 319
309/* 320/*
310 * New page tables may be in 4Mbyte page mode and may 321 * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave
311 * be using the global pages. 322 * bits like NT set. This would confuse the debugger if this code is traced. So
323 * initialize them properly now before switching to protected mode. That means
324 * DF in particular (even though we have cleared it earlier after copying the
325 * command line) because GCC expects it.
326 */
327 pushl $0
328 popfl
329
330/*
331 * New page tables may be in 4Mbyte page mode and may be using the global pages.
312 * 332 *
313 * NOTE! If we are on a 486 we may have no cr4 at all! 333 * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists
314 * Specifically, cr4 exists if and only if CPUID exists 334 * if and only if CPUID exists and has flags other than the FPU flag set.
315 * and has flags other than the FPU flag set.
316 */ 335 */
336 movl $-1,pa(X86_CPUID) # preset CPUID level
317 movl $X86_EFLAGS_ID,%ecx 337 movl $X86_EFLAGS_ID,%ecx
318 pushl %ecx 338 pushl %ecx
319 popfl 339 popfl # set EFLAGS=ID
320 pushfl
321 popl %eax
322 pushl $0
323 popfl
324 pushfl 340 pushfl
325 popl %edx 341 popl %eax # get EFLAGS
326 xorl %edx,%eax 342 testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set?
327 testl %ecx,%eax 343 jz enable_paging # hw disallowed setting of ID bit
328 jz 6f # No ID flag = no CPUID = no CR4 344 # which means no CPUID and no CR4
345
346 xorl %eax,%eax
347 cpuid
348 movl %eax,pa(X86_CPUID) # save largest std CPUID function
329 349
330 movl $1,%eax 350 movl $1,%eax
331 cpuid 351 cpuid
332 andl $~1,%edx # Ignore CPUID.FPU 352 andl $~1,%edx # Ignore CPUID.FPU
333 jz 6f # No flags or only CPUID.FPU = no CR4 353 jz enable_paging # No flags or only CPUID.FPU = no CR4
334 354
335 movl pa(mmu_cr4_features),%eax 355 movl pa(mmu_cr4_features),%eax
336 movl %eax,%cr4 356 movl %eax,%cr4
337 357
338 testb $X86_CR4_PAE, %al # check if PAE is enabled 358 testb $X86_CR4_PAE, %al # check if PAE is enabled
339 jz 6f 359 jz enable_paging
340 360
341 /* Check if extended functions are implemented */ 361 /* Check if extended functions are implemented */
342 movl $0x80000000, %eax 362 movl $0x80000000, %eax
@@ -344,7 +364,7 @@ default_entry:
344 /* Value must be in the range 0x80000001 to 0x8000ffff */ 364 /* Value must be in the range 0x80000001 to 0x8000ffff */
345 subl $0x80000001, %eax 365 subl $0x80000001, %eax
346 cmpl $(0x8000ffff-0x80000001), %eax 366 cmpl $(0x8000ffff-0x80000001), %eax
347 ja 6f 367 ja enable_paging
348 368
349 /* Clear bogus XD_DISABLE bits */ 369 /* Clear bogus XD_DISABLE bits */
350 call verify_cpu 370 call verify_cpu
@@ -353,7 +373,7 @@ default_entry:
353 cpuid 373 cpuid
354 /* Execute Disable bit supported? */ 374 /* Execute Disable bit supported? */
355 btl $(X86_FEATURE_NX & 31), %edx 375 btl $(X86_FEATURE_NX & 31), %edx
356 jnc 6f 376 jnc enable_paging
357 377
358 /* Setup EFER (Extended Feature Enable Register) */ 378 /* Setup EFER (Extended Feature Enable Register) */
359 movl $MSR_EFER, %ecx 379 movl $MSR_EFER, %ecx
@@ -363,7 +383,7 @@ default_entry:
363 /* Make changes effective */ 383 /* Make changes effective */
364 wrmsr 384 wrmsr
365 385
3666: 386enable_paging:
367 387
368/* 388/*
369 * Enable paging 389 * Enable paging
@@ -378,14 +398,6 @@ default_entry:
378 addl $__PAGE_OFFSET, %esp 398 addl $__PAGE_OFFSET, %esp
379 399
380/* 400/*
381 * Initialize eflags. Some BIOS's leave bits like NT set. This would
382 * confuse the debugger if this code is traced.
383 * XXX - best to initialize before switching to protected mode.
384 */
385 pushl $0
386 popfl
387
388/*
389 * start system 32-bit setup. We need to re-do some of the things done 401 * start system 32-bit setup. We need to re-do some of the things done
390 * in 16-bit mode for the "real" operations. 402 * in 16-bit mode for the "real" operations.
391 */ 403 */
@@ -394,31 +406,11 @@ default_entry:
394 jz 1f # Did we do this already? 406 jz 1f # Did we do this already?
395 call *%eax 407 call *%eax
3961: 4081:
397 409
398/* check if it is 486 or 386. */
399/* 410/*
400 * XXX - this does a lot of unnecessary setup. Alignment checks don't 411 * Check if it is 486
401 * apply at our cpl of 0 and the stack ought to be aligned already, and
402 * we don't need to preserve eflags.
403 */ 412 */
404 movl $-1,X86_CPUID # -1 for no CPUID initially 413 cmpl $-1,X86_CPUID
405 movb $3,X86 # at least 386
406 pushfl # push EFLAGS
407 popl %eax # get EFLAGS
408 movl %eax,%ecx # save original EFLAGS
409 xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
410 pushl %eax # copy to EFLAGS
411 popfl # set EFLAGS
412 pushfl # get new EFLAGS
413 popl %eax # put it in eax
414 xorl %ecx,%eax # change in flags
415 pushl %ecx # restore original EFLAGS
416 popfl
417 testl $0x40000,%eax # check if AC bit changed
418 je is386
419
420 movb $4,X86 # at least 486
421 testl $0x200000,%eax # check if ID bit changed
422 je is486 414 je is486
423 415
424 /* get vendor info */ 416 /* get vendor info */
@@ -444,11 +436,10 @@ default_entry:
444 movb %cl,X86_MASK 436 movb %cl,X86_MASK
445 movl %edx,X86_CAPABILITY 437 movl %edx,X86_CAPABILITY
446 438
447is486: movl $0x50022,%ecx # set AM, WP, NE and MP 439is486:
448 jmp 2f 440 movb $4,X86
449 441 movl $0x50022,%ecx # set AM, WP, NE and MP
450is386: movl $2,%ecx # set MP 442 movl %cr0,%eax
4512: movl %cr0,%eax
452 andl $0x80000011,%eax # Save PG,PE,ET 443 andl $0x80000011,%eax # Save PG,PE,ET
453 orl %ecx,%eax 444 orl %ecx,%eax
454 movl %eax,%cr0 445 movl %eax,%cr0
@@ -473,7 +464,6 @@ is386: movl $2,%ecx # set MP
473 xorl %eax,%eax # Clear LDT 464 xorl %eax,%eax # Clear LDT
474 lldt %ax 465 lldt %ax
475 466
476 cld # gcc2 wants the direction flag cleared at all times
477 pushl $0 # fake return address for unwinder 467 pushl $0 # fake return address for unwinder
478 jmp *(initial_code) 468 jmp *(initial_code)
479 469
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 980053c4b9cc..6859e9626442 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -47,14 +47,13 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
47 .code64 47 .code64
48 .globl startup_64 48 .globl startup_64
49startup_64: 49startup_64:
50
51 /* 50 /*
52 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, 51 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
53 * and someone has loaded an identity mapped page table 52 * and someone has loaded an identity mapped page table
54 * for us. These identity mapped page tables map all of the 53 * for us. These identity mapped page tables map all of the
55 * kernel pages and possibly all of memory. 54 * kernel pages and possibly all of memory.
56 * 55 *
57 * %esi holds a physical pointer to real_mode_data. 56 * %rsi holds a physical pointer to real_mode_data.
58 * 57 *
59 * We come here either directly from a 64bit bootloader, or from 58 * We come here either directly from a 64bit bootloader, or from
60 * arch/x86_64/boot/compressed/head.S. 59 * arch/x86_64/boot/compressed/head.S.
@@ -66,7 +65,8 @@ startup_64:
66 * tables and then reload them. 65 * tables and then reload them.
67 */ 66 */
68 67
69 /* Compute the delta between the address I am compiled to run at and the 68 /*
69 * Compute the delta between the address I am compiled to run at and the
70 * address I am actually running at. 70 * address I am actually running at.
71 */ 71 */
72 leaq _text(%rip), %rbp 72 leaq _text(%rip), %rbp
@@ -78,45 +78,62 @@ startup_64:
78 testl %eax, %eax 78 testl %eax, %eax
79 jnz bad_address 79 jnz bad_address
80 80
81 /* Is the address too large? */ 81 /*
82 leaq _text(%rip), %rdx 82 * Is the address too large?
83 movq $PGDIR_SIZE, %rax
84 cmpq %rax, %rdx
85 jae bad_address
86
87 /* Fixup the physical addresses in the page table
88 */ 83 */
89 addq %rbp, init_level4_pgt + 0(%rip) 84 leaq _text(%rip), %rax
90 addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) 85 shrq $MAX_PHYSMEM_BITS, %rax
91 addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) 86 jnz bad_address
92 87
93 addq %rbp, level3_ident_pgt + 0(%rip) 88 /*
89 * Fixup the physical addresses in the page table
90 */
91 addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip)
94 92
95 addq %rbp, level3_kernel_pgt + (510*8)(%rip) 93 addq %rbp, level3_kernel_pgt + (510*8)(%rip)
96 addq %rbp, level3_kernel_pgt + (511*8)(%rip) 94 addq %rbp, level3_kernel_pgt + (511*8)(%rip)
97 95
98 addq %rbp, level2_fixmap_pgt + (506*8)(%rip) 96 addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
99 97
100 /* Add an Identity mapping if I am above 1G */ 98 /*
99 * Set up the identity mapping for the switchover. These
100 * entries should *NOT* have the global bit set! This also
101 * creates a bunch of nonsense entries but that is fine --
102 * it avoids problems around wraparound.
103 */
101 leaq _text(%rip), %rdi 104 leaq _text(%rip), %rdi
102 andq $PMD_PAGE_MASK, %rdi 105 leaq early_level4_pgt(%rip), %rbx
103 106
104 movq %rdi, %rax 107 movq %rdi, %rax
105 shrq $PUD_SHIFT, %rax 108 shrq $PGDIR_SHIFT, %rax
106 andq $(PTRS_PER_PUD - 1), %rax
107 jz ident_complete
108 109
109 leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx 110 leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx
110 leaq level3_ident_pgt(%rip), %rbx 111 movq %rdx, 0(%rbx,%rax,8)
111 movq %rdx, 0(%rbx, %rax, 8) 112 movq %rdx, 8(%rbx,%rax,8)
112 113
114 addq $4096, %rdx
113 movq %rdi, %rax 115 movq %rdi, %rax
114 shrq $PMD_SHIFT, %rax 116 shrq $PUD_SHIFT, %rax
115 andq $(PTRS_PER_PMD - 1), %rax 117 andl $(PTRS_PER_PUD-1), %eax
116 leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx 118 movq %rdx, (4096+0)(%rbx,%rax,8)
117 leaq level2_spare_pgt(%rip), %rbx 119 movq %rdx, (4096+8)(%rbx,%rax,8)
118 movq %rdx, 0(%rbx, %rax, 8) 120
119ident_complete: 121 addq $8192, %rbx
122 movq %rdi, %rax
123 shrq $PMD_SHIFT, %rdi
124 addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
125 leaq (_end - 1)(%rip), %rcx
126 shrq $PMD_SHIFT, %rcx
127 subq %rdi, %rcx
128 incl %ecx
129
1301:
131 andq $(PTRS_PER_PMD - 1), %rdi
132 movq %rax, (%rbx,%rdi,8)
133 incq %rdi
134 addq $PMD_SIZE, %rax
135 decl %ecx
136 jnz 1b
120 137
121 /* 138 /*
122 * Fixup the kernel text+data virtual addresses. Note that 139 * Fixup the kernel text+data virtual addresses. Note that
@@ -124,7 +141,6 @@ ident_complete:
124 * cleanup_highmap() fixes this up along with the mappings 141 * cleanup_highmap() fixes this up along with the mappings
125 * beyond _end. 142 * beyond _end.
126 */ 143 */
127
128 leaq level2_kernel_pgt(%rip), %rdi 144 leaq level2_kernel_pgt(%rip), %rdi
129 leaq 4096(%rdi), %r8 145 leaq 4096(%rdi), %r8
130 /* See if it is a valid page table entry */ 146 /* See if it is a valid page table entry */
@@ -139,17 +155,14 @@ ident_complete:
139 /* Fixup phys_base */ 155 /* Fixup phys_base */
140 addq %rbp, phys_base(%rip) 156 addq %rbp, phys_base(%rip)
141 157
142 /* Due to ENTRY(), sometimes the empty space gets filled with 158 movq $(early_level4_pgt - __START_KERNEL_map), %rax
143 * zeros. Better take a jmp than relying on empty space being 159 jmp 1f
144 * filled with 0x90 (nop)
145 */
146 jmp secondary_startup_64
147ENTRY(secondary_startup_64) 160ENTRY(secondary_startup_64)
148 /* 161 /*
149 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, 162 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
150 * and someone has loaded a mapped page table. 163 * and someone has loaded a mapped page table.
151 * 164 *
152 * %esi holds a physical pointer to real_mode_data. 165 * %rsi holds a physical pointer to real_mode_data.
153 * 166 *
154 * We come here either from startup_64 (using physical addresses) 167 * We come here either from startup_64 (using physical addresses)
155 * or from trampoline.S (using virtual addresses). 168 * or from trampoline.S (using virtual addresses).
@@ -159,12 +172,14 @@ ENTRY(secondary_startup_64)
159 * after the boot processor executes this code. 172 * after the boot processor executes this code.
160 */ 173 */
161 174
175 movq $(init_level4_pgt - __START_KERNEL_map), %rax
1761:
177
162 /* Enable PAE mode and PGE */ 178 /* Enable PAE mode and PGE */
163 movl $(X86_CR4_PAE | X86_CR4_PGE), %eax 179 movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
164 movq %rax, %cr4 180 movq %rcx, %cr4
165 181
166 /* Setup early boot stage 4 level pagetables. */ 182 /* Setup early boot stage 4 level pagetables. */
167 movq $(init_level4_pgt - __START_KERNEL_map), %rax
168 addq phys_base(%rip), %rax 183 addq phys_base(%rip), %rax
169 movq %rax, %cr3 184 movq %rax, %cr3
170 185
@@ -196,7 +211,7 @@ ENTRY(secondary_startup_64)
196 movq %rax, %cr0 211 movq %rax, %cr0
197 212
198 /* Setup a boot time stack */ 213 /* Setup a boot time stack */
199 movq stack_start(%rip),%rsp 214 movq stack_start(%rip), %rsp
200 215
201 /* zero EFLAGS after setting rsp */ 216 /* zero EFLAGS after setting rsp */
202 pushq $0 217 pushq $0
@@ -236,15 +251,33 @@ ENTRY(secondary_startup_64)
236 movl initial_gs+4(%rip),%edx 251 movl initial_gs+4(%rip),%edx
237 wrmsr 252 wrmsr
238 253
239 /* esi is pointer to real mode structure with interesting info. 254 /* rsi is pointer to real mode structure with interesting info.
240 pass it to C */ 255 pass it to C */
241 movl %esi, %edi 256 movq %rsi, %rdi
242 257
243 /* Finally jump to run C code and to be on real kernel address 258 /* Finally jump to run C code and to be on real kernel address
244 * Since we are running on identity-mapped space we have to jump 259 * Since we are running on identity-mapped space we have to jump
245 * to the full 64bit address, this is only possible as indirect 260 * to the full 64bit address, this is only possible as indirect
246 * jump. In addition we need to ensure %cs is set so we make this 261 * jump. In addition we need to ensure %cs is set so we make this
247 * a far return. 262 * a far return.
263 *
264 * Note: do not change to far jump indirect with 64bit offset.
265 *
266 * AMD does not support far jump indirect with 64bit offset.
267 * AMD64 Architecture Programmer's Manual, Volume 3: states only
268 * JMP FAR mem16:16 FF /5 Far jump indirect,
269 * with the target specified by a far pointer in memory.
270 * JMP FAR mem16:32 FF /5 Far jump indirect,
271 * with the target specified by a far pointer in memory.
272 *
273 * Intel64 does support 64bit offset.
274 * Software Developer Manual Vol 2: states:
275 * FF /5 JMP m16:16 Jump far, absolute indirect,
276 * address given in m16:16
277 * FF /5 JMP m16:32 Jump far, absolute indirect,
278 * address given in m16:32.
279 * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
280 * address given in m16:64.
248 */ 281 */
249 movq initial_code(%rip),%rax 282 movq initial_code(%rip),%rax
250 pushq $0 # fake return address to stop unwinder 283 pushq $0 # fake return address to stop unwinder
@@ -270,13 +303,13 @@ ENDPROC(start_cpu0)
270 303
271 /* SMP bootup changes these two */ 304 /* SMP bootup changes these two */
272 __REFDATA 305 __REFDATA
273 .align 8 306 .balign 8
274 ENTRY(initial_code) 307 GLOBAL(initial_code)
275 .quad x86_64_start_kernel 308 .quad x86_64_start_kernel
276 ENTRY(initial_gs) 309 GLOBAL(initial_gs)
277 .quad INIT_PER_CPU_VAR(irq_stack_union) 310 .quad INIT_PER_CPU_VAR(irq_stack_union)
278 311
279 ENTRY(stack_start) 312 GLOBAL(stack_start)
280 .quad init_thread_union+THREAD_SIZE-8 313 .quad init_thread_union+THREAD_SIZE-8
281 .word 0 314 .word 0
282 __FINITDATA 315 __FINITDATA
@@ -284,7 +317,7 @@ ENDPROC(start_cpu0)
284bad_address: 317bad_address:
285 jmp bad_address 318 jmp bad_address
286 319
287 .section ".init.text","ax" 320 __INIT
288 .globl early_idt_handlers 321 .globl early_idt_handlers
289early_idt_handlers: 322early_idt_handlers:
290 # 104(%rsp) %rflags 323 # 104(%rsp) %rflags
@@ -303,6 +336,7 @@ early_idt_handlers:
303 i = i + 1 336 i = i + 1
304 .endr 337 .endr
305 338
339/* This is global to keep gas from relaxing the jumps */
306ENTRY(early_idt_handler) 340ENTRY(early_idt_handler)
307 cld 341 cld
308 342
@@ -321,14 +355,22 @@ ENTRY(early_idt_handler)
321 pushq %r11 # 0(%rsp) 355 pushq %r11 # 0(%rsp)
322 356
323 cmpl $__KERNEL_CS,96(%rsp) 357 cmpl $__KERNEL_CS,96(%rsp)
324 jne 10f 358 jne 11f
359
360 cmpl $14,72(%rsp) # Page fault?
361 jnz 10f
362 GET_CR2_INTO(%rdi) # can clobber any volatile register if pv
363 call early_make_pgtable
364 andl %eax,%eax
365 jz 20f # All good
325 366
36710:
326 leaq 88(%rsp),%rdi # Pointer to %rip 368 leaq 88(%rsp),%rdi # Pointer to %rip
327 call early_fixup_exception 369 call early_fixup_exception
328 andl %eax,%eax 370 andl %eax,%eax
329 jnz 20f # Found an exception entry 371 jnz 20f # Found an exception entry
330 372
33110: 37311:
332#ifdef CONFIG_EARLY_PRINTK 374#ifdef CONFIG_EARLY_PRINTK
333 GET_CR2_INTO(%r9) # can clobber any volatile register if pv 375 GET_CR2_INTO(%r9) # can clobber any volatile register if pv
334 movl 80(%rsp),%r8d # error code 376 movl 80(%rsp),%r8d # error code
@@ -350,7 +392,7 @@ ENTRY(early_idt_handler)
3501: hlt 3921: hlt
351 jmp 1b 393 jmp 1b
352 394
35320: # Exception table entry found 39520: # Exception table entry found or page table generated
354 popq %r11 396 popq %r11
355 popq %r10 397 popq %r10
356 popq %r9 398 popq %r9
@@ -363,6 +405,9 @@ ENTRY(early_idt_handler)
363 addq $16,%rsp # drop vector number and error code 405 addq $16,%rsp # drop vector number and error code
364 decl early_recursion_flag(%rip) 406 decl early_recursion_flag(%rip)
365 INTERRUPT_RETURN 407 INTERRUPT_RETURN
408ENDPROC(early_idt_handler)
409
410 __INITDATA
366 411
367 .balign 4 412 .balign 4
368early_recursion_flag: 413early_recursion_flag:
@@ -374,11 +419,10 @@ early_idt_msg:
374early_idt_ripmsg: 419early_idt_ripmsg:
375 .asciz "RIP %s\n" 420 .asciz "RIP %s\n"
376#endif /* CONFIG_EARLY_PRINTK */ 421#endif /* CONFIG_EARLY_PRINTK */
377 .previous
378 422
379#define NEXT_PAGE(name) \ 423#define NEXT_PAGE(name) \
380 .balign PAGE_SIZE; \ 424 .balign PAGE_SIZE; \
381ENTRY(name) 425GLOBAL(name)
382 426
383/* Automate the creation of 1 to 1 mapping pmd entries */ 427/* Automate the creation of 1 to 1 mapping pmd entries */
384#define PMDS(START, PERM, COUNT) \ 428#define PMDS(START, PERM, COUNT) \
@@ -388,24 +432,37 @@ ENTRY(name)
388 i = i + 1 ; \ 432 i = i + 1 ; \
389 .endr 433 .endr
390 434
435 __INITDATA
436NEXT_PAGE(early_level4_pgt)
437 .fill 511,8,0
438 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
439
440NEXT_PAGE(early_dynamic_pgts)
441 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
442
391 .data 443 .data
392 /* 444
393 * This default setting generates an ident mapping at address 0x100000 445#ifndef CONFIG_XEN
394 * and a mapping for the kernel that precisely maps virtual address
395 * 0xffffffff80000000 to physical address 0x000000. (always using
396 * 2Mbyte large pages provided by PAE mode)
397 */
398NEXT_PAGE(init_level4_pgt) 446NEXT_PAGE(init_level4_pgt)
399 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE 447 .fill 512,8,0
400 .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 448#else
401 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE 449NEXT_PAGE(init_level4_pgt)
402 .org init_level4_pgt + L4_START_KERNEL*8, 0 450 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
451 .org init_level4_pgt + L4_PAGE_OFFSET*8, 0
452 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
453 .org init_level4_pgt + L4_START_KERNEL*8, 0
403 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 454 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
404 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE 455 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
405 456
406NEXT_PAGE(level3_ident_pgt) 457NEXT_PAGE(level3_ident_pgt)
407 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE 458 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
408 .fill 511,8,0 459 .fill 511, 8, 0
460NEXT_PAGE(level2_ident_pgt)
461 /* Since I easily can, map the first 1G.
462 * Don't set NX because code runs from these pages.
463 */
464 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
465#endif
409 466
410NEXT_PAGE(level3_kernel_pgt) 467NEXT_PAGE(level3_kernel_pgt)
411 .fill L3_START_KERNEL,8,0 468 .fill L3_START_KERNEL,8,0
@@ -413,21 +470,6 @@ NEXT_PAGE(level3_kernel_pgt)
413 .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE 470 .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
414 .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE 471 .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
415 472
416NEXT_PAGE(level2_fixmap_pgt)
417 .fill 506,8,0
418 .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
419 /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
420 .fill 5,8,0
421
422NEXT_PAGE(level1_fixmap_pgt)
423 .fill 512,8,0
424
425NEXT_PAGE(level2_ident_pgt)
426 /* Since I easily can, map the first 1G.
427 * Don't set NX because code runs from these pages.
428 */
429 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
430
431NEXT_PAGE(level2_kernel_pgt) 473NEXT_PAGE(level2_kernel_pgt)
432 /* 474 /*
433 * 512 MB kernel mapping. We spend a full page on this pagetable 475 * 512 MB kernel mapping. We spend a full page on this pagetable
@@ -442,11 +484,16 @@ NEXT_PAGE(level2_kernel_pgt)
442 PMDS(0, __PAGE_KERNEL_LARGE_EXEC, 484 PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
443 KERNEL_IMAGE_SIZE/PMD_SIZE) 485 KERNEL_IMAGE_SIZE/PMD_SIZE)
444 486
445NEXT_PAGE(level2_spare_pgt) 487NEXT_PAGE(level2_fixmap_pgt)
446 .fill 512, 8, 0 488 .fill 506,8,0
489 .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
490 /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
491 .fill 5,8,0
492
493NEXT_PAGE(level1_fixmap_pgt)
494 .fill 512,8,0
447 495
448#undef PMDS 496#undef PMDS
449#undef NEXT_PAGE
450 497
451 .data 498 .data
452 .align 16 499 .align 16
@@ -472,6 +519,5 @@ ENTRY(nmi_idt_table)
472 .skip IDT_ENTRIES * 16 519 .skip IDT_ENTRIES * 16
473 520
474 __PAGE_ALIGNED_BSS 521 __PAGE_ALIGNED_BSS
475 .align PAGE_SIZE 522NEXT_PAGE(empty_zero_page)
476ENTRY(empty_zero_page)
477 .skip PAGE_SIZE 523 .skip PAGE_SIZE
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index e28670f9a589..da85a8e830a1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta,
478 478
479static int hpet_setup_msi_irq(unsigned int irq) 479static int hpet_setup_msi_irq(unsigned int irq)
480{ 480{
481 if (arch_setup_hpet_msi(irq, hpet_blockid)) { 481 if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
482 destroy_irq(irq); 482 destroy_irq(irq);
483 return -EINVAL; 483 return -EINVAL;
484 } 484 }
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 9c3bd4a2050e..0fa69127209a 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -26,6 +26,7 @@ EXPORT_SYMBOL(csum_partial_copy_generic);
26EXPORT_SYMBOL(__get_user_1); 26EXPORT_SYMBOL(__get_user_1);
27EXPORT_SYMBOL(__get_user_2); 27EXPORT_SYMBOL(__get_user_2);
28EXPORT_SYMBOL(__get_user_4); 28EXPORT_SYMBOL(__get_user_4);
29EXPORT_SYMBOL(__get_user_8);
29 30
30EXPORT_SYMBOL(__put_user_1); 31EXPORT_SYMBOL(__put_user_1);
31EXPORT_SYMBOL(__put_user_2); 32EXPORT_SYMBOL(__put_user_2);
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8c968974253d..4ddaf66ea35f 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -93,8 +93,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
93 * on system-call entry - see also fork() and the signal handling 93 * on system-call entry - see also fork() and the signal handling
94 * code. 94 * code.
95 */ 95 */
96long sys_iopl(unsigned int level, struct pt_regs *regs) 96SYSCALL_DEFINE1(iopl, unsigned int, level)
97{ 97{
98 struct pt_regs *regs = current_pt_regs();
98 unsigned int old = (regs->flags >> 12) & 3; 99 unsigned int old = (regs->flags >> 12) & 3;
99 struct thread_struct *t = &current->thread; 100 struct thread_struct *t = &current->thread;
100 101
diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile
new file mode 100644
index 000000000000..0d33169cc1a2
--- /dev/null
+++ b/arch/x86/kernel/kprobes/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for kernel probes
3#
4
5obj-$(CONFIG_KPROBES) += core.o
6obj-$(CONFIG_OPTPROBES) += opt.o
7obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes/common.h
index 3230b68ef29a..2e9d4b5af036 100644
--- a/arch/x86/kernel/kprobes-common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
99 return addr; 99 return addr;
100} 100}
101#endif 101#endif
102
103#ifdef CONFIG_KPROBES_ON_FTRACE
104extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
105 struct kprobe_ctlblk *kcb);
106#else
107static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
108 struct kprobe_ctlblk *kcb)
109{
110 return 0;
111}
112#endif
102#endif 113#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes/core.c
index 57916c0d3cf6..7bfe318d3d8a 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -58,7 +58,7 @@
58#include <asm/insn.h> 58#include <asm/insn.h>
59#include <asm/debugreg.h> 59#include <asm/debugreg.h>
60 60
61#include "kprobes-common.h" 61#include "common.h"
62 62
63void jprobe_return_end(void); 63void jprobe_return_end(void);
64 64
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
78 * Groups, and some special opcodes can not boost. 78 * Groups, and some special opcodes can not boost.
79 * This is non-const and volatile to keep gcc from statically 79 * This is non-const and volatile to keep gcc from statically
80 * optimizing it out, as variable_test_bit makes gcc think only 80 * optimizing it out, as variable_test_bit makes gcc think only
81 * *(unsigned long*) is used. 81 * *(unsigned long*) is used.
82 */ 82 */
83static volatile u32 twobyte_is_boostable[256 / 32] = { 83static volatile u32 twobyte_is_boostable[256 / 32] = {
84 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 84 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
117 struct __arch_relative_insn { 117 struct __arch_relative_insn {
118 u8 op; 118 u8 op;
119 s32 raddr; 119 s32 raddr;
120 } __attribute__((packed)) *insn; 120 } __packed *insn;
121 121
122 insn = (struct __arch_relative_insn *)from; 122 insn = (struct __arch_relative_insn *)from;
123 insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); 123 insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
@@ -375,6 +375,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
375 else 375 else
376 p->ainsn.boostable = -1; 376 p->ainsn.boostable = -1;
377 377
378 /* Check whether the instruction modifies Interrupt Flag or not */
379 p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
380
378 /* Also, displacement change doesn't affect the first byte */ 381 /* Also, displacement change doesn't affect the first byte */
379 p->opcode = p->ainsn.insn[0]; 382 p->opcode = p->ainsn.insn[0];
380} 383}
@@ -434,7 +437,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
434 __this_cpu_write(current_kprobe, p); 437 __this_cpu_write(current_kprobe, p);
435 kcb->kprobe_saved_flags = kcb->kprobe_old_flags 438 kcb->kprobe_saved_flags = kcb->kprobe_old_flags
436 = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); 439 = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
437 if (is_IF_modifier(p->ainsn.insn)) 440 if (p->ainsn.if_modifier)
438 kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; 441 kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
439} 442}
440 443
@@ -541,23 +544,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
541 return 1; 544 return 1;
542} 545}
543 546
544#ifdef KPROBES_CAN_USE_FTRACE
545static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
546 struct kprobe_ctlblk *kcb)
547{
548 /*
549 * Emulate singlestep (and also recover regs->ip)
550 * as if there is a 5byte nop
551 */
552 regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
553 if (unlikely(p->post_handler)) {
554 kcb->kprobe_status = KPROBE_HIT_SSDONE;
555 p->post_handler(p, regs, 0);
556 }
557 __this_cpu_write(current_kprobe, NULL);
558}
559#endif
560
561/* 547/*
562 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 548 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
563 * remain disabled throughout this function. 549 * remain disabled throughout this function.
@@ -616,13 +602,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
616 } else if (kprobe_running()) { 602 } else if (kprobe_running()) {
617 p = __this_cpu_read(current_kprobe); 603 p = __this_cpu_read(current_kprobe);
618 if (p->break_handler && p->break_handler(p, regs)) { 604 if (p->break_handler && p->break_handler(p, regs)) {
619#ifdef KPROBES_CAN_USE_FTRACE 605 if (!skip_singlestep(p, regs, kcb))
620 if (kprobe_ftrace(p)) { 606 setup_singlestep(p, regs, kcb, 0);
621 skip_singlestep(p, regs, kcb);
622 return 1;
623 }
624#endif
625 setup_singlestep(p, regs, kcb, 0);
626 return 1; 607 return 1;
627 } 608 }
628 } /* else: not a kprobe fault; let the kernel handle it */ 609 } /* else: not a kprobe fault; let the kernel handle it */
@@ -674,7 +655,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
674{ 655{
675 struct kretprobe_instance *ri = NULL; 656 struct kretprobe_instance *ri = NULL;
676 struct hlist_head *head, empty_rp; 657 struct hlist_head *head, empty_rp;
677 struct hlist_node *node, *tmp; 658 struct hlist_node *tmp;
678 unsigned long flags, orig_ret_address = 0; 659 unsigned long flags, orig_ret_address = 0;
679 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 660 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
680 kprobe_opcode_t *correct_ret_addr = NULL; 661 kprobe_opcode_t *correct_ret_addr = NULL;
@@ -704,7 +685,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
704 * will be the real return address, and all the rest will 685 * will be the real return address, and all the rest will
705 * point to kretprobe_trampoline. 686 * point to kretprobe_trampoline.
706 */ 687 */
707 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 688 hlist_for_each_entry_safe(ri, tmp, head, hlist) {
708 if (ri->task != current) 689 if (ri->task != current)
709 /* another task is sharing our hash bucket */ 690 /* another task is sharing our hash bucket */
710 continue; 691 continue;
@@ -723,7 +704,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
723 kretprobe_assert(ri, orig_ret_address, trampoline_address); 704 kretprobe_assert(ri, orig_ret_address, trampoline_address);
724 705
725 correct_ret_addr = ri->ret_addr; 706 correct_ret_addr = ri->ret_addr;
726 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 707 hlist_for_each_entry_safe(ri, tmp, head, hlist) {
727 if (ri->task != current) 708 if (ri->task != current)
728 /* another task is sharing our hash bucket */ 709 /* another task is sharing our hash bucket */
729 continue; 710 continue;
@@ -750,7 +731,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
750 731
751 kretprobe_hash_unlock(current, &flags); 732 kretprobe_hash_unlock(current, &flags);
752 733
753 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 734 hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
754 hlist_del(&ri->hlist); 735 hlist_del(&ri->hlist);
755 kfree(ri); 736 kfree(ri);
756 } 737 }
@@ -1075,50 +1056,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1075 return 0; 1056 return 0;
1076} 1057}
1077 1058
1078#ifdef KPROBES_CAN_USE_FTRACE
1079/* Ftrace callback handler for kprobes */
1080void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
1081 struct ftrace_ops *ops, struct pt_regs *regs)
1082{
1083 struct kprobe *p;
1084 struct kprobe_ctlblk *kcb;
1085 unsigned long flags;
1086
1087 /* Disable irq for emulating a breakpoint and avoiding preempt */
1088 local_irq_save(flags);
1089
1090 p = get_kprobe((kprobe_opcode_t *)ip);
1091 if (unlikely(!p) || kprobe_disabled(p))
1092 goto end;
1093
1094 kcb = get_kprobe_ctlblk();
1095 if (kprobe_running()) {
1096 kprobes_inc_nmissed_count(p);
1097 } else {
1098 /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
1099 regs->ip = ip + sizeof(kprobe_opcode_t);
1100
1101 __this_cpu_write(current_kprobe, p);
1102 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1103 if (!p->pre_handler || !p->pre_handler(p, regs))
1104 skip_singlestep(p, regs, kcb);
1105 /*
1106 * If pre_handler returns !0, it sets regs->ip and
1107 * resets current kprobe.
1108 */
1109 }
1110end:
1111 local_irq_restore(flags);
1112}
1113
1114int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
1115{
1116 p->ainsn.insn = NULL;
1117 p->ainsn.boostable = -1;
1118 return 0;
1119}
1120#endif
1121
1122int __init arch_init_kprobes(void) 1059int __init arch_init_kprobes(void)
1123{ 1060{
1124 return arch_init_optprobes(); 1061 return arch_init_optprobes();
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
new file mode 100644
index 000000000000..23ef5c556f06
--- /dev/null
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -0,0 +1,93 @@
1/*
2 * Dynamic Ftrace based Kprobes Optimization
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) Hitachi Ltd., 2012
19 */
20#include <linux/kprobes.h>
21#include <linux/ptrace.h>
22#include <linux/hardirq.h>
23#include <linux/preempt.h>
24#include <linux/ftrace.h>
25
26#include "common.h"
27
28static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
29 struct kprobe_ctlblk *kcb)
30{
31 /*
32 * Emulate singlestep (and also recover regs->ip)
33 * as if there is a 5byte nop
34 */
35 regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
36 if (unlikely(p->post_handler)) {
37 kcb->kprobe_status = KPROBE_HIT_SSDONE;
38 p->post_handler(p, regs, 0);
39 }
40 __this_cpu_write(current_kprobe, NULL);
41 return 1;
42}
43
44int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
45 struct kprobe_ctlblk *kcb)
46{
47 if (kprobe_ftrace(p))
48 return __skip_singlestep(p, regs, kcb);
49 else
50 return 0;
51}
52
53/* Ftrace callback handler for kprobes */
54void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
55 struct ftrace_ops *ops, struct pt_regs *regs)
56{
57 struct kprobe *p;
58 struct kprobe_ctlblk *kcb;
59 unsigned long flags;
60
61 /* Disable irq for emulating a breakpoint and avoiding preempt */
62 local_irq_save(flags);
63
64 p = get_kprobe((kprobe_opcode_t *)ip);
65 if (unlikely(!p) || kprobe_disabled(p))
66 goto end;
67
68 kcb = get_kprobe_ctlblk();
69 if (kprobe_running()) {
70 kprobes_inc_nmissed_count(p);
71 } else {
72 /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
73 regs->ip = ip + sizeof(kprobe_opcode_t);
74
75 __this_cpu_write(current_kprobe, p);
76 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
77 if (!p->pre_handler || !p->pre_handler(p, regs))
78 __skip_singlestep(p, regs, kcb);
79 /*
80 * If pre_handler returns !0, it sets regs->ip and
81 * resets current kprobe.
82 */
83 }
84end:
85 local_irq_restore(flags);
86}
87
88int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
89{
90 p->ainsn.insn = NULL;
91 p->ainsn.boostable = -1;
92 return 0;
93}
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes/opt.c
index c5e410eed403..76dc6f095724 100644
--- a/arch/x86/kernel/kprobes-opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,7 +37,7 @@
37#include <asm/insn.h> 37#include <asm/insn.h>
38#include <asm/debugreg.h> 38#include <asm/debugreg.h>
39 39
40#include "kprobes-common.h" 40#include "common.h"
41 41
42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
43{ 43{
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9c2bd8bd4b4c..b686a904d7c3 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -297,9 +297,9 @@ static void kvm_register_steal_time(void)
297 297
298 memset(st, 0, sizeof(*st)); 298 memset(st, 0, sizeof(*st));
299 299
300 wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); 300 wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
301 printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", 301 pr_info("kvm-stealtime: cpu %d, msr %llx\n",
302 cpu, __pa(st)); 302 cpu, (unsigned long long) slow_virt_to_phys(st));
303} 303}
304 304
305static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; 305static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
@@ -324,7 +324,7 @@ void __cpuinit kvm_guest_cpu_init(void)
324 return; 324 return;
325 325
326 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { 326 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
327 u64 pa = __pa(&__get_cpu_var(apf_reason)); 327 u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason));
328 328
329#ifdef CONFIG_PREEMPT 329#ifdef CONFIG_PREEMPT
330 pa |= KVM_ASYNC_PF_SEND_ALWAYS; 330 pa |= KVM_ASYNC_PF_SEND_ALWAYS;
@@ -340,7 +340,8 @@ void __cpuinit kvm_guest_cpu_init(void)
340 /* Size alignment is implied but just to make it explicit. */ 340 /* Size alignment is implied but just to make it explicit. */
341 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); 341 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
342 __get_cpu_var(kvm_apic_eoi) = 0; 342 __get_cpu_var(kvm_apic_eoi) = 0;
343 pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; 343 pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi))
344 | KVM_MSR_ENABLED;
344 wrmsrl(MSR_KVM_PV_EOI_EN, pa); 345 wrmsrl(MSR_KVM_PV_EOI_EN, pa);
345 } 346 }
346 347
@@ -505,6 +506,7 @@ static bool __init kvm_detect(void)
505const struct hypervisor_x86 x86_hyper_kvm __refconst = { 506const struct hypervisor_x86 x86_hyper_kvm __refconst = {
506 .name = "KVM", 507 .name = "KVM",
507 .detect = kvm_detect, 508 .detect = kvm_detect,
509 .x2apic_available = kvm_para_available,
508}; 510};
509EXPORT_SYMBOL_GPL(x86_hyper_kvm); 511EXPORT_SYMBOL_GPL(x86_hyper_kvm);
510 512
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 220a360010f8..0732f0089a3d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -162,8 +162,8 @@ int kvm_register_clock(char *txt)
162 int low, high, ret; 162 int low, high, ret;
163 struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; 163 struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
164 164
165 low = (int)__pa(src) | 1; 165 low = (int)slow_virt_to_phys(src) | 1;
166 high = ((u64)__pa(src) >> 32); 166 high = ((u64)slow_virt_to_phys(src) >> 32);
167 ret = native_write_msr_safe(msr_kvm_system_time, low, high); 167 ret = native_write_msr_safe(msr_kvm_system_time, low, high);
168 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", 168 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
169 cpu, high, low, txt); 169 cpu, high, low, txt);
@@ -218,6 +218,9 @@ static void kvm_shutdown(void)
218void __init kvmclock_init(void) 218void __init kvmclock_init(void)
219{ 219{
220 unsigned long mem; 220 unsigned long mem;
221 int size;
222
223 size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
221 224
222 if (!kvm_para_available()) 225 if (!kvm_para_available())
223 return; 226 return;
@@ -231,16 +234,14 @@ void __init kvmclock_init(void)
231 printk(KERN_INFO "kvm-clock: Using msrs %x and %x", 234 printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
232 msr_kvm_system_time, msr_kvm_wall_clock); 235 msr_kvm_system_time, msr_kvm_wall_clock);
233 236
234 mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS, 237 mem = memblock_alloc(size, PAGE_SIZE);
235 PAGE_SIZE);
236 if (!mem) 238 if (!mem)
237 return; 239 return;
238 hv_clock = __va(mem); 240 hv_clock = __va(mem);
239 241
240 if (kvm_register_clock("boot clock")) { 242 if (kvm_register_clock("boot clock")) {
241 hv_clock = NULL; 243 hv_clock = NULL;
242 memblock_free(mem, 244 memblock_free(mem, size);
243 sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
244 return; 245 return;
245 } 246 }
246 pv_time_ops.sched_clock = kvm_clock_read; 247 pv_time_ops.sched_clock = kvm_clock_read;
@@ -275,7 +276,7 @@ int __init kvm_setup_vsyscall_timeinfo(void)
275 struct pvclock_vcpu_time_info *vcpu_time; 276 struct pvclock_vcpu_time_info *vcpu_time;
276 unsigned int size; 277 unsigned int size;
277 278
278 size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS; 279 size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
279 280
280 preempt_disable(); 281 preempt_disable();
281 cpu = smp_processor_id(); 282 cpu = smp_processor_id();
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b3ea9db39db6..4eabc160696f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -16,125 +16,12 @@
16#include <linux/io.h> 16#include <linux/io.h>
17#include <linux/suspend.h> 17#include <linux/suspend.h>
18 18
19#include <asm/init.h>
19#include <asm/pgtable.h> 20#include <asm/pgtable.h>
20#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
21#include <asm/mmu_context.h> 22#include <asm/mmu_context.h>
22#include <asm/debugreg.h> 23#include <asm/debugreg.h>
23 24
24static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
25 unsigned long addr)
26{
27 pud_t *pud;
28 pmd_t *pmd;
29 struct page *page;
30 int result = -ENOMEM;
31
32 addr &= PMD_MASK;
33 pgd += pgd_index(addr);
34 if (!pgd_present(*pgd)) {
35 page = kimage_alloc_control_pages(image, 0);
36 if (!page)
37 goto out;
38 pud = (pud_t *)page_address(page);
39 clear_page(pud);
40 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
41 }
42 pud = pud_offset(pgd, addr);
43 if (!pud_present(*pud)) {
44 page = kimage_alloc_control_pages(image, 0);
45 if (!page)
46 goto out;
47 pmd = (pmd_t *)page_address(page);
48 clear_page(pmd);
49 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
50 }
51 pmd = pmd_offset(pud, addr);
52 if (!pmd_present(*pmd))
53 set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
54 result = 0;
55out:
56 return result;
57}
58
59static void init_level2_page(pmd_t *level2p, unsigned long addr)
60{
61 unsigned long end_addr;
62
63 addr &= PAGE_MASK;
64 end_addr = addr + PUD_SIZE;
65 while (addr < end_addr) {
66 set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
67 addr += PMD_SIZE;
68 }
69}
70
71static int init_level3_page(struct kimage *image, pud_t *level3p,
72 unsigned long addr, unsigned long last_addr)
73{
74 unsigned long end_addr;
75 int result;
76
77 result = 0;
78 addr &= PAGE_MASK;
79 end_addr = addr + PGDIR_SIZE;
80 while ((addr < last_addr) && (addr < end_addr)) {
81 struct page *page;
82 pmd_t *level2p;
83
84 page = kimage_alloc_control_pages(image, 0);
85 if (!page) {
86 result = -ENOMEM;
87 goto out;
88 }
89 level2p = (pmd_t *)page_address(page);
90 init_level2_page(level2p, addr);
91 set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
92 addr += PUD_SIZE;
93 }
94 /* clear the unused entries */
95 while (addr < end_addr) {
96 pud_clear(level3p++);
97 addr += PUD_SIZE;
98 }
99out:
100 return result;
101}
102
103
104static int init_level4_page(struct kimage *image, pgd_t *level4p,
105 unsigned long addr, unsigned long last_addr)
106{
107 unsigned long end_addr;
108 int result;
109
110 result = 0;
111 addr &= PAGE_MASK;
112 end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
113 while ((addr < last_addr) && (addr < end_addr)) {
114 struct page *page;
115 pud_t *level3p;
116
117 page = kimage_alloc_control_pages(image, 0);
118 if (!page) {
119 result = -ENOMEM;
120 goto out;
121 }
122 level3p = (pud_t *)page_address(page);
123 result = init_level3_page(image, level3p, addr, last_addr);
124 if (result)
125 goto out;
126 set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
127 addr += PGDIR_SIZE;
128 }
129 /* clear the unused entries */
130 while (addr < end_addr) {
131 pgd_clear(level4p++);
132 addr += PGDIR_SIZE;
133 }
134out:
135 return result;
136}
137
138static void free_transition_pgtable(struct kimage *image) 25static void free_transition_pgtable(struct kimage *image)
139{ 26{
140 free_page((unsigned long)image->arch.pud); 27 free_page((unsigned long)image->arch.pud);
@@ -184,22 +71,62 @@ err:
184 return result; 71 return result;
185} 72}
186 73
74static void *alloc_pgt_page(void *data)
75{
76 struct kimage *image = (struct kimage *)data;
77 struct page *page;
78 void *p = NULL;
79
80 page = kimage_alloc_control_pages(image, 0);
81 if (page) {
82 p = page_address(page);
83 clear_page(p);
84 }
85
86 return p;
87}
187 88
188static int init_pgtable(struct kimage *image, unsigned long start_pgtable) 89static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
189{ 90{
91 struct x86_mapping_info info = {
92 .alloc_pgt_page = alloc_pgt_page,
93 .context = image,
94 .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
95 };
96 unsigned long mstart, mend;
190 pgd_t *level4p; 97 pgd_t *level4p;
191 int result; 98 int result;
99 int i;
100
192 level4p = (pgd_t *)__va(start_pgtable); 101 level4p = (pgd_t *)__va(start_pgtable);
193 result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); 102 clear_page(level4p);
194 if (result) 103 for (i = 0; i < nr_pfn_mapped; i++) {
195 return result; 104 mstart = pfn_mapped[i].start << PAGE_SHIFT;
105 mend = pfn_mapped[i].end << PAGE_SHIFT;
106
107 result = kernel_ident_mapping_init(&info,
108 level4p, mstart, mend);
109 if (result)
110 return result;
111 }
112
196 /* 113 /*
197 * image->start may be outside 0 ~ max_pfn, for example when 114 * segments's mem ranges could be outside 0 ~ max_pfn,
198 * jump back to original kernel from kexeced kernel 115 * for example when jump back to original kernel from kexeced kernel.
116 * or first kernel is booted with user mem map, and second kernel
117 * could be loaded out of that range.
199 */ 118 */
200 result = init_one_level2_page(image, level4p, image->start); 119 for (i = 0; i < image->nr_segments; i++) {
201 if (result) 120 mstart = image->segment[i].mem;
202 return result; 121 mend = mstart + image->segment[i].memsz;
122
123 result = kernel_ident_mapping_init(&info,
124 level4p, mstart, mend);
125
126 if (result)
127 return result;
128 }
129
203 return init_transition_pgtable(image, level4p); 130 return init_transition_pgtable(image, level4p);
204} 131}
205 132
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 3a04b224d0c0..22db92bbdf1a 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -364,10 +364,7 @@ static struct attribute_group mc_attr_group = {
364 364
365static void microcode_fini_cpu(int cpu) 365static void microcode_fini_cpu(int cpu)
366{ 366{
367 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
368
369 microcode_ops->microcode_fini_cpu(cpu); 367 microcode_ops->microcode_fini_cpu(cpu);
370 uci->valid = 0;
371} 368}
372 369
373static enum ucode_state microcode_resume_cpu(int cpu) 370static enum ucode_state microcode_resume_cpu(int cpu)
@@ -383,6 +380,10 @@ static enum ucode_state microcode_resume_cpu(int cpu)
383static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) 380static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
384{ 381{
385 enum ucode_state ustate; 382 enum ucode_state ustate;
383 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
384
385 if (uci && uci->valid)
386 return UCODE_OK;
386 387
387 if (collect_cpu_info(cpu)) 388 if (collect_cpu_info(cpu))
388 return UCODE_ERROR; 389 return UCODE_ERROR;
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
new file mode 100644
index 000000000000..577db8417d15
--- /dev/null
+++ b/arch/x86/kernel/microcode_core_early.c
@@ -0,0 +1,76 @@
1/*
2 * X86 CPU microcode early update for Linux
3 *
4 * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
5 * H Peter Anvin" <hpa@zytor.com>
6 *
7 * This driver allows to early upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc.
10 *
11 * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
12 * Software Developer's Manual.
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 */
19#include <linux/module.h>
20#include <asm/microcode_intel.h>
21#include <asm/processor.h>
22
23#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
24#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
25#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
26#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
27#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
28#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
29#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
30
31#define CPUID_IS(a, b, c, ebx, ecx, edx) \
32 (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
33
34/*
35 * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
36 * x86_vendor() gets vendor id for BSP.
37 *
38 * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
39 * coding, we still use x86_vendor() to get vendor id for AP.
40 *
41 * x86_vendor() gets vendor information directly through cpuid.
42 */
43static int __cpuinit x86_vendor(void)
44{
45 u32 eax = 0x00000000;
46 u32 ebx, ecx = 0, edx;
47
48 if (!have_cpuid_p())
49 return X86_VENDOR_UNKNOWN;
50
51 native_cpuid(&eax, &ebx, &ecx, &edx);
52
53 if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
54 return X86_VENDOR_INTEL;
55
56 if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
57 return X86_VENDOR_AMD;
58
59 return X86_VENDOR_UNKNOWN;
60}
61
62void __init load_ucode_bsp(void)
63{
64 int vendor = x86_vendor();
65
66 if (vendor == X86_VENDOR_INTEL)
67 load_ucode_intel_bsp();
68}
69
70void __cpuinit load_ucode_ap(void)
71{
72 int vendor = x86_vendor();
73
74 if (vendor == X86_VENDOR_INTEL)
75 load_ucode_intel_ap();
76}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 3544aed39338..5fb2cebf556b 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -79,7 +79,7 @@
79#include <linux/module.h> 79#include <linux/module.h>
80#include <linux/vmalloc.h> 80#include <linux/vmalloc.h>
81 81
82#include <asm/microcode.h> 82#include <asm/microcode_intel.h>
83#include <asm/processor.h> 83#include <asm/processor.h>
84#include <asm/msr.h> 84#include <asm/msr.h>
85 85
@@ -87,59 +87,6 @@ MODULE_DESCRIPTION("Microcode Update Driver");
87MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 87MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
88MODULE_LICENSE("GPL"); 88MODULE_LICENSE("GPL");
89 89
90struct microcode_header_intel {
91 unsigned int hdrver;
92 unsigned int rev;
93 unsigned int date;
94 unsigned int sig;
95 unsigned int cksum;
96 unsigned int ldrver;
97 unsigned int pf;
98 unsigned int datasize;
99 unsigned int totalsize;
100 unsigned int reserved[3];
101};
102
103struct microcode_intel {
104 struct microcode_header_intel hdr;
105 unsigned int bits[0];
106};
107
108/* microcode format is extended from prescott processors */
109struct extended_signature {
110 unsigned int sig;
111 unsigned int pf;
112 unsigned int cksum;
113};
114
115struct extended_sigtable {
116 unsigned int count;
117 unsigned int cksum;
118 unsigned int reserved[3];
119 struct extended_signature sigs[0];
120};
121
122#define DEFAULT_UCODE_DATASIZE (2000)
123#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
124#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
125#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
126#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
127#define DWSIZE (sizeof(u32))
128
129#define get_totalsize(mc) \
130 (((struct microcode_intel *)mc)->hdr.totalsize ? \
131 ((struct microcode_intel *)mc)->hdr.totalsize : \
132 DEFAULT_UCODE_TOTALSIZE)
133
134#define get_datasize(mc) \
135 (((struct microcode_intel *)mc)->hdr.datasize ? \
136 ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
137
138#define sigmatch(s1, s2, p1, p2) \
139 (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
140
141#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
142
143static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) 90static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
144{ 91{
145 struct cpuinfo_x86 *c = &cpu_data(cpu_num); 92 struct cpuinfo_x86 *c = &cpu_data(cpu_num);
@@ -162,128 +109,25 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
162 return 0; 109 return 0;
163} 110}
164 111
165static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
166{
167 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
168}
169
170static inline int
171update_match_revision(struct microcode_header_intel *mc_header, int rev)
172{
173 return (mc_header->rev <= rev) ? 0 : 1;
174}
175
176static int microcode_sanity_check(void *mc)
177{
178 unsigned long total_size, data_size, ext_table_size;
179 struct microcode_header_intel *mc_header = mc;
180 struct extended_sigtable *ext_header = NULL;
181 int sum, orig_sum, ext_sigcount = 0, i;
182 struct extended_signature *ext_sig;
183
184 total_size = get_totalsize(mc_header);
185 data_size = get_datasize(mc_header);
186
187 if (data_size + MC_HEADER_SIZE > total_size) {
188 pr_err("error! Bad data size in microcode data file\n");
189 return -EINVAL;
190 }
191
192 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
193 pr_err("error! Unknown microcode update format\n");
194 return -EINVAL;
195 }
196 ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
197 if (ext_table_size) {
198 if ((ext_table_size < EXT_HEADER_SIZE)
199 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
200 pr_err("error! Small exttable size in microcode data file\n");
201 return -EINVAL;
202 }
203 ext_header = mc + MC_HEADER_SIZE + data_size;
204 if (ext_table_size != exttable_size(ext_header)) {
205 pr_err("error! Bad exttable size in microcode data file\n");
206 return -EFAULT;
207 }
208 ext_sigcount = ext_header->count;
209 }
210
211 /* check extended table checksum */
212 if (ext_table_size) {
213 int ext_table_sum = 0;
214 int *ext_tablep = (int *)ext_header;
215
216 i = ext_table_size / DWSIZE;
217 while (i--)
218 ext_table_sum += ext_tablep[i];
219 if (ext_table_sum) {
220 pr_warning("aborting, bad extended signature table checksum\n");
221 return -EINVAL;
222 }
223 }
224
225 /* calculate the checksum */
226 orig_sum = 0;
227 i = (MC_HEADER_SIZE + data_size) / DWSIZE;
228 while (i--)
229 orig_sum += ((int *)mc)[i];
230 if (orig_sum) {
231 pr_err("aborting, bad checksum\n");
232 return -EINVAL;
233 }
234 if (!ext_table_size)
235 return 0;
236 /* check extended signature checksum */
237 for (i = 0; i < ext_sigcount; i++) {
238 ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
239 EXT_SIGNATURE_SIZE * i;
240 sum = orig_sum
241 - (mc_header->sig + mc_header->pf + mc_header->cksum)
242 + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
243 if (sum) {
244 pr_err("aborting, bad checksum\n");
245 return -EINVAL;
246 }
247 }
248 return 0;
249}
250
251/* 112/*
252 * return 0 - no update found 113 * return 0 - no update found
253 * return 1 - found update 114 * return 1 - found update
254 */ 115 */
255static int 116static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
256get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
257{ 117{
258 struct microcode_header_intel *mc_header = mc; 118 struct cpu_signature cpu_sig;
259 struct extended_sigtable *ext_header; 119 unsigned int csig, cpf, crev;
260 unsigned long total_size = get_totalsize(mc_header);
261 int ext_sigcount, i;
262 struct extended_signature *ext_sig;
263
264 if (!update_match_revision(mc_header, rev))
265 return 0;
266
267 if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf))
268 return 1;
269 120
270 /* Look for ext. headers: */ 121 collect_cpu_info(cpu, &cpu_sig);
271 if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
272 return 0;
273 122
274 ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; 123 csig = cpu_sig.sig;
275 ext_sigcount = ext_header->count; 124 cpf = cpu_sig.pf;
276 ext_sig = (void *)ext_header + EXT_HEADER_SIZE; 125 crev = cpu_sig.rev;
277 126
278 for (i = 0; i < ext_sigcount; i++) { 127 return get_matching_microcode(csig, cpf, mc_intel, crev);
279 if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf))
280 return 1;
281 ext_sig++;
282 }
283 return 0;
284} 128}
285 129
286static int apply_microcode(int cpu) 130int apply_microcode(int cpu)
287{ 131{
288 struct microcode_intel *mc_intel; 132 struct microcode_intel *mc_intel;
289 struct ucode_cpu_info *uci; 133 struct ucode_cpu_info *uci;
@@ -300,6 +144,14 @@ static int apply_microcode(int cpu)
300 if (mc_intel == NULL) 144 if (mc_intel == NULL)
301 return 0; 145 return 0;
302 146
147 /*
148 * Microcode on this CPU could be updated earlier. Only apply the
149 * microcode patch in mc_intel when it is newer than the one on this
150 * CPU.
151 */
152 if (get_matching_mc(mc_intel, cpu) == 0)
153 return 0;
154
303 /* write microcode via MSR 0x79 */ 155 /* write microcode via MSR 0x79 */
304 wrmsr(MSR_IA32_UCODE_WRITE, 156 wrmsr(MSR_IA32_UCODE_WRITE,
305 (unsigned long) mc_intel->bits, 157 (unsigned long) mc_intel->bits,
@@ -338,6 +190,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
338 unsigned int leftover = size; 190 unsigned int leftover = size;
339 enum ucode_state state = UCODE_OK; 191 enum ucode_state state = UCODE_OK;
340 unsigned int curr_mc_size = 0; 192 unsigned int curr_mc_size = 0;
193 unsigned int csig, cpf;
341 194
342 while (leftover) { 195 while (leftover) {
343 struct microcode_header_intel mc_header; 196 struct microcode_header_intel mc_header;
@@ -362,11 +215,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
362 } 215 }
363 216
364 if (get_ucode_data(mc, ucode_ptr, mc_size) || 217 if (get_ucode_data(mc, ucode_ptr, mc_size) ||
365 microcode_sanity_check(mc) < 0) { 218 microcode_sanity_check(mc, 1) < 0) {
366 break; 219 break;
367 } 220 }
368 221
369 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { 222 csig = uci->cpu_sig.sig;
223 cpf = uci->cpu_sig.pf;
224 if (get_matching_microcode(csig, cpf, mc, new_rev)) {
370 vfree(new_mc); 225 vfree(new_mc);
371 new_rev = mc_header.rev; 226 new_rev = mc_header.rev;
372 new_mc = mc; 227 new_mc = mc;
@@ -393,6 +248,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
393 vfree(uci->mc); 248 vfree(uci->mc);
394 uci->mc = (struct microcode_intel *)new_mc; 249 uci->mc = (struct microcode_intel *)new_mc;
395 250
251 /*
252 * If early loading microcode is supported, save this mc into
253 * permanent memory. So it will be loaded early when a CPU is hot added
254 * or resumes.
255 */
256 save_mc_for_early(new_mc);
257
396 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", 258 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
397 cpu, new_rev, uci->cpu_sig.rev); 259 cpu, new_rev, uci->cpu_sig.rev);
398out: 260out:
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
new file mode 100644
index 000000000000..d893e8ed8ac9
--- /dev/null
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -0,0 +1,796 @@
1/*
2 * Intel CPU microcode early update for Linux
3 *
4 * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
5 * H Peter Anvin" <hpa@zytor.com>
6 *
7 * This allows to early upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc.
10 *
11 * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
12 * Software Developer's Manual.
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 */
19#include <linux/module.h>
20#include <linux/mm.h>
21#include <linux/slab.h>
22#include <linux/earlycpio.h>
23#include <linux/initrd.h>
24#include <linux/cpu.h>
25#include <asm/msr.h>
26#include <asm/microcode_intel.h>
27#include <asm/processor.h>
28#include <asm/tlbflush.h>
29#include <asm/setup.h>
30
31unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
32struct mc_saved_data {
33 unsigned int mc_saved_count;
34 struct microcode_intel **mc_saved;
35} mc_saved_data;
36
37static enum ucode_state __cpuinit
38generic_load_microcode_early(struct microcode_intel **mc_saved_p,
39 unsigned int mc_saved_count,
40 struct ucode_cpu_info *uci)
41{
42 struct microcode_intel *ucode_ptr, *new_mc = NULL;
43 int new_rev = uci->cpu_sig.rev;
44 enum ucode_state state = UCODE_OK;
45 unsigned int mc_size;
46 struct microcode_header_intel *mc_header;
47 unsigned int csig = uci->cpu_sig.sig;
48 unsigned int cpf = uci->cpu_sig.pf;
49 int i;
50
51 for (i = 0; i < mc_saved_count; i++) {
52 ucode_ptr = mc_saved_p[i];
53
54 mc_header = (struct microcode_header_intel *)ucode_ptr;
55 mc_size = get_totalsize(mc_header);
56 if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) {
57 new_rev = mc_header->rev;
58 new_mc = ucode_ptr;
59 }
60 }
61
62 if (!new_mc) {
63 state = UCODE_NFOUND;
64 goto out;
65 }
66
67 uci->mc = (struct microcode_intel *)new_mc;
68out:
69 return state;
70}
71
72static void __cpuinit
73microcode_pointer(struct microcode_intel **mc_saved,
74 unsigned long *mc_saved_in_initrd,
75 unsigned long initrd_start, int mc_saved_count)
76{
77 int i;
78
79 for (i = 0; i < mc_saved_count; i++)
80 mc_saved[i] = (struct microcode_intel *)
81 (mc_saved_in_initrd[i] + initrd_start);
82}
83
84#ifdef CONFIG_X86_32
85static void __cpuinit
86microcode_phys(struct microcode_intel **mc_saved_tmp,
87 struct mc_saved_data *mc_saved_data)
88{
89 int i;
90 struct microcode_intel ***mc_saved;
91
92 mc_saved = (struct microcode_intel ***)
93 __pa_nodebug(&mc_saved_data->mc_saved);
94 for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
95 struct microcode_intel *p;
96
97 p = *(struct microcode_intel **)
98 __pa_nodebug(mc_saved_data->mc_saved + i);
99 mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
100 }
101}
102#endif
103
104static enum ucode_state __cpuinit
105load_microcode(struct mc_saved_data *mc_saved_data,
106 unsigned long *mc_saved_in_initrd,
107 unsigned long initrd_start,
108 struct ucode_cpu_info *uci)
109{
110 struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
111 unsigned int count = mc_saved_data->mc_saved_count;
112
113 if (!mc_saved_data->mc_saved) {
114 microcode_pointer(mc_saved_tmp, mc_saved_in_initrd,
115 initrd_start, count);
116
117 return generic_load_microcode_early(mc_saved_tmp, count, uci);
118 } else {
119#ifdef CONFIG_X86_32
120 microcode_phys(mc_saved_tmp, mc_saved_data);
121 return generic_load_microcode_early(mc_saved_tmp, count, uci);
122#else
123 return generic_load_microcode_early(mc_saved_data->mc_saved,
124 count, uci);
125#endif
126 }
127}
128
129static u8 get_x86_family(unsigned long sig)
130{
131 u8 x86;
132
133 x86 = (sig >> 8) & 0xf;
134
135 if (x86 == 0xf)
136 x86 += (sig >> 20) & 0xff;
137
138 return x86;
139}
140
141static u8 get_x86_model(unsigned long sig)
142{
143 u8 x86, x86_model;
144
145 x86 = get_x86_family(sig);
146 x86_model = (sig >> 4) & 0xf;
147
148 if (x86 == 0x6 || x86 == 0xf)
149 x86_model += ((sig >> 16) & 0xf) << 4;
150
151 return x86_model;
152}
153
154/*
155 * Given CPU signature and a microcode patch, this function finds if the
156 * microcode patch has matching family and model with the CPU.
157 */
158static enum ucode_state
159matching_model_microcode(struct microcode_header_intel *mc_header,
160 unsigned long sig)
161{
162 u8 x86, x86_model;
163 u8 x86_ucode, x86_model_ucode;
164 struct extended_sigtable *ext_header;
165 unsigned long total_size = get_totalsize(mc_header);
166 unsigned long data_size = get_datasize(mc_header);
167 int ext_sigcount, i;
168 struct extended_signature *ext_sig;
169
170 x86 = get_x86_family(sig);
171 x86_model = get_x86_model(sig);
172
173 x86_ucode = get_x86_family(mc_header->sig);
174 x86_model_ucode = get_x86_model(mc_header->sig);
175
176 if (x86 == x86_ucode && x86_model == x86_model_ucode)
177 return UCODE_OK;
178
179 /* Look for ext. headers: */
180 if (total_size <= data_size + MC_HEADER_SIZE)
181 return UCODE_NFOUND;
182
183 ext_header = (struct extended_sigtable *)
184 mc_header + data_size + MC_HEADER_SIZE;
185 ext_sigcount = ext_header->count;
186 ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
187
188 for (i = 0; i < ext_sigcount; i++) {
189 x86_ucode = get_x86_family(ext_sig->sig);
190 x86_model_ucode = get_x86_model(ext_sig->sig);
191
192 if (x86 == x86_ucode && x86_model == x86_model_ucode)
193 return UCODE_OK;
194
195 ext_sig++;
196 }
197
198 return UCODE_NFOUND;
199}
200
201static int
202save_microcode(struct mc_saved_data *mc_saved_data,
203 struct microcode_intel **mc_saved_src,
204 unsigned int mc_saved_count)
205{
206 int i, j;
207 struct microcode_intel **mc_saved_p;
208 int ret;
209
210 if (!mc_saved_count)
211 return -EINVAL;
212
213 /*
214 * Copy new microcode data.
215 */
216 mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *),
217 GFP_KERNEL);
218 if (!mc_saved_p)
219 return -ENOMEM;
220
221 for (i = 0; i < mc_saved_count; i++) {
222 struct microcode_intel *mc = mc_saved_src[i];
223 struct microcode_header_intel *mc_header = &mc->hdr;
224 unsigned long mc_size = get_totalsize(mc_header);
225 mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL);
226 if (!mc_saved_p[i]) {
227 ret = -ENOMEM;
228 goto err;
229 }
230 if (!mc_saved_src[i]) {
231 ret = -EINVAL;
232 goto err;
233 }
234 memcpy(mc_saved_p[i], mc, mc_size);
235 }
236
237 /*
238 * Point to newly saved microcode.
239 */
240 mc_saved_data->mc_saved = mc_saved_p;
241 mc_saved_data->mc_saved_count = mc_saved_count;
242
243 return 0;
244
245err:
246 for (j = 0; j <= i; j++)
247 kfree(mc_saved_p[j]);
248 kfree(mc_saved_p);
249
250 return ret;
251}
252
253/*
254 * A microcode patch in ucode_ptr is saved into mc_saved
255 * - if it has matching signature and newer revision compared to an existing
256 * patch mc_saved.
257 * - or if it is a newly discovered microcode patch.
258 *
259 * The microcode patch should have matching model with CPU.
260 */
261static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr,
262 unsigned int *mc_saved_count_p)
263{
264 int i;
265 int found = 0;
266 unsigned int mc_saved_count = *mc_saved_count_p;
267 struct microcode_header_intel *mc_header;
268
269 mc_header = (struct microcode_header_intel *)ucode_ptr;
270 for (i = 0; i < mc_saved_count; i++) {
271 unsigned int sig, pf;
272 unsigned int new_rev;
273 struct microcode_header_intel *mc_saved_header =
274 (struct microcode_header_intel *)mc_saved[i];
275 sig = mc_saved_header->sig;
276 pf = mc_saved_header->pf;
277 new_rev = mc_header->rev;
278
279 if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) {
280 found = 1;
281 if (update_match_revision(mc_header, new_rev)) {
282 /*
283 * Found an older ucode saved before.
284 * Replace the older one with this newer
285 * one.
286 */
287 mc_saved[i] =
288 (struct microcode_intel *)ucode_ptr;
289 break;
290 }
291 }
292 }
293 if (i >= mc_saved_count && !found)
294 /*
295 * This ucode is first time discovered in ucode file.
296 * Save it to memory.
297 */
298 mc_saved[mc_saved_count++] =
299 (struct microcode_intel *)ucode_ptr;
300
301 *mc_saved_count_p = mc_saved_count;
302}
303
304/*
305 * Get microcode matching with BSP's model. Only CPUs with the same model as
306 * BSP can stay in the platform.
307 */
308static enum ucode_state __init
309get_matching_model_microcode(int cpu, unsigned long start,
310 void *data, size_t size,
311 struct mc_saved_data *mc_saved_data,
312 unsigned long *mc_saved_in_initrd,
313 struct ucode_cpu_info *uci)
314{
315 u8 *ucode_ptr = data;
316 unsigned int leftover = size;
317 enum ucode_state state = UCODE_OK;
318 unsigned int mc_size;
319 struct microcode_header_intel *mc_header;
320 struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
321 unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
322 int i;
323
324 while (leftover) {
325 mc_header = (struct microcode_header_intel *)ucode_ptr;
326
327 mc_size = get_totalsize(mc_header);
328 if (!mc_size || mc_size > leftover ||
329 microcode_sanity_check(ucode_ptr, 0) < 0)
330 break;
331
332 leftover -= mc_size;
333
334 /*
335 * Since APs with same family and model as the BSP may boot in
336 * the platform, we need to find and save microcode patches
337 * with the same family and model as the BSP.
338 */
339 if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
340 UCODE_OK) {
341 ucode_ptr += mc_size;
342 continue;
343 }
344
345 _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count);
346
347 ucode_ptr += mc_size;
348 }
349
350 if (leftover) {
351 state = UCODE_ERROR;
352 goto out;
353 }
354
355 if (mc_saved_count == 0) {
356 state = UCODE_NFOUND;
357 goto out;
358 }
359
360 for (i = 0; i < mc_saved_count; i++)
361 mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
362
363 mc_saved_data->mc_saved_count = mc_saved_count;
364out:
365 return state;
366}
367
368#define native_rdmsr(msr, val1, val2) \
369do { \
370 u64 __val = native_read_msr((msr)); \
371 (void)((val1) = (u32)__val); \
372 (void)((val2) = (u32)(__val >> 32)); \
373} while (0)
374
375#define native_wrmsr(msr, low, high) \
376 native_write_msr(msr, low, high);
377
378static int __cpuinit collect_cpu_info_early(struct ucode_cpu_info *uci)
379{
380 unsigned int val[2];
381 u8 x86, x86_model;
382 struct cpu_signature csig;
383 unsigned int eax, ebx, ecx, edx;
384
385 csig.sig = 0;
386 csig.pf = 0;
387 csig.rev = 0;
388
389 memset(uci, 0, sizeof(*uci));
390
391 eax = 0x00000001;
392 ecx = 0;
393 native_cpuid(&eax, &ebx, &ecx, &edx);
394 csig.sig = eax;
395
396 x86 = get_x86_family(csig.sig);
397 x86_model = get_x86_model(csig.sig);
398
399 if ((x86_model >= 5) || (x86 > 6)) {
400 /* get processor flags from MSR 0x17 */
401 native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
402 csig.pf = 1 << ((val[1] >> 18) & 7);
403 }
404 native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
405
406 /* As documented in the SDM: Do a CPUID 1 here */
407 sync_core();
408
409 /* get the current revision from MSR 0x8B */
410 native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
411
412 csig.rev = val[1];
413
414 uci->cpu_sig = csig;
415 uci->valid = 1;
416
417 return 0;
418}
419
420#ifdef DEBUG
421static void __ref show_saved_mc(void)
422{
423 int i, j;
424 unsigned int sig, pf, rev, total_size, data_size, date;
425 struct ucode_cpu_info uci;
426
427 if (mc_saved_data.mc_saved_count == 0) {
428 pr_debug("no micorcode data saved.\n");
429 return;
430 }
431 pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
432
433 collect_cpu_info_early(&uci);
434
435 sig = uci.cpu_sig.sig;
436 pf = uci.cpu_sig.pf;
437 rev = uci.cpu_sig.rev;
438 pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n",
439 smp_processor_id(), sig, pf, rev);
440
441 for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
442 struct microcode_header_intel *mc_saved_header;
443 struct extended_sigtable *ext_header;
444 int ext_sigcount;
445 struct extended_signature *ext_sig;
446
447 mc_saved_header = (struct microcode_header_intel *)
448 mc_saved_data.mc_saved[i];
449 sig = mc_saved_header->sig;
450 pf = mc_saved_header->pf;
451 rev = mc_saved_header->rev;
452 total_size = get_totalsize(mc_saved_header);
453 data_size = get_datasize(mc_saved_header);
454 date = mc_saved_header->date;
455
456 pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
457 i, sig, pf, rev, total_size,
458 date & 0xffff,
459 date >> 24,
460 (date >> 16) & 0xff);
461
462 /* Look for ext. headers: */
463 if (total_size <= data_size + MC_HEADER_SIZE)
464 continue;
465
466 ext_header = (struct extended_sigtable *)
467 mc_saved_header + data_size + MC_HEADER_SIZE;
468 ext_sigcount = ext_header->count;
469 ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
470
471 for (j = 0; j < ext_sigcount; j++) {
472 sig = ext_sig->sig;
473 pf = ext_sig->pf;
474
475 pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
476 j, sig, pf);
477
478 ext_sig++;
479 }
480
481 }
482}
483#else
484static inline void show_saved_mc(void)
485{
486}
487#endif
488
489#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
490/*
491 * Save this mc into mc_saved_data. So it will be loaded early when a CPU is
492 * hot added or resumes.
493 *
494 * Please make sure this mc should be a valid microcode patch before calling
495 * this function.
496 */
497int save_mc_for_early(u8 *mc)
498{
499 struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
500 unsigned int mc_saved_count_init;
501 unsigned int mc_saved_count;
502 struct microcode_intel **mc_saved;
503 int ret = 0;
504 int i;
505
506 /*
507 * Hold hotplug lock so mc_saved_data is not accessed by a CPU in
508 * hotplug.
509 */
510 cpu_hotplug_driver_lock();
511
512 mc_saved_count_init = mc_saved_data.mc_saved_count;
513 mc_saved_count = mc_saved_data.mc_saved_count;
514 mc_saved = mc_saved_data.mc_saved;
515
516 if (mc_saved && mc_saved_count)
517 memcpy(mc_saved_tmp, mc_saved,
518 mc_saved_count * sizeof(struct mirocode_intel *));
519 /*
520 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
521 * version.
522 */
523
524 _save_mc(mc_saved_tmp, mc, &mc_saved_count);
525
526 /*
527 * Save the mc_save_tmp in global mc_saved_data.
528 */
529 ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
530 if (ret) {
531 pr_err("Can not save microcode patch.\n");
532 goto out;
533 }
534
535 show_saved_mc();
536
537 /*
538 * Free old saved microcod data.
539 */
540 if (mc_saved) {
541 for (i = 0; i < mc_saved_count_init; i++)
542 kfree(mc_saved[i]);
543 kfree(mc_saved);
544 }
545
546out:
547 cpu_hotplug_driver_unlock();
548
549 return ret;
550}
551EXPORT_SYMBOL_GPL(save_mc_for_early);
552#endif
553
554static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
555static __init enum ucode_state
556scan_microcode(unsigned long start, unsigned long end,
557 struct mc_saved_data *mc_saved_data,
558 unsigned long *mc_saved_in_initrd,
559 struct ucode_cpu_info *uci)
560{
561 unsigned int size = end - start + 1;
562 struct cpio_data cd;
563 long offset = 0;
564#ifdef CONFIG_X86_32
565 char *p = (char *)__pa_nodebug(ucode_name);
566#else
567 char *p = ucode_name;
568#endif
569
570 cd.data = NULL;
571 cd.size = 0;
572
573 cd = find_cpio_data(p, (void *)start, size, &offset);
574 if (!cd.data)
575 return UCODE_ERROR;
576
577
578 return get_matching_model_microcode(0, start, cd.data, cd.size,
579 mc_saved_data, mc_saved_in_initrd,
580 uci);
581}
582
583/*
584 * Print ucode update info.
585 */
586static void __cpuinit
587print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
588{
589 int cpu = smp_processor_id();
590
591 pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
592 cpu,
593 uci->cpu_sig.rev,
594 date & 0xffff,
595 date >> 24,
596 (date >> 16) & 0xff);
597}
598
599#ifdef CONFIG_X86_32
600
601static int delay_ucode_info;
602static int current_mc_date;
603
604/*
605 * Print early updated ucode info after printk works. This is delayed info dump.
606 */
607void __cpuinit show_ucode_info_early(void)
608{
609 struct ucode_cpu_info uci;
610
611 if (delay_ucode_info) {
612 collect_cpu_info_early(&uci);
613 print_ucode_info(&uci, current_mc_date);
614 delay_ucode_info = 0;
615 }
616}
617
618/*
619 * At this point, we can not call printk() yet. Keep microcode patch number in
620 * mc_saved_data.mc_saved and delay printing microcode info in
621 * show_ucode_info_early() until printk() works.
622 */
623static void __cpuinit print_ucode(struct ucode_cpu_info *uci)
624{
625 struct microcode_intel *mc_intel;
626 int *delay_ucode_info_p;
627 int *current_mc_date_p;
628
629 mc_intel = uci->mc;
630 if (mc_intel == NULL)
631 return;
632
633 delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
634 current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
635
636 *delay_ucode_info_p = 1;
637 *current_mc_date_p = mc_intel->hdr.date;
638}
639#else
640
641/*
642 * Flush global tlb. We only do this in x86_64 where paging has been enabled
643 * already and PGE should be enabled as well.
644 */
645static inline void __cpuinit flush_tlb_early(void)
646{
647 __native_flush_tlb_global_irq_disabled();
648}
649
650static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci)
651{
652 struct microcode_intel *mc_intel;
653
654 mc_intel = uci->mc;
655 if (mc_intel == NULL)
656 return;
657
658 print_ucode_info(uci, mc_intel->hdr.date);
659}
660#endif
661
662static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data,
663 struct ucode_cpu_info *uci)
664{
665 struct microcode_intel *mc_intel;
666 unsigned int val[2];
667
668 mc_intel = uci->mc;
669 if (mc_intel == NULL)
670 return 0;
671
672 /* write microcode via MSR 0x79 */
673 native_wrmsr(MSR_IA32_UCODE_WRITE,
674 (unsigned long) mc_intel->bits,
675 (unsigned long) mc_intel->bits >> 16 >> 16);
676 native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
677
678 /* As documented in the SDM: Do a CPUID 1 here */
679 sync_core();
680
681 /* get the current revision from MSR 0x8B */
682 native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
683 if (val[1] != mc_intel->hdr.rev)
684 return -1;
685
686#ifdef CONFIG_X86_64
687 /* Flush global tlb. This is precaution. */
688 flush_tlb_early();
689#endif
690 uci->cpu_sig.rev = val[1];
691
692 print_ucode(uci);
693
694 return 0;
695}
696
697/*
698 * This function converts microcode patch offsets previously stored in
699 * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
700 */
701int __init save_microcode_in_initrd(void)
702{
703 unsigned int count = mc_saved_data.mc_saved_count;
704 struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
705 int ret = 0;
706
707 if (count == 0)
708 return ret;
709
710 microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
711 ret = save_microcode(&mc_saved_data, mc_saved, count);
712 if (ret)
713 pr_err("Can not save microcod patches from initrd");
714
715 show_saved_mc();
716
717 return ret;
718}
719
720static void __init
721_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
722 unsigned long *mc_saved_in_initrd,
723 unsigned long initrd_start_early,
724 unsigned long initrd_end_early,
725 struct ucode_cpu_info *uci)
726{
727 collect_cpu_info_early(uci);
728 scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data,
729 mc_saved_in_initrd, uci);
730 load_microcode(mc_saved_data, mc_saved_in_initrd,
731 initrd_start_early, uci);
732 apply_microcode_early(mc_saved_data, uci);
733}
734
735void __init
736load_ucode_intel_bsp(void)
737{
738 u64 ramdisk_image, ramdisk_size;
739 unsigned long initrd_start_early, initrd_end_early;
740 struct ucode_cpu_info uci;
741#ifdef CONFIG_X86_32
742 struct boot_params *boot_params_p;
743
744 boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params);
745 ramdisk_image = boot_params_p->hdr.ramdisk_image;
746 ramdisk_size = boot_params_p->hdr.ramdisk_size;
747 initrd_start_early = ramdisk_image;
748 initrd_end_early = initrd_start_early + ramdisk_size;
749
750 _load_ucode_intel_bsp(
751 (struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
752 (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
753 initrd_start_early, initrd_end_early, &uci);
754#else
755 ramdisk_image = boot_params.hdr.ramdisk_image;
756 ramdisk_size = boot_params.hdr.ramdisk_size;
757 initrd_start_early = ramdisk_image + PAGE_OFFSET;
758 initrd_end_early = initrd_start_early + ramdisk_size;
759
760 _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd,
761 initrd_start_early, initrd_end_early, &uci);
762#endif
763}
764
765void __cpuinit load_ucode_intel_ap(void)
766{
767 struct mc_saved_data *mc_saved_data_p;
768 struct ucode_cpu_info uci;
769 unsigned long *mc_saved_in_initrd_p;
770 unsigned long initrd_start_addr;
771#ifdef CONFIG_X86_32
772 unsigned long *initrd_start_p;
773
774 mc_saved_in_initrd_p =
775 (unsigned long *)__pa_nodebug(mc_saved_in_initrd);
776 mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
777 initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
778 initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
779#else
780 mc_saved_data_p = &mc_saved_data;
781 mc_saved_in_initrd_p = mc_saved_in_initrd;
782 initrd_start_addr = initrd_start;
783#endif
784
785 /*
786 * If there is no valid ucode previously saved in memory, no need to
787 * update ucode on this AP.
788 */
789 if (mc_saved_data_p->mc_saved_count == 0)
790 return;
791
792 collect_cpu_info_early(&uci);
793 load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
794 initrd_start_addr, &uci);
795 apply_microcode_early(mc_saved_data_p, &uci);
796}
diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/microcode_intel_lib.c
new file mode 100644
index 000000000000..ce69320d0179
--- /dev/null
+++ b/arch/x86/kernel/microcode_intel_lib.c
@@ -0,0 +1,174 @@
1/*
2 * Intel CPU Microcode Update Driver for Linux
3 *
4 * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
5 * H Peter Anvin" <hpa@zytor.com>
6 *
7 * This driver allows to upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc.
10 *
11 * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
12 * Software Developer's Manual
13 * Order Number 253668 or free download from:
14 *
15 * http://developer.intel.com/Assets/PDF/manual/253668.pdf
16 *
17 * For more information, go to http://www.urbanmyth.org/microcode
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25#include <linux/firmware.h>
26#include <linux/uaccess.h>
27#include <linux/kernel.h>
28#include <linux/module.h>
29
30#include <asm/microcode_intel.h>
31#include <asm/processor.h>
32#include <asm/msr.h>
33
34static inline int
35update_match_cpu(unsigned int csig, unsigned int cpf,
36 unsigned int sig, unsigned int pf)
37{
38 return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1;
39}
40
41int
42update_match_revision(struct microcode_header_intel *mc_header, int rev)
43{
44 return (mc_header->rev <= rev) ? 0 : 1;
45}
46
47int microcode_sanity_check(void *mc, int print_err)
48{
49 unsigned long total_size, data_size, ext_table_size;
50 struct microcode_header_intel *mc_header = mc;
51 struct extended_sigtable *ext_header = NULL;
52 int sum, orig_sum, ext_sigcount = 0, i;
53 struct extended_signature *ext_sig;
54
55 total_size = get_totalsize(mc_header);
56 data_size = get_datasize(mc_header);
57
58 if (data_size + MC_HEADER_SIZE > total_size) {
59 if (print_err)
60 pr_err("error! Bad data size in microcode data file\n");
61 return -EINVAL;
62 }
63
64 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
65 if (print_err)
66 pr_err("error! Unknown microcode update format\n");
67 return -EINVAL;
68 }
69 ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
70 if (ext_table_size) {
71 if ((ext_table_size < EXT_HEADER_SIZE)
72 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
73 if (print_err)
74 pr_err("error! Small exttable size in microcode data file\n");
75 return -EINVAL;
76 }
77 ext_header = mc + MC_HEADER_SIZE + data_size;
78 if (ext_table_size != exttable_size(ext_header)) {
79 if (print_err)
80 pr_err("error! Bad exttable size in microcode data file\n");
81 return -EFAULT;
82 }
83 ext_sigcount = ext_header->count;
84 }
85
86 /* check extended table checksum */
87 if (ext_table_size) {
88 int ext_table_sum = 0;
89 int *ext_tablep = (int *)ext_header;
90
91 i = ext_table_size / DWSIZE;
92 while (i--)
93 ext_table_sum += ext_tablep[i];
94 if (ext_table_sum) {
95 if (print_err)
96 pr_warn("aborting, bad extended signature table checksum\n");
97 return -EINVAL;
98 }
99 }
100
101 /* calculate the checksum */
102 orig_sum = 0;
103 i = (MC_HEADER_SIZE + data_size) / DWSIZE;
104 while (i--)
105 orig_sum += ((int *)mc)[i];
106 if (orig_sum) {
107 if (print_err)
108 pr_err("aborting, bad checksum\n");
109 return -EINVAL;
110 }
111 if (!ext_table_size)
112 return 0;
113 /* check extended signature checksum */
114 for (i = 0; i < ext_sigcount; i++) {
115 ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
116 EXT_SIGNATURE_SIZE * i;
117 sum = orig_sum
118 - (mc_header->sig + mc_header->pf + mc_header->cksum)
119 + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
120 if (sum) {
121 if (print_err)
122 pr_err("aborting, bad checksum\n");
123 return -EINVAL;
124 }
125 }
126 return 0;
127}
128EXPORT_SYMBOL_GPL(microcode_sanity_check);
129
130/*
131 * return 0 - no update found
132 * return 1 - found update
133 */
134int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
135{
136 struct microcode_header_intel *mc_header = mc;
137 struct extended_sigtable *ext_header;
138 unsigned long total_size = get_totalsize(mc_header);
139 int ext_sigcount, i;
140 struct extended_signature *ext_sig;
141
142 if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf))
143 return 1;
144
145 /* Look for ext. headers: */
146 if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
147 return 0;
148
149 ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
150 ext_sigcount = ext_header->count;
151 ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
152
153 for (i = 0; i < ext_sigcount; i++) {
154 if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf))
155 return 1;
156 ext_sig++;
157 }
158 return 0;
159}
160
161/*
162 * return 0 - no update found
163 * return 1 - found update
164 */
165int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev)
166{
167 struct microcode_header_intel *mc_header = mc;
168
169 if (!update_match_revision(mc_header, rev))
170 return 0;
171
172 return get_matching_sig(csig, cpf, mc, rev);
173}
174EXPORT_SYMBOL_GPL(get_matching_microcode);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 4929502c1372..ce130493b802 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -71,7 +71,7 @@ static ssize_t msr_read(struct file *file, char __user *buf,
71 u32 __user *tmp = (u32 __user *) buf; 71 u32 __user *tmp = (u32 __user *) buf;
72 u32 data[2]; 72 u32 data[2];
73 u32 reg = *ppos; 73 u32 reg = *ppos;
74 int cpu = iminor(file->f_path.dentry->d_inode); 74 int cpu = iminor(file_inode(file));
75 int err = 0; 75 int err = 0;
76 ssize_t bytes = 0; 76 ssize_t bytes = 0;
77 77
@@ -99,7 +99,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
99 const u32 __user *tmp = (const u32 __user *)buf; 99 const u32 __user *tmp = (const u32 __user *)buf;
100 u32 data[2]; 100 u32 data[2];
101 u32 reg = *ppos; 101 u32 reg = *ppos;
102 int cpu = iminor(file->f_path.dentry->d_inode); 102 int cpu = iminor(file_inode(file));
103 int err = 0; 103 int err = 0;
104 ssize_t bytes = 0; 104 ssize_t bytes = 0;
105 105
@@ -125,7 +125,7 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
125{ 125{
126 u32 __user *uregs = (u32 __user *)arg; 126 u32 __user *uregs = (u32 __user *)arg;
127 u32 regs[8]; 127 u32 regs[8];
128 int cpu = iminor(file->f_path.dentry->d_inode); 128 int cpu = iminor(file_inode(file));
129 int err; 129 int err;
130 130
131 switch (ioc) { 131 switch (ioc) {
@@ -171,13 +171,12 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
171 171
172static int msr_open(struct inode *inode, struct file *file) 172static int msr_open(struct inode *inode, struct file *file)
173{ 173{
174 unsigned int cpu; 174 unsigned int cpu = iminor(file_inode(file));
175 struct cpuinfo_x86 *c; 175 struct cpuinfo_x86 *c;
176 176
177 if (!capable(CAP_SYS_RAWIO)) 177 if (!capable(CAP_SYS_RAWIO))
178 return -EPERM; 178 return -EPERM;
179 179
180 cpu = iminor(file->f_path.dentry->d_inode);
181 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) 180 if (cpu >= nr_cpu_ids || !cpu_online(cpu))
182 return -ENXIO; /* No such CPU */ 181 return -ENXIO; /* No such CPU */
183 182
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index f84f5c57de35..60308053fdb2 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -509,3 +509,4 @@ void local_touch_nmi(void)
509{ 509{
510 __this_cpu_write(last_nmi_rip, 0); 510 __this_cpu_write(last_nmi_rip, 0);
511} 511}
512EXPORT_SYMBOL_GPL(local_touch_nmi);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 2ed787f15bf0..14ae10031ff0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -268,13 +268,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
268unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; 268unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
269EXPORT_SYMBOL(boot_option_idle_override); 269EXPORT_SYMBOL(boot_option_idle_override);
270 270
271/* 271static void (*x86_idle)(void);
272 * Powermanagement idle function, if any..
273 */
274void (*pm_idle)(void);
275#ifdef CONFIG_APM_MODULE
276EXPORT_SYMBOL(pm_idle);
277#endif
278 272
279#ifndef CONFIG_SMP 273#ifndef CONFIG_SMP
280static inline void play_dead(void) 274static inline void play_dead(void)
@@ -351,7 +345,7 @@ void cpu_idle(void)
351 rcu_idle_enter(); 345 rcu_idle_enter();
352 346
353 if (cpuidle_idle_call()) 347 if (cpuidle_idle_call())
354 pm_idle(); 348 x86_idle();
355 349
356 rcu_idle_exit(); 350 rcu_idle_exit();
357 start_critical_timings(); 351 start_critical_timings();
@@ -375,7 +369,6 @@ void cpu_idle(void)
375 */ 369 */
376void default_idle(void) 370void default_idle(void)
377{ 371{
378 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
379 trace_cpu_idle_rcuidle(1, smp_processor_id()); 372 trace_cpu_idle_rcuidle(1, smp_processor_id());
380 current_thread_info()->status &= ~TS_POLLING; 373 current_thread_info()->status &= ~TS_POLLING;
381 /* 374 /*
@@ -389,21 +382,22 @@ void default_idle(void)
389 else 382 else
390 local_irq_enable(); 383 local_irq_enable();
391 current_thread_info()->status |= TS_POLLING; 384 current_thread_info()->status |= TS_POLLING;
392 trace_power_end_rcuidle(smp_processor_id());
393 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 385 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
394} 386}
395#ifdef CONFIG_APM_MODULE 387#ifdef CONFIG_APM_MODULE
396EXPORT_SYMBOL(default_idle); 388EXPORT_SYMBOL(default_idle);
397#endif 389#endif
398 390
399bool set_pm_idle_to_default(void) 391#ifdef CONFIG_XEN
392bool xen_set_default_idle(void)
400{ 393{
401 bool ret = !!pm_idle; 394 bool ret = !!x86_idle;
402 395
403 pm_idle = default_idle; 396 x86_idle = default_idle;
404 397
405 return ret; 398 return ret;
406} 399}
400#endif
407void stop_this_cpu(void *dummy) 401void stop_this_cpu(void *dummy)
408{ 402{
409 local_irq_disable(); 403 local_irq_disable();
@@ -413,31 +407,8 @@ void stop_this_cpu(void *dummy)
413 set_cpu_online(smp_processor_id(), false); 407 set_cpu_online(smp_processor_id(), false);
414 disable_local_APIC(); 408 disable_local_APIC();
415 409
416 for (;;) { 410 for (;;)
417 if (hlt_works(smp_processor_id())) 411 halt();
418 halt();
419 }
420}
421
422/* Default MONITOR/MWAIT with no hints, used for default C1 state */
423static void mwait_idle(void)
424{
425 if (!need_resched()) {
426 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
427 trace_cpu_idle_rcuidle(1, smp_processor_id());
428 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
429 clflush((void *)&current_thread_info()->flags);
430
431 __monitor((void *)&current_thread_info()->flags, 0, 0);
432 smp_mb();
433 if (!need_resched())
434 __sti_mwait(0, 0);
435 else
436 local_irq_enable();
437 trace_power_end_rcuidle(smp_processor_id());
438 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
439 } else
440 local_irq_enable();
441} 412}
442 413
443/* 414/*
@@ -447,62 +418,13 @@ static void mwait_idle(void)
447 */ 418 */
448static void poll_idle(void) 419static void poll_idle(void)
449{ 420{
450 trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
451 trace_cpu_idle_rcuidle(0, smp_processor_id()); 421 trace_cpu_idle_rcuidle(0, smp_processor_id());
452 local_irq_enable(); 422 local_irq_enable();
453 while (!need_resched()) 423 while (!need_resched())
454 cpu_relax(); 424 cpu_relax();
455 trace_power_end_rcuidle(smp_processor_id());
456 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 425 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
457} 426}
458 427
459/*
460 * mwait selection logic:
461 *
462 * It depends on the CPU. For AMD CPUs that support MWAIT this is
463 * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
464 * then depend on a clock divisor and current Pstate of the core. If
465 * all cores of a processor are in halt state (C1) the processor can
466 * enter the C1E (C1 enhanced) state. If mwait is used this will never
467 * happen.
468 *
469 * idle=mwait overrides this decision and forces the usage of mwait.
470 */
471
472#define MWAIT_INFO 0x05
473#define MWAIT_ECX_EXTENDED_INFO 0x01
474#define MWAIT_EDX_C1 0xf0
475
476int mwait_usable(const struct cpuinfo_x86 *c)
477{
478 u32 eax, ebx, ecx, edx;
479
480 /* Use mwait if idle=mwait boot option is given */
481 if (boot_option_idle_override == IDLE_FORCE_MWAIT)
482 return 1;
483
484 /*
485 * Any idle= boot option other than idle=mwait means that we must not
486 * use mwait. Eg: idle=halt or idle=poll or idle=nomwait
487 */
488 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
489 return 0;
490
491 if (c->cpuid_level < MWAIT_INFO)
492 return 0;
493
494 cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
495 /* Check, whether EDX has extended info about MWAIT */
496 if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
497 return 1;
498
499 /*
500 * edx enumeratios MONITOR/MWAIT extensions. Check, whether
501 * C1 supports MWAIT
502 */
503 return (edx & MWAIT_EDX_C1);
504}
505
506bool amd_e400_c1e_detected; 428bool amd_e400_c1e_detected;
507EXPORT_SYMBOL(amd_e400_c1e_detected); 429EXPORT_SYMBOL(amd_e400_c1e_detected);
508 430
@@ -567,31 +489,24 @@ static void amd_e400_idle(void)
567void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 489void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
568{ 490{
569#ifdef CONFIG_SMP 491#ifdef CONFIG_SMP
570 if (pm_idle == poll_idle && smp_num_siblings > 1) { 492 if (x86_idle == poll_idle && smp_num_siblings > 1)
571 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); 493 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
572 }
573#endif 494#endif
574 if (pm_idle) 495 if (x86_idle)
575 return; 496 return;
576 497
577 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { 498 if (cpu_has_amd_erratum(amd_erratum_400)) {
578 /*
579 * One CPU supports mwait => All CPUs supports mwait
580 */
581 pr_info("using mwait in idle threads\n");
582 pm_idle = mwait_idle;
583 } else if (cpu_has_amd_erratum(amd_erratum_400)) {
584 /* E400: APIC timer interrupt does not wake up CPU from C1e */ 499 /* E400: APIC timer interrupt does not wake up CPU from C1e */
585 pr_info("using AMD E400 aware idle routine\n"); 500 pr_info("using AMD E400 aware idle routine\n");
586 pm_idle = amd_e400_idle; 501 x86_idle = amd_e400_idle;
587 } else 502 } else
588 pm_idle = default_idle; 503 x86_idle = default_idle;
589} 504}
590 505
591void __init init_amd_e400_c1e_mask(void) 506void __init init_amd_e400_c1e_mask(void)
592{ 507{
593 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ 508 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
594 if (pm_idle == amd_e400_idle) 509 if (x86_idle == amd_e400_idle)
595 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); 510 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
596} 511}
597 512
@@ -602,11 +517,8 @@ static int __init idle_setup(char *str)
602 517
603 if (!strcmp(str, "poll")) { 518 if (!strcmp(str, "poll")) {
604 pr_info("using polling idle threads\n"); 519 pr_info("using polling idle threads\n");
605 pm_idle = poll_idle; 520 x86_idle = poll_idle;
606 boot_option_idle_override = IDLE_POLL; 521 boot_option_idle_override = IDLE_POLL;
607 } else if (!strcmp(str, "mwait")) {
608 boot_option_idle_override = IDLE_FORCE_MWAIT;
609 WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
610 } else if (!strcmp(str, "halt")) { 522 } else if (!strcmp(str, "halt")) {
611 /* 523 /*
612 * When the boot option of idle=halt is added, halt is 524 * When the boot option of idle=halt is added, halt is
@@ -615,7 +527,7 @@ static int __init idle_setup(char *str)
615 * To continue to load the CPU idle driver, don't touch 527 * To continue to load the CPU idle driver, don't touch
616 * the boot_option_idle_override. 528 * the boot_option_idle_override.
617 */ 529 */
618 pm_idle = default_idle; 530 x86_idle = default_idle;
619 boot_option_idle_override = IDLE_HALT; 531 boot_option_idle_override = IDLE_HALT;
620 } else if (!strcmp(str, "nomwait")) { 532 } else if (!strcmp(str, "nomwait")) {
621 /* 533 /*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6e68a6194965..0f49677da51e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,7 +117,7 @@ void release_thread(struct task_struct *dead_task)
117{ 117{
118 if (dead_task->mm) { 118 if (dead_task->mm) {
119 if (dead_task->mm->context.size) { 119 if (dead_task->mm->context.size) {
120 pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n", 120 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
121 dead_task->comm, 121 dead_task->comm,
122 dead_task->mm->context.ldt, 122 dead_task->mm->context.ldt,
123 dead_task->mm->context.size); 123 dead_task->mm->context.size);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b629bbe0d9bd..29a8120e6fe8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,7 +22,7 @@
22#include <linux/perf_event.h> 22#include <linux/perf_event.h>
23#include <linux/hw_breakpoint.h> 23#include <linux/hw_breakpoint.h>
24#include <linux/rcupdate.h> 24#include <linux/rcupdate.h>
25#include <linux/module.h> 25#include <linux/export.h>
26#include <linux/context_tracking.h> 26#include <linux/context_tracking.h>
27 27
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 85c39590c1a4..2cb9470ea85b 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -185,7 +185,7 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
185 185
186 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { 186 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
187 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, 187 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
188 __pa_symbol(i) + (idx*PAGE_SIZE), 188 __pa(i) + (idx*PAGE_SIZE),
189 PAGE_KERNEL_VVAR); 189 PAGE_KERNEL_VVAR);
190 } 190 }
191 191
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 801602b5d745..2e8f3d3b5641 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -149,7 +149,6 @@ unsigned long mach_get_cmos_time(void)
149 if (century) { 149 if (century) {
150 century = bcd2bin(century); 150 century = bcd2bin(century);
151 year += century * 100; 151 year += century * 100;
152 printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
153 } else 152 } else
154 year += CMOS_YEARS_OFFS; 153 year += CMOS_YEARS_OFFS;
155 154
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1abb7969173a..90d8cc930f5e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -108,17 +108,16 @@
108#include <asm/topology.h> 108#include <asm/topology.h>
109#include <asm/apicdef.h> 109#include <asm/apicdef.h>
110#include <asm/amd_nb.h> 110#include <asm/amd_nb.h>
111#ifdef CONFIG_X86_64
112#include <asm/numa_64.h>
113#endif
114#include <asm/mce.h> 111#include <asm/mce.h>
115#include <asm/alternative.h> 112#include <asm/alternative.h>
116#include <asm/prom.h> 113#include <asm/prom.h>
117 114
118/* 115/*
119 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 116 * max_low_pfn_mapped: highest direct mapped pfn under 4GB
120 * The direct mapping extends to max_pfn_mapped, so that we can directly access 117 * max_pfn_mapped: highest direct mapped pfn over 4GB
121 * apertures, ACPI and other tables without having to play with fixmaps. 118 *
119 * The direct mapping only covers E820_RAM regions, so the ranges and gaps are
120 * represented by pfn_mapped
122 */ 121 */
123unsigned long max_low_pfn_mapped; 122unsigned long max_low_pfn_mapped;
124unsigned long max_pfn_mapped; 123unsigned long max_pfn_mapped;
@@ -172,9 +171,15 @@ static struct resource bss_resource = {
172 171
173#ifdef CONFIG_X86_32 172#ifdef CONFIG_X86_32
174/* cpu data as detected by the assembly code in head.S */ 173/* cpu data as detected by the assembly code in head.S */
175struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; 174struct cpuinfo_x86 new_cpu_data __cpuinitdata = {
175 .wp_works_ok = -1,
176 .fdiv_bug = -1,
177};
176/* common cpu data for all cpus */ 178/* common cpu data for all cpus */
177struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; 179struct cpuinfo_x86 boot_cpu_data __read_mostly = {
180 .wp_works_ok = -1,
181 .fdiv_bug = -1,
182};
178EXPORT_SYMBOL(boot_cpu_data); 183EXPORT_SYMBOL(boot_cpu_data);
179 184
180unsigned int def_to_bigsmp; 185unsigned int def_to_bigsmp;
@@ -276,18 +281,7 @@ void * __init extend_brk(size_t size, size_t align)
276 return ret; 281 return ret;
277} 282}
278 283
279#ifdef CONFIG_X86_64 284#ifdef CONFIG_X86_32
280static void __init init_gbpages(void)
281{
282 if (direct_gbpages && cpu_has_gbpages)
283 printk(KERN_INFO "Using GB pages for direct mapping\n");
284 else
285 direct_gbpages = 0;
286}
287#else
288static inline void init_gbpages(void)
289{
290}
291static void __init cleanup_highmap(void) 285static void __init cleanup_highmap(void)
292{ 286{
293} 287}
@@ -296,8 +290,8 @@ static void __init cleanup_highmap(void)
296static void __init reserve_brk(void) 290static void __init reserve_brk(void)
297{ 291{
298 if (_brk_end > _brk_start) 292 if (_brk_end > _brk_start)
299 memblock_reserve(__pa(_brk_start), 293 memblock_reserve(__pa_symbol(_brk_start),
300 __pa(_brk_end) - __pa(_brk_start)); 294 _brk_end - _brk_start);
301 295
302 /* Mark brk area as locked down and no longer taking any 296 /* Mark brk area as locked down and no longer taking any
303 new allocations */ 297 new allocations */
@@ -306,27 +300,43 @@ static void __init reserve_brk(void)
306 300
307#ifdef CONFIG_BLK_DEV_INITRD 301#ifdef CONFIG_BLK_DEV_INITRD
308 302
303static u64 __init get_ramdisk_image(void)
304{
305 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
306
307 ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32;
308
309 return ramdisk_image;
310}
311static u64 __init get_ramdisk_size(void)
312{
313 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
314
315 ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32;
316
317 return ramdisk_size;
318}
319
309#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) 320#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
310static void __init relocate_initrd(void) 321static void __init relocate_initrd(void)
311{ 322{
312 /* Assume only end is not page aligned */ 323 /* Assume only end is not page aligned */
313 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 324 u64 ramdisk_image = get_ramdisk_image();
314 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 325 u64 ramdisk_size = get_ramdisk_size();
315 u64 area_size = PAGE_ALIGN(ramdisk_size); 326 u64 area_size = PAGE_ALIGN(ramdisk_size);
316 u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
317 u64 ramdisk_here; 327 u64 ramdisk_here;
318 unsigned long slop, clen, mapaddr; 328 unsigned long slop, clen, mapaddr;
319 char *p, *q; 329 char *p, *q;
320 330
321 /* We need to move the initrd down into lowmem */ 331 /* We need to move the initrd down into directly mapped mem */
322 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, 332 ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
323 PAGE_SIZE); 333 area_size, PAGE_SIZE);
324 334
325 if (!ramdisk_here) 335 if (!ramdisk_here)
326 panic("Cannot find place for new RAMDISK of size %lld\n", 336 panic("Cannot find place for new RAMDISK of size %lld\n",
327 ramdisk_size); 337 ramdisk_size);
328 338
329 /* Note: this includes all the lowmem currently occupied by 339 /* Note: this includes all the mem currently occupied by
330 the initrd, we rely on that fact to keep the data intact. */ 340 the initrd, we rely on that fact to keep the data intact. */
331 memblock_reserve(ramdisk_here, area_size); 341 memblock_reserve(ramdisk_here, area_size);
332 initrd_start = ramdisk_here + PAGE_OFFSET; 342 initrd_start = ramdisk_here + PAGE_OFFSET;
@@ -336,17 +346,7 @@ static void __init relocate_initrd(void)
336 346
337 q = (char *)initrd_start; 347 q = (char *)initrd_start;
338 348
339 /* Copy any lowmem portion of the initrd */ 349 /* Copy the initrd */
340 if (ramdisk_image < end_of_lowmem) {
341 clen = end_of_lowmem - ramdisk_image;
342 p = (char *)__va(ramdisk_image);
343 memcpy(q, p, clen);
344 q += clen;
345 ramdisk_image += clen;
346 ramdisk_size -= clen;
347 }
348
349 /* Copy the highmem portion of the initrd */
350 while (ramdisk_size) { 350 while (ramdisk_size) {
351 slop = ramdisk_image & ~PAGE_MASK; 351 slop = ramdisk_image & ~PAGE_MASK;
352 clen = ramdisk_size; 352 clen = ramdisk_size;
@@ -360,22 +360,35 @@ static void __init relocate_initrd(void)
360 ramdisk_image += clen; 360 ramdisk_image += clen;
361 ramdisk_size -= clen; 361 ramdisk_size -= clen;
362 } 362 }
363 /* high pages is not converted by early_res_to_bootmem */ 363
364 ramdisk_image = boot_params.hdr.ramdisk_image; 364 ramdisk_image = get_ramdisk_image();
365 ramdisk_size = boot_params.hdr.ramdisk_size; 365 ramdisk_size = get_ramdisk_size();
366 printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" 366 printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
367 " [mem %#010llx-%#010llx]\n", 367 " [mem %#010llx-%#010llx]\n",
368 ramdisk_image, ramdisk_image + ramdisk_size - 1, 368 ramdisk_image, ramdisk_image + ramdisk_size - 1,
369 ramdisk_here, ramdisk_here + ramdisk_size - 1); 369 ramdisk_here, ramdisk_here + ramdisk_size - 1);
370} 370}
371 371
372static void __init early_reserve_initrd(void)
373{
374 /* Assume only end is not page aligned */
375 u64 ramdisk_image = get_ramdisk_image();
376 u64 ramdisk_size = get_ramdisk_size();
377 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
378
379 if (!boot_params.hdr.type_of_loader ||
380 !ramdisk_image || !ramdisk_size)
381 return; /* No initrd provided by bootloader */
382
383 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
384}
372static void __init reserve_initrd(void) 385static void __init reserve_initrd(void)
373{ 386{
374 /* Assume only end is not page aligned */ 387 /* Assume only end is not page aligned */
375 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 388 u64 ramdisk_image = get_ramdisk_image();
376 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 389 u64 ramdisk_size = get_ramdisk_size();
377 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 390 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
378 u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; 391 u64 mapped_size;
379 392
380 if (!boot_params.hdr.type_of_loader || 393 if (!boot_params.hdr.type_of_loader ||
381 !ramdisk_image || !ramdisk_size) 394 !ramdisk_image || !ramdisk_size)
@@ -383,22 +396,18 @@ static void __init reserve_initrd(void)
383 396
384 initrd_start = 0; 397 initrd_start = 0;
385 398
386 if (ramdisk_size >= (end_of_lowmem>>1)) { 399 mapped_size = memblock_mem_size(max_pfn_mapped);
400 if (ramdisk_size >= (mapped_size>>1))
387 panic("initrd too large to handle, " 401 panic("initrd too large to handle, "
388 "disabling initrd (%lld needed, %lld available)\n", 402 "disabling initrd (%lld needed, %lld available)\n",
389 ramdisk_size, end_of_lowmem>>1); 403 ramdisk_size, mapped_size>>1);
390 }
391 404
392 printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, 405 printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
393 ramdisk_end - 1); 406 ramdisk_end - 1);
394 407
395 408 if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image),
396 if (ramdisk_end <= end_of_lowmem) { 409 PFN_DOWN(ramdisk_end))) {
397 /* All in lowmem, easy case */ 410 /* All are mapped, easy case */
398 /*
399 * don't need to reserve again, already reserved early
400 * in i386_start_kernel
401 */
402 initrd_start = ramdisk_image + PAGE_OFFSET; 411 initrd_start = ramdisk_image + PAGE_OFFSET;
403 initrd_end = initrd_start + ramdisk_size; 412 initrd_end = initrd_start + ramdisk_size;
404 return; 413 return;
@@ -409,6 +418,9 @@ static void __init reserve_initrd(void)
409 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); 418 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
410} 419}
411#else 420#else
421static void __init early_reserve_initrd(void)
422{
423}
412static void __init reserve_initrd(void) 424static void __init reserve_initrd(void)
413{ 425{
414} 426}
@@ -419,8 +431,6 @@ static void __init parse_setup_data(void)
419 struct setup_data *data; 431 struct setup_data *data;
420 u64 pa_data; 432 u64 pa_data;
421 433
422 if (boot_params.hdr.version < 0x0209)
423 return;
424 pa_data = boot_params.hdr.setup_data; 434 pa_data = boot_params.hdr.setup_data;
425 while (pa_data) { 435 while (pa_data) {
426 u32 data_len, map_len; 436 u32 data_len, map_len;
@@ -456,8 +466,6 @@ static void __init e820_reserve_setup_data(void)
456 u64 pa_data; 466 u64 pa_data;
457 int found = 0; 467 int found = 0;
458 468
459 if (boot_params.hdr.version < 0x0209)
460 return;
461 pa_data = boot_params.hdr.setup_data; 469 pa_data = boot_params.hdr.setup_data;
462 while (pa_data) { 470 while (pa_data) {
463 data = early_memremap(pa_data, sizeof(*data)); 471 data = early_memremap(pa_data, sizeof(*data));
@@ -481,8 +489,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)
481 struct setup_data *data; 489 struct setup_data *data;
482 u64 pa_data; 490 u64 pa_data;
483 491
484 if (boot_params.hdr.version < 0x0209)
485 return;
486 pa_data = boot_params.hdr.setup_data; 492 pa_data = boot_params.hdr.setup_data;
487 while (pa_data) { 493 while (pa_data) {
488 data = early_memremap(pa_data, sizeof(*data)); 494 data = early_memremap(pa_data, sizeof(*data));
@@ -501,17 +507,51 @@ static void __init memblock_x86_reserve_range_setup_data(void)
501/* 507/*
502 * Keep the crash kernel below this limit. On 32 bits earlier kernels 508 * Keep the crash kernel below this limit. On 32 bits earlier kernels
503 * would limit the kernel to the low 512 MiB due to mapping restrictions. 509 * would limit the kernel to the low 512 MiB due to mapping restrictions.
504 * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
505 * limit once kexec-tools are fixed.
506 */ 510 */
507#ifdef CONFIG_X86_32 511#ifdef CONFIG_X86_32
508# define CRASH_KERNEL_ADDR_MAX (512 << 20) 512# define CRASH_KERNEL_ADDR_MAX (512 << 20)
509#else 513#else
510# define CRASH_KERNEL_ADDR_MAX (896 << 20) 514# define CRASH_KERNEL_ADDR_MAX MAXMEM
511#endif 515#endif
512 516
517static void __init reserve_crashkernel_low(void)
518{
519#ifdef CONFIG_X86_64
520 const unsigned long long alignment = 16<<20; /* 16M */
521 unsigned long long low_base = 0, low_size = 0;
522 unsigned long total_low_mem;
523 unsigned long long base;
524 int ret;
525
526 total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
527 ret = parse_crashkernel_low(boot_command_line, total_low_mem,
528 &low_size, &base);
529 if (ret != 0 || low_size <= 0)
530 return;
531
532 low_base = memblock_find_in_range(low_size, (1ULL<<32),
533 low_size, alignment);
534
535 if (!low_base) {
536 pr_info("crashkernel low reservation failed - No suitable area found.\n");
537
538 return;
539 }
540
541 memblock_reserve(low_base, low_size);
542 pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
543 (unsigned long)(low_size >> 20),
544 (unsigned long)(low_base >> 20),
545 (unsigned long)(total_low_mem >> 20));
546 crashk_low_res.start = low_base;
547 crashk_low_res.end = low_base + low_size - 1;
548 insert_resource(&iomem_resource, &crashk_low_res);
549#endif
550}
551
513static void __init reserve_crashkernel(void) 552static void __init reserve_crashkernel(void)
514{ 553{
554 const unsigned long long alignment = 16<<20; /* 16M */
515 unsigned long long total_mem; 555 unsigned long long total_mem;
516 unsigned long long crash_size, crash_base; 556 unsigned long long crash_size, crash_base;
517 int ret; 557 int ret;
@@ -525,8 +565,6 @@ static void __init reserve_crashkernel(void)
525 565
526 /* 0 means: find the address automatically */ 566 /* 0 means: find the address automatically */
527 if (crash_base <= 0) { 567 if (crash_base <= 0) {
528 const unsigned long long alignment = 16<<20; /* 16M */
529
530 /* 568 /*
531 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX 569 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
532 */ 570 */
@@ -537,6 +575,7 @@ static void __init reserve_crashkernel(void)
537 pr_info("crashkernel reservation failed - No suitable area found.\n"); 575 pr_info("crashkernel reservation failed - No suitable area found.\n");
538 return; 576 return;
539 } 577 }
578
540 } else { 579 } else {
541 unsigned long long start; 580 unsigned long long start;
542 581
@@ -558,6 +597,9 @@ static void __init reserve_crashkernel(void)
558 crashk_res.start = crash_base; 597 crashk_res.start = crash_base;
559 crashk_res.end = crash_base + crash_size - 1; 598 crashk_res.end = crash_base + crash_size - 1;
560 insert_resource(&iomem_resource, &crashk_res); 599 insert_resource(&iomem_resource, &crashk_res);
600
601 if (crash_base >= (1ULL<<32))
602 reserve_crashkernel_low();
561} 603}
562#else 604#else
563static void __init reserve_crashkernel(void) 605static void __init reserve_crashkernel(void)
@@ -608,8 +650,6 @@ static __init void reserve_ibft_region(void)
608 memblock_reserve(addr, size); 650 memblock_reserve(addr, size);
609} 651}
610 652
611static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
612
613static bool __init snb_gfx_workaround_needed(void) 653static bool __init snb_gfx_workaround_needed(void)
614{ 654{
615#ifdef CONFIG_PCI 655#ifdef CONFIG_PCI
@@ -698,8 +738,7 @@ static void __init trim_bios_range(void)
698 * since some BIOSes are known to corrupt low memory. See the 738 * since some BIOSes are known to corrupt low memory. See the
699 * Kconfig help text for X86_RESERVE_LOW. 739 * Kconfig help text for X86_RESERVE_LOW.
700 */ 740 */
701 e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE), 741 e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
702 E820_RAM, E820_RESERVED);
703 742
704 /* 743 /*
705 * special case: Some BIOSen report the PC BIOS 744 * special case: Some BIOSen report the PC BIOS
@@ -711,6 +750,29 @@ static void __init trim_bios_range(void)
711 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 750 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
712} 751}
713 752
753/* called before trim_bios_range() to spare extra sanitize */
754static void __init e820_add_kernel_range(void)
755{
756 u64 start = __pa_symbol(_text);
757 u64 size = __pa_symbol(_end) - start;
758
759 /*
760 * Complain if .text .data and .bss are not marked as E820_RAM and
761 * attempt to fix it by adding the range. We may have a confused BIOS,
762 * or the user may have used memmap=exactmap or memmap=xxM$yyM to
763 * exclude kernel range. If we really are running on top non-RAM,
764 * we will crash later anyways.
765 */
766 if (e820_all_mapped(start, start + size, E820_RAM))
767 return;
768
769 pr_warn(".text .data .bss are not marked as E820_RAM!\n");
770 e820_remove_range(start, size, E820_RAM, 0);
771 e820_add_region(start, size, E820_RAM);
772}
773
774static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
775
714static int __init parse_reservelow(char *p) 776static int __init parse_reservelow(char *p)
715{ 777{
716 unsigned long long size; 778 unsigned long long size;
@@ -733,6 +795,11 @@ static int __init parse_reservelow(char *p)
733 795
734early_param("reservelow", parse_reservelow); 796early_param("reservelow", parse_reservelow);
735 797
798static void __init trim_low_memory_range(void)
799{
800 memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
801}
802
736/* 803/*
737 * Determine if we were loaded by an EFI loader. If so, then we have also been 804 * Determine if we were loaded by an EFI loader. If so, then we have also been
738 * passed the efi memmap, systab, etc., so we should use these data structures 805 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -748,6 +815,17 @@ early_param("reservelow", parse_reservelow);
748 815
749void __init setup_arch(char **cmdline_p) 816void __init setup_arch(char **cmdline_p)
750{ 817{
818 memblock_reserve(__pa_symbol(_text),
819 (unsigned long)__bss_stop - (unsigned long)_text);
820
821 early_reserve_initrd();
822
823 /*
824 * At this point everything still needed from the boot loader
825 * or BIOS or kernel text should be early reserved or marked not
826 * RAM in e820. All other memory is free game.
827 */
828
751#ifdef CONFIG_X86_32 829#ifdef CONFIG_X86_32
752 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 830 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
753 visws_early_detect(); 831 visws_early_detect();
@@ -835,12 +913,12 @@ void __init setup_arch(char **cmdline_p)
835 init_mm.end_data = (unsigned long) _edata; 913 init_mm.end_data = (unsigned long) _edata;
836 init_mm.brk = _brk_end; 914 init_mm.brk = _brk_end;
837 915
838 code_resource.start = virt_to_phys(_text); 916 code_resource.start = __pa_symbol(_text);
839 code_resource.end = virt_to_phys(_etext)-1; 917 code_resource.end = __pa_symbol(_etext)-1;
840 data_resource.start = virt_to_phys(_etext); 918 data_resource.start = __pa_symbol(_etext);
841 data_resource.end = virt_to_phys(_edata)-1; 919 data_resource.end = __pa_symbol(_edata)-1;
842 bss_resource.start = virt_to_phys(&__bss_start); 920 bss_resource.start = __pa_symbol(__bss_start);
843 bss_resource.end = virt_to_phys(&__bss_stop)-1; 921 bss_resource.end = __pa_symbol(__bss_stop)-1;
844 922
845#ifdef CONFIG_CMDLINE_BOOL 923#ifdef CONFIG_CMDLINE_BOOL
846#ifdef CONFIG_CMDLINE_OVERRIDE 924#ifdef CONFIG_CMDLINE_OVERRIDE
@@ -906,6 +984,7 @@ void __init setup_arch(char **cmdline_p)
906 insert_resource(&iomem_resource, &data_resource); 984 insert_resource(&iomem_resource, &data_resource);
907 insert_resource(&iomem_resource, &bss_resource); 985 insert_resource(&iomem_resource, &bss_resource);
908 986
987 e820_add_kernel_range();
909 trim_bios_range(); 988 trim_bios_range();
910#ifdef CONFIG_X86_32 989#ifdef CONFIG_X86_32
911 if (ppro_with_ram_bug()) { 990 if (ppro_with_ram_bug()) {
@@ -955,6 +1034,8 @@ void __init setup_arch(char **cmdline_p)
955 1034
956 reserve_ibft_region(); 1035 reserve_ibft_region();
957 1036
1037 early_alloc_pgt_buf();
1038
958 /* 1039 /*
959 * Need to conclude brk, before memblock_x86_fill() 1040 * Need to conclude brk, before memblock_x86_fill()
960 * it could use memblock_find_in_range, could overlap with 1041 * it could use memblock_find_in_range, could overlap with
@@ -964,7 +1045,7 @@ void __init setup_arch(char **cmdline_p)
964 1045
965 cleanup_highmap(); 1046 cleanup_highmap();
966 1047
967 memblock.current_limit = get_max_mapped(); 1048 memblock.current_limit = ISA_END_ADDRESS;
968 memblock_x86_fill(); 1049 memblock_x86_fill();
969 1050
970 /* 1051 /*
@@ -981,41 +1062,22 @@ void __init setup_arch(char **cmdline_p)
981 setup_bios_corruption_check(); 1062 setup_bios_corruption_check();
982#endif 1063#endif
983 1064
1065#ifdef CONFIG_X86_32
984 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", 1066 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
985 (max_pfn_mapped<<PAGE_SHIFT) - 1); 1067 (max_pfn_mapped<<PAGE_SHIFT) - 1);
1068#endif
986 1069
987 setup_real_mode(); 1070 reserve_real_mode();
988 1071
989 trim_platform_memory_ranges(); 1072 trim_platform_memory_ranges();
1073 trim_low_memory_range();
990 1074
991 init_gbpages(); 1075 init_mem_mapping();
992
993 /* max_pfn_mapped is updated here */
994 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
995 max_pfn_mapped = max_low_pfn_mapped;
996
997#ifdef CONFIG_X86_64
998 if (max_pfn > max_low_pfn) {
999 int i;
1000 unsigned long start, end;
1001 unsigned long start_pfn, end_pfn;
1002
1003 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
1004 NULL) {
1005 1076
1006 end = PFN_PHYS(end_pfn); 1077 early_trap_pf_init();
1007 if (end <= (1UL<<32))
1008 continue;
1009 1078
1010 start = PFN_PHYS(start_pfn); 1079 setup_real_mode();
1011 max_pfn_mapped = init_memory_mapping(
1012 max((1UL<<32), start), end);
1013 }
1014 1080
1015 /* can we preseve max_low_pfn ?*/
1016 max_low_pfn = max_pfn;
1017 }
1018#endif
1019 memblock.current_limit = get_max_mapped(); 1081 memblock.current_limit = get_max_mapped();
1020 dma_contiguous_reserve(0); 1082 dma_contiguous_reserve(0);
1021 1083
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index d6bf1f34a6e9..69562992e457 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -278,7 +278,7 @@ static const struct {
278}; 278};
279 279
280static int 280static int
281__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, 281__setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
282 struct pt_regs *regs) 282 struct pt_regs *regs)
283{ 283{
284 struct sigframe __user *frame; 284 struct sigframe __user *frame;
@@ -286,7 +286,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
286 int err = 0; 286 int err = 0;
287 void __user *fpstate = NULL; 287 void __user *fpstate = NULL;
288 288
289 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 289 frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
290 290
291 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 291 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
292 return -EFAULT; 292 return -EFAULT;
@@ -307,8 +307,8 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
307 restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); 307 restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
308 else 308 else
309 restorer = &frame->retcode; 309 restorer = &frame->retcode;
310 if (ka->sa.sa_flags & SA_RESTORER) 310 if (ksig->ka.sa.sa_flags & SA_RESTORER)
311 restorer = ka->sa.sa_restorer; 311 restorer = ksig->ka.sa.sa_restorer;
312 312
313 /* Set up to return from userspace. */ 313 /* Set up to return from userspace. */
314 err |= __put_user(restorer, &frame->pretcode); 314 err |= __put_user(restorer, &frame->pretcode);
@@ -327,7 +327,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
327 327
328 /* Set up registers for signal handler */ 328 /* Set up registers for signal handler */
329 regs->sp = (unsigned long)frame; 329 regs->sp = (unsigned long)frame;
330 regs->ip = (unsigned long)ka->sa.sa_handler; 330 regs->ip = (unsigned long)ksig->ka.sa.sa_handler;
331 regs->ax = (unsigned long)sig; 331 regs->ax = (unsigned long)sig;
332 regs->dx = 0; 332 regs->dx = 0;
333 regs->cx = 0; 333 regs->cx = 0;
@@ -340,7 +340,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
340 return 0; 340 return 0;
341} 341}
342 342
343static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 343static int __setup_rt_frame(int sig, struct ksignal *ksig,
344 sigset_t *set, struct pt_regs *regs) 344 sigset_t *set, struct pt_regs *regs)
345{ 345{
346 struct rt_sigframe __user *frame; 346 struct rt_sigframe __user *frame;
@@ -348,7 +348,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
348 int err = 0; 348 int err = 0;
349 void __user *fpstate = NULL; 349 void __user *fpstate = NULL;
350 350
351 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 351 frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
352 352
353 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 353 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
354 return -EFAULT; 354 return -EFAULT;
@@ -368,8 +368,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
368 368
369 /* Set up to return from userspace. */ 369 /* Set up to return from userspace. */
370 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); 370 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
371 if (ka->sa.sa_flags & SA_RESTORER) 371 if (ksig->ka.sa.sa_flags & SA_RESTORER)
372 restorer = ka->sa.sa_restorer; 372 restorer = ksig->ka.sa.sa_restorer;
373 put_user_ex(restorer, &frame->pretcode); 373 put_user_ex(restorer, &frame->pretcode);
374 374
375 /* 375 /*
@@ -382,7 +382,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
382 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); 382 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
383 } put_user_catch(err); 383 } put_user_catch(err);
384 384
385 err |= copy_siginfo_to_user(&frame->info, info); 385 err |= copy_siginfo_to_user(&frame->info, &ksig->info);
386 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, 386 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
387 regs, set->sig[0]); 387 regs, set->sig[0]);
388 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 388 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -392,7 +392,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
392 392
393 /* Set up registers for signal handler */ 393 /* Set up registers for signal handler */
394 regs->sp = (unsigned long)frame; 394 regs->sp = (unsigned long)frame;
395 regs->ip = (unsigned long)ka->sa.sa_handler; 395 regs->ip = (unsigned long)ksig->ka.sa.sa_handler;
396 regs->ax = (unsigned long)sig; 396 regs->ax = (unsigned long)sig;
397 regs->dx = (unsigned long)&frame->info; 397 regs->dx = (unsigned long)&frame->info;
398 regs->cx = (unsigned long)&frame->uc; 398 regs->cx = (unsigned long)&frame->uc;
@@ -405,20 +405,20 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
405 return 0; 405 return 0;
406} 406}
407#else /* !CONFIG_X86_32 */ 407#else /* !CONFIG_X86_32 */
408static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 408static int __setup_rt_frame(int sig, struct ksignal *ksig,
409 sigset_t *set, struct pt_regs *regs) 409 sigset_t *set, struct pt_regs *regs)
410{ 410{
411 struct rt_sigframe __user *frame; 411 struct rt_sigframe __user *frame;
412 void __user *fp = NULL; 412 void __user *fp = NULL;
413 int err = 0; 413 int err = 0;
414 414
415 frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp); 415 frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
416 416
417 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 417 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
418 return -EFAULT; 418 return -EFAULT;
419 419
420 if (ka->sa.sa_flags & SA_SIGINFO) { 420 if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
421 if (copy_siginfo_to_user(&frame->info, info)) 421 if (copy_siginfo_to_user(&frame->info, &ksig->info))
422 return -EFAULT; 422 return -EFAULT;
423 } 423 }
424 424
@@ -434,8 +434,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
434 /* Set up to return from userspace. If provided, use a stub 434 /* Set up to return from userspace. If provided, use a stub
435 already in userspace. */ 435 already in userspace. */
436 /* x86-64 should always use SA_RESTORER. */ 436 /* x86-64 should always use SA_RESTORER. */
437 if (ka->sa.sa_flags & SA_RESTORER) { 437 if (ksig->ka.sa.sa_flags & SA_RESTORER) {
438 put_user_ex(ka->sa.sa_restorer, &frame->pretcode); 438 put_user_ex(ksig->ka.sa.sa_restorer, &frame->pretcode);
439 } else { 439 } else {
440 /* could use a vstub here */ 440 /* could use a vstub here */
441 err |= -EFAULT; 441 err |= -EFAULT;
@@ -457,7 +457,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
457 next argument after the signal number on the stack. */ 457 next argument after the signal number on the stack. */
458 regs->si = (unsigned long)&frame->info; 458 regs->si = (unsigned long)&frame->info;
459 regs->dx = (unsigned long)&frame->uc; 459 regs->dx = (unsigned long)&frame->uc;
460 regs->ip = (unsigned long) ka->sa.sa_handler; 460 regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
461 461
462 regs->sp = (unsigned long)frame; 462 regs->sp = (unsigned long)frame;
463 463
@@ -469,8 +469,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
469} 469}
470#endif /* CONFIG_X86_32 */ 470#endif /* CONFIG_X86_32 */
471 471
472static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, 472static int x32_setup_rt_frame(struct ksignal *ksig,
473 siginfo_t *info, compat_sigset_t *set, 473 compat_sigset_t *set,
474 struct pt_regs *regs) 474 struct pt_regs *regs)
475{ 475{
476#ifdef CONFIG_X86_X32_ABI 476#ifdef CONFIG_X86_X32_ABI
@@ -479,13 +479,13 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
479 int err = 0; 479 int err = 0;
480 void __user *fpstate = NULL; 480 void __user *fpstate = NULL;
481 481
482 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 482 frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
483 483
484 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 484 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
485 return -EFAULT; 485 return -EFAULT;
486 486
487 if (ka->sa.sa_flags & SA_SIGINFO) { 487 if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
488 if (copy_siginfo_to_user32(&frame->info, info)) 488 if (copy_siginfo_to_user32(&frame->info, &ksig->info))
489 return -EFAULT; 489 return -EFAULT;
490 } 490 }
491 491
@@ -499,8 +499,8 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
499 err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); 499 err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
500 put_user_ex(0, &frame->uc.uc__pad0); 500 put_user_ex(0, &frame->uc.uc__pad0);
501 501
502 if (ka->sa.sa_flags & SA_RESTORER) { 502 if (ksig->ka.sa.sa_flags & SA_RESTORER) {
503 restorer = ka->sa.sa_restorer; 503 restorer = ksig->ka.sa.sa_restorer;
504 } else { 504 } else {
505 /* could use a vstub here */ 505 /* could use a vstub here */
506 restorer = NULL; 506 restorer = NULL;
@@ -518,10 +518,10 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
518 518
519 /* Set up registers for signal handler */ 519 /* Set up registers for signal handler */
520 regs->sp = (unsigned long) frame; 520 regs->sp = (unsigned long) frame;
521 regs->ip = (unsigned long) ka->sa.sa_handler; 521 regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
522 522
523 /* We use the x32 calling convention here... */ 523 /* We use the x32 calling convention here... */
524 regs->di = sig; 524 regs->di = ksig->sig;
525 regs->si = (unsigned long) &frame->info; 525 regs->si = (unsigned long) &frame->info;
526 regs->dx = (unsigned long) &frame->uc; 526 regs->dx = (unsigned long) &frame->uc;
527 527
@@ -535,70 +535,13 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
535 return 0; 535 return 0;
536} 536}
537 537
538#ifdef CONFIG_X86_32
539/*
540 * Atomically swap in the new signal mask, and wait for a signal.
541 */
542asmlinkage int
543sys_sigsuspend(int history0, int history1, old_sigset_t mask)
544{
545 sigset_t blocked;
546 siginitset(&blocked, mask);
547 return sigsuspend(&blocked);
548}
549
550asmlinkage int
551sys_sigaction(int sig, const struct old_sigaction __user *act,
552 struct old_sigaction __user *oact)
553{
554 struct k_sigaction new_ka, old_ka;
555 int ret = 0;
556
557 if (act) {
558 old_sigset_t mask;
559
560 if (!access_ok(VERIFY_READ, act, sizeof(*act)))
561 return -EFAULT;
562
563 get_user_try {
564 get_user_ex(new_ka.sa.sa_handler, &act->sa_handler);
565 get_user_ex(new_ka.sa.sa_flags, &act->sa_flags);
566 get_user_ex(mask, &act->sa_mask);
567 get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer);
568 } get_user_catch(ret);
569
570 if (ret)
571 return -EFAULT;
572 siginitset(&new_ka.sa.sa_mask, mask);
573 }
574
575 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
576
577 if (!ret && oact) {
578 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)))
579 return -EFAULT;
580
581 put_user_try {
582 put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler);
583 put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags);
584 put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
585 put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer);
586 } put_user_catch(ret);
587
588 if (ret)
589 return -EFAULT;
590 }
591
592 return ret;
593}
594#endif /* CONFIG_X86_32 */
595
596/* 538/*
597 * Do a signal return; undo the signal stack. 539 * Do a signal return; undo the signal stack.
598 */ 540 */
599#ifdef CONFIG_X86_32 541#ifdef CONFIG_X86_32
600unsigned long sys_sigreturn(struct pt_regs *regs) 542unsigned long sys_sigreturn(void)
601{ 543{
544 struct pt_regs *regs = current_pt_regs();
602 struct sigframe __user *frame; 545 struct sigframe __user *frame;
603 unsigned long ax; 546 unsigned long ax;
604 sigset_t set; 547 sigset_t set;
@@ -625,8 +568,9 @@ badframe:
625} 568}
626#endif /* CONFIG_X86_32 */ 569#endif /* CONFIG_X86_32 */
627 570
628long sys_rt_sigreturn(struct pt_regs *regs) 571long sys_rt_sigreturn(void)
629{ 572{
573 struct pt_regs *regs = current_pt_regs();
630 struct rt_sigframe __user *frame; 574 struct rt_sigframe __user *frame;
631 unsigned long ax; 575 unsigned long ax;
632 sigset_t set; 576 sigset_t set;
@@ -667,30 +611,29 @@ static int signr_convert(int sig)
667} 611}
668 612
669static int 613static int
670setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 614setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
671 struct pt_regs *regs)
672{ 615{
673 int usig = signr_convert(sig); 616 int usig = signr_convert(ksig->sig);
674 sigset_t *set = sigmask_to_save(); 617 sigset_t *set = sigmask_to_save();
675 compat_sigset_t *cset = (compat_sigset_t *) set; 618 compat_sigset_t *cset = (compat_sigset_t *) set;
676 619
677 /* Set up the stack frame */ 620 /* Set up the stack frame */
678 if (is_ia32_frame()) { 621 if (is_ia32_frame()) {
679 if (ka->sa.sa_flags & SA_SIGINFO) 622 if (ksig->ka.sa.sa_flags & SA_SIGINFO)
680 return ia32_setup_rt_frame(usig, ka, info, cset, regs); 623 return ia32_setup_rt_frame(usig, ksig, cset, regs);
681 else 624 else
682 return ia32_setup_frame(usig, ka, cset, regs); 625 return ia32_setup_frame(usig, ksig, cset, regs);
683 } else if (is_x32_frame()) { 626 } else if (is_x32_frame()) {
684 return x32_setup_rt_frame(usig, ka, info, cset, regs); 627 return x32_setup_rt_frame(ksig, cset, regs);
685 } else { 628 } else {
686 return __setup_rt_frame(sig, ka, info, set, regs); 629 return __setup_rt_frame(ksig->sig, ksig, set, regs);
687 } 630 }
688} 631}
689 632
690static void 633static void
691handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, 634handle_signal(struct ksignal *ksig, struct pt_regs *regs)
692 struct pt_regs *regs)
693{ 635{
636 bool failed;
694 /* Are we from a system call? */ 637 /* Are we from a system call? */
695 if (syscall_get_nr(current, regs) >= 0) { 638 if (syscall_get_nr(current, regs) >= 0) {
696 /* If so, check system call restarting.. */ 639 /* If so, check system call restarting.. */
@@ -701,7 +644,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
701 break; 644 break;
702 645
703 case -ERESTARTSYS: 646 case -ERESTARTSYS:
704 if (!(ka->sa.sa_flags & SA_RESTART)) { 647 if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
705 regs->ax = -EINTR; 648 regs->ax = -EINTR;
706 break; 649 break;
707 } 650 }
@@ -721,26 +664,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
721 likely(test_and_clear_thread_flag(TIF_FORCED_TF))) 664 likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
722 regs->flags &= ~X86_EFLAGS_TF; 665 regs->flags &= ~X86_EFLAGS_TF;
723 666
724 if (setup_rt_frame(sig, ka, info, regs) < 0) { 667 failed = (setup_rt_frame(ksig, regs) < 0);
725 force_sigsegv(sig, current); 668 if (!failed) {
726 return; 669 /*
670 * Clear the direction flag as per the ABI for function entry.
671 */
672 regs->flags &= ~X86_EFLAGS_DF;
673 /*
674 * Clear TF when entering the signal handler, but
675 * notify any tracer that was single-stepping it.
676 * The tracer may want to single-step inside the
677 * handler too.
678 */
679 regs->flags &= ~X86_EFLAGS_TF;
727 } 680 }
728 681 signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
729 /*
730 * Clear the direction flag as per the ABI for function entry.
731 */
732 regs->flags &= ~X86_EFLAGS_DF;
733
734 /*
735 * Clear TF when entering the signal handler, but
736 * notify any tracer that was single-stepping it.
737 * The tracer may want to single-step inside the
738 * handler too.
739 */
740 regs->flags &= ~X86_EFLAGS_TF;
741
742 signal_delivered(sig, info, ka, regs,
743 test_thread_flag(TIF_SINGLESTEP));
744} 682}
745 683
746#ifdef CONFIG_X86_32 684#ifdef CONFIG_X86_32
@@ -757,14 +695,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
757 */ 695 */
758static void do_signal(struct pt_regs *regs) 696static void do_signal(struct pt_regs *regs)
759{ 697{
760 struct k_sigaction ka; 698 struct ksignal ksig;
761 siginfo_t info;
762 int signr;
763 699
764 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 700 if (get_signal(&ksig)) {
765 if (signr > 0) {
766 /* Whee! Actually deliver the signal. */ 701 /* Whee! Actually deliver the signal. */
767 handle_signal(signr, &info, &ka, regs); 702 handle_signal(&ksig, regs);
768 return; 703 return;
769 } 704 }
770 705
@@ -843,8 +778,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
843} 778}
844 779
845#ifdef CONFIG_X86_X32_ABI 780#ifdef CONFIG_X86_X32_ABI
846asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) 781asmlinkage long sys32_x32_rt_sigreturn(void)
847{ 782{
783 struct pt_regs *regs = current_pt_regs();
848 struct rt_sigframe_x32 __user *frame; 784 struct rt_sigframe_x32 __user *frame;
849 sigset_t set; 785 sigset_t set;
850 unsigned long ax; 786 unsigned long ax;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ed0fe385289d..9f190a2a00e9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1365,11 +1365,10 @@ static inline void mwait_play_dead(void)
1365 unsigned int eax, ebx, ecx, edx; 1365 unsigned int eax, ebx, ecx, edx;
1366 unsigned int highest_cstate = 0; 1366 unsigned int highest_cstate = 0;
1367 unsigned int highest_subcstate = 0; 1367 unsigned int highest_subcstate = 0;
1368 int i;
1369 void *mwait_ptr; 1368 void *mwait_ptr;
1370 struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); 1369 int i;
1371 1370
1372 if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) 1371 if (!this_cpu_has(X86_FEATURE_MWAIT))
1373 return; 1372 return;
1374 if (!this_cpu_has(X86_FEATURE_CLFLSH)) 1373 if (!this_cpu_has(X86_FEATURE_CLFLSH))
1375 return; 1374 return;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 97ef74b88e0f..dbded5aedb81 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
157 if (flags & MAP_FIXED) 157 if (flags & MAP_FIXED)
158 return addr; 158 return addr;
159 159
160 /* for MAP_32BIT mappings we force the legact mmap base */ 160 /* for MAP_32BIT mappings we force the legacy mmap base */
161 if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) 161 if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
162 goto bottomup; 162 goto bottomup;
163 163
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ecffca11f4e9..68bda7a84159 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -688,10 +688,19 @@ void __init early_trap_init(void)
688 set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); 688 set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
689 /* int3 can be called from all */ 689 /* int3 can be called from all */
690 set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); 690 set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
691#ifdef CONFIG_X86_32
691 set_intr_gate(X86_TRAP_PF, &page_fault); 692 set_intr_gate(X86_TRAP_PF, &page_fault);
693#endif
692 load_idt(&idt_descr); 694 load_idt(&idt_descr);
693} 695}
694 696
697void __init early_trap_pf_init(void)
698{
699#ifdef CONFIG_X86_64
700 set_intr_gate(X86_TRAP_PF, &page_fault);
701#endif
702}
703
695void __init trap_init(void) 704void __init trap_init(void)
696{ 705{
697 int i; 706 int i;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 06ccb5073a3f..4b9ea101fe3b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
623 ns_now = __cycles_2_ns(tsc_now); 623 ns_now = __cycles_2_ns(tsc_now);
624 624
625 if (cpu_khz) { 625 if (cpu_khz) {
626 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; 626 *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
627 cpu_khz / 2) / cpu_khz;
627 *offset = ns_now - mult_frac(tsc_now, *scale, 628 *offset = ns_now - mult_frac(tsc_now, *scale,
628 (1UL << CYC2NS_SCALE_FACTOR)); 629 (1UL << CYC2NS_SCALE_FACTOR));
629 } 630 }
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index c71025b67462..0ba4cfb4f412 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
680 if (auprobe->insn[i] == 0x66) 680 if (auprobe->insn[i] == 0x66)
681 continue; 681 continue;
682 682
683 if (auprobe->insn[i] == 0x90) 683 if (auprobe->insn[i] == 0x90) {
684 regs->ip += i + 1;
684 return true; 685 return true;
686 }
685 687
686 break; 688 break;
687 } 689 }
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 1dfe69cc78a8..1cf5766dde16 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -202,7 +202,7 @@ out:
202static int do_vm86_irq_handling(int subfunction, int irqnumber); 202static int do_vm86_irq_handling(int subfunction, int irqnumber);
203static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); 203static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
204 204
205int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) 205int sys_vm86old(struct vm86_struct __user *v86)
206{ 206{
207 struct kernel_vm86_struct info; /* declare this _on top_, 207 struct kernel_vm86_struct info; /* declare this _on top_,
208 * this avoids wasting of stack space. 208 * this avoids wasting of stack space.
@@ -222,7 +222,7 @@ int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs)
222 if (tmp) 222 if (tmp)
223 goto out; 223 goto out;
224 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); 224 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
225 info.regs32 = regs; 225 info.regs32 = current_pt_regs();
226 tsk->thread.vm86_info = v86; 226 tsk->thread.vm86_info = v86;
227 do_sys_vm86(&info, tsk); 227 do_sys_vm86(&info, tsk);
228 ret = 0; /* we never return here */ 228 ret = 0; /* we never return here */
@@ -231,7 +231,7 @@ out:
231} 231}
232 232
233 233
234int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) 234int sys_vm86(unsigned long cmd, unsigned long arg)
235{ 235{
236 struct kernel_vm86_struct info; /* declare this _on top_, 236 struct kernel_vm86_struct info; /* declare this _on top_,
237 * this avoids wasting of stack space. 237 * this avoids wasting of stack space.
@@ -272,7 +272,7 @@ int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs)
272 ret = -EFAULT; 272 ret = -EFAULT;
273 if (tmp) 273 if (tmp)
274 goto out; 274 goto out;
275 info.regs32 = regs; 275 info.regs32 = current_pt_regs();
276 info.vm86plus.is_vm86pus = 1; 276 info.vm86plus.is_vm86pus = 1;
277 tsk->thread.vm86_info = (struct vm86_struct __user *)v86; 277 tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
278 do_sys_vm86(&info, tsk); 278 do_sys_vm86(&info, tsk);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1330dd102950..b014d9414d08 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -59,6 +59,9 @@ EXPORT_SYMBOL(memcpy);
59EXPORT_SYMBOL(__memcpy); 59EXPORT_SYMBOL(__memcpy);
60EXPORT_SYMBOL(memmove); 60EXPORT_SYMBOL(memmove);
61 61
62#ifndef CONFIG_DEBUG_VIRTUAL
63EXPORT_SYMBOL(phys_base);
64#endif
62EXPORT_SYMBOL(empty_zero_page); 65EXPORT_SYMBOL(empty_zero_page);
63#ifndef CONFIG_PARAVIRT 66#ifndef CONFIG_PARAVIRT
64EXPORT_SYMBOL(native_load_gs_index); 67EXPORT_SYMBOL(native_load_gs_index);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 7a3d075a814a..45a14dbbddaf 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -19,6 +19,7 @@
19#include <asm/time.h> 19#include <asm/time.h>
20#include <asm/irq.h> 20#include <asm/irq.h>
21#include <asm/io_apic.h> 21#include <asm/io_apic.h>
22#include <asm/hpet.h>
22#include <asm/pat.h> 23#include <asm/pat.h>
23#include <asm/tsc.h> 24#include <asm/tsc.h>
24#include <asm/iommu.h> 25#include <asm/iommu.h>
@@ -62,10 +63,6 @@ struct x86_init_ops x86_init __initdata = {
62 .banner = default_banner, 63 .banner = default_banner,
63 }, 64 },
64 65
65 .mapping = {
66 .pagetable_reserve = native_pagetable_reserve,
67 },
68
69 .paging = { 66 .paging = {
70 .pagetable_init = native_pagetable_init, 67 .pagetable_init = native_pagetable_init,
71 }, 68 },
@@ -111,15 +108,22 @@ struct x86_platform_ops x86_platform = {
111 108
112EXPORT_SYMBOL_GPL(x86_platform); 109EXPORT_SYMBOL_GPL(x86_platform);
113struct x86_msi_ops x86_msi = { 110struct x86_msi_ops x86_msi = {
114 .setup_msi_irqs = native_setup_msi_irqs, 111 .setup_msi_irqs = native_setup_msi_irqs,
115 .teardown_msi_irq = native_teardown_msi_irq, 112 .compose_msi_msg = native_compose_msi_msg,
116 .teardown_msi_irqs = default_teardown_msi_irqs, 113 .teardown_msi_irq = native_teardown_msi_irq,
117 .restore_msi_irqs = default_restore_msi_irqs, 114 .teardown_msi_irqs = default_teardown_msi_irqs,
115 .restore_msi_irqs = default_restore_msi_irqs,
116 .setup_hpet_msi = default_setup_hpet_msi,
118}; 117};
119 118
120struct x86_io_apic_ops x86_io_apic_ops = { 119struct x86_io_apic_ops x86_io_apic_ops = {
121 .init = native_io_apic_init_mappings, 120 .init = native_io_apic_init_mappings,
122 .read = native_io_apic_read, 121 .read = native_io_apic_read,
123 .write = native_io_apic_write, 122 .write = native_io_apic_write,
124 .modify = native_io_apic_modify, 123 .modify = native_io_apic_modify,
124 .disable = native_disable_io_apic,
125 .print_entries = native_io_apic_print_entries,
126 .set_affinity = native_ioapic_set_affinity,
127 .setup_entry = native_setup_ioapic_entry,
128 .eoi_ioapic_pin = native_eoi_ioapic_pin,
125}; 129};
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a27e76371108..a335cc6cde72 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -24,6 +24,7 @@
24#include "kvm_cache_regs.h" 24#include "kvm_cache_regs.h"
25#include <linux/module.h> 25#include <linux/module.h>
26#include <asm/kvm_emulate.h> 26#include <asm/kvm_emulate.h>
27#include <linux/stringify.h>
27 28
28#include "x86.h" 29#include "x86.h"
29#include "tss.h" 30#include "tss.h"
@@ -43,7 +44,7 @@
43#define OpCL 9ull /* CL register (for shifts) */ 44#define OpCL 9ull /* CL register (for shifts) */
44#define OpImmByte 10ull /* 8-bit sign extended immediate */ 45#define OpImmByte 10ull /* 8-bit sign extended immediate */
45#define OpOne 11ull /* Implied 1 */ 46#define OpOne 11ull /* Implied 1 */
46#define OpImm 12ull /* Sign extended immediate */ 47#define OpImm 12ull /* Sign extended up to 32-bit immediate */
47#define OpMem16 13ull /* Memory operand (16-bit). */ 48#define OpMem16 13ull /* Memory operand (16-bit). */
48#define OpMem32 14ull /* Memory operand (32-bit). */ 49#define OpMem32 14ull /* Memory operand (32-bit). */
49#define OpImmU 15ull /* Immediate operand, zero extended */ 50#define OpImmU 15ull /* Immediate operand, zero extended */
@@ -58,6 +59,7 @@
58#define OpFS 24ull /* FS */ 59#define OpFS 24ull /* FS */
59#define OpGS 25ull /* GS */ 60#define OpGS 25ull /* GS */
60#define OpMem8 26ull /* 8-bit zero extended memory operand */ 61#define OpMem8 26ull /* 8-bit zero extended memory operand */
62#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
61 63
62#define OpBits 5 /* Width of operand field */ 64#define OpBits 5 /* Width of operand field */
63#define OpMask ((1ull << OpBits) - 1) 65#define OpMask ((1ull << OpBits) - 1)
@@ -101,6 +103,7 @@
101#define SrcMemFAddr (OpMemFAddr << SrcShift) 103#define SrcMemFAddr (OpMemFAddr << SrcShift)
102#define SrcAcc (OpAcc << SrcShift) 104#define SrcAcc (OpAcc << SrcShift)
103#define SrcImmU16 (OpImmU16 << SrcShift) 105#define SrcImmU16 (OpImmU16 << SrcShift)
106#define SrcImm64 (OpImm64 << SrcShift)
104#define SrcDX (OpDX << SrcShift) 107#define SrcDX (OpDX << SrcShift)
105#define SrcMem8 (OpMem8 << SrcShift) 108#define SrcMem8 (OpMem8 << SrcShift)
106#define SrcMask (OpMask << SrcShift) 109#define SrcMask (OpMask << SrcShift)
@@ -113,6 +116,7 @@
113#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ 116#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
114#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ 117#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
115#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ 118#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
119#define Escape (5<<15) /* Escape to coprocessor instruction */
116#define Sse (1<<18) /* SSE Vector instruction */ 120#define Sse (1<<18) /* SSE Vector instruction */
117/* Generic ModRM decode. */ 121/* Generic ModRM decode. */
118#define ModRM (1<<19) 122#define ModRM (1<<19)
@@ -146,6 +150,8 @@
146#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ 150#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
147#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ 151#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
148#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ 152#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
153#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
154#define NoWrite ((u64)1 << 45) /* No writeback */
149 155
150#define X2(x...) x, x 156#define X2(x...) x, x
151#define X3(x...) X2(x), x 157#define X3(x...) X2(x), x
@@ -156,6 +162,27 @@
156#define X8(x...) X4(x), X4(x) 162#define X8(x...) X4(x), X4(x)
157#define X16(x...) X8(x), X8(x) 163#define X16(x...) X8(x), X8(x)
158 164
165#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
166#define FASTOP_SIZE 8
167
168/*
169 * fastop functions have a special calling convention:
170 *
171 * dst: [rdx]:rax (in/out)
172 * src: rbx (in/out)
173 * src2: rcx (in)
174 * flags: rflags (in/out)
175 *
176 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
177 * different operand sizes can be reached by calculation, rather than a jump
178 * table (which would be bigger than the code).
179 *
180 * fastop functions are declared as taking a never-defined fastop parameter,
181 * so they can't be called from C directly.
182 */
183
184struct fastop;
185
159struct opcode { 186struct opcode {
160 u64 flags : 56; 187 u64 flags : 56;
161 u64 intercept : 8; 188 u64 intercept : 8;
@@ -164,6 +191,8 @@ struct opcode {
164 const struct opcode *group; 191 const struct opcode *group;
165 const struct group_dual *gdual; 192 const struct group_dual *gdual;
166 const struct gprefix *gprefix; 193 const struct gprefix *gprefix;
194 const struct escape *esc;
195 void (*fastop)(struct fastop *fake);
167 } u; 196 } u;
168 int (*check_perm)(struct x86_emulate_ctxt *ctxt); 197 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
169}; 198};
@@ -180,6 +209,11 @@ struct gprefix {
180 struct opcode pfx_f3; 209 struct opcode pfx_f3;
181}; 210};
182 211
212struct escape {
213 struct opcode op[8];
214 struct opcode high[64];
215};
216
183/* EFLAGS bit definitions. */ 217/* EFLAGS bit definitions. */
184#define EFLG_ID (1<<21) 218#define EFLG_ID (1<<21)
185#define EFLG_VIP (1<<20) 219#define EFLG_VIP (1<<20)
@@ -407,6 +441,97 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
407 } \ 441 } \
408 } while (0) 442 } while (0)
409 443
444static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
445
446#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
447#define FOP_RET "ret \n\t"
448
449#define FOP_START(op) \
450 extern void em_##op(struct fastop *fake); \
451 asm(".pushsection .text, \"ax\" \n\t" \
452 ".global em_" #op " \n\t" \
453 FOP_ALIGN \
454 "em_" #op ": \n\t"
455
456#define FOP_END \
457 ".popsection")
458
459#define FOPNOP() FOP_ALIGN FOP_RET
460
461#define FOP1E(op, dst) \
462 FOP_ALIGN #op " %" #dst " \n\t" FOP_RET
463
464#define FASTOP1(op) \
465 FOP_START(op) \
466 FOP1E(op##b, al) \
467 FOP1E(op##w, ax) \
468 FOP1E(op##l, eax) \
469 ON64(FOP1E(op##q, rax)) \
470 FOP_END
471
472#define FOP2E(op, dst, src) \
473 FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
474
475#define FASTOP2(op) \
476 FOP_START(op) \
477 FOP2E(op##b, al, bl) \
478 FOP2E(op##w, ax, bx) \
479 FOP2E(op##l, eax, ebx) \
480 ON64(FOP2E(op##q, rax, rbx)) \
481 FOP_END
482
483/* 2 operand, word only */
484#define FASTOP2W(op) \
485 FOP_START(op) \
486 FOPNOP() \
487 FOP2E(op##w, ax, bx) \
488 FOP2E(op##l, eax, ebx) \
489 ON64(FOP2E(op##q, rax, rbx)) \
490 FOP_END
491
492/* 2 operand, src is CL */
493#define FASTOP2CL(op) \
494 FOP_START(op) \
495 FOP2E(op##b, al, cl) \
496 FOP2E(op##w, ax, cl) \
497 FOP2E(op##l, eax, cl) \
498 ON64(FOP2E(op##q, rax, cl)) \
499 FOP_END
500
501#define FOP3E(op, dst, src, src2) \
502 FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
503
504/* 3-operand, word-only, src2=cl */
505#define FASTOP3WCL(op) \
506 FOP_START(op) \
507 FOPNOP() \
508 FOP3E(op##w, ax, bx, cl) \
509 FOP3E(op##l, eax, ebx, cl) \
510 ON64(FOP3E(op##q, rax, rbx, cl)) \
511 FOP_END
512
513/* Special case for SETcc - 1 instruction per cc */
514#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
515
516FOP_START(setcc)
517FOP_SETCC(seto)
518FOP_SETCC(setno)
519FOP_SETCC(setc)
520FOP_SETCC(setnc)
521FOP_SETCC(setz)
522FOP_SETCC(setnz)
523FOP_SETCC(setbe)
524FOP_SETCC(setnbe)
525FOP_SETCC(sets)
526FOP_SETCC(setns)
527FOP_SETCC(setp)
528FOP_SETCC(setnp)
529FOP_SETCC(setl)
530FOP_SETCC(setnl)
531FOP_SETCC(setle)
532FOP_SETCC(setnle)
533FOP_END;
534
410#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ 535#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \
411 do { \ 536 do { \
412 unsigned long _tmp; \ 537 unsigned long _tmp; \
@@ -663,7 +788,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
663 ulong la; 788 ulong la;
664 u32 lim; 789 u32 lim;
665 u16 sel; 790 u16 sel;
666 unsigned cpl, rpl; 791 unsigned cpl;
667 792
668 la = seg_base(ctxt, addr.seg) + addr.ea; 793 la = seg_base(ctxt, addr.seg) + addr.ea;
669 switch (ctxt->mode) { 794 switch (ctxt->mode) {
@@ -697,11 +822,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
697 goto bad; 822 goto bad;
698 } 823 }
699 cpl = ctxt->ops->cpl(ctxt); 824 cpl = ctxt->ops->cpl(ctxt);
700 if (ctxt->mode == X86EMUL_MODE_REAL)
701 rpl = 0;
702 else
703 rpl = sel & 3;
704 cpl = max(cpl, rpl);
705 if (!(desc.type & 8)) { 825 if (!(desc.type & 8)) {
706 /* data segment */ 826 /* data segment */
707 if (cpl > desc.dpl) 827 if (cpl > desc.dpl)
@@ -852,39 +972,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
852 return rc; 972 return rc;
853} 973}
854 974
855static int test_cc(unsigned int condition, unsigned int flags) 975FASTOP2(add);
856{ 976FASTOP2(or);
857 int rc = 0; 977FASTOP2(adc);
858 978FASTOP2(sbb);
859 switch ((condition & 15) >> 1) { 979FASTOP2(and);
860 case 0: /* o */ 980FASTOP2(sub);
861 rc |= (flags & EFLG_OF); 981FASTOP2(xor);
862 break; 982FASTOP2(cmp);
863 case 1: /* b/c/nae */ 983FASTOP2(test);
864 rc |= (flags & EFLG_CF); 984
865 break; 985FASTOP3WCL(shld);
866 case 2: /* z/e */ 986FASTOP3WCL(shrd);
867 rc |= (flags & EFLG_ZF); 987
868 break; 988FASTOP2W(imul);
869 case 3: /* be/na */ 989
870 rc |= (flags & (EFLG_CF|EFLG_ZF)); 990FASTOP1(not);
871 break; 991FASTOP1(neg);
872 case 4: /* s */ 992FASTOP1(inc);
873 rc |= (flags & EFLG_SF); 993FASTOP1(dec);
874 break; 994
875 case 5: /* p/pe */ 995FASTOP2CL(rol);
876 rc |= (flags & EFLG_PF); 996FASTOP2CL(ror);
877 break; 997FASTOP2CL(rcl);
878 case 7: /* le/ng */ 998FASTOP2CL(rcr);
879 rc |= (flags & EFLG_ZF); 999FASTOP2CL(shl);
880 /* fall through */ 1000FASTOP2CL(shr);
881 case 6: /* l/nge */ 1001FASTOP2CL(sar);
882 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); 1002
883 break; 1003FASTOP2W(bsf);
884 } 1004FASTOP2W(bsr);
885 1005FASTOP2W(bt);
886 /* Odd condition identifiers (lsb == 1) have inverted sense. */ 1006FASTOP2W(bts);
887 return (!!rc ^ (condition & 1)); 1007FASTOP2W(btr);
1008FASTOP2W(btc);
1009
1010static u8 test_cc(unsigned int condition, unsigned long flags)
1011{
1012 u8 rc;
1013 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1014
1015 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1016 asm("push %[flags]; popf; call *%[fastop]"
1017 : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
1018 return rc;
888} 1019}
889 1020
890static void fetch_register_operand(struct operand *op) 1021static void fetch_register_operand(struct operand *op)
@@ -994,6 +1125,53 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
994 ctxt->ops->put_fpu(ctxt); 1125 ctxt->ops->put_fpu(ctxt);
995} 1126}
996 1127
1128static int em_fninit(struct x86_emulate_ctxt *ctxt)
1129{
1130 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1131 return emulate_nm(ctxt);
1132
1133 ctxt->ops->get_fpu(ctxt);
1134 asm volatile("fninit");
1135 ctxt->ops->put_fpu(ctxt);
1136 return X86EMUL_CONTINUE;
1137}
1138
1139static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1140{
1141 u16 fcw;
1142
1143 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1144 return emulate_nm(ctxt);
1145
1146 ctxt->ops->get_fpu(ctxt);
1147 asm volatile("fnstcw %0": "+m"(fcw));
1148 ctxt->ops->put_fpu(ctxt);
1149
1150 /* force 2 byte destination */
1151 ctxt->dst.bytes = 2;
1152 ctxt->dst.val = fcw;
1153
1154 return X86EMUL_CONTINUE;
1155}
1156
1157static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1158{
1159 u16 fsw;
1160
1161 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1162 return emulate_nm(ctxt);
1163
1164 ctxt->ops->get_fpu(ctxt);
1165 asm volatile("fnstsw %0": "+m"(fsw));
1166 ctxt->ops->put_fpu(ctxt);
1167
1168 /* force 2 byte destination */
1169 ctxt->dst.bytes = 2;
1170 ctxt->dst.val = fsw;
1171
1172 return X86EMUL_CONTINUE;
1173}
1174
997static void decode_register_operand(struct x86_emulate_ctxt *ctxt, 1175static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
998 struct operand *op) 1176 struct operand *op)
999{ 1177{
@@ -1534,6 +1712,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
1534{ 1712{
1535 int rc; 1713 int rc;
1536 1714
1715 if (ctxt->d & NoWrite)
1716 return X86EMUL_CONTINUE;
1717
1537 switch (ctxt->dst.type) { 1718 switch (ctxt->dst.type) {
1538 case OP_REG: 1719 case OP_REG:
1539 write_register_operand(&ctxt->dst); 1720 write_register_operand(&ctxt->dst);
@@ -1918,47 +2099,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
1918 return X86EMUL_CONTINUE; 2099 return X86EMUL_CONTINUE;
1919} 2100}
1920 2101
1921static int em_grp2(struct x86_emulate_ctxt *ctxt)
1922{
1923 switch (ctxt->modrm_reg) {
1924 case 0: /* rol */
1925 emulate_2op_SrcB(ctxt, "rol");
1926 break;
1927 case 1: /* ror */
1928 emulate_2op_SrcB(ctxt, "ror");
1929 break;
1930 case 2: /* rcl */
1931 emulate_2op_SrcB(ctxt, "rcl");
1932 break;
1933 case 3: /* rcr */
1934 emulate_2op_SrcB(ctxt, "rcr");
1935 break;
1936 case 4: /* sal/shl */
1937 case 6: /* sal/shl */
1938 emulate_2op_SrcB(ctxt, "sal");
1939 break;
1940 case 5: /* shr */
1941 emulate_2op_SrcB(ctxt, "shr");
1942 break;
1943 case 7: /* sar */
1944 emulate_2op_SrcB(ctxt, "sar");
1945 break;
1946 }
1947 return X86EMUL_CONTINUE;
1948}
1949
1950static int em_not(struct x86_emulate_ctxt *ctxt)
1951{
1952 ctxt->dst.val = ~ctxt->dst.val;
1953 return X86EMUL_CONTINUE;
1954}
1955
1956static int em_neg(struct x86_emulate_ctxt *ctxt)
1957{
1958 emulate_1op(ctxt, "neg");
1959 return X86EMUL_CONTINUE;
1960}
1961
1962static int em_mul_ex(struct x86_emulate_ctxt *ctxt) 2102static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
1963{ 2103{
1964 u8 ex = 0; 2104 u8 ex = 0;
@@ -2000,12 +2140,6 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
2000 int rc = X86EMUL_CONTINUE; 2140 int rc = X86EMUL_CONTINUE;
2001 2141
2002 switch (ctxt->modrm_reg) { 2142 switch (ctxt->modrm_reg) {
2003 case 0: /* inc */
2004 emulate_1op(ctxt, "inc");
2005 break;
2006 case 1: /* dec */
2007 emulate_1op(ctxt, "dec");
2008 break;
2009 case 2: /* call near abs */ { 2143 case 2: /* call near abs */ {
2010 long int old_eip; 2144 long int old_eip;
2011 old_eip = ctxt->_eip; 2145 old_eip = ctxt->_eip;
@@ -2075,7 +2209,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2075 /* Save real source value, then compare EAX against destination. */ 2209 /* Save real source value, then compare EAX against destination. */
2076 ctxt->src.orig_val = ctxt->src.val; 2210 ctxt->src.orig_val = ctxt->src.val;
2077 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); 2211 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
2078 emulate_2op_SrcV(ctxt, "cmp"); 2212 fastop(ctxt, em_cmp);
2079 2213
2080 if (ctxt->eflags & EFLG_ZF) { 2214 if (ctxt->eflags & EFLG_ZF) {
2081 /* Success: write back to memory. */ 2215 /* Success: write back to memory. */
@@ -2843,7 +2977,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
2843 ctxt->src.type = OP_IMM; 2977 ctxt->src.type = OP_IMM;
2844 ctxt->src.val = 0; 2978 ctxt->src.val = 0;
2845 ctxt->src.bytes = 1; 2979 ctxt->src.bytes = 1;
2846 emulate_2op_SrcV(ctxt, "or"); 2980 fastop(ctxt, em_or);
2847 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); 2981 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
2848 if (cf) 2982 if (cf)
2849 ctxt->eflags |= X86_EFLAGS_CF; 2983 ctxt->eflags |= X86_EFLAGS_CF;
@@ -2852,6 +2986,24 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
2852 return X86EMUL_CONTINUE; 2986 return X86EMUL_CONTINUE;
2853} 2987}
2854 2988
2989static int em_aad(struct x86_emulate_ctxt *ctxt)
2990{
2991 u8 al = ctxt->dst.val & 0xff;
2992 u8 ah = (ctxt->dst.val >> 8) & 0xff;
2993
2994 al = (al + (ah * ctxt->src.val)) & 0xff;
2995
2996 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
2997
2998 /* Set PF, ZF, SF */
2999 ctxt->src.type = OP_IMM;
3000 ctxt->src.val = 0;
3001 ctxt->src.bytes = 1;
3002 fastop(ctxt, em_or);
3003
3004 return X86EMUL_CONTINUE;
3005}
3006
2855static int em_call(struct x86_emulate_ctxt *ctxt) 3007static int em_call(struct x86_emulate_ctxt *ctxt)
2856{ 3008{
2857 long rel = ctxt->src.val; 3009 long rel = ctxt->src.val;
@@ -2900,64 +3052,6 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2900 return X86EMUL_CONTINUE; 3052 return X86EMUL_CONTINUE;
2901} 3053}
2902 3054
2903static int em_add(struct x86_emulate_ctxt *ctxt)
2904{
2905 emulate_2op_SrcV(ctxt, "add");
2906 return X86EMUL_CONTINUE;
2907}
2908
2909static int em_or(struct x86_emulate_ctxt *ctxt)
2910{
2911 emulate_2op_SrcV(ctxt, "or");
2912 return X86EMUL_CONTINUE;
2913}
2914
2915static int em_adc(struct x86_emulate_ctxt *ctxt)
2916{
2917 emulate_2op_SrcV(ctxt, "adc");
2918 return X86EMUL_CONTINUE;
2919}
2920
2921static int em_sbb(struct x86_emulate_ctxt *ctxt)
2922{
2923 emulate_2op_SrcV(ctxt, "sbb");
2924 return X86EMUL_CONTINUE;
2925}
2926
2927static int em_and(struct x86_emulate_ctxt *ctxt)
2928{
2929 emulate_2op_SrcV(ctxt, "and");
2930 return X86EMUL_CONTINUE;
2931}
2932
2933static int em_sub(struct x86_emulate_ctxt *ctxt)
2934{
2935 emulate_2op_SrcV(ctxt, "sub");
2936 return X86EMUL_CONTINUE;
2937}
2938
2939static int em_xor(struct x86_emulate_ctxt *ctxt)
2940{
2941 emulate_2op_SrcV(ctxt, "xor");
2942 return X86EMUL_CONTINUE;
2943}
2944
2945static int em_cmp(struct x86_emulate_ctxt *ctxt)
2946{
2947 emulate_2op_SrcV(ctxt, "cmp");
2948 /* Disable writeback. */
2949 ctxt->dst.type = OP_NONE;
2950 return X86EMUL_CONTINUE;
2951}
2952
2953static int em_test(struct x86_emulate_ctxt *ctxt)
2954{
2955 emulate_2op_SrcV(ctxt, "test");
2956 /* Disable writeback. */
2957 ctxt->dst.type = OP_NONE;
2958 return X86EMUL_CONTINUE;
2959}
2960
2961static int em_xchg(struct x86_emulate_ctxt *ctxt) 3055static int em_xchg(struct x86_emulate_ctxt *ctxt)
2962{ 3056{
2963 /* Write back the register source. */ 3057 /* Write back the register source. */
@@ -2970,16 +3064,10 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt)
2970 return X86EMUL_CONTINUE; 3064 return X86EMUL_CONTINUE;
2971} 3065}
2972 3066
2973static int em_imul(struct x86_emulate_ctxt *ctxt)
2974{
2975 emulate_2op_SrcV_nobyte(ctxt, "imul");
2976 return X86EMUL_CONTINUE;
2977}
2978
2979static int em_imul_3op(struct x86_emulate_ctxt *ctxt) 3067static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
2980{ 3068{
2981 ctxt->dst.val = ctxt->src2.val; 3069 ctxt->dst.val = ctxt->src2.val;
2982 return em_imul(ctxt); 3070 return fastop(ctxt, em_imul);
2983} 3071}
2984 3072
2985static int em_cwd(struct x86_emulate_ctxt *ctxt) 3073static int em_cwd(struct x86_emulate_ctxt *ctxt)
@@ -3300,47 +3388,6 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
3300 return X86EMUL_CONTINUE; 3388 return X86EMUL_CONTINUE;
3301} 3389}
3302 3390
3303static int em_bt(struct x86_emulate_ctxt *ctxt)
3304{
3305 /* Disable writeback. */
3306 ctxt->dst.type = OP_NONE;
3307 /* only subword offset */
3308 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
3309
3310 emulate_2op_SrcV_nobyte(ctxt, "bt");
3311 return X86EMUL_CONTINUE;
3312}
3313
3314static int em_bts(struct x86_emulate_ctxt *ctxt)
3315{
3316 emulate_2op_SrcV_nobyte(ctxt, "bts");
3317 return X86EMUL_CONTINUE;
3318}
3319
3320static int em_btr(struct x86_emulate_ctxt *ctxt)
3321{
3322 emulate_2op_SrcV_nobyte(ctxt, "btr");
3323 return X86EMUL_CONTINUE;
3324}
3325
3326static int em_btc(struct x86_emulate_ctxt *ctxt)
3327{
3328 emulate_2op_SrcV_nobyte(ctxt, "btc");
3329 return X86EMUL_CONTINUE;
3330}
3331
3332static int em_bsf(struct x86_emulate_ctxt *ctxt)
3333{
3334 emulate_2op_SrcV_nobyte(ctxt, "bsf");
3335 return X86EMUL_CONTINUE;
3336}
3337
3338static int em_bsr(struct x86_emulate_ctxt *ctxt)
3339{
3340 emulate_2op_SrcV_nobyte(ctxt, "bsr");
3341 return X86EMUL_CONTINUE;
3342}
3343
3344static int em_cpuid(struct x86_emulate_ctxt *ctxt) 3391static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3345{ 3392{
3346 u32 eax, ebx, ecx, edx; 3393 u32 eax, ebx, ecx, edx;
@@ -3572,7 +3619,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3572#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } 3619#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3573#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } 3620#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3574#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } 3621#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3622#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3575#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3623#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3624#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
3576#define II(_f, _e, _i) \ 3625#define II(_f, _e, _i) \
3577 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } 3626 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
3578#define IIP(_f, _e, _i, _p) \ 3627#define IIP(_f, _e, _i, _p) \
@@ -3583,12 +3632,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3583#define D2bv(_f) D((_f) | ByteOp), D(_f) 3632#define D2bv(_f) D((_f) | ByteOp), D(_f)
3584#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) 3633#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
3585#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) 3634#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
3635#define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
3586#define I2bvIP(_f, _e, _i, _p) \ 3636#define I2bvIP(_f, _e, _i, _p) \
3587 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) 3637 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
3588 3638
3589#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ 3639#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
3590 I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ 3640 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
3591 I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) 3641 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3592 3642
3593static const struct opcode group7_rm1[] = { 3643static const struct opcode group7_rm1[] = {
3594 DI(SrcNone | Priv, monitor), 3644 DI(SrcNone | Priv, monitor),
@@ -3614,25 +3664,36 @@ static const struct opcode group7_rm7[] = {
3614}; 3664};
3615 3665
3616static const struct opcode group1[] = { 3666static const struct opcode group1[] = {
3617 I(Lock, em_add), 3667 F(Lock, em_add),
3618 I(Lock | PageTable, em_or), 3668 F(Lock | PageTable, em_or),
3619 I(Lock, em_adc), 3669 F(Lock, em_adc),
3620 I(Lock, em_sbb), 3670 F(Lock, em_sbb),
3621 I(Lock | PageTable, em_and), 3671 F(Lock | PageTable, em_and),
3622 I(Lock, em_sub), 3672 F(Lock, em_sub),
3623 I(Lock, em_xor), 3673 F(Lock, em_xor),
3624 I(0, em_cmp), 3674 F(NoWrite, em_cmp),
3625}; 3675};
3626 3676
3627static const struct opcode group1A[] = { 3677static const struct opcode group1A[] = {
3628 I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, 3678 I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
3629}; 3679};
3630 3680
3681static const struct opcode group2[] = {
3682 F(DstMem | ModRM, em_rol),
3683 F(DstMem | ModRM, em_ror),
3684 F(DstMem | ModRM, em_rcl),
3685 F(DstMem | ModRM, em_rcr),
3686 F(DstMem | ModRM, em_shl),
3687 F(DstMem | ModRM, em_shr),
3688 F(DstMem | ModRM, em_shl),
3689 F(DstMem | ModRM, em_sar),
3690};
3691
3631static const struct opcode group3[] = { 3692static const struct opcode group3[] = {
3632 I(DstMem | SrcImm, em_test), 3693 F(DstMem | SrcImm | NoWrite, em_test),
3633 I(DstMem | SrcImm, em_test), 3694 F(DstMem | SrcImm | NoWrite, em_test),
3634 I(DstMem | SrcNone | Lock, em_not), 3695 F(DstMem | SrcNone | Lock, em_not),
3635 I(DstMem | SrcNone | Lock, em_neg), 3696 F(DstMem | SrcNone | Lock, em_neg),
3636 I(SrcMem, em_mul_ex), 3697 I(SrcMem, em_mul_ex),
3637 I(SrcMem, em_imul_ex), 3698 I(SrcMem, em_imul_ex),
3638 I(SrcMem, em_div_ex), 3699 I(SrcMem, em_div_ex),
@@ -3640,14 +3701,14 @@ static const struct opcode group3[] = {
3640}; 3701};
3641 3702
3642static const struct opcode group4[] = { 3703static const struct opcode group4[] = {
3643 I(ByteOp | DstMem | SrcNone | Lock, em_grp45), 3704 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
3644 I(ByteOp | DstMem | SrcNone | Lock, em_grp45), 3705 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
3645 N, N, N, N, N, N, 3706 N, N, N, N, N, N,
3646}; 3707};
3647 3708
3648static const struct opcode group5[] = { 3709static const struct opcode group5[] = {
3649 I(DstMem | SrcNone | Lock, em_grp45), 3710 F(DstMem | SrcNone | Lock, em_inc),
3650 I(DstMem | SrcNone | Lock, em_grp45), 3711 F(DstMem | SrcNone | Lock, em_dec),
3651 I(SrcMem | Stack, em_grp45), 3712 I(SrcMem | Stack, em_grp45),
3652 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), 3713 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
3653 I(SrcMem | Stack, em_grp45), 3714 I(SrcMem | Stack, em_grp45),
@@ -3682,10 +3743,10 @@ static const struct group_dual group7 = { {
3682 3743
3683static const struct opcode group8[] = { 3744static const struct opcode group8[] = {
3684 N, N, N, N, 3745 N, N, N, N,
3685 I(DstMem | SrcImmByte, em_bt), 3746 F(DstMem | SrcImmByte | NoWrite, em_bt),
3686 I(DstMem | SrcImmByte | Lock | PageTable, em_bts), 3747 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
3687 I(DstMem | SrcImmByte | Lock, em_btr), 3748 F(DstMem | SrcImmByte | Lock, em_btr),
3688 I(DstMem | SrcImmByte | Lock | PageTable, em_btc), 3749 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
3689}; 3750};
3690 3751
3691static const struct group_dual group9 = { { 3752static const struct group_dual group9 = { {
@@ -3707,33 +3768,96 @@ static const struct gprefix pfx_vmovntpx = {
3707 I(0, em_mov), N, N, N, 3768 I(0, em_mov), N, N, N,
3708}; 3769};
3709 3770
3771static const struct escape escape_d9 = { {
3772 N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
3773}, {
3774 /* 0xC0 - 0xC7 */
3775 N, N, N, N, N, N, N, N,
3776 /* 0xC8 - 0xCF */
3777 N, N, N, N, N, N, N, N,
3778 /* 0xD0 - 0xC7 */
3779 N, N, N, N, N, N, N, N,
3780 /* 0xD8 - 0xDF */
3781 N, N, N, N, N, N, N, N,
3782 /* 0xE0 - 0xE7 */
3783 N, N, N, N, N, N, N, N,
3784 /* 0xE8 - 0xEF */
3785 N, N, N, N, N, N, N, N,
3786 /* 0xF0 - 0xF7 */
3787 N, N, N, N, N, N, N, N,
3788 /* 0xF8 - 0xFF */
3789 N, N, N, N, N, N, N, N,
3790} };
3791
3792static const struct escape escape_db = { {
3793 N, N, N, N, N, N, N, N,
3794}, {
3795 /* 0xC0 - 0xC7 */
3796 N, N, N, N, N, N, N, N,
3797 /* 0xC8 - 0xCF */
3798 N, N, N, N, N, N, N, N,
3799 /* 0xD0 - 0xC7 */
3800 N, N, N, N, N, N, N, N,
3801 /* 0xD8 - 0xDF */
3802 N, N, N, N, N, N, N, N,
3803 /* 0xE0 - 0xE7 */
3804 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
3805 /* 0xE8 - 0xEF */
3806 N, N, N, N, N, N, N, N,
3807 /* 0xF0 - 0xF7 */
3808 N, N, N, N, N, N, N, N,
3809 /* 0xF8 - 0xFF */
3810 N, N, N, N, N, N, N, N,
3811} };
3812
3813static const struct escape escape_dd = { {
3814 N, N, N, N, N, N, N, I(DstMem, em_fnstsw),
3815}, {
3816 /* 0xC0 - 0xC7 */
3817 N, N, N, N, N, N, N, N,
3818 /* 0xC8 - 0xCF */
3819 N, N, N, N, N, N, N, N,
3820 /* 0xD0 - 0xC7 */
3821 N, N, N, N, N, N, N, N,
3822 /* 0xD8 - 0xDF */
3823 N, N, N, N, N, N, N, N,
3824 /* 0xE0 - 0xE7 */
3825 N, N, N, N, N, N, N, N,
3826 /* 0xE8 - 0xEF */
3827 N, N, N, N, N, N, N, N,
3828 /* 0xF0 - 0xF7 */
3829 N, N, N, N, N, N, N, N,
3830 /* 0xF8 - 0xFF */
3831 N, N, N, N, N, N, N, N,
3832} };
3833
3710static const struct opcode opcode_table[256] = { 3834static const struct opcode opcode_table[256] = {
3711 /* 0x00 - 0x07 */ 3835 /* 0x00 - 0x07 */
3712 I6ALU(Lock, em_add), 3836 F6ALU(Lock, em_add),
3713 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), 3837 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
3714 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), 3838 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
3715 /* 0x08 - 0x0F */ 3839 /* 0x08 - 0x0F */
3716 I6ALU(Lock | PageTable, em_or), 3840 F6ALU(Lock | PageTable, em_or),
3717 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), 3841 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
3718 N, 3842 N,
3719 /* 0x10 - 0x17 */ 3843 /* 0x10 - 0x17 */
3720 I6ALU(Lock, em_adc), 3844 F6ALU(Lock, em_adc),
3721 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), 3845 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
3722 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), 3846 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
3723 /* 0x18 - 0x1F */ 3847 /* 0x18 - 0x1F */
3724 I6ALU(Lock, em_sbb), 3848 F6ALU(Lock, em_sbb),
3725 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), 3849 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
3726 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), 3850 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
3727 /* 0x20 - 0x27 */ 3851 /* 0x20 - 0x27 */
3728 I6ALU(Lock | PageTable, em_and), N, N, 3852 F6ALU(Lock | PageTable, em_and), N, N,
3729 /* 0x28 - 0x2F */ 3853 /* 0x28 - 0x2F */
3730 I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), 3854 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
3731 /* 0x30 - 0x37 */ 3855 /* 0x30 - 0x37 */
3732 I6ALU(Lock, em_xor), N, N, 3856 F6ALU(Lock, em_xor), N, N,
3733 /* 0x38 - 0x3F */ 3857 /* 0x38 - 0x3F */
3734 I6ALU(0, em_cmp), N, N, 3858 F6ALU(NoWrite, em_cmp), N, N,
3735 /* 0x40 - 0x4F */ 3859 /* 0x40 - 0x4F */
3736 X16(D(DstReg)), 3860 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
3737 /* 0x50 - 0x57 */ 3861 /* 0x50 - 0x57 */
3738 X8(I(SrcReg | Stack, em_push)), 3862 X8(I(SrcReg | Stack, em_push)),
3739 /* 0x58 - 0x5F */ 3863 /* 0x58 - 0x5F */
@@ -3757,7 +3881,7 @@ static const struct opcode opcode_table[256] = {
3757 G(DstMem | SrcImm, group1), 3881 G(DstMem | SrcImm, group1),
3758 G(ByteOp | DstMem | SrcImm | No64, group1), 3882 G(ByteOp | DstMem | SrcImm | No64, group1),
3759 G(DstMem | SrcImmByte, group1), 3883 G(DstMem | SrcImmByte, group1),
3760 I2bv(DstMem | SrcReg | ModRM, em_test), 3884 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
3761 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), 3885 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
3762 /* 0x88 - 0x8F */ 3886 /* 0x88 - 0x8F */
3763 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), 3887 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
@@ -3777,18 +3901,18 @@ static const struct opcode opcode_table[256] = {
3777 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 3901 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
3778 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), 3902 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
3779 I2bv(SrcSI | DstDI | Mov | String, em_mov), 3903 I2bv(SrcSI | DstDI | Mov | String, em_mov),
3780 I2bv(SrcSI | DstDI | String, em_cmp), 3904 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
3781 /* 0xA8 - 0xAF */ 3905 /* 0xA8 - 0xAF */
3782 I2bv(DstAcc | SrcImm, em_test), 3906 F2bv(DstAcc | SrcImm | NoWrite, em_test),
3783 I2bv(SrcAcc | DstDI | Mov | String, em_mov), 3907 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
3784 I2bv(SrcSI | DstAcc | Mov | String, em_mov), 3908 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
3785 I2bv(SrcAcc | DstDI | String, em_cmp), 3909 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
3786 /* 0xB0 - 0xB7 */ 3910 /* 0xB0 - 0xB7 */
3787 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), 3911 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
3788 /* 0xB8 - 0xBF */ 3912 /* 0xB8 - 0xBF */
3789 X8(I(DstReg | SrcImm | Mov, em_mov)), 3913 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
3790 /* 0xC0 - 0xC7 */ 3914 /* 0xC0 - 0xC7 */
3791 D2bv(DstMem | SrcImmByte | ModRM), 3915 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
3792 I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), 3916 I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
3793 I(ImplicitOps | Stack, em_ret), 3917 I(ImplicitOps | Stack, em_ret),
3794 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), 3918 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
@@ -3800,10 +3924,11 @@ static const struct opcode opcode_table[256] = {
3800 D(ImplicitOps), DI(SrcImmByte, intn), 3924 D(ImplicitOps), DI(SrcImmByte, intn),
3801 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), 3925 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
3802 /* 0xD0 - 0xD7 */ 3926 /* 0xD0 - 0xD7 */
3803 D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), 3927 G(Src2One | ByteOp, group2), G(Src2One, group2),
3804 N, N, N, N, 3928 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
3929 N, I(DstAcc | SrcImmByte | No64, em_aad), N, N,
3805 /* 0xD8 - 0xDF */ 3930 /* 0xD8 - 0xDF */
3806 N, N, N, N, N, N, N, N, 3931 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
3807 /* 0xE0 - 0xE7 */ 3932 /* 0xE0 - 0xE7 */
3808 X3(I(SrcImmByte, em_loop)), 3933 X3(I(SrcImmByte, em_loop)),
3809 I(SrcImmByte, em_jcxz), 3934 I(SrcImmByte, em_jcxz),
@@ -3870,28 +3995,29 @@ static const struct opcode twobyte_table[256] = {
3870 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), 3995 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
3871 /* 0xA0 - 0xA7 */ 3996 /* 0xA0 - 0xA7 */
3872 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), 3997 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
3873 II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), 3998 II(ImplicitOps, em_cpuid, cpuid),
3874 D(DstMem | SrcReg | Src2ImmByte | ModRM), 3999 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
3875 D(DstMem | SrcReg | Src2CL | ModRM), N, N, 4000 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4001 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
3876 /* 0xA8 - 0xAF */ 4002 /* 0xA8 - 0xAF */
3877 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), 4003 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
3878 DI(ImplicitOps, rsm), 4004 DI(ImplicitOps, rsm),
3879 I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), 4005 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
3880 D(DstMem | SrcReg | Src2ImmByte | ModRM), 4006 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
3881 D(DstMem | SrcReg | Src2CL | ModRM), 4007 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
3882 D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), 4008 D(ModRM), F(DstReg | SrcMem | ModRM, em_imul),
3883 /* 0xB0 - 0xB7 */ 4009 /* 0xB0 - 0xB7 */
3884 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), 4010 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
3885 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), 4011 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
3886 I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), 4012 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
3887 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), 4013 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
3888 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), 4014 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
3889 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 4015 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3890 /* 0xB8 - 0xBF */ 4016 /* 0xB8 - 0xBF */
3891 N, N, 4017 N, N,
3892 G(BitOp, group8), 4018 G(BitOp, group8),
3893 I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), 4019 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
3894 I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), 4020 F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
3895 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 4021 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3896 /* 0xC0 - 0xC7 */ 4022 /* 0xC0 - 0xC7 */
3897 D2bv(DstMem | SrcReg | ModRM | Lock), 4023 D2bv(DstMem | SrcReg | ModRM | Lock),
@@ -3950,6 +4076,9 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
3950 case 4: 4076 case 4:
3951 op->val = insn_fetch(s32, ctxt); 4077 op->val = insn_fetch(s32, ctxt);
3952 break; 4078 break;
4079 case 8:
4080 op->val = insn_fetch(s64, ctxt);
4081 break;
3953 } 4082 }
3954 if (!sign_extension) { 4083 if (!sign_extension) {
3955 switch (op->bytes) { 4084 switch (op->bytes) {
@@ -4028,6 +4157,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4028 case OpImm: 4157 case OpImm:
4029 rc = decode_imm(ctxt, op, imm_size(ctxt), true); 4158 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4030 break; 4159 break;
4160 case OpImm64:
4161 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4162 break;
4031 case OpMem8: 4163 case OpMem8:
4032 ctxt->memop.bytes = 1; 4164 ctxt->memop.bytes = 1;
4033 goto mem_common; 4165 goto mem_common;
@@ -4222,6 +4354,12 @@ done_prefixes:
4222 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; 4354 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4223 } 4355 }
4224 break; 4356 break;
4357 case Escape:
4358 if (ctxt->modrm > 0xbf)
4359 opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
4360 else
4361 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4362 break;
4225 default: 4363 default:
4226 return EMULATION_FAILED; 4364 return EMULATION_FAILED;
4227 } 4365 }
@@ -4354,6 +4492,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
4354 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); 4492 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
4355} 4493}
4356 4494
4495static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
4496{
4497 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
4498 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
4499 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
4500 : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags)
4501 : "c"(ctxt->src2.val), [fastop]"S"(fop));
4502 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
4503 return X86EMUL_CONTINUE;
4504}
4357 4505
4358int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) 4506int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4359{ 4507{
@@ -4483,6 +4631,13 @@ special_insn:
4483 } 4631 }
4484 4632
4485 if (ctxt->execute) { 4633 if (ctxt->execute) {
4634 if (ctxt->d & Fastop) {
4635 void (*fop)(struct fastop *) = (void *)ctxt->execute;
4636 rc = fastop(ctxt, fop);
4637 if (rc != X86EMUL_CONTINUE)
4638 goto done;
4639 goto writeback;
4640 }
4486 rc = ctxt->execute(ctxt); 4641 rc = ctxt->execute(ctxt);
4487 if (rc != X86EMUL_CONTINUE) 4642 if (rc != X86EMUL_CONTINUE)
4488 goto done; 4643 goto done;
@@ -4493,12 +4648,6 @@ special_insn:
4493 goto twobyte_insn; 4648 goto twobyte_insn;
4494 4649
4495 switch (ctxt->b) { 4650 switch (ctxt->b) {
4496 case 0x40 ... 0x47: /* inc r16/r32 */
4497 emulate_1op(ctxt, "inc");
4498 break;
4499 case 0x48 ... 0x4f: /* dec r16/r32 */
4500 emulate_1op(ctxt, "dec");
4501 break;
4502 case 0x63: /* movsxd */ 4651 case 0x63: /* movsxd */
4503 if (ctxt->mode != X86EMUL_MODE_PROT64) 4652 if (ctxt->mode != X86EMUL_MODE_PROT64)
4504 goto cannot_emulate; 4653 goto cannot_emulate;
@@ -4523,9 +4672,6 @@ special_insn:
4523 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break; 4672 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
4524 } 4673 }
4525 break; 4674 break;
4526 case 0xc0 ... 0xc1:
4527 rc = em_grp2(ctxt);
4528 break;
4529 case 0xcc: /* int3 */ 4675 case 0xcc: /* int3 */
4530 rc = emulate_int(ctxt, 3); 4676 rc = emulate_int(ctxt, 3);
4531 break; 4677 break;
@@ -4536,13 +4682,6 @@ special_insn:
4536 if (ctxt->eflags & EFLG_OF) 4682 if (ctxt->eflags & EFLG_OF)
4537 rc = emulate_int(ctxt, 4); 4683 rc = emulate_int(ctxt, 4);
4538 break; 4684 break;
4539 case 0xd0 ... 0xd1: /* Grp2 */
4540 rc = em_grp2(ctxt);
4541 break;
4542 case 0xd2 ... 0xd3: /* Grp2 */
4543 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX);
4544 rc = em_grp2(ctxt);
4545 break;
4546 case 0xe9: /* jmp rel */ 4685 case 0xe9: /* jmp rel */
4547 case 0xeb: /* jmp rel short */ 4686 case 0xeb: /* jmp rel short */
4548 jmp_rel(ctxt, ctxt->src.val); 4687 jmp_rel(ctxt, ctxt->src.val);
@@ -4661,14 +4800,6 @@ twobyte_insn:
4661 case 0x90 ... 0x9f: /* setcc r/m8 */ 4800 case 0x90 ... 0x9f: /* setcc r/m8 */
4662 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); 4801 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
4663 break; 4802 break;
4664 case 0xa4: /* shld imm8, r, r/m */
4665 case 0xa5: /* shld cl, r, r/m */
4666 emulate_2op_cl(ctxt, "shld");
4667 break;
4668 case 0xac: /* shrd imm8, r, r/m */
4669 case 0xad: /* shrd cl, r, r/m */
4670 emulate_2op_cl(ctxt, "shrd");
4671 break;
4672 case 0xae: /* clflush */ 4803 case 0xae: /* clflush */
4673 break; 4804 break;
4674 case 0xb6 ... 0xb7: /* movzx */ 4805 case 0xb6 ... 0xb7: /* movzx */
@@ -4682,7 +4813,7 @@ twobyte_insn:
4682 (s16) ctxt->src.val; 4813 (s16) ctxt->src.val;
4683 break; 4814 break;
4684 case 0xc0 ... 0xc1: /* xadd */ 4815 case 0xc0 ... 0xc1: /* xadd */
4685 emulate_2op_SrcV(ctxt, "add"); 4816 fastop(ctxt, em_add);
4686 /* Write back the register source. */ 4817 /* Write back the register source. */
4687 ctxt->src.val = ctxt->dst.orig_val; 4818 ctxt->src.val = ctxt->dst.orig_val;
4688 write_register_operand(&ctxt->src); 4819 write_register_operand(&ctxt->src);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 11300d2fa714..c1d30b2fc9bb 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -122,7 +122,6 @@ static s64 __kpit_elapsed(struct kvm *kvm)
122 */ 122 */
123 remaining = hrtimer_get_remaining(&ps->timer); 123 remaining = hrtimer_get_remaining(&ps->timer);
124 elapsed = ps->period - ktime_to_ns(remaining); 124 elapsed = ps->period - ktime_to_ns(remaining);
125 elapsed = mod_64(elapsed, ps->period);
126 125
127 return elapsed; 126 return elapsed;
128} 127}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 848206df0967..cc31f7c06d3d 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -241,6 +241,8 @@ int kvm_pic_read_irq(struct kvm *kvm)
241 int irq, irq2, intno; 241 int irq, irq2, intno;
242 struct kvm_pic *s = pic_irqchip(kvm); 242 struct kvm_pic *s = pic_irqchip(kvm);
243 243
244 s->output = 0;
245
244 pic_lock(s); 246 pic_lock(s);
245 irq = pic_get_irq(&s->pics[0]); 247 irq = pic_get_irq(&s->pics[0]);
246 if (irq >= 0) { 248 if (irq >= 0) {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 7e06ba1618bd..484bc874688b 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,49 +38,81 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
38EXPORT_SYMBOL(kvm_cpu_has_pending_timer); 38EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
39 39
40/* 40/*
41 * check if there is pending interrupt from
42 * non-APIC source without intack.
43 */
44static int kvm_cpu_has_extint(struct kvm_vcpu *v)
45{
46 if (kvm_apic_accept_pic_intr(v))
47 return pic_irqchip(v->kvm)->output; /* PIC */
48 else
49 return 0;
50}
51
52/*
53 * check if there is injectable interrupt:
54 * when virtual interrupt delivery enabled,
55 * interrupt from apic will handled by hardware,
56 * we don't need to check it here.
57 */
58int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
59{
60 if (!irqchip_in_kernel(v->kvm))
61 return v->arch.interrupt.pending;
62
63 if (kvm_cpu_has_extint(v))
64 return 1;
65
66 if (kvm_apic_vid_enabled(v->kvm))
67 return 0;
68
69 return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
70}
71
72/*
41 * check if there is pending interrupt without 73 * check if there is pending interrupt without
42 * intack. 74 * intack.
43 */ 75 */
44int kvm_cpu_has_interrupt(struct kvm_vcpu *v) 76int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
45{ 77{
46 struct kvm_pic *s;
47
48 if (!irqchip_in_kernel(v->kvm)) 78 if (!irqchip_in_kernel(v->kvm))
49 return v->arch.interrupt.pending; 79 return v->arch.interrupt.pending;
50 80
51 if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ 81 if (kvm_cpu_has_extint(v))
52 if (kvm_apic_accept_pic_intr(v)) { 82 return 1;
53 s = pic_irqchip(v->kvm); /* PIC */ 83
54 return s->output; 84 return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
55 } else
56 return 0;
57 }
58 return 1;
59} 85}
60EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); 86EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
61 87
62/* 88/*
89 * Read pending interrupt(from non-APIC source)
90 * vector and intack.
91 */
92static int kvm_cpu_get_extint(struct kvm_vcpu *v)
93{
94 if (kvm_cpu_has_extint(v))
95 return kvm_pic_read_irq(v->kvm); /* PIC */
96 return -1;
97}
98
99/*
63 * Read pending interrupt vector and intack. 100 * Read pending interrupt vector and intack.
64 */ 101 */
65int kvm_cpu_get_interrupt(struct kvm_vcpu *v) 102int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
66{ 103{
67 struct kvm_pic *s;
68 int vector; 104 int vector;
69 105
70 if (!irqchip_in_kernel(v->kvm)) 106 if (!irqchip_in_kernel(v->kvm))
71 return v->arch.interrupt.nr; 107 return v->arch.interrupt.nr;
72 108
73 vector = kvm_get_apic_interrupt(v); /* APIC */ 109 vector = kvm_cpu_get_extint(v);
74 if (vector == -1) { 110
75 if (kvm_apic_accept_pic_intr(v)) { 111 if (kvm_apic_vid_enabled(v->kvm) || vector != -1)
76 s = pic_irqchip(v->kvm); 112 return vector; /* PIC */
77 s->output = 0; /* PIC */ 113
78 vector = kvm_pic_read_irq(v->kvm); 114 return kvm_get_apic_interrupt(v); /* APIC */
79 }
80 }
81 return vector;
82} 115}
83EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
84 116
85void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) 117void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
86{ 118{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9392f527f107..02b51dd4e4ad 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -140,31 +140,56 @@ static inline int apic_enabled(struct kvm_lapic *apic)
140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
142 142
143static inline int apic_x2apic_mode(struct kvm_lapic *apic)
144{
145 return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
146}
147
148static inline int kvm_apic_id(struct kvm_lapic *apic) 143static inline int kvm_apic_id(struct kvm_lapic *apic)
149{ 144{
150 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 145 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
151} 146}
152 147
153static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) 148void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
149 struct kvm_lapic_irq *irq,
150 u64 *eoi_exit_bitmap)
154{ 151{
155 u16 cid; 152 struct kvm_lapic **dst;
156 ldr >>= 32 - map->ldr_bits; 153 struct kvm_apic_map *map;
157 cid = (ldr >> map->cid_shift) & map->cid_mask; 154 unsigned long bitmap = 1;
155 int i;
158 156
159 BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); 157 rcu_read_lock();
158 map = rcu_dereference(vcpu->kvm->arch.apic_map);
160 159
161 return cid; 160 if (unlikely(!map)) {
162} 161 __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
162 goto out;
163 }
163 164
164static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) 165 if (irq->dest_mode == 0) { /* physical mode */
165{ 166 if (irq->delivery_mode == APIC_DM_LOWEST ||
166 ldr >>= (32 - map->ldr_bits); 167 irq->dest_id == 0xff) {
167 return ldr & map->lid_mask; 168 __set_bit(irq->vector,
169 (unsigned long *)eoi_exit_bitmap);
170 goto out;
171 }
172 dst = &map->phys_map[irq->dest_id & 0xff];
173 } else {
174 u32 mda = irq->dest_id << (32 - map->ldr_bits);
175
176 dst = map->logical_map[apic_cluster_id(map, mda)];
177
178 bitmap = apic_logical_id(map, mda);
179 }
180
181 for_each_set_bit(i, &bitmap, 16) {
182 if (!dst[i])
183 continue;
184 if (dst[i]->vcpu == vcpu) {
185 __set_bit(irq->vector,
186 (unsigned long *)eoi_exit_bitmap);
187 break;
188 }
189 }
190
191out:
192 rcu_read_unlock();
168} 193}
169 194
170static void recalculate_apic_map(struct kvm *kvm) 195static void recalculate_apic_map(struct kvm *kvm)
@@ -230,6 +255,8 @@ out:
230 255
231 if (old) 256 if (old)
232 kfree_rcu(old, rcu); 257 kfree_rcu(old, rcu);
258
259 kvm_ioapic_make_eoibitmap_request(kvm);
233} 260}
234 261
235static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 262static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
@@ -345,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
345{ 372{
346 int result; 373 int result;
347 374
375 /*
376 * Note that irr_pending is just a hint. It will be always
377 * true with virtual interrupt delivery enabled.
378 */
348 if (!apic->irr_pending) 379 if (!apic->irr_pending)
349 return -1; 380 return -1;
350 381
@@ -461,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
461static inline int apic_find_highest_isr(struct kvm_lapic *apic) 492static inline int apic_find_highest_isr(struct kvm_lapic *apic)
462{ 493{
463 int result; 494 int result;
495
496 /* Note that isr_count is always 1 with vid enabled */
464 if (!apic->isr_count) 497 if (!apic->isr_count)
465 return -1; 498 return -1;
466 if (likely(apic->highest_isr_cache != -1)) 499 if (likely(apic->highest_isr_cache != -1))
@@ -740,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
740 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 773 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
741} 774}
742 775
776static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
777{
778 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
779 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
780 int trigger_mode;
781 if (apic_test_vector(vector, apic->regs + APIC_TMR))
782 trigger_mode = IOAPIC_LEVEL_TRIG;
783 else
784 trigger_mode = IOAPIC_EDGE_TRIG;
785 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
786 }
787}
788
743static int apic_set_eoi(struct kvm_lapic *apic) 789static int apic_set_eoi(struct kvm_lapic *apic)
744{ 790{
745 int vector = apic_find_highest_isr(apic); 791 int vector = apic_find_highest_isr(apic);
@@ -756,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic)
756 apic_clear_isr(vector, apic); 802 apic_clear_isr(vector, apic);
757 apic_update_ppr(apic); 803 apic_update_ppr(apic);
758 804
759 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 805 kvm_ioapic_send_eoi(apic, vector);
760 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
761 int trigger_mode;
762 if (apic_test_vector(vector, apic->regs + APIC_TMR))
763 trigger_mode = IOAPIC_LEVEL_TRIG;
764 else
765 trigger_mode = IOAPIC_EDGE_TRIG;
766 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
767 }
768 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 806 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
769 return vector; 807 return vector;
770} 808}
771 809
810/*
811 * this interface assumes a trap-like exit, which has already finished
812 * desired side effect including vISR and vPPR update.
813 */
814void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
815{
816 struct kvm_lapic *apic = vcpu->arch.apic;
817
818 trace_kvm_eoi(apic, vector);
819
820 kvm_ioapic_send_eoi(apic, vector);
821 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
822}
823EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
824
772static void apic_send_ipi(struct kvm_lapic *apic) 825static void apic_send_ipi(struct kvm_lapic *apic)
773{ 826{
774 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); 827 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
@@ -1212,6 +1265,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
1212} 1265}
1213EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1266EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
1214 1267
1268/* emulate APIC access in a trap manner */
1269void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
1270{
1271 u32 val = 0;
1272
1273 /* hw has done the conditional check and inst decode */
1274 offset &= 0xff0;
1275
1276 apic_reg_read(vcpu->arch.apic, offset, 4, &val);
1277
1278 /* TODO: optimize to just emulate side effect w/o one more write */
1279 apic_reg_write(vcpu->arch.apic, offset, val);
1280}
1281EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
1282
1215void kvm_free_lapic(struct kvm_vcpu *vcpu) 1283void kvm_free_lapic(struct kvm_vcpu *vcpu)
1216{ 1284{
1217 struct kvm_lapic *apic = vcpu->arch.apic; 1285 struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1288,6 +1356,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
1288 1356
1289void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 1357void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
1290{ 1358{
1359 u64 old_value = vcpu->arch.apic_base;
1291 struct kvm_lapic *apic = vcpu->arch.apic; 1360 struct kvm_lapic *apic = vcpu->arch.apic;
1292 1361
1293 if (!apic) { 1362 if (!apic) {
@@ -1309,11 +1378,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
1309 value &= ~MSR_IA32_APICBASE_BSP; 1378 value &= ~MSR_IA32_APICBASE_BSP;
1310 1379
1311 vcpu->arch.apic_base = value; 1380 vcpu->arch.apic_base = value;
1312 if (apic_x2apic_mode(apic)) { 1381 if ((old_value ^ value) & X2APIC_ENABLE) {
1313 u32 id = kvm_apic_id(apic); 1382 if (value & X2APIC_ENABLE) {
1314 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 1383 u32 id = kvm_apic_id(apic);
1315 kvm_apic_set_ldr(apic, ldr); 1384 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
1385 kvm_apic_set_ldr(apic, ldr);
1386 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
1387 } else
1388 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
1316 } 1389 }
1390
1317 apic->base_address = apic->vcpu->arch.apic_base & 1391 apic->base_address = apic->vcpu->arch.apic_base &
1318 MSR_IA32_APICBASE_BASE; 1392 MSR_IA32_APICBASE_BASE;
1319 1393
@@ -1359,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
1359 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1433 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
1360 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1434 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
1361 } 1435 }
1362 apic->irr_pending = false; 1436 apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
1363 apic->isr_count = 0; 1437 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm);
1364 apic->highest_isr_cache = -1; 1438 apic->highest_isr_cache = -1;
1365 update_divide_count(apic); 1439 update_divide_count(apic);
1366 atomic_set(&apic->lapic_timer.pending, 0); 1440 atomic_set(&apic->lapic_timer.pending, 0);
@@ -1575,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
1575 update_divide_count(apic); 1649 update_divide_count(apic);
1576 start_apic_timer(apic); 1650 start_apic_timer(apic);
1577 apic->irr_pending = true; 1651 apic->irr_pending = true;
1578 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 1652 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
1653 1 : count_vectors(apic->regs + APIC_ISR);
1579 apic->highest_isr_cache = -1; 1654 apic->highest_isr_cache = -1;
1655 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
1580 kvm_make_request(KVM_REQ_EVENT, vcpu); 1656 kvm_make_request(KVM_REQ_EVENT, vcpu);
1581} 1657}
1582 1658
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index e5ebf9f3571f..1676d34ddb4e 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -64,6 +64,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
64u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); 64u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
65void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); 65void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
66 66
67void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
68void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
69
67void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); 70void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
68void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); 71void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
69void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); 72void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
@@ -124,4 +127,35 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
124 return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); 127 return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
125} 128}
126 129
130static inline int apic_x2apic_mode(struct kvm_lapic *apic)
131{
132 return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
133}
134
135static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
136{
137 return kvm_x86_ops->vm_has_apicv(kvm);
138}
139
140static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
141{
142 u16 cid;
143 ldr >>= 32 - map->ldr_bits;
144 cid = (ldr >> map->cid_shift) & map->cid_mask;
145
146 BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
147
148 return cid;
149}
150
151static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
152{
153 ldr >>= (32 - map->ldr_bits);
154 return ldr & map->lid_mask;
155}
156
157void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
158 struct kvm_lapic_irq *irq,
159 u64 *eoi_bitmap);
160
127#endif 161#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 01d7c2ad05f5..956ca358108a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte)
448 448
449static bool spte_is_locklessly_modifiable(u64 spte) 449static bool spte_is_locklessly_modifiable(u64 spte)
450{ 450{
451 return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)); 451 return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
452 (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
452} 453}
453 454
454static bool spte_has_volatile_bits(u64 spte) 455static bool spte_has_volatile_bits(u64 spte)
@@ -831,8 +832,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
831 if (host_level == PT_PAGE_TABLE_LEVEL) 832 if (host_level == PT_PAGE_TABLE_LEVEL)
832 return host_level; 833 return host_level;
833 834
834 max_level = kvm_x86_ops->get_lpage_level() < host_level ? 835 max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
835 kvm_x86_ops->get_lpage_level() : host_level;
836 836
837 for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) 837 for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
838 if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) 838 if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
@@ -1142,7 +1142,7 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
1142} 1142}
1143 1143
1144static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, 1144static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
1145 int level, bool pt_protect) 1145 bool pt_protect)
1146{ 1146{
1147 u64 *sptep; 1147 u64 *sptep;
1148 struct rmap_iterator iter; 1148 struct rmap_iterator iter;
@@ -1180,7 +1180,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
1180 while (mask) { 1180 while (mask) {
1181 rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), 1181 rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
1182 PT_PAGE_TABLE_LEVEL, slot); 1182 PT_PAGE_TABLE_LEVEL, slot);
1183 __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); 1183 __rmap_write_protect(kvm, rmapp, false);
1184 1184
1185 /* clear the first set bit */ 1185 /* clear the first set bit */
1186 mask &= mask - 1; 1186 mask &= mask - 1;
@@ -1199,7 +1199,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
1199 for (i = PT_PAGE_TABLE_LEVEL; 1199 for (i = PT_PAGE_TABLE_LEVEL;
1200 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 1200 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
1201 rmapp = __gfn_to_rmap(gfn, i, slot); 1201 rmapp = __gfn_to_rmap(gfn, i, slot);
1202 write_protected |= __rmap_write_protect(kvm, rmapp, i, true); 1202 write_protected |= __rmap_write_protect(kvm, rmapp, true);
1203 } 1203 }
1204 1204
1205 return write_protected; 1205 return write_protected;
@@ -1460,28 +1460,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
1460 percpu_counter_add(&kvm_total_used_mmu_pages, nr); 1460 percpu_counter_add(&kvm_total_used_mmu_pages, nr);
1461} 1461}
1462 1462
1463/* 1463static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
1464 * Remove the sp from shadow page cache, after call it,
1465 * we can not find this sp from the cache, and the shadow
1466 * page table is still valid.
1467 * It should be under the protection of mmu lock.
1468 */
1469static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp)
1470{ 1464{
1471 ASSERT(is_empty_shadow_page(sp->spt)); 1465 ASSERT(is_empty_shadow_page(sp->spt));
1472 hlist_del(&sp->hash_link); 1466 hlist_del(&sp->hash_link);
1473 if (!sp->role.direct)
1474 free_page((unsigned long)sp->gfns);
1475}
1476
1477/*
1478 * Free the shadow page table and the sp, we can do it
1479 * out of the protection of mmu lock.
1480 */
1481static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
1482{
1483 list_del(&sp->link); 1467 list_del(&sp->link);
1484 free_page((unsigned long)sp->spt); 1468 free_page((unsigned long)sp->spt);
1469 if (!sp->role.direct)
1470 free_page((unsigned long)sp->gfns);
1485 kmem_cache_free(mmu_page_header_cache, sp); 1471 kmem_cache_free(mmu_page_header_cache, sp);
1486} 1472}
1487 1473
@@ -1522,7 +1508,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
1522 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); 1508 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
1523 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 1509 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
1524 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 1510 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
1525 bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM);
1526 sp->parent_ptes = 0; 1511 sp->parent_ptes = 0;
1527 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1512 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
1528 kvm_mod_used_mmu_pages(vcpu->kvm, +1); 1513 kvm_mod_used_mmu_pages(vcpu->kvm, +1);
@@ -1659,13 +1644,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
1659static void kvm_mmu_commit_zap_page(struct kvm *kvm, 1644static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1660 struct list_head *invalid_list); 1645 struct list_head *invalid_list);
1661 1646
1662#define for_each_gfn_sp(kvm, sp, gfn, pos) \ 1647#define for_each_gfn_sp(kvm, sp, gfn) \
1663 hlist_for_each_entry(sp, pos, \ 1648 hlist_for_each_entry(sp, \
1664 &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ 1649 &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
1665 if ((sp)->gfn != (gfn)) {} else 1650 if ((sp)->gfn != (gfn)) {} else
1666 1651
1667#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ 1652#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \
1668 hlist_for_each_entry(sp, pos, \ 1653 hlist_for_each_entry(sp, \
1669 &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ 1654 &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
1670 if ((sp)->gfn != (gfn) || (sp)->role.direct || \ 1655 if ((sp)->gfn != (gfn) || (sp)->role.direct || \
1671 (sp)->role.invalid) {} else 1656 (sp)->role.invalid) {} else
@@ -1721,11 +1706,10 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1721static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) 1706static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
1722{ 1707{
1723 struct kvm_mmu_page *s; 1708 struct kvm_mmu_page *s;
1724 struct hlist_node *node;
1725 LIST_HEAD(invalid_list); 1709 LIST_HEAD(invalid_list);
1726 bool flush = false; 1710 bool flush = false;
1727 1711
1728 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { 1712 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
1729 if (!s->unsync) 1713 if (!s->unsync)
1730 continue; 1714 continue;
1731 1715
@@ -1863,7 +1847,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1863 union kvm_mmu_page_role role; 1847 union kvm_mmu_page_role role;
1864 unsigned quadrant; 1848 unsigned quadrant;
1865 struct kvm_mmu_page *sp; 1849 struct kvm_mmu_page *sp;
1866 struct hlist_node *node;
1867 bool need_sync = false; 1850 bool need_sync = false;
1868 1851
1869 role = vcpu->arch.mmu.base_role; 1852 role = vcpu->arch.mmu.base_role;
@@ -1878,7 +1861,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1878 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; 1861 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
1879 role.quadrant = quadrant; 1862 role.quadrant = quadrant;
1880 } 1863 }
1881 for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { 1864 for_each_gfn_sp(vcpu->kvm, sp, gfn) {
1882 if (!need_sync && sp->unsync) 1865 if (!need_sync && sp->unsync)
1883 need_sync = true; 1866 need_sync = true;
1884 1867
@@ -1973,9 +1956,9 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
1973{ 1956{
1974 u64 spte; 1957 u64 spte;
1975 1958
1976 spte = __pa(sp->spt) 1959 spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
1977 | PT_PRESENT_MASK | PT_ACCESSED_MASK 1960 shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
1978 | PT_WRITABLE_MASK | PT_USER_MASK; 1961
1979 mmu_spte_set(sptep, spte); 1962 mmu_spte_set(sptep, spte);
1980} 1963}
1981 1964
@@ -2126,7 +2109,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
2126 do { 2109 do {
2127 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); 2110 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
2128 WARN_ON(!sp->role.invalid || sp->root_count); 2111 WARN_ON(!sp->role.invalid || sp->root_count);
2129 kvm_mmu_isolate_page(sp);
2130 kvm_mmu_free_page(sp); 2112 kvm_mmu_free_page(sp);
2131 } while (!list_empty(invalid_list)); 2113 } while (!list_empty(invalid_list));
2132} 2114}
@@ -2144,6 +2126,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
2144 * change the value 2126 * change the value
2145 */ 2127 */
2146 2128
2129 spin_lock(&kvm->mmu_lock);
2130
2147 if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { 2131 if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
2148 while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && 2132 while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages &&
2149 !list_empty(&kvm->arch.active_mmu_pages)) { 2133 !list_empty(&kvm->arch.active_mmu_pages)) {
@@ -2158,19 +2142,20 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
2158 } 2142 }
2159 2143
2160 kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; 2144 kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
2145
2146 spin_unlock(&kvm->mmu_lock);
2161} 2147}
2162 2148
2163int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) 2149int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
2164{ 2150{
2165 struct kvm_mmu_page *sp; 2151 struct kvm_mmu_page *sp;
2166 struct hlist_node *node;
2167 LIST_HEAD(invalid_list); 2152 LIST_HEAD(invalid_list);
2168 int r; 2153 int r;
2169 2154
2170 pgprintk("%s: looking for gfn %llx\n", __func__, gfn); 2155 pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
2171 r = 0; 2156 r = 0;
2172 spin_lock(&kvm->mmu_lock); 2157 spin_lock(&kvm->mmu_lock);
2173 for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { 2158 for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
2174 pgprintk("%s: gfn %llx role %x\n", __func__, gfn, 2159 pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
2175 sp->role.word); 2160 sp->role.word);
2176 r = 1; 2161 r = 1;
@@ -2183,14 +2168,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
2183} 2168}
2184EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); 2169EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
2185 2170
2186static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
2187{
2188 int slot = memslot_id(kvm, gfn);
2189 struct kvm_mmu_page *sp = page_header(__pa(pte));
2190
2191 __set_bit(slot, sp->slot_bitmap);
2192}
2193
2194/* 2171/*
2195 * The function is based on mtrr_type_lookup() in 2172 * The function is based on mtrr_type_lookup() in
2196 * arch/x86/kernel/cpu/mtrr/generic.c 2173 * arch/x86/kernel/cpu/mtrr/generic.c
@@ -2308,9 +2285,8 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
2308static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) 2285static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
2309{ 2286{
2310 struct kvm_mmu_page *s; 2287 struct kvm_mmu_page *s;
2311 struct hlist_node *node;
2312 2288
2313 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { 2289 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
2314 if (s->unsync) 2290 if (s->unsync)
2315 continue; 2291 continue;
2316 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); 2292 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
@@ -2322,19 +2298,17 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
2322 bool can_unsync) 2298 bool can_unsync)
2323{ 2299{
2324 struct kvm_mmu_page *s; 2300 struct kvm_mmu_page *s;
2325 struct hlist_node *node;
2326 bool need_unsync = false; 2301 bool need_unsync = false;
2327 2302
2328 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { 2303 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
2329 if (!can_unsync) 2304 if (!can_unsync)
2330 return 1; 2305 return 1;
2331 2306
2332 if (s->role.level != PT_PAGE_TABLE_LEVEL) 2307 if (s->role.level != PT_PAGE_TABLE_LEVEL)
2333 return 1; 2308 return 1;
2334 2309
2335 if (!need_unsync && !s->unsync) { 2310 if (!s->unsync)
2336 need_unsync = true; 2311 need_unsync = true;
2337 }
2338 } 2312 }
2339 if (need_unsync) 2313 if (need_unsync)
2340 kvm_unsync_pages(vcpu, gfn); 2314 kvm_unsync_pages(vcpu, gfn);
@@ -2342,8 +2316,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
2342} 2316}
2343 2317
2344static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, 2318static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2345 unsigned pte_access, int user_fault, 2319 unsigned pte_access, int level,
2346 int write_fault, int level,
2347 gfn_t gfn, pfn_t pfn, bool speculative, 2320 gfn_t gfn, pfn_t pfn, bool speculative,
2348 bool can_unsync, bool host_writable) 2321 bool can_unsync, bool host_writable)
2349{ 2322{
@@ -2378,20 +2351,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2378 2351
2379 spte |= (u64)pfn << PAGE_SHIFT; 2352 spte |= (u64)pfn << PAGE_SHIFT;
2380 2353
2381 if ((pte_access & ACC_WRITE_MASK) 2354 if (pte_access & ACC_WRITE_MASK) {
2382 || (!vcpu->arch.mmu.direct_map && write_fault
2383 && !is_write_protection(vcpu) && !user_fault)) {
2384 2355
2385 /* 2356 /*
2386 * There are two cases: 2357 * Other vcpu creates new sp in the window between
2387 * - the one is other vcpu creates new sp in the window 2358 * mapping_level() and acquiring mmu-lock. We can
2388 * between mapping_level() and acquiring mmu-lock. 2359 * allow guest to retry the access, the mapping can
2389 * - the another case is the new sp is created by itself 2360 * be fixed if guest refault.
2390 * (page-fault path) when guest uses the target gfn as
2391 * its page table.
2392 * Both of these cases can be fixed by allowing guest to
2393 * retry the access, it will refault, then we can establish
2394 * the mapping by using small page.
2395 */ 2361 */
2396 if (level > PT_PAGE_TABLE_LEVEL && 2362 if (level > PT_PAGE_TABLE_LEVEL &&
2397 has_wrprotected_page(vcpu->kvm, gfn, level)) 2363 has_wrprotected_page(vcpu->kvm, gfn, level))
@@ -2399,19 +2365,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2399 2365
2400 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; 2366 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
2401 2367
2402 if (!vcpu->arch.mmu.direct_map
2403 && !(pte_access & ACC_WRITE_MASK)) {
2404 spte &= ~PT_USER_MASK;
2405 /*
2406 * If we converted a user page to a kernel page,
2407 * so that the kernel can write to it when cr0.wp=0,
2408 * then we should prevent the kernel from executing it
2409 * if SMEP is enabled.
2410 */
2411 if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
2412 spte |= PT64_NX_MASK;
2413 }
2414
2415 /* 2368 /*
2416 * Optimization: for pte sync, if spte was writable the hash 2369 * Optimization: for pte sync, if spte was writable the hash
2417 * lookup is unnecessary (and expensive). Write protection 2370 * lookup is unnecessary (and expensive). Write protection
@@ -2441,19 +2394,15 @@ done:
2441} 2394}
2442 2395
2443static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, 2396static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2444 unsigned pt_access, unsigned pte_access, 2397 unsigned pte_access, int write_fault, int *emulate,
2445 int user_fault, int write_fault, 2398 int level, gfn_t gfn, pfn_t pfn, bool speculative,
2446 int *emulate, int level, gfn_t gfn,
2447 pfn_t pfn, bool speculative,
2448 bool host_writable) 2399 bool host_writable)
2449{ 2400{
2450 int was_rmapped = 0; 2401 int was_rmapped = 0;
2451 int rmap_count; 2402 int rmap_count;
2452 2403
2453 pgprintk("%s: spte %llx access %x write_fault %d" 2404 pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
2454 " user_fault %d gfn %llx\n", 2405 *sptep, write_fault, gfn);
2455 __func__, *sptep, pt_access,
2456 write_fault, user_fault, gfn);
2457 2406
2458 if (is_rmap_spte(*sptep)) { 2407 if (is_rmap_spte(*sptep)) {
2459 /* 2408 /*
@@ -2477,9 +2426,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2477 was_rmapped = 1; 2426 was_rmapped = 1;
2478 } 2427 }
2479 2428
2480 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, 2429 if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
2481 level, gfn, pfn, speculative, true, 2430 true, host_writable)) {
2482 host_writable)) {
2483 if (write_fault) 2431 if (write_fault)
2484 *emulate = 1; 2432 *emulate = 1;
2485 kvm_mmu_flush_tlb(vcpu); 2433 kvm_mmu_flush_tlb(vcpu);
@@ -2497,7 +2445,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2497 ++vcpu->kvm->stat.lpages; 2445 ++vcpu->kvm->stat.lpages;
2498 2446
2499 if (is_shadow_present_pte(*sptep)) { 2447 if (is_shadow_present_pte(*sptep)) {
2500 page_header_update_slot(vcpu->kvm, sptep, gfn);
2501 if (!was_rmapped) { 2448 if (!was_rmapped) {
2502 rmap_count = rmap_add(vcpu, sptep, gfn); 2449 rmap_count = rmap_add(vcpu, sptep, gfn);
2503 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 2450 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
@@ -2571,10 +2518,9 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
2571 return -1; 2518 return -1;
2572 2519
2573 for (i = 0; i < ret; i++, gfn++, start++) 2520 for (i = 0; i < ret; i++, gfn++, start++)
2574 mmu_set_spte(vcpu, start, ACC_ALL, 2521 mmu_set_spte(vcpu, start, access, 0, NULL,
2575 access, 0, 0, NULL, 2522 sp->role.level, gfn, page_to_pfn(pages[i]),
2576 sp->role.level, gfn, 2523 true, true);
2577 page_to_pfn(pages[i]), true, true);
2578 2524
2579 return 0; 2525 return 0;
2580} 2526}
@@ -2633,11 +2579,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2633 2579
2634 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2580 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
2635 if (iterator.level == level) { 2581 if (iterator.level == level) {
2636 unsigned pte_access = ACC_ALL; 2582 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
2637 2583 write, &emulate, level, gfn, pfn,
2638 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, 2584 prefault, map_writable);
2639 0, write, &emulate,
2640 level, gfn, pfn, prefault, map_writable);
2641 direct_pte_prefetch(vcpu, iterator.sptep); 2585 direct_pte_prefetch(vcpu, iterator.sptep);
2642 ++vcpu->stat.pf_fixed; 2586 ++vcpu->stat.pf_fixed;
2643 break; 2587 break;
@@ -2652,11 +2596,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2652 iterator.level - 1, 2596 iterator.level - 1,
2653 1, ACC_ALL, iterator.sptep); 2597 1, ACC_ALL, iterator.sptep);
2654 2598
2655 mmu_spte_set(iterator.sptep, 2599 link_shadow_page(iterator.sptep, sp);
2656 __pa(sp->spt)
2657 | PT_PRESENT_MASK | PT_WRITABLE_MASK
2658 | shadow_user_mask | shadow_x_mask
2659 | shadow_accessed_mask);
2660 } 2600 }
2661 } 2601 }
2662 return emulate; 2602 return emulate;
@@ -3719,6 +3659,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
3719 else 3659 else
3720 r = paging32_init_context(vcpu, context); 3660 r = paging32_init_context(vcpu, context);
3721 3661
3662 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
3722 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); 3663 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
3723 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); 3664 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
3724 vcpu->arch.mmu.base_role.smep_andnot_wp 3665 vcpu->arch.mmu.base_role.smep_andnot_wp
@@ -3885,7 +3826,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
3885 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ 3826 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
3886 *gpa &= ~(gpa_t)7; 3827 *gpa &= ~(gpa_t)7;
3887 *bytes = 8; 3828 *bytes = 8;
3888 r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); 3829 r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8);
3889 if (r) 3830 if (r)
3890 gentry = 0; 3831 gentry = 0;
3891 new = (const u8 *)&gentry; 3832 new = (const u8 *)&gentry;
@@ -3987,7 +3928,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3987 gfn_t gfn = gpa >> PAGE_SHIFT; 3928 gfn_t gfn = gpa >> PAGE_SHIFT;
3988 union kvm_mmu_page_role mask = { .word = 0 }; 3929 union kvm_mmu_page_role mask = { .word = 0 };
3989 struct kvm_mmu_page *sp; 3930 struct kvm_mmu_page *sp;
3990 struct hlist_node *node;
3991 LIST_HEAD(invalid_list); 3931 LIST_HEAD(invalid_list);
3992 u64 entry, gentry, *spte; 3932 u64 entry, gentry, *spte;
3993 int npte; 3933 int npte;
@@ -4018,7 +3958,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4018 kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); 3958 kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
4019 3959
4020 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; 3960 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
4021 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { 3961 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
4022 if (detect_write_misaligned(sp, gpa, bytes) || 3962 if (detect_write_misaligned(sp, gpa, bytes) ||
4023 detect_write_flooding(sp)) { 3963 detect_write_flooding(sp)) {
4024 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, 3964 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
@@ -4039,7 +3979,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4039 !((sp->role.word ^ vcpu->arch.mmu.base_role.word) 3979 !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
4040 & mask.word) && rmap_can_add(vcpu)) 3980 & mask.word) && rmap_can_add(vcpu))
4041 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); 3981 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
4042 if (!remote_flush && need_remote_flush(entry, *spte)) 3982 if (need_remote_flush(entry, *spte))
4043 remote_flush = true; 3983 remote_flush = true;
4044 ++spte; 3984 ++spte;
4045 } 3985 }
@@ -4198,26 +4138,36 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
4198 4138
4199void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) 4139void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
4200{ 4140{
4201 struct kvm_mmu_page *sp; 4141 struct kvm_memory_slot *memslot;
4202 bool flush = false; 4142 gfn_t last_gfn;
4143 int i;
4203 4144
4204 list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { 4145 memslot = id_to_memslot(kvm->memslots, slot);
4205 int i; 4146 last_gfn = memslot->base_gfn + memslot->npages - 1;
4206 u64 *pt;
4207 4147
4208 if (!test_bit(slot, sp->slot_bitmap)) 4148 spin_lock(&kvm->mmu_lock);
4209 continue;
4210 4149
4211 pt = sp->spt; 4150 for (i = PT_PAGE_TABLE_LEVEL;
4212 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 4151 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
4213 if (!is_shadow_present_pte(pt[i]) || 4152 unsigned long *rmapp;
4214 !is_last_spte(pt[i], sp->role.level)) 4153 unsigned long last_index, index;
4215 continue;
4216 4154
4217 spte_write_protect(kvm, &pt[i], &flush, false); 4155 rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
4156 last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
4157
4158 for (index = 0; index <= last_index; ++index, ++rmapp) {
4159 if (*rmapp)
4160 __rmap_write_protect(kvm, rmapp, false);
4161
4162 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
4163 kvm_flush_remote_tlbs(kvm);
4164 cond_resched_lock(&kvm->mmu_lock);
4165 }
4218 } 4166 }
4219 } 4167 }
4168
4220 kvm_flush_remote_tlbs(kvm); 4169 kvm_flush_remote_tlbs(kvm);
4170 spin_unlock(&kvm->mmu_lock);
4221} 4171}
4222 4172
4223void kvm_mmu_zap_all(struct kvm *kvm) 4173void kvm_mmu_zap_all(struct kvm *kvm)
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index cd6e98333ba3..b8f6172f4174 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -195,12 +195,6 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
195 TP_ARGS(sp) 195 TP_ARGS(sp)
196); 196);
197 197
198DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_delay_free_pages,
199 TP_PROTO(struct kvm_mmu_page *sp),
200
201 TP_ARGS(sp)
202);
203
204TRACE_EVENT( 198TRACE_EVENT(
205 mark_mmio_spte, 199 mark_mmio_spte,
206 TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), 200 TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access),
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 891eb6d93b8b..105dd5bd550e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
151 pt_element_t pte; 151 pt_element_t pte;
152 pt_element_t __user *uninitialized_var(ptep_user); 152 pt_element_t __user *uninitialized_var(ptep_user);
153 gfn_t table_gfn; 153 gfn_t table_gfn;
154 unsigned index, pt_access, pte_access, accessed_dirty, shift; 154 unsigned index, pt_access, pte_access, accessed_dirty;
155 gpa_t pte_gpa; 155 gpa_t pte_gpa;
156 int offset; 156 int offset;
157 const int write_fault = access & PFERR_WRITE_MASK; 157 const int write_fault = access & PFERR_WRITE_MASK;
@@ -249,16 +249,12 @@ retry_walk:
249 249
250 if (!write_fault) 250 if (!write_fault)
251 protect_clean_gpte(&pte_access, pte); 251 protect_clean_gpte(&pte_access, pte);
252 252 else
253 /* 253 /*
254 * On a write fault, fold the dirty bit into accessed_dirty by shifting it one 254 * On a write fault, fold the dirty bit into accessed_dirty by
255 * place right. 255 * shifting it one place right.
256 * 256 */
257 * On a read fault, do nothing. 257 accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT);
258 */
259 shift = write_fault >> ilog2(PFERR_WRITE_MASK);
260 shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
261 accessed_dirty &= pte >> shift;
262 258
263 if (unlikely(!accessed_dirty)) { 259 if (unlikely(!accessed_dirty)) {
264 ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); 260 ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
@@ -330,8 +326,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
330 * we call mmu_set_spte() with host_writable = true because 326 * we call mmu_set_spte() with host_writable = true because
331 * pte_prefetch_gfn_to_pfn always gets a writable pfn. 327 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
332 */ 328 */
333 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, 329 mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL,
334 NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); 330 gfn, pfn, true, true);
335 331
336 return true; 332 return true;
337} 333}
@@ -405,7 +401,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
405 */ 401 */
406static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 402static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
407 struct guest_walker *gw, 403 struct guest_walker *gw,
408 int user_fault, int write_fault, int hlevel, 404 int write_fault, int hlevel,
409 pfn_t pfn, bool map_writable, bool prefault) 405 pfn_t pfn, bool map_writable, bool prefault)
410{ 406{
411 struct kvm_mmu_page *sp = NULL; 407 struct kvm_mmu_page *sp = NULL;
@@ -413,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
413 unsigned direct_access, access = gw->pt_access; 409 unsigned direct_access, access = gw->pt_access;
414 int top_level, emulate = 0; 410 int top_level, emulate = 0;
415 411
416 if (!is_present_gpte(gw->ptes[gw->level - 1]))
417 return 0;
418
419 direct_access = gw->pte_access; 412 direct_access = gw->pte_access;
420 413
421 top_level = vcpu->arch.mmu.root_level; 414 top_level = vcpu->arch.mmu.root_level;
@@ -477,9 +470,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
477 } 470 }
478 471
479 clear_sp_write_flooding_count(it.sptep); 472 clear_sp_write_flooding_count(it.sptep);
480 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, 473 mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate,
481 user_fault, write_fault, &emulate, it.level, 474 it.level, gw->gfn, pfn, prefault, map_writable);
482 gw->gfn, pfn, prefault, map_writable);
483 FNAME(pte_prefetch)(vcpu, gw, it.sptep); 475 FNAME(pte_prefetch)(vcpu, gw, it.sptep);
484 476
485 return emulate; 477 return emulate;
@@ -491,6 +483,46 @@ out_gpte_changed:
491 return 0; 483 return 0;
492} 484}
493 485
486 /*
487 * To see whether the mapped gfn can write its page table in the current
488 * mapping.
489 *
490 * It is the helper function of FNAME(page_fault). When guest uses large page
491 * size to map the writable gfn which is used as current page table, we should
492 * force kvm to use small page size to map it because new shadow page will be
493 * created when kvm establishes shadow page table that stop kvm using large
494 * page size. Do it early can avoid unnecessary #PF and emulation.
495 *
496 * @write_fault_to_shadow_pgtable will return true if the fault gfn is
497 * currently used as its page table.
498 *
499 * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
500 * since the PDPT is always shadowed, that means, we can not use large page
501 * size to map the gfn which is used as PDPT.
502 */
503static bool
504FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
505 struct guest_walker *walker, int user_fault,
506 bool *write_fault_to_shadow_pgtable)
507{
508 int level;
509 gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
510 bool self_changed = false;
511
512 if (!(walker->pte_access & ACC_WRITE_MASK ||
513 (!is_write_protection(vcpu) && !user_fault)))
514 return false;
515
516 for (level = walker->level; level <= walker->max_level; level++) {
517 gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
518
519 self_changed |= !(gfn & mask);
520 *write_fault_to_shadow_pgtable |= !gfn;
521 }
522
523 return self_changed;
524}
525
494/* 526/*
495 * Page fault handler. There are several causes for a page fault: 527 * Page fault handler. There are several causes for a page fault:
496 * - there is no shadow pte for the guest pte 528 * - there is no shadow pte for the guest pte
@@ -516,7 +548,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
516 int level = PT_PAGE_TABLE_LEVEL; 548 int level = PT_PAGE_TABLE_LEVEL;
517 int force_pt_level; 549 int force_pt_level;
518 unsigned long mmu_seq; 550 unsigned long mmu_seq;
519 bool map_writable; 551 bool map_writable, is_self_change_mapping;
520 552
521 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 553 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
522 554
@@ -544,8 +576,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
544 return 0; 576 return 0;
545 } 577 }
546 578
579 vcpu->arch.write_fault_to_shadow_pgtable = false;
580
581 is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
582 &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
583
547 if (walker.level >= PT_DIRECTORY_LEVEL) 584 if (walker.level >= PT_DIRECTORY_LEVEL)
548 force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn); 585 force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
586 || is_self_change_mapping;
549 else 587 else
550 force_pt_level = 1; 588 force_pt_level = 1;
551 if (!force_pt_level) { 589 if (!force_pt_level) {
@@ -564,6 +602,26 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
564 walker.gfn, pfn, walker.pte_access, &r)) 602 walker.gfn, pfn, walker.pte_access, &r))
565 return r; 603 return r;
566 604
605 /*
606 * Do not change pte_access if the pfn is a mmio page, otherwise
607 * we will cache the incorrect access into mmio spte.
608 */
609 if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) &&
610 !is_write_protection(vcpu) && !user_fault &&
611 !is_noslot_pfn(pfn)) {
612 walker.pte_access |= ACC_WRITE_MASK;
613 walker.pte_access &= ~ACC_USER_MASK;
614
615 /*
616 * If we converted a user page to a kernel page,
617 * so that the kernel can write to it when cr0.wp=0,
618 * then we should prevent the kernel from executing it
619 * if SMEP is enabled.
620 */
621 if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
622 walker.pte_access &= ~ACC_EXEC_MASK;
623 }
624
567 spin_lock(&vcpu->kvm->mmu_lock); 625 spin_lock(&vcpu->kvm->mmu_lock);
568 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) 626 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
569 goto out_unlock; 627 goto out_unlock;
@@ -572,7 +630,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
572 kvm_mmu_free_some_pages(vcpu); 630 kvm_mmu_free_some_pages(vcpu);
573 if (!force_pt_level) 631 if (!force_pt_level)
574 transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); 632 transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
575 r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 633 r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
576 level, pfn, map_writable, prefault); 634 level, pfn, map_writable, prefault);
577 ++vcpu->stat.pf_fixed; 635 ++vcpu->stat.pf_fixed;
578 kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); 636 kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
@@ -747,7 +805,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
747 805
748 host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; 806 host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
749 807
750 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 808 set_spte(vcpu, &sp->spt[i], pte_access,
751 PT_PAGE_TABLE_LEVEL, gfn, 809 PT_PAGE_TABLE_LEVEL, gfn,
752 spte_to_pfn(sp->spt[i]), true, false, 810 spte_to_pfn(sp->spt[i]), true, false,
753 host_writable); 811 host_writable);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d29d3cd1c156..e1b1ce21bc00 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3571,6 +3571,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3571 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 3571 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3572} 3572}
3573 3573
3574static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
3575{
3576 return;
3577}
3578
3579static int svm_vm_has_apicv(struct kvm *kvm)
3580{
3581 return 0;
3582}
3583
3584static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
3585{
3586 return;
3587}
3588
3589static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
3590{
3591 return;
3592}
3593
3574static int svm_nmi_allowed(struct kvm_vcpu *vcpu) 3594static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3575{ 3595{
3576 struct vcpu_svm *svm = to_svm(vcpu); 3596 struct vcpu_svm *svm = to_svm(vcpu);
@@ -4290,6 +4310,10 @@ static struct kvm_x86_ops svm_x86_ops = {
4290 .enable_nmi_window = enable_nmi_window, 4310 .enable_nmi_window = enable_nmi_window,
4291 .enable_irq_window = enable_irq_window, 4311 .enable_irq_window = enable_irq_window,
4292 .update_cr8_intercept = update_cr8_intercept, 4312 .update_cr8_intercept = update_cr8_intercept,
4313 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
4314 .vm_has_apicv = svm_vm_has_apicv,
4315 .load_eoi_exitmap = svm_load_eoi_exitmap,
4316 .hwapic_isr_update = svm_hwapic_isr_update,
4293 4317
4294 .set_tss_addr = svm_set_tss_addr, 4318 .set_tss_addr = svm_set_tss_addr,
4295 .get_tdp_level = get_npt_level, 4319 .get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9120ae1901e4..6667042714cc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,6 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
84static bool __read_mostly fasteoi = 1; 84static bool __read_mostly fasteoi = 1;
85module_param(fasteoi, bool, S_IRUGO); 85module_param(fasteoi, bool, S_IRUGO);
86 86
87static bool __read_mostly enable_apicv_reg_vid;
88
87/* 89/*
88 * If nested=1, nested virtualization is supported, i.e., guests may use 90 * If nested=1, nested virtualization is supported, i.e., guests may use
89 * VMX and be a hypervisor for its own guests. If nested=0, guests may not 91 * VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -92,12 +94,8 @@ module_param(fasteoi, bool, S_IRUGO);
92static bool __read_mostly nested = 0; 94static bool __read_mostly nested = 0;
93module_param(nested, bool, S_IRUGO); 95module_param(nested, bool, S_IRUGO);
94 96
95#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ 97#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
96 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) 98#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
97#define KVM_GUEST_CR0_MASK \
98 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
99#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
100 (X86_CR0_WP | X86_CR0_NE)
101#define KVM_VM_CR0_ALWAYS_ON \ 99#define KVM_VM_CR0_ALWAYS_ON \
102 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 100 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
103#define KVM_CR4_GUEST_OWNED_BITS \ 101#define KVM_CR4_GUEST_OWNED_BITS \
@@ -624,6 +622,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
624 struct kvm_segment *var, int seg); 622 struct kvm_segment *var, int seg);
625static void vmx_get_segment(struct kvm_vcpu *vcpu, 623static void vmx_get_segment(struct kvm_vcpu *vcpu,
626 struct kvm_segment *var, int seg); 624 struct kvm_segment *var, int seg);
625static bool guest_state_valid(struct kvm_vcpu *vcpu);
626static u32 vmx_segment_access_rights(struct kvm_segment *var);
627 627
628static DEFINE_PER_CPU(struct vmcs *, vmxarea); 628static DEFINE_PER_CPU(struct vmcs *, vmxarea);
629static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 629static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -638,6 +638,8 @@ static unsigned long *vmx_io_bitmap_a;
638static unsigned long *vmx_io_bitmap_b; 638static unsigned long *vmx_io_bitmap_b;
639static unsigned long *vmx_msr_bitmap_legacy; 639static unsigned long *vmx_msr_bitmap_legacy;
640static unsigned long *vmx_msr_bitmap_longmode; 640static unsigned long *vmx_msr_bitmap_longmode;
641static unsigned long *vmx_msr_bitmap_legacy_x2apic;
642static unsigned long *vmx_msr_bitmap_longmode_x2apic;
641 643
642static bool cpu_has_load_ia32_efer; 644static bool cpu_has_load_ia32_efer;
643static bool cpu_has_load_perf_global_ctrl; 645static bool cpu_has_load_perf_global_ctrl;
@@ -762,6 +764,24 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
762 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 764 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
763} 765}
764 766
767static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
768{
769 return vmcs_config.cpu_based_2nd_exec_ctrl &
770 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
771}
772
773static inline bool cpu_has_vmx_apic_register_virt(void)
774{
775 return vmcs_config.cpu_based_2nd_exec_ctrl &
776 SECONDARY_EXEC_APIC_REGISTER_VIRT;
777}
778
779static inline bool cpu_has_vmx_virtual_intr_delivery(void)
780{
781 return vmcs_config.cpu_based_2nd_exec_ctrl &
782 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
783}
784
765static inline bool cpu_has_vmx_flexpriority(void) 785static inline bool cpu_has_vmx_flexpriority(void)
766{ 786{
767 return cpu_has_vmx_tpr_shadow() && 787 return cpu_has_vmx_tpr_shadow() &&
@@ -1694,7 +1714,6 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
1694static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1714static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1695{ 1715{
1696 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); 1716 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
1697 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
1698 to_vmx(vcpu)->rflags = rflags; 1717 to_vmx(vcpu)->rflags = rflags;
1699 if (to_vmx(vcpu)->rmode.vm86_active) { 1718 if (to_vmx(vcpu)->rmode.vm86_active) {
1700 to_vmx(vcpu)->rmode.save_rflags = rflags; 1719 to_vmx(vcpu)->rmode.save_rflags = rflags;
@@ -1820,6 +1839,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
1820 vmx->guest_msrs[from] = tmp; 1839 vmx->guest_msrs[from] = tmp;
1821} 1840}
1822 1841
1842static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
1843{
1844 unsigned long *msr_bitmap;
1845
1846 if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
1847 if (is_long_mode(vcpu))
1848 msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
1849 else
1850 msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
1851 } else {
1852 if (is_long_mode(vcpu))
1853 msr_bitmap = vmx_msr_bitmap_longmode;
1854 else
1855 msr_bitmap = vmx_msr_bitmap_legacy;
1856 }
1857
1858 vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
1859}
1860
1823/* 1861/*
1824 * Set up the vmcs to automatically save and restore system 1862 * Set up the vmcs to automatically save and restore system
1825 * msrs. Don't touch the 64-bit msrs if the guest is in legacy 1863 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
@@ -1828,7 +1866,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
1828static void setup_msrs(struct vcpu_vmx *vmx) 1866static void setup_msrs(struct vcpu_vmx *vmx)
1829{ 1867{
1830 int save_nmsrs, index; 1868 int save_nmsrs, index;
1831 unsigned long *msr_bitmap;
1832 1869
1833 save_nmsrs = 0; 1870 save_nmsrs = 0;
1834#ifdef CONFIG_X86_64 1871#ifdef CONFIG_X86_64
@@ -1860,14 +1897,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
1860 1897
1861 vmx->save_nmsrs = save_nmsrs; 1898 vmx->save_nmsrs = save_nmsrs;
1862 1899
1863 if (cpu_has_vmx_msr_bitmap()) { 1900 if (cpu_has_vmx_msr_bitmap())
1864 if (is_long_mode(&vmx->vcpu)) 1901 vmx_set_msr_bitmap(&vmx->vcpu);
1865 msr_bitmap = vmx_msr_bitmap_longmode;
1866 else
1867 msr_bitmap = vmx_msr_bitmap_legacy;
1868
1869 vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
1870 }
1871} 1902}
1872 1903
1873/* 1904/*
@@ -2533,13 +2564,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2533 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 2564 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
2534 min2 = 0; 2565 min2 = 0;
2535 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2566 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2567 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2536 SECONDARY_EXEC_WBINVD_EXITING | 2568 SECONDARY_EXEC_WBINVD_EXITING |
2537 SECONDARY_EXEC_ENABLE_VPID | 2569 SECONDARY_EXEC_ENABLE_VPID |
2538 SECONDARY_EXEC_ENABLE_EPT | 2570 SECONDARY_EXEC_ENABLE_EPT |
2539 SECONDARY_EXEC_UNRESTRICTED_GUEST | 2571 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2540 SECONDARY_EXEC_PAUSE_LOOP_EXITING | 2572 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
2541 SECONDARY_EXEC_RDTSCP | 2573 SECONDARY_EXEC_RDTSCP |
2542 SECONDARY_EXEC_ENABLE_INVPCID; 2574 SECONDARY_EXEC_ENABLE_INVPCID |
2575 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2576 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
2543 if (adjust_vmx_controls(min2, opt2, 2577 if (adjust_vmx_controls(min2, opt2,
2544 MSR_IA32_VMX_PROCBASED_CTLS2, 2578 MSR_IA32_VMX_PROCBASED_CTLS2,
2545 &_cpu_based_2nd_exec_control) < 0) 2579 &_cpu_based_2nd_exec_control) < 0)
@@ -2550,6 +2584,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2550 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) 2584 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
2551 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; 2585 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
2552#endif 2586#endif
2587
2588 if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
2589 _cpu_based_2nd_exec_control &= ~(
2590 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2591 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2592 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
2593
2553 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { 2594 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
2554 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT 2595 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
2555 enabled */ 2596 enabled */
@@ -2747,6 +2788,15 @@ static __init int hardware_setup(void)
2747 if (!cpu_has_vmx_ple()) 2788 if (!cpu_has_vmx_ple())
2748 ple_gap = 0; 2789 ple_gap = 0;
2749 2790
2791 if (!cpu_has_vmx_apic_register_virt() ||
2792 !cpu_has_vmx_virtual_intr_delivery())
2793 enable_apicv_reg_vid = 0;
2794
2795 if (enable_apicv_reg_vid)
2796 kvm_x86_ops->update_cr8_intercept = NULL;
2797 else
2798 kvm_x86_ops->hwapic_irr_update = NULL;
2799
2750 if (nested) 2800 if (nested)
2751 nested_vmx_setup_ctls_msrs(); 2801 nested_vmx_setup_ctls_msrs();
2752 2802
@@ -2758,18 +2808,28 @@ static __exit void hardware_unsetup(void)
2758 free_kvm_area(); 2808 free_kvm_area();
2759} 2809}
2760 2810
2761static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) 2811static bool emulation_required(struct kvm_vcpu *vcpu)
2762{ 2812{
2763 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2813 return emulate_invalid_guest_state && !guest_state_valid(vcpu);
2764 struct kvm_segment tmp = *save; 2814}
2765 2815
2766 if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { 2816static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
2767 tmp.base = vmcs_readl(sf->base); 2817 struct kvm_segment *save)
2768 tmp.selector = vmcs_read16(sf->selector); 2818{
2769 tmp.dpl = tmp.selector & SELECTOR_RPL_MASK; 2819 if (!emulate_invalid_guest_state) {
2770 tmp.s = 1; 2820 /*
2821 * CS and SS RPL should be equal during guest entry according
2822 * to VMX spec, but in reality it is not always so. Since vcpu
2823 * is in the middle of the transition from real mode to
2824 * protected mode it is safe to assume that RPL 0 is a good
2825 * default value.
2826 */
2827 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
2828 save->selector &= ~SELECTOR_RPL_MASK;
2829 save->dpl = save->selector & SELECTOR_RPL_MASK;
2830 save->s = 1;
2771 } 2831 }
2772 vmx_set_segment(vcpu, &tmp, seg); 2832 vmx_set_segment(vcpu, save, seg);
2773} 2833}
2774 2834
2775static void enter_pmode(struct kvm_vcpu *vcpu) 2835static void enter_pmode(struct kvm_vcpu *vcpu)
@@ -2777,7 +2837,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
2777 unsigned long flags; 2837 unsigned long flags;
2778 struct vcpu_vmx *vmx = to_vmx(vcpu); 2838 struct vcpu_vmx *vmx = to_vmx(vcpu);
2779 2839
2780 vmx->emulation_required = 1; 2840 /*
2841 * Update real mode segment cache. It may be not up-to-date if sement
2842 * register was written while vcpu was in a guest mode.
2843 */
2844 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2845 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2846 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2847 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2848 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2849 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2850
2781 vmx->rmode.vm86_active = 0; 2851 vmx->rmode.vm86_active = 0;
2782 2852
2783 vmx_segment_cache_clear(vmx); 2853 vmx_segment_cache_clear(vmx);
@@ -2794,22 +2864,16 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
2794 2864
2795 update_exception_bitmap(vcpu); 2865 update_exception_bitmap(vcpu);
2796 2866
2797 if (emulate_invalid_guest_state) 2867 fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2798 return; 2868 fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2799 2869 fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2800 fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); 2870 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2801 fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); 2871 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2802 fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); 2872 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2803 fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2804
2805 vmx_segment_cache_clear(vmx);
2806 2873
2807 vmcs_write16(GUEST_SS_SELECTOR, 0); 2874 /* CPL is always 0 when CPU enters protected mode */
2808 vmcs_write32(GUEST_SS_AR_BYTES, 0x93); 2875 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2809 2876 vmx->cpl = 0;
2810 vmcs_write16(GUEST_CS_SELECTOR,
2811 vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
2812 vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
2813} 2877}
2814 2878
2815static gva_t rmode_tss_base(struct kvm *kvm) 2879static gva_t rmode_tss_base(struct kvm *kvm)
@@ -2831,36 +2895,51 @@ static gva_t rmode_tss_base(struct kvm *kvm)
2831static void fix_rmode_seg(int seg, struct kvm_segment *save) 2895static void fix_rmode_seg(int seg, struct kvm_segment *save)
2832{ 2896{
2833 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2897 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2834 2898 struct kvm_segment var = *save;
2835 vmcs_write16(sf->selector, save->base >> 4); 2899
2836 vmcs_write32(sf->base, save->base & 0xffff0); 2900 var.dpl = 0x3;
2837 vmcs_write32(sf->limit, 0xffff); 2901 if (seg == VCPU_SREG_CS)
2838 vmcs_write32(sf->ar_bytes, 0xf3); 2902 var.type = 0x3;
2839 if (save->base & 0xf) 2903
2840 printk_once(KERN_WARNING "kvm: segment base is not paragraph" 2904 if (!emulate_invalid_guest_state) {
2841 " aligned when entering protected mode (seg=%d)", 2905 var.selector = var.base >> 4;
2842 seg); 2906 var.base = var.base & 0xffff0;
2907 var.limit = 0xffff;
2908 var.g = 0;
2909 var.db = 0;
2910 var.present = 1;
2911 var.s = 1;
2912 var.l = 0;
2913 var.unusable = 0;
2914 var.type = 0x3;
2915 var.avl = 0;
2916 if (save->base & 0xf)
2917 printk_once(KERN_WARNING "kvm: segment base is not "
2918 "paragraph aligned when entering "
2919 "protected mode (seg=%d)", seg);
2920 }
2921
2922 vmcs_write16(sf->selector, var.selector);
2923 vmcs_write32(sf->base, var.base);
2924 vmcs_write32(sf->limit, var.limit);
2925 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
2843} 2926}
2844 2927
2845static void enter_rmode(struct kvm_vcpu *vcpu) 2928static void enter_rmode(struct kvm_vcpu *vcpu)
2846{ 2929{
2847 unsigned long flags; 2930 unsigned long flags;
2848 struct vcpu_vmx *vmx = to_vmx(vcpu); 2931 struct vcpu_vmx *vmx = to_vmx(vcpu);
2849 struct kvm_segment var;
2850
2851 if (enable_unrestricted_guest)
2852 return;
2853 2932
2854 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); 2933 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2855 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); 2934 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2856 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); 2935 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2857 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); 2936 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2858 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); 2937 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2938 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2939 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2859 2940
2860 vmx->emulation_required = 1;
2861 vmx->rmode.vm86_active = 1; 2941 vmx->rmode.vm86_active = 1;
2862 2942
2863
2864 /* 2943 /*
2865 * Very old userspace does not call KVM_SET_TSS_ADDR before entering 2944 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
2866 * vcpu. Call it here with phys address pointing 16M below 4G. 2945 * vcpu. Call it here with phys address pointing 16M below 4G.
@@ -2888,28 +2967,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
2888 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); 2967 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
2889 update_exception_bitmap(vcpu); 2968 update_exception_bitmap(vcpu);
2890 2969
2891 if (emulate_invalid_guest_state) 2970 fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2892 goto continue_rmode; 2971 fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2893 2972 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2894 vmx_get_segment(vcpu, &var, VCPU_SREG_SS); 2973 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2895 vmx_set_segment(vcpu, &var, VCPU_SREG_SS); 2974 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2896 2975 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2897 vmx_get_segment(vcpu, &var, VCPU_SREG_CS);
2898 vmx_set_segment(vcpu, &var, VCPU_SREG_CS);
2899
2900 vmx_get_segment(vcpu, &var, VCPU_SREG_ES);
2901 vmx_set_segment(vcpu, &var, VCPU_SREG_ES);
2902
2903 vmx_get_segment(vcpu, &var, VCPU_SREG_DS);
2904 vmx_set_segment(vcpu, &var, VCPU_SREG_DS);
2905 2976
2906 vmx_get_segment(vcpu, &var, VCPU_SREG_GS);
2907 vmx_set_segment(vcpu, &var, VCPU_SREG_GS);
2908
2909 vmx_get_segment(vcpu, &var, VCPU_SREG_FS);
2910 vmx_set_segment(vcpu, &var, VCPU_SREG_FS);
2911
2912continue_rmode:
2913 kvm_mmu_reset_context(vcpu); 2977 kvm_mmu_reset_context(vcpu);
2914} 2978}
2915 2979
@@ -3068,17 +3132,18 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3068 struct vcpu_vmx *vmx = to_vmx(vcpu); 3132 struct vcpu_vmx *vmx = to_vmx(vcpu);
3069 unsigned long hw_cr0; 3133 unsigned long hw_cr0;
3070 3134
3135 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
3071 if (enable_unrestricted_guest) 3136 if (enable_unrestricted_guest)
3072 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST) 3137 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
3073 | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; 3138 else {
3074 else 3139 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
3075 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
3076 3140
3077 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) 3141 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
3078 enter_pmode(vcpu); 3142 enter_pmode(vcpu);
3079 3143
3080 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) 3144 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
3081 enter_rmode(vcpu); 3145 enter_rmode(vcpu);
3146 }
3082 3147
3083#ifdef CONFIG_X86_64 3148#ifdef CONFIG_X86_64
3084 if (vcpu->arch.efer & EFER_LME) { 3149 if (vcpu->arch.efer & EFER_LME) {
@@ -3098,7 +3163,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3098 vmcs_writel(CR0_READ_SHADOW, cr0); 3163 vmcs_writel(CR0_READ_SHADOW, cr0);
3099 vmcs_writel(GUEST_CR0, hw_cr0); 3164 vmcs_writel(GUEST_CR0, hw_cr0);
3100 vcpu->arch.cr0 = cr0; 3165 vcpu->arch.cr0 = cr0;
3101 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); 3166
3167 /* depends on vcpu->arch.cr0 to be set to a new value */
3168 vmx->emulation_required = emulation_required(vcpu);
3102} 3169}
3103 3170
3104static u64 construct_eptp(unsigned long root_hpa) 3171static u64 construct_eptp(unsigned long root_hpa)
@@ -3155,6 +3222,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3155 if (!is_paging(vcpu)) { 3222 if (!is_paging(vcpu)) {
3156 hw_cr4 &= ~X86_CR4_PAE; 3223 hw_cr4 &= ~X86_CR4_PAE;
3157 hw_cr4 |= X86_CR4_PSE; 3224 hw_cr4 |= X86_CR4_PSE;
3225 /*
3226 * SMEP is disabled if CPU is in non-paging mode in
3227 * hardware. However KVM always uses paging mode to
3228 * emulate guest non-paging mode with TDP.
3229 * To emulate this behavior, SMEP needs to be manually
3230 * disabled when guest switches to non-paging mode.
3231 */
3232 hw_cr4 &= ~X86_CR4_SMEP;
3158 } else if (!(cr4 & X86_CR4_PAE)) { 3233 } else if (!(cr4 & X86_CR4_PAE)) {
3159 hw_cr4 &= ~X86_CR4_PAE; 3234 hw_cr4 &= ~X86_CR4_PAE;
3160 } 3235 }
@@ -3171,10 +3246,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
3171 struct vcpu_vmx *vmx = to_vmx(vcpu); 3246 struct vcpu_vmx *vmx = to_vmx(vcpu);
3172 u32 ar; 3247 u32 ar;
3173 3248
3174 if (vmx->rmode.vm86_active 3249 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3175 && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
3176 || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
3177 || seg == VCPU_SREG_GS)) {
3178 *var = vmx->rmode.segs[seg]; 3250 *var = vmx->rmode.segs[seg];
3179 if (seg == VCPU_SREG_TR 3251 if (seg == VCPU_SREG_TR
3180 || var->selector == vmx_read_guest_seg_selector(vmx, seg)) 3252 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
@@ -3187,8 +3259,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
3187 var->limit = vmx_read_guest_seg_limit(vmx, seg); 3259 var->limit = vmx_read_guest_seg_limit(vmx, seg);
3188 var->selector = vmx_read_guest_seg_selector(vmx, seg); 3260 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3189 ar = vmx_read_guest_seg_ar(vmx, seg); 3261 ar = vmx_read_guest_seg_ar(vmx, seg);
3190 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
3191 ar = 0;
3192 var->type = ar & 15; 3262 var->type = ar & 15;
3193 var->s = (ar >> 4) & 1; 3263 var->s = (ar >> 4) & 1;
3194 var->dpl = (ar >> 5) & 3; 3264 var->dpl = (ar >> 5) & 3;
@@ -3211,8 +3281,10 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
3211 return vmx_read_guest_seg_base(to_vmx(vcpu), seg); 3281 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
3212} 3282}
3213 3283
3214static int __vmx_get_cpl(struct kvm_vcpu *vcpu) 3284static int vmx_get_cpl(struct kvm_vcpu *vcpu)
3215{ 3285{
3286 struct vcpu_vmx *vmx = to_vmx(vcpu);
3287
3216 if (!is_protmode(vcpu)) 3288 if (!is_protmode(vcpu))
3217 return 0; 3289 return 0;
3218 3290
@@ -3220,24 +3292,9 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
3220 && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ 3292 && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
3221 return 3; 3293 return 3;
3222 3294
3223 return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
3224}
3225
3226static int vmx_get_cpl(struct kvm_vcpu *vcpu)
3227{
3228 struct vcpu_vmx *vmx = to_vmx(vcpu);
3229
3230 /*
3231 * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations
3232 * fail; use the cache instead.
3233 */
3234 if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) {
3235 return vmx->cpl;
3236 }
3237
3238 if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { 3295 if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
3239 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); 3296 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3240 vmx->cpl = __vmx_get_cpl(vcpu); 3297 vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3;
3241 } 3298 }
3242 3299
3243 return vmx->cpl; 3300 return vmx->cpl;
@@ -3269,28 +3326,23 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3269{ 3326{
3270 struct vcpu_vmx *vmx = to_vmx(vcpu); 3327 struct vcpu_vmx *vmx = to_vmx(vcpu);
3271 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 3328 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3272 u32 ar;
3273 3329
3274 vmx_segment_cache_clear(vmx); 3330 vmx_segment_cache_clear(vmx);
3331 if (seg == VCPU_SREG_CS)
3332 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3275 3333
3276 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { 3334 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3277 vmcs_write16(sf->selector, var->selector); 3335 vmx->rmode.segs[seg] = *var;
3278 vmx->rmode.segs[VCPU_SREG_TR] = *var; 3336 if (seg == VCPU_SREG_TR)
3279 return; 3337 vmcs_write16(sf->selector, var->selector);
3338 else if (var->s)
3339 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3340 goto out;
3280 } 3341 }
3342
3281 vmcs_writel(sf->base, var->base); 3343 vmcs_writel(sf->base, var->base);
3282 vmcs_write32(sf->limit, var->limit); 3344 vmcs_write32(sf->limit, var->limit);
3283 vmcs_write16(sf->selector, var->selector); 3345 vmcs_write16(sf->selector, var->selector);
3284 if (vmx->rmode.vm86_active && var->s) {
3285 vmx->rmode.segs[seg] = *var;
3286 /*
3287 * Hack real-mode segments into vm86 compatibility.
3288 */
3289 if (var->base == 0xffff0000 && var->selector == 0xf000)
3290 vmcs_writel(sf->base, 0xf0000);
3291 ar = 0xf3;
3292 } else
3293 ar = vmx_segment_access_rights(var);
3294 3346
3295 /* 3347 /*
3296 * Fix the "Accessed" bit in AR field of segment registers for older 3348 * Fix the "Accessed" bit in AR field of segment registers for older
@@ -3304,42 +3356,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3304 * kvm hack. 3356 * kvm hack.
3305 */ 3357 */
3306 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) 3358 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
3307 ar |= 0x1; /* Accessed */ 3359 var->type |= 0x1; /* Accessed */
3308 3360
3309 vmcs_write32(sf->ar_bytes, ar); 3361 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
3310 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3311 3362
3312 /* 3363out:
3313 * Fix segments for real mode guest in hosts that don't have 3364 vmx->emulation_required |= emulation_required(vcpu);
3314 * "unrestricted_mode" or it was disabled.
3315 * This is done to allow migration of the guests from hosts with
3316 * unrestricted guest like Westmere to older host that don't have
3317 * unrestricted guest like Nehelem.
3318 */
3319 if (vmx->rmode.vm86_active) {
3320 switch (seg) {
3321 case VCPU_SREG_CS:
3322 vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
3323 vmcs_write32(GUEST_CS_LIMIT, 0xffff);
3324 if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
3325 vmcs_writel(GUEST_CS_BASE, 0xf0000);
3326 vmcs_write16(GUEST_CS_SELECTOR,
3327 vmcs_readl(GUEST_CS_BASE) >> 4);
3328 break;
3329 case VCPU_SREG_ES:
3330 case VCPU_SREG_DS:
3331 case VCPU_SREG_GS:
3332 case VCPU_SREG_FS:
3333 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3334 break;
3335 case VCPU_SREG_SS:
3336 vmcs_write16(GUEST_SS_SELECTOR,
3337 vmcs_readl(GUEST_SS_BASE) >> 4);
3338 vmcs_write32(GUEST_SS_LIMIT, 0xffff);
3339 vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
3340 break;
3341 }
3342 }
3343} 3365}
3344 3366
3345static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 3367static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -3380,13 +3402,16 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
3380 u32 ar; 3402 u32 ar;
3381 3403
3382 vmx_get_segment(vcpu, &var, seg); 3404 vmx_get_segment(vcpu, &var, seg);
3405 var.dpl = 0x3;
3406 if (seg == VCPU_SREG_CS)
3407 var.type = 0x3;
3383 ar = vmx_segment_access_rights(&var); 3408 ar = vmx_segment_access_rights(&var);
3384 3409
3385 if (var.base != (var.selector << 4)) 3410 if (var.base != (var.selector << 4))
3386 return false; 3411 return false;
3387 if (var.limit < 0xffff) 3412 if (var.limit != 0xffff)
3388 return false; 3413 return false;
3389 if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) 3414 if (ar != 0xf3)
3390 return false; 3415 return false;
3391 3416
3392 return true; 3417 return true;
@@ -3521,6 +3546,9 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
3521 */ 3546 */
3522static bool guest_state_valid(struct kvm_vcpu *vcpu) 3547static bool guest_state_valid(struct kvm_vcpu *vcpu)
3523{ 3548{
3549 if (enable_unrestricted_guest)
3550 return true;
3551
3524 /* real mode guest state checks */ 3552 /* real mode guest state checks */
3525 if (!is_protmode(vcpu)) { 3553 if (!is_protmode(vcpu)) {
3526 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) 3554 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
@@ -3644,12 +3672,9 @@ static void seg_setup(int seg)
3644 vmcs_write16(sf->selector, 0); 3672 vmcs_write16(sf->selector, 0);
3645 vmcs_writel(sf->base, 0); 3673 vmcs_writel(sf->base, 0);
3646 vmcs_write32(sf->limit, 0xffff); 3674 vmcs_write32(sf->limit, 0xffff);
3647 if (enable_unrestricted_guest) { 3675 ar = 0x93;
3648 ar = 0x93; 3676 if (seg == VCPU_SREG_CS)
3649 if (seg == VCPU_SREG_CS) 3677 ar |= 0x08; /* code segment */
3650 ar |= 0x08; /* code segment */
3651 } else
3652 ar = 0xf3;
3653 3678
3654 vmcs_write32(sf->ar_bytes, ar); 3679 vmcs_write32(sf->ar_bytes, ar);
3655} 3680}
@@ -3667,7 +3692,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
3667 kvm_userspace_mem.flags = 0; 3692 kvm_userspace_mem.flags = 0;
3668 kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; 3693 kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
3669 kvm_userspace_mem.memory_size = PAGE_SIZE; 3694 kvm_userspace_mem.memory_size = PAGE_SIZE;
3670 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); 3695 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
3671 if (r) 3696 if (r)
3672 goto out; 3697 goto out;
3673 3698
@@ -3697,7 +3722,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
3697 kvm_userspace_mem.guest_phys_addr = 3722 kvm_userspace_mem.guest_phys_addr =
3698 kvm->arch.ept_identity_map_addr; 3723 kvm->arch.ept_identity_map_addr;
3699 kvm_userspace_mem.memory_size = PAGE_SIZE; 3724 kvm_userspace_mem.memory_size = PAGE_SIZE;
3700 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); 3725 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
3701 if (r) 3726 if (r)
3702 goto out; 3727 goto out;
3703 3728
@@ -3739,7 +3764,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
3739 spin_unlock(&vmx_vpid_lock); 3764 spin_unlock(&vmx_vpid_lock);
3740} 3765}
3741 3766
3742static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) 3767#define MSR_TYPE_R 1
3768#define MSR_TYPE_W 2
3769static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
3770 u32 msr, int type)
3743{ 3771{
3744 int f = sizeof(unsigned long); 3772 int f = sizeof(unsigned long);
3745 3773
@@ -3752,20 +3780,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
3752 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. 3780 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
3753 */ 3781 */
3754 if (msr <= 0x1fff) { 3782 if (msr <= 0x1fff) {
3755 __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ 3783 if (type & MSR_TYPE_R)
3756 __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ 3784 /* read-low */
3785 __clear_bit(msr, msr_bitmap + 0x000 / f);
3786
3787 if (type & MSR_TYPE_W)
3788 /* write-low */
3789 __clear_bit(msr, msr_bitmap + 0x800 / f);
3790
3757 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { 3791 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3758 msr &= 0x1fff; 3792 msr &= 0x1fff;
3759 __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ 3793 if (type & MSR_TYPE_R)
3760 __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ 3794 /* read-high */
3795 __clear_bit(msr, msr_bitmap + 0x400 / f);
3796
3797 if (type & MSR_TYPE_W)
3798 /* write-high */
3799 __clear_bit(msr, msr_bitmap + 0xc00 / f);
3800
3801 }
3802}
3803
3804static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
3805 u32 msr, int type)
3806{
3807 int f = sizeof(unsigned long);
3808
3809 if (!cpu_has_vmx_msr_bitmap())
3810 return;
3811
3812 /*
3813 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
3814 * have the write-low and read-high bitmap offsets the wrong way round.
3815 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
3816 */
3817 if (msr <= 0x1fff) {
3818 if (type & MSR_TYPE_R)
3819 /* read-low */
3820 __set_bit(msr, msr_bitmap + 0x000 / f);
3821
3822 if (type & MSR_TYPE_W)
3823 /* write-low */
3824 __set_bit(msr, msr_bitmap + 0x800 / f);
3825
3826 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3827 msr &= 0x1fff;
3828 if (type & MSR_TYPE_R)
3829 /* read-high */
3830 __set_bit(msr, msr_bitmap + 0x400 / f);
3831
3832 if (type & MSR_TYPE_W)
3833 /* write-high */
3834 __set_bit(msr, msr_bitmap + 0xc00 / f);
3835
3761 } 3836 }
3762} 3837}
3763 3838
3764static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) 3839static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
3765{ 3840{
3766 if (!longmode_only) 3841 if (!longmode_only)
3767 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); 3842 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
3768 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); 3843 msr, MSR_TYPE_R | MSR_TYPE_W);
3844 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
3845 msr, MSR_TYPE_R | MSR_TYPE_W);
3846}
3847
3848static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
3849{
3850 __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3851 msr, MSR_TYPE_R);
3852 __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3853 msr, MSR_TYPE_R);
3854}
3855
3856static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
3857{
3858 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3859 msr, MSR_TYPE_R);
3860 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3861 msr, MSR_TYPE_R);
3862}
3863
3864static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
3865{
3866 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3867 msr, MSR_TYPE_W);
3868 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3869 msr, MSR_TYPE_W);
3769} 3870}
3770 3871
3771/* 3872/*
@@ -3844,6 +3945,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
3844 return exec_control; 3945 return exec_control;
3845} 3946}
3846 3947
3948static int vmx_vm_has_apicv(struct kvm *kvm)
3949{
3950 return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
3951}
3952
3847static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) 3953static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
3848{ 3954{
3849 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; 3955 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
@@ -3861,6 +3967,10 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
3861 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; 3967 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
3862 if (!ple_gap) 3968 if (!ple_gap)
3863 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; 3969 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
3970 if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
3971 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
3972 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
3973 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
3864 return exec_control; 3974 return exec_control;
3865} 3975}
3866 3976
@@ -3905,6 +4015,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
3905 vmx_secondary_exec_control(vmx)); 4015 vmx_secondary_exec_control(vmx));
3906 } 4016 }
3907 4017
4018 if (enable_apicv_reg_vid) {
4019 vmcs_write64(EOI_EXIT_BITMAP0, 0);
4020 vmcs_write64(EOI_EXIT_BITMAP1, 0);
4021 vmcs_write64(EOI_EXIT_BITMAP2, 0);
4022 vmcs_write64(EOI_EXIT_BITMAP3, 0);
4023
4024 vmcs_write16(GUEST_INTR_STATUS, 0);
4025 }
4026
3908 if (ple_gap) { 4027 if (ple_gap) {
3909 vmcs_write32(PLE_GAP, ple_gap); 4028 vmcs_write32(PLE_GAP, ple_gap);
3910 vmcs_write32(PLE_WINDOW, ple_window); 4029 vmcs_write32(PLE_WINDOW, ple_window);
@@ -3990,14 +4109,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
3990 vmx_segment_cache_clear(vmx); 4109 vmx_segment_cache_clear(vmx);
3991 4110
3992 seg_setup(VCPU_SREG_CS); 4111 seg_setup(VCPU_SREG_CS);
3993 /* 4112 if (kvm_vcpu_is_bsp(&vmx->vcpu))
3994 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
3995 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
3996 */
3997 if (kvm_vcpu_is_bsp(&vmx->vcpu)) {
3998 vmcs_write16(GUEST_CS_SELECTOR, 0xf000); 4113 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
3999 vmcs_writel(GUEST_CS_BASE, 0x000f0000); 4114 else {
4000 } else {
4001 vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); 4115 vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
4002 vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); 4116 vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
4003 } 4117 }
@@ -4073,9 +4187,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4073 4187
4074 ret = 0; 4188 ret = 0;
4075 4189
4076 /* HACK: Don't enable emulation on guest boot/reset */
4077 vmx->emulation_required = 0;
4078
4079 return ret; 4190 return ret;
4080} 4191}
4081 4192
@@ -4251,7 +4362,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
4251 .flags = 0, 4362 .flags = 0,
4252 }; 4363 };
4253 4364
4254 ret = kvm_set_memory_region(kvm, &tss_mem, 0); 4365 ret = kvm_set_memory_region(kvm, &tss_mem, false);
4255 if (ret) 4366 if (ret)
4256 return ret; 4367 return ret;
4257 kvm->arch.tss_addr = addr; 4368 kvm->arch.tss_addr = addr;
@@ -4261,28 +4372,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
4261 return 0; 4372 return 0;
4262} 4373}
4263 4374
4264static int handle_rmode_exception(struct kvm_vcpu *vcpu, 4375static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
4265 int vec, u32 err_code)
4266{ 4376{
4267 /*
4268 * Instruction with address size override prefix opcode 0x67
4269 * Cause the #SS fault with 0 error code in VM86 mode.
4270 */
4271 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
4272 if (emulate_instruction(vcpu, 0) == EMULATE_DONE)
4273 return 1;
4274 /*
4275 * Forward all other exceptions that are valid in real mode.
4276 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
4277 * the required debugging infrastructure rework.
4278 */
4279 switch (vec) { 4377 switch (vec) {
4280 case DB_VECTOR:
4281 if (vcpu->guest_debug &
4282 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
4283 return 0;
4284 kvm_queue_exception(vcpu, vec);
4285 return 1;
4286 case BP_VECTOR: 4378 case BP_VECTOR:
4287 /* 4379 /*
4288 * Update instruction length as we may reinject the exception 4380 * Update instruction length as we may reinject the exception
@@ -4291,7 +4383,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4291 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = 4383 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
4292 vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 4384 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4293 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 4385 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
4294 return 0; 4386 return false;
4387 /* fall through */
4388 case DB_VECTOR:
4389 if (vcpu->guest_debug &
4390 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
4391 return false;
4295 /* fall through */ 4392 /* fall through */
4296 case DE_VECTOR: 4393 case DE_VECTOR:
4297 case OF_VECTOR: 4394 case OF_VECTOR:
@@ -4301,10 +4398,37 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4301 case SS_VECTOR: 4398 case SS_VECTOR:
4302 case GP_VECTOR: 4399 case GP_VECTOR:
4303 case MF_VECTOR: 4400 case MF_VECTOR:
4304 kvm_queue_exception(vcpu, vec); 4401 return true;
4305 return 1; 4402 break;
4306 } 4403 }
4307 return 0; 4404 return false;
4405}
4406
4407static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4408 int vec, u32 err_code)
4409{
4410 /*
4411 * Instruction with address size override prefix opcode 0x67
4412 * Cause the #SS fault with 0 error code in VM86 mode.
4413 */
4414 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
4415 if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
4416 if (vcpu->arch.halt_request) {
4417 vcpu->arch.halt_request = 0;
4418 return kvm_emulate_halt(vcpu);
4419 }
4420 return 1;
4421 }
4422 return 0;
4423 }
4424
4425 /*
4426 * Forward all other exceptions that are valid in real mode.
4427 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
4428 * the required debugging infrastructure rework.
4429 */
4430 kvm_queue_exception(vcpu, vec);
4431 return 1;
4308} 4432}
4309 4433
4310/* 4434/*
@@ -4392,17 +4516,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4392 return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); 4516 return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
4393 } 4517 }
4394 4518
4395 if (vmx->rmode.vm86_active &&
4396 handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
4397 error_code)) {
4398 if (vcpu->arch.halt_request) {
4399 vcpu->arch.halt_request = 0;
4400 return kvm_emulate_halt(vcpu);
4401 }
4402 return 1;
4403 }
4404
4405 ex_no = intr_info & INTR_INFO_VECTOR_MASK; 4519 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
4520
4521 if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
4522 return handle_rmode_exception(vcpu, ex_no, error_code);
4523
4406 switch (ex_no) { 4524 switch (ex_no) {
4407 case DB_VECTOR: 4525 case DB_VECTOR:
4408 dr6 = vmcs_readl(EXIT_QUALIFICATION); 4526 dr6 = vmcs_readl(EXIT_QUALIFICATION);
@@ -4820,6 +4938,26 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
4820 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 4938 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
4821} 4939}
4822 4940
4941static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
4942{
4943 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4944 int vector = exit_qualification & 0xff;
4945
4946 /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
4947 kvm_apic_set_eoi_accelerated(vcpu, vector);
4948 return 1;
4949}
4950
4951static int handle_apic_write(struct kvm_vcpu *vcpu)
4952{
4953 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4954 u32 offset = exit_qualification & 0xfff;
4955
4956 /* APIC-write VM exit is trap-like and thus no need to adjust IP */
4957 kvm_apic_write_nodecode(vcpu, offset);
4958 return 1;
4959}
4960
4823static int handle_task_switch(struct kvm_vcpu *vcpu) 4961static int handle_task_switch(struct kvm_vcpu *vcpu)
4824{ 4962{
4825 struct vcpu_vmx *vmx = to_vmx(vcpu); 4963 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5065,7 +5203,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5065 schedule(); 5203 schedule();
5066 } 5204 }
5067 5205
5068 vmx->emulation_required = !guest_state_valid(vcpu); 5206 vmx->emulation_required = emulation_required(vcpu);
5069out: 5207out:
5070 return ret; 5208 return ret;
5071} 5209}
@@ -5754,6 +5892,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
5754 [EXIT_REASON_VMON] = handle_vmon, 5892 [EXIT_REASON_VMON] = handle_vmon,
5755 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 5893 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
5756 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 5894 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
5895 [EXIT_REASON_APIC_WRITE] = handle_apic_write,
5896 [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
5757 [EXIT_REASON_WBINVD] = handle_wbinvd, 5897 [EXIT_REASON_WBINVD] = handle_wbinvd,
5758 [EXIT_REASON_XSETBV] = handle_xsetbv, 5898 [EXIT_REASON_XSETBV] = handle_xsetbv,
5759 [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 5899 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
@@ -5780,7 +5920,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
5780 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; 5920 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
5781 gpa_t bitmap; 5921 gpa_t bitmap;
5782 5922
5783 if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS)) 5923 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
5784 return 1; 5924 return 1;
5785 5925
5786 /* 5926 /*
@@ -6008,7 +6148,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6008 u32 vectoring_info = vmx->idt_vectoring_info; 6148 u32 vectoring_info = vmx->idt_vectoring_info;
6009 6149
6010 /* If guest state is invalid, start emulating */ 6150 /* If guest state is invalid, start emulating */
6011 if (vmx->emulation_required && emulate_invalid_guest_state) 6151 if (vmx->emulation_required)
6012 return handle_invalid_guest_state(vcpu); 6152 return handle_invalid_guest_state(vcpu);
6013 6153
6014 /* 6154 /*
@@ -6103,6 +6243,85 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
6103 vmcs_write32(TPR_THRESHOLD, irr); 6243 vmcs_write32(TPR_THRESHOLD, irr);
6104} 6244}
6105 6245
6246static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
6247{
6248 u32 sec_exec_control;
6249
6250 /*
6251 * There is not point to enable virtualize x2apic without enable
6252 * apicv
6253 */
6254 if (!cpu_has_vmx_virtualize_x2apic_mode() ||
6255 !vmx_vm_has_apicv(vcpu->kvm))
6256 return;
6257
6258 if (!vm_need_tpr_shadow(vcpu->kvm))
6259 return;
6260
6261 sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6262
6263 if (set) {
6264 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6265 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6266 } else {
6267 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6268 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6269 }
6270 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
6271
6272 vmx_set_msr_bitmap(vcpu);
6273}
6274
6275static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
6276{
6277 u16 status;
6278 u8 old;
6279
6280 if (!vmx_vm_has_apicv(kvm))
6281 return;
6282
6283 if (isr == -1)
6284 isr = 0;
6285
6286 status = vmcs_read16(GUEST_INTR_STATUS);
6287 old = status >> 8;
6288 if (isr != old) {
6289 status &= 0xff;
6290 status |= isr << 8;
6291 vmcs_write16(GUEST_INTR_STATUS, status);
6292 }
6293}
6294
6295static void vmx_set_rvi(int vector)
6296{
6297 u16 status;
6298 u8 old;
6299
6300 status = vmcs_read16(GUEST_INTR_STATUS);
6301 old = (u8)status & 0xff;
6302 if ((u8)vector != old) {
6303 status &= ~0xff;
6304 status |= (u8)vector;
6305 vmcs_write16(GUEST_INTR_STATUS, status);
6306 }
6307}
6308
6309static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
6310{
6311 if (max_irr == -1)
6312 return;
6313
6314 vmx_set_rvi(max_irr);
6315}
6316
6317static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6318{
6319 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
6320 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
6321 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
6322 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
6323}
6324
6106static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) 6325static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
6107{ 6326{
6108 u32 exit_intr_info; 6327 u32 exit_intr_info;
@@ -6291,7 +6510,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6291 6510
6292 /* Don't enter VMX if guest state is invalid, let the exit handler 6511 /* Don't enter VMX if guest state is invalid, let the exit handler
6293 start emulation until we arrive back to a valid state */ 6512 start emulation until we arrive back to a valid state */
6294 if (vmx->emulation_required && emulate_invalid_guest_state) 6513 if (vmx->emulation_required)
6295 return; 6514 return;
6296 6515
6297 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) 6516 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
@@ -7366,6 +7585,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
7366 .enable_nmi_window = enable_nmi_window, 7585 .enable_nmi_window = enable_nmi_window,
7367 .enable_irq_window = enable_irq_window, 7586 .enable_irq_window = enable_irq_window,
7368 .update_cr8_intercept = update_cr8_intercept, 7587 .update_cr8_intercept = update_cr8_intercept,
7588 .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
7589 .vm_has_apicv = vmx_vm_has_apicv,
7590 .load_eoi_exitmap = vmx_load_eoi_exitmap,
7591 .hwapic_irr_update = vmx_hwapic_irr_update,
7592 .hwapic_isr_update = vmx_hwapic_isr_update,
7369 7593
7370 .set_tss_addr = vmx_set_tss_addr, 7594 .set_tss_addr = vmx_set_tss_addr,
7371 .get_tdp_level = get_ept_level, 7595 .get_tdp_level = get_ept_level,
@@ -7398,7 +7622,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
7398 7622
7399static int __init vmx_init(void) 7623static int __init vmx_init(void)
7400{ 7624{
7401 int r, i; 7625 int r, i, msr;
7402 7626
7403 rdmsrl_safe(MSR_EFER, &host_efer); 7627 rdmsrl_safe(MSR_EFER, &host_efer);
7404 7628
@@ -7419,11 +7643,19 @@ static int __init vmx_init(void)
7419 if (!vmx_msr_bitmap_legacy) 7643 if (!vmx_msr_bitmap_legacy)
7420 goto out1; 7644 goto out1;
7421 7645
7646 vmx_msr_bitmap_legacy_x2apic =
7647 (unsigned long *)__get_free_page(GFP_KERNEL);
7648 if (!vmx_msr_bitmap_legacy_x2apic)
7649 goto out2;
7422 7650
7423 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); 7651 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
7424 if (!vmx_msr_bitmap_longmode) 7652 if (!vmx_msr_bitmap_longmode)
7425 goto out2; 7653 goto out3;
7426 7654
7655 vmx_msr_bitmap_longmode_x2apic =
7656 (unsigned long *)__get_free_page(GFP_KERNEL);
7657 if (!vmx_msr_bitmap_longmode_x2apic)
7658 goto out4;
7427 7659
7428 /* 7660 /*
7429 * Allow direct access to the PC debug port (it is often used for I/O 7661 * Allow direct access to the PC debug port (it is often used for I/O
@@ -7455,6 +7687,28 @@ static int __init vmx_init(void)
7455 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); 7687 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
7456 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); 7688 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
7457 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); 7689 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
7690 memcpy(vmx_msr_bitmap_legacy_x2apic,
7691 vmx_msr_bitmap_legacy, PAGE_SIZE);
7692 memcpy(vmx_msr_bitmap_longmode_x2apic,
7693 vmx_msr_bitmap_longmode, PAGE_SIZE);
7694
7695 if (enable_apicv_reg_vid) {
7696 for (msr = 0x800; msr <= 0x8ff; msr++)
7697 vmx_disable_intercept_msr_read_x2apic(msr);
7698
7699 /* According SDM, in x2apic mode, the whole id reg is used.
7700 * But in KVM, it only use the highest eight bits. Need to
7701 * intercept it */
7702 vmx_enable_intercept_msr_read_x2apic(0x802);
7703 /* TMCCT */
7704 vmx_enable_intercept_msr_read_x2apic(0x839);
7705 /* TPR */
7706 vmx_disable_intercept_msr_write_x2apic(0x808);
7707 /* EOI */
7708 vmx_disable_intercept_msr_write_x2apic(0x80b);
7709 /* SELF-IPI */
7710 vmx_disable_intercept_msr_write_x2apic(0x83f);
7711 }
7458 7712
7459 if (enable_ept) { 7713 if (enable_ept) {
7460 kvm_mmu_set_mask_ptes(0ull, 7714 kvm_mmu_set_mask_ptes(0ull,
@@ -7468,8 +7722,10 @@ static int __init vmx_init(void)
7468 7722
7469 return 0; 7723 return 0;
7470 7724
7471out3: 7725out4:
7472 free_page((unsigned long)vmx_msr_bitmap_longmode); 7726 free_page((unsigned long)vmx_msr_bitmap_longmode);
7727out3:
7728 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
7473out2: 7729out2:
7474 free_page((unsigned long)vmx_msr_bitmap_legacy); 7730 free_page((unsigned long)vmx_msr_bitmap_legacy);
7475out1: 7731out1:
@@ -7481,6 +7737,8 @@ out:
7481 7737
7482static void __exit vmx_exit(void) 7738static void __exit vmx_exit(void)
7483{ 7739{
7740 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
7741 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
7484 free_page((unsigned long)vmx_msr_bitmap_legacy); 7742 free_page((unsigned long)vmx_msr_bitmap_legacy);
7485 free_page((unsigned long)vmx_msr_bitmap_longmode); 7743 free_page((unsigned long)vmx_msr_bitmap_longmode);
7486 free_page((unsigned long)vmx_io_bitmap_b); 7744 free_page((unsigned long)vmx_io_bitmap_b);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c243b81e3c74..f19ac0aca60d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -872,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
872 872
873 kvm_x86_ops->set_efer(vcpu, efer); 873 kvm_x86_ops->set_efer(vcpu, efer);
874 874
875 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
876
877 /* Update reserved bits */ 875 /* Update reserved bits */
878 if ((efer ^ old_efer) & EFER_NX) 876 if ((efer ^ old_efer) & EFER_NX)
879 kvm_mmu_reset_context(vcpu); 877 kvm_mmu_reset_context(vcpu);
@@ -1408,25 +1406,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1408 unsigned long flags, this_tsc_khz; 1406 unsigned long flags, this_tsc_khz;
1409 struct kvm_vcpu_arch *vcpu = &v->arch; 1407 struct kvm_vcpu_arch *vcpu = &v->arch;
1410 struct kvm_arch *ka = &v->kvm->arch; 1408 struct kvm_arch *ka = &v->kvm->arch;
1411 void *shared_kaddr;
1412 s64 kernel_ns, max_kernel_ns; 1409 s64 kernel_ns, max_kernel_ns;
1413 u64 tsc_timestamp, host_tsc; 1410 u64 tsc_timestamp, host_tsc;
1414 struct pvclock_vcpu_time_info *guest_hv_clock; 1411 struct pvclock_vcpu_time_info guest_hv_clock;
1415 u8 pvclock_flags; 1412 u8 pvclock_flags;
1416 bool use_master_clock; 1413 bool use_master_clock;
1417 1414
1418 kernel_ns = 0; 1415 kernel_ns = 0;
1419 host_tsc = 0; 1416 host_tsc = 0;
1420 1417
1421 /* Keep irq disabled to prevent changes to the clock */
1422 local_irq_save(flags);
1423 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1424 if (unlikely(this_tsc_khz == 0)) {
1425 local_irq_restore(flags);
1426 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1427 return 1;
1428 }
1429
1430 /* 1418 /*
1431 * If the host uses TSC clock, then passthrough TSC as stable 1419 * If the host uses TSC clock, then passthrough TSC as stable
1432 * to the guest. 1420 * to the guest.
@@ -1438,6 +1426,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1438 kernel_ns = ka->master_kernel_ns; 1426 kernel_ns = ka->master_kernel_ns;
1439 } 1427 }
1440 spin_unlock(&ka->pvclock_gtod_sync_lock); 1428 spin_unlock(&ka->pvclock_gtod_sync_lock);
1429
1430 /* Keep irq disabled to prevent changes to the clock */
1431 local_irq_save(flags);
1432 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1433 if (unlikely(this_tsc_khz == 0)) {
1434 local_irq_restore(flags);
1435 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1436 return 1;
1437 }
1441 if (!use_master_clock) { 1438 if (!use_master_clock) {
1442 host_tsc = native_read_tsc(); 1439 host_tsc = native_read_tsc();
1443 kernel_ns = get_kernel_ns(); 1440 kernel_ns = get_kernel_ns();
@@ -1465,7 +1462,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1465 1462
1466 local_irq_restore(flags); 1463 local_irq_restore(flags);
1467 1464
1468 if (!vcpu->time_page) 1465 if (!vcpu->pv_time_enabled)
1469 return 0; 1466 return 0;
1470 1467
1471 /* 1468 /*
@@ -1527,12 +1524,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1527 */ 1524 */
1528 vcpu->hv_clock.version += 2; 1525 vcpu->hv_clock.version += 2;
1529 1526
1530 shared_kaddr = kmap_atomic(vcpu->time_page); 1527 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
1531 1528 &guest_hv_clock, sizeof(guest_hv_clock))))
1532 guest_hv_clock = shared_kaddr + vcpu->time_offset; 1529 return 0;
1533 1530
1534 /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ 1531 /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
1535 pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED); 1532 pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
1536 1533
1537 if (vcpu->pvclock_set_guest_stopped_request) { 1534 if (vcpu->pvclock_set_guest_stopped_request) {
1538 pvclock_flags |= PVCLOCK_GUEST_STOPPED; 1535 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
@@ -1545,12 +1542,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1545 1542
1546 vcpu->hv_clock.flags = pvclock_flags; 1543 vcpu->hv_clock.flags = pvclock_flags;
1547 1544
1548 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, 1545 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1549 sizeof(vcpu->hv_clock)); 1546 &vcpu->hv_clock,
1550 1547 sizeof(vcpu->hv_clock));
1551 kunmap_atomic(shared_kaddr);
1552
1553 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
1554 return 0; 1548 return 0;
1555} 1549}
1556 1550
@@ -1839,10 +1833,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1839 1833
1840static void kvmclock_reset(struct kvm_vcpu *vcpu) 1834static void kvmclock_reset(struct kvm_vcpu *vcpu)
1841{ 1835{
1842 if (vcpu->arch.time_page) { 1836 vcpu->arch.pv_time_enabled = false;
1843 kvm_release_page_dirty(vcpu->arch.time_page);
1844 vcpu->arch.time_page = NULL;
1845 }
1846} 1837}
1847 1838
1848static void accumulate_steal_time(struct kvm_vcpu *vcpu) 1839static void accumulate_steal_time(struct kvm_vcpu *vcpu)
@@ -1881,6 +1872,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1881 u64 data = msr_info->data; 1872 u64 data = msr_info->data;
1882 1873
1883 switch (msr) { 1874 switch (msr) {
1875 case MSR_AMD64_NB_CFG:
1876 case MSR_IA32_UCODE_REV:
1877 case MSR_IA32_UCODE_WRITE:
1878 case MSR_VM_HSAVE_PA:
1879 case MSR_AMD64_PATCH_LOADER:
1880 case MSR_AMD64_BU_CFG2:
1881 break;
1882
1884 case MSR_EFER: 1883 case MSR_EFER:
1885 return set_efer(vcpu, data); 1884 return set_efer(vcpu, data);
1886 case MSR_K7_HWCR: 1885 case MSR_K7_HWCR:
@@ -1900,8 +1899,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1900 return 1; 1899 return 1;
1901 } 1900 }
1902 break; 1901 break;
1903 case MSR_AMD64_NB_CFG:
1904 break;
1905 case MSR_IA32_DEBUGCTLMSR: 1902 case MSR_IA32_DEBUGCTLMSR:
1906 if (!data) { 1903 if (!data) {
1907 /* We support the non-activated case already */ 1904 /* We support the non-activated case already */
@@ -1914,11 +1911,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1914 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", 1911 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1915 __func__, data); 1912 __func__, data);
1916 break; 1913 break;
1917 case MSR_IA32_UCODE_REV:
1918 case MSR_IA32_UCODE_WRITE:
1919 case MSR_VM_HSAVE_PA:
1920 case MSR_AMD64_PATCH_LOADER:
1921 break;
1922 case 0x200 ... 0x2ff: 1914 case 0x200 ... 0x2ff:
1923 return set_msr_mtrr(vcpu, msr, data); 1915 return set_msr_mtrr(vcpu, msr, data);
1924 case MSR_IA32_APICBASE: 1916 case MSR_IA32_APICBASE:
@@ -1948,6 +1940,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1948 break; 1940 break;
1949 case MSR_KVM_SYSTEM_TIME_NEW: 1941 case MSR_KVM_SYSTEM_TIME_NEW:
1950 case MSR_KVM_SYSTEM_TIME: { 1942 case MSR_KVM_SYSTEM_TIME: {
1943 u64 gpa_offset;
1951 kvmclock_reset(vcpu); 1944 kvmclock_reset(vcpu);
1952 1945
1953 vcpu->arch.time = data; 1946 vcpu->arch.time = data;
@@ -1957,14 +1950,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1957 if (!(data & 1)) 1950 if (!(data & 1))
1958 break; 1951 break;
1959 1952
1960 /* ...but clean it before doing the actual write */ 1953 gpa_offset = data & ~(PAGE_MASK | 1);
1961 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
1962 1954
1963 vcpu->arch.time_page = 1955 /* Check that the address is 32-byte aligned. */
1964 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); 1956 if (gpa_offset & (sizeof(struct pvclock_vcpu_time_info) - 1))
1957 break;
1965 1958
1966 if (is_error_page(vcpu->arch.time_page)) 1959 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
1967 vcpu->arch.time_page = NULL; 1960 &vcpu->arch.pv_time, data & ~1ULL))
1961 vcpu->arch.pv_time_enabled = false;
1962 else
1963 vcpu->arch.pv_time_enabled = true;
1968 1964
1969 break; 1965 break;
1970 } 1966 }
@@ -2253,6 +2249,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2253 case MSR_K8_INT_PENDING_MSG: 2249 case MSR_K8_INT_PENDING_MSG:
2254 case MSR_AMD64_NB_CFG: 2250 case MSR_AMD64_NB_CFG:
2255 case MSR_FAM10H_MMIO_CONF_BASE: 2251 case MSR_FAM10H_MMIO_CONF_BASE:
2252 case MSR_AMD64_BU_CFG2:
2256 data = 0; 2253 data = 0;
2257 break; 2254 break;
2258 case MSR_P6_PERFCTR0: 2255 case MSR_P6_PERFCTR0:
@@ -2520,7 +2517,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2520 r = KVM_MAX_VCPUS; 2517 r = KVM_MAX_VCPUS;
2521 break; 2518 break;
2522 case KVM_CAP_NR_MEMSLOTS: 2519 case KVM_CAP_NR_MEMSLOTS:
2523 r = KVM_MEMORY_SLOTS; 2520 r = KVM_USER_MEM_SLOTS;
2524 break; 2521 break;
2525 case KVM_CAP_PV_MMU: /* obsolete */ 2522 case KVM_CAP_PV_MMU: /* obsolete */
2526 r = 0; 2523 r = 0;
@@ -2967,7 +2964,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
2967 */ 2964 */
2968static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) 2965static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
2969{ 2966{
2970 if (!vcpu->arch.time_page) 2967 if (!vcpu->arch.pv_time_enabled)
2971 return -EINVAL; 2968 return -EINVAL;
2972 vcpu->arch.pvclock_set_guest_stopped_request = true; 2969 vcpu->arch.pvclock_set_guest_stopped_request = true;
2973 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 2970 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -3272,12 +3269,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3272 return -EINVAL; 3269 return -EINVAL;
3273 3270
3274 mutex_lock(&kvm->slots_lock); 3271 mutex_lock(&kvm->slots_lock);
3275 spin_lock(&kvm->mmu_lock);
3276 3272
3277 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 3273 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3278 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 3274 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3279 3275
3280 spin_unlock(&kvm->mmu_lock);
3281 mutex_unlock(&kvm->slots_lock); 3276 mutex_unlock(&kvm->slots_lock);
3282 return 0; 3277 return 0;
3283} 3278}
@@ -3437,7 +3432,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3437 mutex_lock(&kvm->slots_lock); 3432 mutex_lock(&kvm->slots_lock);
3438 3433
3439 r = -EINVAL; 3434 r = -EINVAL;
3440 if (log->slot >= KVM_MEMORY_SLOTS) 3435 if (log->slot >= KVM_USER_MEM_SLOTS)
3441 goto out; 3436 goto out;
3442 3437
3443 memslot = id_to_memslot(kvm->memslots, log->slot); 3438 memslot = id_to_memslot(kvm->memslots, log->slot);
@@ -4493,8 +4488,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4493 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); 4488 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4494 *selector = var.selector; 4489 *selector = var.selector;
4495 4490
4496 if (var.unusable) 4491 if (var.unusable) {
4492 memset(desc, 0, sizeof(*desc));
4497 return false; 4493 return false;
4494 }
4498 4495
4499 if (var.g) 4496 if (var.g)
4500 var.limit >>= 12; 4497 var.limit >>= 12;
@@ -4755,26 +4752,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4755 return r; 4752 return r;
4756} 4753}
4757 4754
4758static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) 4755static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
4756 bool write_fault_to_shadow_pgtable)
4759{ 4757{
4760 gpa_t gpa; 4758 gpa_t gpa = cr2;
4761 pfn_t pfn; 4759 pfn_t pfn;
4762 4760
4763 if (tdp_enabled) 4761 if (!vcpu->arch.mmu.direct_map) {
4764 return false; 4762 /*
4765 4763 * Write permission should be allowed since only
4766 /* 4764 * write access need to be emulated.
4767 * if emulation was due to access to shadowed page table 4765 */
4768 * and it failed try to unshadow page and re-enter the 4766 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4769 * guest to let CPU execute the instruction.
4770 */
4771 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4772 return true;
4773
4774 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4775 4767
4776 if (gpa == UNMAPPED_GVA) 4768 /*
4777 return true; /* let cpu generate fault */ 4769 * If the mapping is invalid in guest, let cpu retry
4770 * it to generate fault.
4771 */
4772 if (gpa == UNMAPPED_GVA)
4773 return true;
4774 }
4778 4775
4779 /* 4776 /*
4780 * Do not retry the unhandleable instruction if it faults on the 4777 * Do not retry the unhandleable instruction if it faults on the
@@ -4783,12 +4780,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4783 * instruction -> ... 4780 * instruction -> ...
4784 */ 4781 */
4785 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); 4782 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4786 if (!is_error_noslot_pfn(pfn)) { 4783
4787 kvm_release_pfn_clean(pfn); 4784 /*
4785 * If the instruction failed on the error pfn, it can not be fixed,
4786 * report the error to userspace.
4787 */
4788 if (is_error_noslot_pfn(pfn))
4789 return false;
4790
4791 kvm_release_pfn_clean(pfn);
4792
4793 /* The instructions are well-emulated on direct mmu. */
4794 if (vcpu->arch.mmu.direct_map) {
4795 unsigned int indirect_shadow_pages;
4796
4797 spin_lock(&vcpu->kvm->mmu_lock);
4798 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
4799 spin_unlock(&vcpu->kvm->mmu_lock);
4800
4801 if (indirect_shadow_pages)
4802 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4803
4788 return true; 4804 return true;
4789 } 4805 }
4790 4806
4791 return false; 4807 /*
4808 * if emulation was due to access to shadowed page table
4809 * and it failed try to unshadow page and re-enter the
4810 * guest to let CPU execute the instruction.
4811 */
4812 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4813
4814 /*
4815 * If the access faults on its page table, it can not
4816 * be fixed by unprotecting shadow page and it should
4817 * be reported to userspace.
4818 */
4819 return !write_fault_to_shadow_pgtable;
4792} 4820}
4793 4821
4794static bool retry_instruction(struct x86_emulate_ctxt *ctxt, 4822static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
@@ -4830,7 +4858,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4830 if (!vcpu->arch.mmu.direct_map) 4858 if (!vcpu->arch.mmu.direct_map)
4831 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); 4859 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4832 4860
4833 kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 4861 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4834 4862
4835 return true; 4863 return true;
4836} 4864}
@@ -4847,7 +4875,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4847 int r; 4875 int r;
4848 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 4876 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4849 bool writeback = true; 4877 bool writeback = true;
4878 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
4850 4879
4880 /*
4881 * Clear write_fault_to_shadow_pgtable here to ensure it is
4882 * never reused.
4883 */
4884 vcpu->arch.write_fault_to_shadow_pgtable = false;
4851 kvm_clear_exception_queue(vcpu); 4885 kvm_clear_exception_queue(vcpu);
4852 4886
4853 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 4887 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
@@ -4866,7 +4900,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4866 if (r != EMULATION_OK) { 4900 if (r != EMULATION_OK) {
4867 if (emulation_type & EMULTYPE_TRAP_UD) 4901 if (emulation_type & EMULTYPE_TRAP_UD)
4868 return EMULATE_FAIL; 4902 return EMULATE_FAIL;
4869 if (reexecute_instruction(vcpu, cr2)) 4903 if (reexecute_instruction(vcpu, cr2,
4904 write_fault_to_spt))
4870 return EMULATE_DONE; 4905 return EMULATE_DONE;
4871 if (emulation_type & EMULTYPE_SKIP) 4906 if (emulation_type & EMULTYPE_SKIP)
4872 return EMULATE_FAIL; 4907 return EMULATE_FAIL;
@@ -4896,7 +4931,7 @@ restart:
4896 return EMULATE_DONE; 4931 return EMULATE_DONE;
4897 4932
4898 if (r == EMULATION_FAILED) { 4933 if (r == EMULATION_FAILED) {
4899 if (reexecute_instruction(vcpu, cr2)) 4934 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
4900 return EMULATE_DONE; 4935 return EMULATE_DONE;
4901 4936
4902 return handle_emulation_failure(vcpu); 4937 return handle_emulation_failure(vcpu);
@@ -5539,7 +5574,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
5539 vcpu->arch.nmi_injected = true; 5574 vcpu->arch.nmi_injected = true;
5540 kvm_x86_ops->set_nmi(vcpu); 5575 kvm_x86_ops->set_nmi(vcpu);
5541 } 5576 }
5542 } else if (kvm_cpu_has_interrupt(vcpu)) { 5577 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
5543 if (kvm_x86_ops->interrupt_allowed(vcpu)) { 5578 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5544 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), 5579 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5545 false); 5580 false);
@@ -5607,6 +5642,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
5607#endif 5642#endif
5608} 5643}
5609 5644
5645static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
5646{
5647 u64 eoi_exit_bitmap[4];
5648
5649 memset(eoi_exit_bitmap, 0, 32);
5650
5651 kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
5652 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
5653}
5654
5610static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5655static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5611{ 5656{
5612 int r; 5657 int r;
@@ -5660,6 +5705,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5660 kvm_handle_pmu_event(vcpu); 5705 kvm_handle_pmu_event(vcpu);
5661 if (kvm_check_request(KVM_REQ_PMI, vcpu)) 5706 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5662 kvm_deliver_pmi(vcpu); 5707 kvm_deliver_pmi(vcpu);
5708 if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
5709 update_eoi_exitmap(vcpu);
5663 } 5710 }
5664 5711
5665 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 5712 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -5668,10 +5715,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5668 /* enable NMI/IRQ window open exits if needed */ 5715 /* enable NMI/IRQ window open exits if needed */
5669 if (vcpu->arch.nmi_pending) 5716 if (vcpu->arch.nmi_pending)
5670 kvm_x86_ops->enable_nmi_window(vcpu); 5717 kvm_x86_ops->enable_nmi_window(vcpu);
5671 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) 5718 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
5672 kvm_x86_ops->enable_irq_window(vcpu); 5719 kvm_x86_ops->enable_irq_window(vcpu);
5673 5720
5674 if (kvm_lapic_enabled(vcpu)) { 5721 if (kvm_lapic_enabled(vcpu)) {
5722 /*
5723 * Update architecture specific hints for APIC
5724 * virtual interrupt delivery.
5725 */
5726 if (kvm_x86_ops->hwapic_irr_update)
5727 kvm_x86_ops->hwapic_irr_update(vcpu,
5728 kvm_lapic_find_highest_irr(vcpu));
5675 update_cr8_intercept(vcpu); 5729 update_cr8_intercept(vcpu);
5676 kvm_lapic_sync_to_vapic(vcpu); 5730 kvm_lapic_sync_to_vapic(vcpu);
5677 } 5731 }
@@ -6661,6 +6715,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6661 goto fail_free_wbinvd_dirty_mask; 6715 goto fail_free_wbinvd_dirty_mask;
6662 6716
6663 vcpu->arch.ia32_tsc_adjust_msr = 0x0; 6717 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
6718 vcpu->arch.pv_time_enabled = false;
6664 kvm_async_pf_hash_reset(vcpu); 6719 kvm_async_pf_hash_reset(vcpu);
6665 kvm_pmu_init(vcpu); 6720 kvm_pmu_init(vcpu);
6666 6721
@@ -6851,48 +6906,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
6851 struct kvm_memory_slot *memslot, 6906 struct kvm_memory_slot *memslot,
6852 struct kvm_memory_slot old, 6907 struct kvm_memory_slot old,
6853 struct kvm_userspace_memory_region *mem, 6908 struct kvm_userspace_memory_region *mem,
6854 int user_alloc) 6909 bool user_alloc)
6855{ 6910{
6856 int npages = memslot->npages; 6911 int npages = memslot->npages;
6857 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6858
6859 /* Prevent internal slot pages from being moved by fork()/COW. */
6860 if (memslot->id >= KVM_MEMORY_SLOTS)
6861 map_flags = MAP_SHARED | MAP_ANONYMOUS;
6862 6912
6863 /*To keep backward compatibility with older userspace, 6913 /*
6864 *x86 needs to handle !user_alloc case. 6914 * Only private memory slots need to be mapped here since
6915 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
6865 */ 6916 */
6866 if (!user_alloc) { 6917 if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
6867 if (npages && !old.npages) { 6918 unsigned long userspace_addr;
6868 unsigned long userspace_addr;
6869 6919
6870 userspace_addr = vm_mmap(NULL, 0, 6920 /*
6871 npages * PAGE_SIZE, 6921 * MAP_SHARED to prevent internal slot pages from being moved
6872 PROT_READ | PROT_WRITE, 6922 * by fork()/COW.
6873 map_flags, 6923 */
6874 0); 6924 userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
6925 PROT_READ | PROT_WRITE,
6926 MAP_SHARED | MAP_ANONYMOUS, 0);
6875 6927
6876 if (IS_ERR((void *)userspace_addr)) 6928 if (IS_ERR((void *)userspace_addr))
6877 return PTR_ERR((void *)userspace_addr); 6929 return PTR_ERR((void *)userspace_addr);
6878 6930
6879 memslot->userspace_addr = userspace_addr; 6931 memslot->userspace_addr = userspace_addr;
6880 }
6881 } 6932 }
6882 6933
6883
6884 return 0; 6934 return 0;
6885} 6935}
6886 6936
6887void kvm_arch_commit_memory_region(struct kvm *kvm, 6937void kvm_arch_commit_memory_region(struct kvm *kvm,
6888 struct kvm_userspace_memory_region *mem, 6938 struct kvm_userspace_memory_region *mem,
6889 struct kvm_memory_slot old, 6939 struct kvm_memory_slot old,
6890 int user_alloc) 6940 bool user_alloc)
6891{ 6941{
6892 6942
6893 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; 6943 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6894 6944
6895 if (!user_alloc && !old.user_alloc && old.npages && !npages) { 6945 if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
6896 int ret; 6946 int ret;
6897 6947
6898 ret = vm_munmap(old.userspace_addr, 6948 ret = vm_munmap(old.userspace_addr,
@@ -6906,11 +6956,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
6906 if (!kvm->arch.n_requested_mmu_pages) 6956 if (!kvm->arch.n_requested_mmu_pages)
6907 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 6957 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
6908 6958
6909 spin_lock(&kvm->mmu_lock);
6910 if (nr_mmu_pages) 6959 if (nr_mmu_pages)
6911 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); 6960 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6912 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 6961 /*
6913 spin_unlock(&kvm->mmu_lock); 6962 * Write protect all pages for dirty logging.
6963 * Existing largepage mappings are destroyed here and new ones will
6964 * not be created until the end of the logging.
6965 */
6966 if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
6967 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6914 /* 6968 /*
6915 * If memory slot is created, or moved, we need to clear all 6969 * If memory slot is created, or moved, we need to clear all
6916 * mmio sptes. 6970 * mmio sptes.
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 7872a3330fb5..29043d2048a0 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -2,6 +2,7 @@ config LGUEST_GUEST
2 bool "Lguest guest support" 2 bool "Lguest guest support"
3 select PARAVIRT 3 select PARAVIRT
4 depends on X86_32 4 depends on X86_32
5 select TTY
5 select VIRTUALIZATION 6 select VIRTUALIZATION
6 select VIRTIO 7 select VIRTIO
7 select VIRTIO_CONSOLE 8 select VIRTIO_CONSOLE
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index df4176cdbb32..1cbd89ca5569 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -552,7 +552,8 @@ static void lguest_write_cr3(unsigned long cr3)
552 current_cr3 = cr3; 552 current_cr3 = cr3;
553 553
554 /* These two page tables are simple, linear, and used during boot */ 554 /* These two page tables are simple, linear, and used during boot */
555 if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) 555 if (cr3 != __pa_symbol(swapper_pg_dir) &&
556 cr3 != __pa_symbol(initial_page_table))
556 cr3_changed = true; 557 cr3_changed = true;
557} 558}
558 559
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 156b9c804670..a4512359656a 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -15,11 +15,10 @@
15 * __get_user_X 15 * __get_user_X
16 * 16 *
17 * Inputs: %[r|e]ax contains the address. 17 * Inputs: %[r|e]ax contains the address.
18 * The register is modified, but all changes are undone
19 * before returning because the C code doesn't know about it.
20 * 18 *
21 * Outputs: %[r|e]ax is error code (0 or -EFAULT) 19 * Outputs: %[r|e]ax is error code (0 or -EFAULT)
22 * %[r|e]dx contains zero-extended value 20 * %[r|e]dx contains zero-extended value
21 * %ecx contains the high half for 32-bit __get_user_8
23 * 22 *
24 * 23 *
25 * These functions should not modify any other registers, 24 * These functions should not modify any other registers,
@@ -42,7 +41,7 @@ ENTRY(__get_user_1)
42 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 41 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
43 jae bad_get_user 42 jae bad_get_user
44 ASM_STAC 43 ASM_STAC
451: movzb (%_ASM_AX),%edx 441: movzbl (%_ASM_AX),%edx
46 xor %eax,%eax 45 xor %eax,%eax
47 ASM_CLAC 46 ASM_CLAC
48 ret 47 ret
@@ -72,29 +71,42 @@ ENTRY(__get_user_4)
72 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 71 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
73 jae bad_get_user 72 jae bad_get_user
74 ASM_STAC 73 ASM_STAC
753: mov -3(%_ASM_AX),%edx 743: movl -3(%_ASM_AX),%edx
76 xor %eax,%eax 75 xor %eax,%eax
77 ASM_CLAC 76 ASM_CLAC
78 ret 77 ret
79 CFI_ENDPROC 78 CFI_ENDPROC
80ENDPROC(__get_user_4) 79ENDPROC(__get_user_4)
81 80
82#ifdef CONFIG_X86_64
83ENTRY(__get_user_8) 81ENTRY(__get_user_8)
84 CFI_STARTPROC 82 CFI_STARTPROC
83#ifdef CONFIG_X86_64
85 add $7,%_ASM_AX 84 add $7,%_ASM_AX
86 jc bad_get_user 85 jc bad_get_user
87 GET_THREAD_INFO(%_ASM_DX) 86 GET_THREAD_INFO(%_ASM_DX)
88 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 87 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
89 jae bad_get_user 88 jae bad_get_user
90 ASM_STAC 89 ASM_STAC
914: movq -7(%_ASM_AX),%_ASM_DX 904: movq -7(%_ASM_AX),%rdx
92 xor %eax,%eax 91 xor %eax,%eax
93 ASM_CLAC 92 ASM_CLAC
94 ret 93 ret
94#else
95 add $7,%_ASM_AX
96 jc bad_get_user_8
97 GET_THREAD_INFO(%_ASM_DX)
98 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
99 jae bad_get_user_8
100 ASM_STAC
1014: movl -7(%_ASM_AX),%edx
1025: movl -3(%_ASM_AX),%ecx
103 xor %eax,%eax
104 ASM_CLAC
105 ret
106#endif
95 CFI_ENDPROC 107 CFI_ENDPROC
96ENDPROC(__get_user_8) 108ENDPROC(__get_user_8)
97#endif 109
98 110
99bad_get_user: 111bad_get_user:
100 CFI_STARTPROC 112 CFI_STARTPROC
@@ -105,9 +117,24 @@ bad_get_user:
105 CFI_ENDPROC 117 CFI_ENDPROC
106END(bad_get_user) 118END(bad_get_user)
107 119
120#ifdef CONFIG_X86_32
121bad_get_user_8:
122 CFI_STARTPROC
123 xor %edx,%edx
124 xor %ecx,%ecx
125 mov $(-EFAULT),%_ASM_AX
126 ASM_CLAC
127 ret
128 CFI_ENDPROC
129END(bad_get_user_8)
130#endif
131
108 _ASM_EXTABLE(1b,bad_get_user) 132 _ASM_EXTABLE(1b,bad_get_user)
109 _ASM_EXTABLE(2b,bad_get_user) 133 _ASM_EXTABLE(2b,bad_get_user)
110 _ASM_EXTABLE(3b,bad_get_user) 134 _ASM_EXTABLE(3b,bad_get_user)
111#ifdef CONFIG_X86_64 135#ifdef CONFIG_X86_64
112 _ASM_EXTABLE(4b,bad_get_user) 136 _ASM_EXTABLE(4b,bad_get_user)
137#else
138 _ASM_EXTABLE(4b,bad_get_user_8)
139 _ASM_EXTABLE(5b,bad_get_user_8)
113#endif 140#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 05928aae911e..906fea315791 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -74,10 +74,10 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
74 char c; 74 char c;
75 unsigned zero_len; 75 unsigned zero_len;
76 76
77 for (; len; --len) { 77 for (; len; --len, to++) {
78 if (__get_user_nocheck(c, from++, sizeof(char))) 78 if (__get_user_nocheck(c, from++, sizeof(char)))
79 break; 79 break;
80 if (__put_user_nocheck(c, to++, sizeof(char))) 80 if (__put_user_nocheck(c, to, sizeof(char)))
81 break; 81 break;
82 } 82 }
83 83
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 027088f2f7dd..2b97525246d4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -748,13 +748,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
748 return; 748 return;
749 } 749 }
750#endif 750#endif
751 /* Kernel addresses are always protection faults: */
752 if (address >= TASK_SIZE)
753 error_code |= PF_PROT;
751 754
752 if (unlikely(show_unhandled_signals)) 755 if (likely(show_unhandled_signals))
753 show_signal_msg(regs, error_code, address, tsk); 756 show_signal_msg(regs, error_code, address, tsk);
754 757
755 /* Kernel addresses are always protection faults: */
756 tsk->thread.cr2 = address; 758 tsk->thread.cr2 = address;
757 tsk->thread.error_code = error_code | (address >= TASK_SIZE); 759 tsk->thread.error_code = error_code;
758 tsk->thread.trap_nr = X86_TRAP_PF; 760 tsk->thread.trap_nr = X86_TRAP_PF;
759 761
760 force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); 762 force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
@@ -937,14 +939,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
937 if (pmd_large(*pmd)) 939 if (pmd_large(*pmd))
938 return spurious_fault_check(error_code, (pte_t *) pmd); 940 return spurious_fault_check(error_code, (pte_t *) pmd);
939 941
940 /*
941 * Note: don't use pte_present() here, since it returns true
942 * if the _PAGE_PROTNONE bit is set. However, this aliases the
943 * _PAGE_GLOBAL bit, which for kernel pages give false positives
944 * when CONFIG_DEBUG_PAGEALLOC is used.
945 */
946 pte = pte_offset_kernel(pmd, address); 942 pte = pte_offset_kernel(pmd, address);
947 if (!(pte_flags(*pte) & _PAGE_PRESENT)) 943 if (!pte_present(*pte))
948 return 0; 944 return 0;
949 945
950 ret = spurious_fault_check(error_code, pte); 946 ret = spurious_fault_check(error_code, pte);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d7aea41563b3..59b7fc453277 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -16,87 +16,134 @@
16#include <asm/tlb.h> 16#include <asm/tlb.h>
17#include <asm/proto.h> 17#include <asm/proto.h>
18#include <asm/dma.h> /* for MAX_DMA_PFN */ 18#include <asm/dma.h> /* for MAX_DMA_PFN */
19#include <asm/microcode.h>
19 20
20unsigned long __initdata pgt_buf_start; 21#include "mm_internal.h"
21unsigned long __meminitdata pgt_buf_end;
22unsigned long __meminitdata pgt_buf_top;
23 22
24int after_bootmem; 23static unsigned long __initdata pgt_buf_start;
24static unsigned long __initdata pgt_buf_end;
25static unsigned long __initdata pgt_buf_top;
25 26
26int direct_gbpages 27static unsigned long min_pfn_mapped;
27#ifdef CONFIG_DIRECT_GBPAGES
28 = 1
29#endif
30;
31 28
32struct map_range { 29static bool __initdata can_use_brk_pgt = true;
33 unsigned long start;
34 unsigned long end;
35 unsigned page_size_mask;
36};
37 30
38/* 31/*
39 * First calculate space needed for kernel direct mapping page tables to cover 32 * Pages returned are already directly mapped.
40 * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB 33 *
41 * pages. Then find enough contiguous space for those page tables. 34 * Changing that is likely to break Xen, see commit:
35 *
36 * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve
37 *
38 * for detailed information.
42 */ 39 */
43static void __init find_early_table_space(struct map_range *mr, int nr_range) 40__ref void *alloc_low_pages(unsigned int num)
44{ 41{
42 unsigned long pfn;
45 int i; 43 int i;
46 unsigned long puds = 0, pmds = 0, ptes = 0, tables;
47 unsigned long start = 0, good_end;
48 phys_addr_t base;
49 44
50 for (i = 0; i < nr_range; i++) { 45 if (after_bootmem) {
51 unsigned long range, extra; 46 unsigned int order;
52 47
53 range = mr[i].end - mr[i].start; 48 order = get_order((unsigned long)num << PAGE_SHIFT);
54 puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; 49 return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK |
50 __GFP_ZERO, order);
51 }
55 52
56 if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { 53 if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
57 extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); 54 unsigned long ret;
58 pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; 55 if (min_pfn_mapped >= max_pfn_mapped)
59 } else { 56 panic("alloc_low_page: ran out of memory");
60 pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; 57 ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
61 } 58 max_pfn_mapped << PAGE_SHIFT,
59 PAGE_SIZE * num , PAGE_SIZE);
60 if (!ret)
61 panic("alloc_low_page: can not alloc memory");
62 memblock_reserve(ret, PAGE_SIZE * num);
63 pfn = ret >> PAGE_SHIFT;
64 } else {
65 pfn = pgt_buf_end;
66 pgt_buf_end += num;
67 printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n",
68 pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
69 }
62 70
63 if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { 71 for (i = 0; i < num; i++) {
64 extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); 72 void *adr;
65#ifdef CONFIG_X86_32 73
66 extra += PMD_SIZE; 74 adr = __va((pfn + i) << PAGE_SHIFT);
67#endif 75 clear_page(adr);
68 ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
69 } else {
70 ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT;
71 }
72 } 76 }
73 77
74 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); 78 return __va(pfn << PAGE_SHIFT);
75 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); 79}
76 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
77 80
78#ifdef CONFIG_X86_32 81/* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */
79 /* for fixmap */ 82#define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE)
80 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); 83RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
81#endif 84void __init early_alloc_pgt_buf(void)
82 good_end = max_pfn_mapped << PAGE_SHIFT; 85{
86 unsigned long tables = INIT_PGT_BUF_SIZE;
87 phys_addr_t base;
83 88
84 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); 89 base = __pa(extend_brk(tables, PAGE_SIZE));
85 if (!base)
86 panic("Cannot find space for the kernel page tables");
87 90
88 pgt_buf_start = base >> PAGE_SHIFT; 91 pgt_buf_start = base >> PAGE_SHIFT;
89 pgt_buf_end = pgt_buf_start; 92 pgt_buf_end = pgt_buf_start;
90 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); 93 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
94}
91 95
92 printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", 96int after_bootmem;
93 mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, 97
94 (pgt_buf_top << PAGE_SHIFT) - 1); 98int direct_gbpages
99#ifdef CONFIG_DIRECT_GBPAGES
100 = 1
101#endif
102;
103
104static void __init init_gbpages(void)
105{
106#ifdef CONFIG_X86_64
107 if (direct_gbpages && cpu_has_gbpages)
108 printk(KERN_INFO "Using GB pages for direct mapping\n");
109 else
110 direct_gbpages = 0;
111#endif
95} 112}
96 113
97void __init native_pagetable_reserve(u64 start, u64 end) 114struct map_range {
115 unsigned long start;
116 unsigned long end;
117 unsigned page_size_mask;
118};
119
120static int page_size_mask;
121
122static void __init probe_page_size_mask(void)
98{ 123{
99 memblock_reserve(start, end - start); 124 init_gbpages();
125
126#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
127 /*
128 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
129 * This will simplify cpa(), which otherwise needs to support splitting
130 * large pages into small in interrupt context, etc.
131 */
132 if (direct_gbpages)
133 page_size_mask |= 1 << PG_LEVEL_1G;
134 if (cpu_has_pse)
135 page_size_mask |= 1 << PG_LEVEL_2M;
136#endif
137
138 /* Enable PSE if available */
139 if (cpu_has_pse)
140 set_in_cr4(X86_CR4_PSE);
141
142 /* Enable PGE if available */
143 if (cpu_has_pge) {
144 set_in_cr4(X86_CR4_PGE);
145 __supported_pte_mask |= _PAGE_GLOBAL;
146 }
100} 147}
101 148
102#ifdef CONFIG_X86_32 149#ifdef CONFIG_X86_32
@@ -122,58 +169,51 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
122} 169}
123 170
124/* 171/*
125 * Setup the direct mapping of the physical memory at PAGE_OFFSET. 172 * adjust the page_size_mask for small range to go with
126 * This runs before bootmem is initialized and gets pages directly from 173 * big page size instead small one if nearby are ram too.
127 * the physical memory. To access them they are temporarily mapped.
128 */ 174 */
129unsigned long __init_refok init_memory_mapping(unsigned long start, 175static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
130 unsigned long end) 176 int nr_range)
131{ 177{
132 unsigned long page_size_mask = 0; 178 int i;
133 unsigned long start_pfn, end_pfn;
134 unsigned long ret = 0;
135 unsigned long pos;
136
137 struct map_range mr[NR_RANGE_MR];
138 int nr_range, i;
139 int use_pse, use_gbpages;
140 179
141 printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n", 180 for (i = 0; i < nr_range; i++) {
142 start, end - 1); 181 if ((page_size_mask & (1<<PG_LEVEL_2M)) &&
182 !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) {
183 unsigned long start = round_down(mr[i].start, PMD_SIZE);
184 unsigned long end = round_up(mr[i].end, PMD_SIZE);
143 185
144#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) 186#ifdef CONFIG_X86_32
145 /* 187 if ((end >> PAGE_SHIFT) > max_low_pfn)
146 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. 188 continue;
147 * This will simplify cpa(), which otherwise needs to support splitting
148 * large pages into small in interrupt context, etc.
149 */
150 use_pse = use_gbpages = 0;
151#else
152 use_pse = cpu_has_pse;
153 use_gbpages = direct_gbpages;
154#endif 189#endif
155 190
156 /* Enable PSE if available */ 191 if (memblock_is_region_memory(start, end - start))
157 if (cpu_has_pse) 192 mr[i].page_size_mask |= 1<<PG_LEVEL_2M;
158 set_in_cr4(X86_CR4_PSE); 193 }
194 if ((page_size_mask & (1<<PG_LEVEL_1G)) &&
195 !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) {
196 unsigned long start = round_down(mr[i].start, PUD_SIZE);
197 unsigned long end = round_up(mr[i].end, PUD_SIZE);
159 198
160 /* Enable PGE if available */ 199 if (memblock_is_region_memory(start, end - start))
161 if (cpu_has_pge) { 200 mr[i].page_size_mask |= 1<<PG_LEVEL_1G;
162 set_in_cr4(X86_CR4_PGE); 201 }
163 __supported_pte_mask |= _PAGE_GLOBAL;
164 } 202 }
203}
165 204
166 if (use_gbpages) 205static int __meminit split_mem_range(struct map_range *mr, int nr_range,
167 page_size_mask |= 1 << PG_LEVEL_1G; 206 unsigned long start,
168 if (use_pse) 207 unsigned long end)
169 page_size_mask |= 1 << PG_LEVEL_2M; 208{
209 unsigned long start_pfn, end_pfn, limit_pfn;
210 unsigned long pfn;
211 int i;
170 212
171 memset(mr, 0, sizeof(mr)); 213 limit_pfn = PFN_DOWN(end);
172 nr_range = 0;
173 214
174 /* head if not big page alignment ? */ 215 /* head if not big page alignment ? */
175 start_pfn = start >> PAGE_SHIFT; 216 pfn = start_pfn = PFN_DOWN(start);
176 pos = start_pfn << PAGE_SHIFT;
177#ifdef CONFIG_X86_32 217#ifdef CONFIG_X86_32
178 /* 218 /*
179 * Don't use a large page for the first 2/4MB of memory 219 * Don't use a large page for the first 2/4MB of memory
@@ -181,66 +221,60 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
181 * and overlapping MTRRs into large pages can cause 221 * and overlapping MTRRs into large pages can cause
182 * slowdowns. 222 * slowdowns.
183 */ 223 */
184 if (pos == 0) 224 if (pfn == 0)
185 end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); 225 end_pfn = PFN_DOWN(PMD_SIZE);
186 else 226 else
187 end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) 227 end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
188 << (PMD_SHIFT - PAGE_SHIFT);
189#else /* CONFIG_X86_64 */ 228#else /* CONFIG_X86_64 */
190 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) 229 end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
191 << (PMD_SHIFT - PAGE_SHIFT);
192#endif 230#endif
193 if (end_pfn > (end >> PAGE_SHIFT)) 231 if (end_pfn > limit_pfn)
194 end_pfn = end >> PAGE_SHIFT; 232 end_pfn = limit_pfn;
195 if (start_pfn < end_pfn) { 233 if (start_pfn < end_pfn) {
196 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); 234 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
197 pos = end_pfn << PAGE_SHIFT; 235 pfn = end_pfn;
198 } 236 }
199 237
200 /* big page (2M) range */ 238 /* big page (2M) range */
201 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) 239 start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
202 << (PMD_SHIFT - PAGE_SHIFT);
203#ifdef CONFIG_X86_32 240#ifdef CONFIG_X86_32
204 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); 241 end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
205#else /* CONFIG_X86_64 */ 242#else /* CONFIG_X86_64 */
206 end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) 243 end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE));
207 << (PUD_SHIFT - PAGE_SHIFT); 244 if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE)))
208 if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) 245 end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
209 end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
210#endif 246#endif
211 247
212 if (start_pfn < end_pfn) { 248 if (start_pfn < end_pfn) {
213 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 249 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
214 page_size_mask & (1<<PG_LEVEL_2M)); 250 page_size_mask & (1<<PG_LEVEL_2M));
215 pos = end_pfn << PAGE_SHIFT; 251 pfn = end_pfn;
216 } 252 }
217 253
218#ifdef CONFIG_X86_64 254#ifdef CONFIG_X86_64
219 /* big page (1G) range */ 255 /* big page (1G) range */
220 start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) 256 start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE));
221 << (PUD_SHIFT - PAGE_SHIFT); 257 end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE));
222 end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
223 if (start_pfn < end_pfn) { 258 if (start_pfn < end_pfn) {
224 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 259 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
225 page_size_mask & 260 page_size_mask &
226 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); 261 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
227 pos = end_pfn << PAGE_SHIFT; 262 pfn = end_pfn;
228 } 263 }
229 264
230 /* tail is not big page (1G) alignment */ 265 /* tail is not big page (1G) alignment */
231 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) 266 start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
232 << (PMD_SHIFT - PAGE_SHIFT); 267 end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
233 end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
234 if (start_pfn < end_pfn) { 268 if (start_pfn < end_pfn) {
235 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 269 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
236 page_size_mask & (1<<PG_LEVEL_2M)); 270 page_size_mask & (1<<PG_LEVEL_2M));
237 pos = end_pfn << PAGE_SHIFT; 271 pfn = end_pfn;
238 } 272 }
239#endif 273#endif
240 274
241 /* tail is not big page (2M) alignment */ 275 /* tail is not big page (2M) alignment */
242 start_pfn = pos>>PAGE_SHIFT; 276 start_pfn = pfn;
243 end_pfn = end>>PAGE_SHIFT; 277 end_pfn = limit_pfn;
244 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); 278 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
245 279
246 /* try to merge same page size and continuous */ 280 /* try to merge same page size and continuous */
@@ -257,59 +291,168 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
257 nr_range--; 291 nr_range--;
258 } 292 }
259 293
294 if (!after_bootmem)
295 adjust_range_page_size_mask(mr, nr_range);
296
260 for (i = 0; i < nr_range; i++) 297 for (i = 0; i < nr_range; i++)
261 printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", 298 printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
262 mr[i].start, mr[i].end - 1, 299 mr[i].start, mr[i].end - 1,
263 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( 300 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
264 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); 301 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
265 302
266 /* 303 return nr_range;
267 * Find space for the kernel direct mapping tables. 304}
268 * 305
269 * Later we should allocate these tables in the local node of the 306struct range pfn_mapped[E820_X_MAX];
270 * memory mapped. Unfortunately this is done currently before the 307int nr_pfn_mapped;
271 * nodes are discovered. 308
272 */ 309static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
273 if (!after_bootmem) 310{
274 find_early_table_space(mr, nr_range); 311 nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX,
312 nr_pfn_mapped, start_pfn, end_pfn);
313 nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX);
314
315 max_pfn_mapped = max(max_pfn_mapped, end_pfn);
316
317 if (start_pfn < (1UL<<(32-PAGE_SHIFT)))
318 max_low_pfn_mapped = max(max_low_pfn_mapped,
319 min(end_pfn, 1UL<<(32-PAGE_SHIFT)));
320}
321
322bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
323{
324 int i;
325
326 for (i = 0; i < nr_pfn_mapped; i++)
327 if ((start_pfn >= pfn_mapped[i].start) &&
328 (end_pfn <= pfn_mapped[i].end))
329 return true;
330
331 return false;
332}
333
334/*
335 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
336 * This runs before bootmem is initialized and gets pages directly from
337 * the physical memory. To access them they are temporarily mapped.
338 */
339unsigned long __init_refok init_memory_mapping(unsigned long start,
340 unsigned long end)
341{
342 struct map_range mr[NR_RANGE_MR];
343 unsigned long ret = 0;
344 int nr_range, i;
345
346 pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n",
347 start, end - 1);
348
349 memset(mr, 0, sizeof(mr));
350 nr_range = split_mem_range(mr, 0, start, end);
275 351
276 for (i = 0; i < nr_range; i++) 352 for (i = 0; i < nr_range; i++)
277 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, 353 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
278 mr[i].page_size_mask); 354 mr[i].page_size_mask);
279 355
280#ifdef CONFIG_X86_32 356 add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT);
281 early_ioremap_page_table_range_init();
282 357
283 load_cr3(swapper_pg_dir); 358 return ret >> PAGE_SHIFT;
284#endif 359}
285 360
286 __flush_tlb_all(); 361/*
362 * would have hole in the middle or ends, and only ram parts will be mapped.
363 */
364static unsigned long __init init_range_memory_mapping(
365 unsigned long r_start,
366 unsigned long r_end)
367{
368 unsigned long start_pfn, end_pfn;
369 unsigned long mapped_ram_size = 0;
370 int i;
287 371
288 /* 372 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
289 * Reserve the kernel pagetable pages we used (pgt_buf_start - 373 u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end);
290 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) 374 u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end);
291 * so that they can be reused for other purposes. 375 if (start >= end)
292 * 376 continue;
293 * On native it just means calling memblock_reserve, on Xen it also
294 * means marking RW the pagetable pages that we allocated before
295 * but that haven't been used.
296 *
297 * In fact on xen we mark RO the whole range pgt_buf_start -
298 * pgt_buf_top, because we have to make sure that when
299 * init_memory_mapping reaches the pagetable pages area, it maps
300 * RO all the pagetable pages, including the ones that are beyond
301 * pgt_buf_end at that time.
302 */
303 if (!after_bootmem && pgt_buf_end > pgt_buf_start)
304 x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
305 PFN_PHYS(pgt_buf_end));
306 377
307 if (!after_bootmem) 378 /*
308 early_memtest(start, end); 379 * if it is overlapping with brk pgt, we need to
380 * alloc pgt buf from memblock instead.
381 */
382 can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >=
383 min(end, (u64)pgt_buf_top<<PAGE_SHIFT);
384 init_memory_mapping(start, end);
385 mapped_ram_size += end - start;
386 can_use_brk_pgt = true;
387 }
309 388
310 return ret >> PAGE_SHIFT; 389 return mapped_ram_size;
311} 390}
312 391
392/* (PUD_SHIFT-PMD_SHIFT)/2 */
393#define STEP_SIZE_SHIFT 5
394void __init init_mem_mapping(void)
395{
396 unsigned long end, real_end, start, last_start;
397 unsigned long step_size;
398 unsigned long addr;
399 unsigned long mapped_ram_size = 0;
400 unsigned long new_mapped_ram_size;
401
402 probe_page_size_mask();
403
404#ifdef CONFIG_X86_64
405 end = max_pfn << PAGE_SHIFT;
406#else
407 end = max_low_pfn << PAGE_SHIFT;
408#endif
409
410 /* the ISA range is always mapped regardless of memory holes */
411 init_memory_mapping(0, ISA_END_ADDRESS);
412
413 /* xen has big range in reserved near end of ram, skip it at first.*/
414 addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE);
415 real_end = addr + PMD_SIZE;
416
417 /* step_size need to be small so pgt_buf from BRK could cover it */
418 step_size = PMD_SIZE;
419 max_pfn_mapped = 0; /* will get exact value next */
420 min_pfn_mapped = real_end >> PAGE_SHIFT;
421 last_start = start = real_end;
422 while (last_start > ISA_END_ADDRESS) {
423 if (last_start > step_size) {
424 start = round_down(last_start - 1, step_size);
425 if (start < ISA_END_ADDRESS)
426 start = ISA_END_ADDRESS;
427 } else
428 start = ISA_END_ADDRESS;
429 new_mapped_ram_size = init_range_memory_mapping(start,
430 last_start);
431 last_start = start;
432 min_pfn_mapped = last_start >> PAGE_SHIFT;
433 /* only increase step_size after big range get mapped */
434 if (new_mapped_ram_size > mapped_ram_size)
435 step_size <<= STEP_SIZE_SHIFT;
436 mapped_ram_size += new_mapped_ram_size;
437 }
438
439 if (real_end < end)
440 init_range_memory_mapping(real_end, end);
441
442#ifdef CONFIG_X86_64
443 if (max_pfn > max_low_pfn) {
444 /* can we preseve max_low_pfn ?*/
445 max_low_pfn = max_pfn;
446 }
447#else
448 early_ioremap_page_table_range_init();
449#endif
450
451 load_cr3(swapper_pg_dir);
452 __flush_tlb_all();
453
454 early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
455}
313 456
314/* 457/*
315 * devmem_is_allowed() checks to see if /dev/mem access to a certain address 458 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
@@ -391,6 +534,15 @@ void free_initmem(void)
391#ifdef CONFIG_BLK_DEV_INITRD 534#ifdef CONFIG_BLK_DEV_INITRD
392void __init free_initrd_mem(unsigned long start, unsigned long end) 535void __init free_initrd_mem(unsigned long start, unsigned long end)
393{ 536{
537#ifdef CONFIG_MICROCODE_EARLY
538 /*
539 * Remember, initrd memory may contain microcode or other useful things.
540 * Before we lose initrd mem, we need to find a place to hold them
541 * now that normal virtual memory is enabled.
542 */
543 save_microcode_in_initrd();
544#endif
545
394 /* 546 /*
395 * end could be not aligned, and We can not align that, 547 * end could be not aligned, and We can not align that,
396 * decompresser could be confused by aligned initrd_end 548 * decompresser could be confused by aligned initrd_end
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 745d66b843c8..2d19001151d5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -53,25 +53,14 @@
53#include <asm/page_types.h> 53#include <asm/page_types.h>
54#include <asm/init.h> 54#include <asm/init.h>
55 55
56#include "mm_internal.h"
57
56unsigned long highstart_pfn, highend_pfn; 58unsigned long highstart_pfn, highend_pfn;
57 59
58static noinline int do_test_wp_bit(void); 60static noinline int do_test_wp_bit(void);
59 61
60bool __read_mostly __vmalloc_start_set = false; 62bool __read_mostly __vmalloc_start_set = false;
61 63
62static __init void *alloc_low_page(void)
63{
64 unsigned long pfn = pgt_buf_end++;
65 void *adr;
66
67 if (pfn >= pgt_buf_top)
68 panic("alloc_low_page: ran out of memory");
69
70 adr = __va(pfn * PAGE_SIZE);
71 clear_page(adr);
72 return adr;
73}
74
75/* 64/*
76 * Creates a middle page table and puts a pointer to it in the 65 * Creates a middle page table and puts a pointer to it in the
77 * given global directory entry. This only returns the gd entry 66 * given global directory entry. This only returns the gd entry
@@ -84,10 +73,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
84 73
85#ifdef CONFIG_X86_PAE 74#ifdef CONFIG_X86_PAE
86 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 75 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
87 if (after_bootmem) 76 pmd_table = (pmd_t *)alloc_low_page();
88 pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
89 else
90 pmd_table = (pmd_t *)alloc_low_page();
91 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 77 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
92 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 78 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
93 pud = pud_offset(pgd, 0); 79 pud = pud_offset(pgd, 0);
@@ -109,17 +95,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
109static pte_t * __init one_page_table_init(pmd_t *pmd) 95static pte_t * __init one_page_table_init(pmd_t *pmd)
110{ 96{
111 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { 97 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
112 pte_t *page_table = NULL; 98 pte_t *page_table = (pte_t *)alloc_low_page();
113
114 if (after_bootmem) {
115#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
116 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
117#endif
118 if (!page_table)
119 page_table =
120 (pte_t *)alloc_bootmem_pages(PAGE_SIZE);
121 } else
122 page_table = (pte_t *)alloc_low_page();
123 99
124 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); 100 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
125 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 101 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -146,8 +122,39 @@ pte_t * __init populate_extra_pte(unsigned long vaddr)
146 return one_page_table_init(pmd) + pte_idx; 122 return one_page_table_init(pmd) + pte_idx;
147} 123}
148 124
125static unsigned long __init
126page_table_range_init_count(unsigned long start, unsigned long end)
127{
128 unsigned long count = 0;
129#ifdef CONFIG_HIGHMEM
130 int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
131 int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
132 int pgd_idx, pmd_idx;
133 unsigned long vaddr;
134
135 if (pmd_idx_kmap_begin == pmd_idx_kmap_end)
136 return 0;
137
138 vaddr = start;
139 pgd_idx = pgd_index(vaddr);
140
141 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) {
142 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
143 pmd_idx++) {
144 if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin &&
145 (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end)
146 count++;
147 vaddr += PMD_SIZE;
148 }
149 pmd_idx = 0;
150 }
151#endif
152 return count;
153}
154
149static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, 155static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
150 unsigned long vaddr, pte_t *lastpte) 156 unsigned long vaddr, pte_t *lastpte,
157 void **adr)
151{ 158{
152#ifdef CONFIG_HIGHMEM 159#ifdef CONFIG_HIGHMEM
153 /* 160 /*
@@ -161,16 +168,15 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
161 168
162 if (pmd_idx_kmap_begin != pmd_idx_kmap_end 169 if (pmd_idx_kmap_begin != pmd_idx_kmap_end
163 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin 170 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
164 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end 171 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) {
165 && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
166 || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
167 pte_t *newpte; 172 pte_t *newpte;
168 int i; 173 int i;
169 174
170 BUG_ON(after_bootmem); 175 BUG_ON(after_bootmem);
171 newpte = alloc_low_page(); 176 newpte = *adr;
172 for (i = 0; i < PTRS_PER_PTE; i++) 177 for (i = 0; i < PTRS_PER_PTE; i++)
173 set_pte(newpte + i, pte[i]); 178 set_pte(newpte + i, pte[i]);
179 *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
174 180
175 paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); 181 paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
176 set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); 182 set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
@@ -204,6 +210,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
204 pgd_t *pgd; 210 pgd_t *pgd;
205 pmd_t *pmd; 211 pmd_t *pmd;
206 pte_t *pte = NULL; 212 pte_t *pte = NULL;
213 unsigned long count = page_table_range_init_count(start, end);
214 void *adr = NULL;
215
216 if (count)
217 adr = alloc_low_pages(count);
207 218
208 vaddr = start; 219 vaddr = start;
209 pgd_idx = pgd_index(vaddr); 220 pgd_idx = pgd_index(vaddr);
@@ -216,7 +227,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
216 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); 227 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
217 pmd++, pmd_idx++) { 228 pmd++, pmd_idx++) {
218 pte = page_table_kmap_check(one_page_table_init(pmd), 229 pte = page_table_kmap_check(one_page_table_init(pmd),
219 pmd, vaddr, pte); 230 pmd, vaddr, pte, &adr);
220 231
221 vaddr += PMD_SIZE; 232 vaddr += PMD_SIZE;
222 } 233 }
@@ -310,6 +321,7 @@ repeat:
310 __pgprot(PTE_IDENT_ATTR | 321 __pgprot(PTE_IDENT_ATTR |
311 _PAGE_PSE); 322 _PAGE_PSE);
312 323
324 pfn &= PMD_MASK >> PAGE_SHIFT;
313 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + 325 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
314 PAGE_OFFSET + PAGE_SIZE-1; 326 PAGE_OFFSET + PAGE_SIZE-1;
315 327
@@ -455,9 +467,14 @@ void __init native_pagetable_init(void)
455 467
456 /* 468 /*
457 * Remove any mappings which extend past the end of physical 469 * Remove any mappings which extend past the end of physical
458 * memory from the boot time page table: 470 * memory from the boot time page table.
471 * In virtual address space, we should have at least two pages
472 * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END
473 * definition. And max_low_pfn is set to VMALLOC_END physical
474 * address. If initial memory mapping is doing right job, we
475 * should have pte used near max_low_pfn or one pmd is not present.
459 */ 476 */
460 for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { 477 for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
461 va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); 478 va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
462 pgd = base + pgd_index(va); 479 pgd = base + pgd_index(va);
463 if (!pgd_present(*pgd)) 480 if (!pgd_present(*pgd))
@@ -468,10 +485,19 @@ void __init native_pagetable_init(void)
468 if (!pmd_present(*pmd)) 485 if (!pmd_present(*pmd))
469 break; 486 break;
470 487
488 /* should not be large page here */
489 if (pmd_large(*pmd)) {
490 pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n",
491 pfn, pmd, __pa(pmd));
492 BUG_ON(1);
493 }
494
471 pte = pte_offset_kernel(pmd, va); 495 pte = pte_offset_kernel(pmd, va);
472 if (!pte_present(*pte)) 496 if (!pte_present(*pte))
473 break; 497 break;
474 498
499 printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n",
500 pfn, pmd, __pa(pmd), pte, __pa(pte));
475 pte_clear(NULL, va, pte); 501 pte_clear(NULL, va, pte);
476 } 502 }
477 paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); 503 paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
@@ -550,7 +576,7 @@ early_param("highmem", parse_highmem);
550 * artificially via the highmem=x boot parameter then create 576 * artificially via the highmem=x boot parameter then create
551 * it: 577 * it:
552 */ 578 */
553void __init lowmem_pfn_init(void) 579static void __init lowmem_pfn_init(void)
554{ 580{
555 /* max_low_pfn is 0, we already have early_res support */ 581 /* max_low_pfn is 0, we already have early_res support */
556 max_low_pfn = max_pfn; 582 max_low_pfn = max_pfn;
@@ -586,7 +612,7 @@ void __init lowmem_pfn_init(void)
586 * We have more RAM than fits into lowmem - we try to put it into 612 * We have more RAM than fits into lowmem - we try to put it into
587 * highmem, also taking the highmem=x boot parameter into account: 613 * highmem, also taking the highmem=x boot parameter into account:
588 */ 614 */
589void __init highmem_pfn_init(void) 615static void __init highmem_pfn_init(void)
590{ 616{
591 max_low_pfn = MAXMEM_PFN; 617 max_low_pfn = MAXMEM_PFN;
592 618
@@ -669,8 +695,6 @@ void __init setup_bootmem_allocator(void)
669 printk(KERN_INFO " mapped low ram: 0 - %08lx\n", 695 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
670 max_pfn_mapped<<PAGE_SHIFT); 696 max_pfn_mapped<<PAGE_SHIFT);
671 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); 697 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
672
673 after_bootmem = 1;
674} 698}
675 699
676/* 700/*
@@ -753,6 +777,8 @@ void __init mem_init(void)
753 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) 777 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
754 reservedpages++; 778 reservedpages++;
755 779
780 after_bootmem = 1;
781
756 codesize = (unsigned long) &_etext - (unsigned long) &_text; 782 codesize = (unsigned long) &_etext - (unsigned long) &_text;
757 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 783 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
758 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 784 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
@@ -836,6 +862,18 @@ int arch_add_memory(int nid, u64 start, u64 size)
836 862
837 return __add_pages(nid, zone, start_pfn, nr_pages); 863 return __add_pages(nid, zone, start_pfn, nr_pages);
838} 864}
865
866#ifdef CONFIG_MEMORY_HOTREMOVE
867int arch_remove_memory(u64 start, u64 size)
868{
869 unsigned long start_pfn = start >> PAGE_SHIFT;
870 unsigned long nr_pages = size >> PAGE_SHIFT;
871 struct zone *zone;
872
873 zone = page_zone(pfn_to_page(start_pfn));
874 return __remove_pages(zone, start_pfn, nr_pages);
875}
876#endif
839#endif 877#endif
840 878
841/* 879/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 2ead3c8a4c84..474e28f10815 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -54,6 +54,82 @@
54#include <asm/uv/uv.h> 54#include <asm/uv/uv.h>
55#include <asm/setup.h> 55#include <asm/setup.h>
56 56
57#include "mm_internal.h"
58
59static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
60 unsigned long addr, unsigned long end)
61{
62 addr &= PMD_MASK;
63 for (; addr < end; addr += PMD_SIZE) {
64 pmd_t *pmd = pmd_page + pmd_index(addr);
65
66 if (!pmd_present(*pmd))
67 set_pmd(pmd, __pmd(addr | pmd_flag));
68 }
69}
70static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
71 unsigned long addr, unsigned long end)
72{
73 unsigned long next;
74
75 for (; addr < end; addr = next) {
76 pud_t *pud = pud_page + pud_index(addr);
77 pmd_t *pmd;
78
79 next = (addr & PUD_MASK) + PUD_SIZE;
80 if (next > end)
81 next = end;
82
83 if (pud_present(*pud)) {
84 pmd = pmd_offset(pud, 0);
85 ident_pmd_init(info->pmd_flag, pmd, addr, next);
86 continue;
87 }
88 pmd = (pmd_t *)info->alloc_pgt_page(info->context);
89 if (!pmd)
90 return -ENOMEM;
91 ident_pmd_init(info->pmd_flag, pmd, addr, next);
92 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
93 }
94
95 return 0;
96}
97
98int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
99 unsigned long addr, unsigned long end)
100{
101 unsigned long next;
102 int result;
103 int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
104
105 for (; addr < end; addr = next) {
106 pgd_t *pgd = pgd_page + pgd_index(addr) + off;
107 pud_t *pud;
108
109 next = (addr & PGDIR_MASK) + PGDIR_SIZE;
110 if (next > end)
111 next = end;
112
113 if (pgd_present(*pgd)) {
114 pud = pud_offset(pgd, 0);
115 result = ident_pud_init(info, pud, addr, next);
116 if (result)
117 return result;
118 continue;
119 }
120
121 pud = (pud_t *)info->alloc_pgt_page(info->context);
122 if (!pud)
123 return -ENOMEM;
124 result = ident_pud_init(info, pud, addr, next);
125 if (result)
126 return result;
127 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
128 }
129
130 return 0;
131}
132
57static int __init parse_direct_gbpages_off(char *arg) 133static int __init parse_direct_gbpages_off(char *arg)
58{ 134{
59 direct_gbpages = 0; 135 direct_gbpages = 0;
@@ -302,10 +378,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
302void __init cleanup_highmap(void) 378void __init cleanup_highmap(void)
303{ 379{
304 unsigned long vaddr = __START_KERNEL_map; 380 unsigned long vaddr = __START_KERNEL_map;
305 unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); 381 unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE;
306 unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; 382 unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
307 pmd_t *pmd = level2_kernel_pgt; 383 pmd_t *pmd = level2_kernel_pgt;
308 384
385 /*
386 * Native path, max_pfn_mapped is not set yet.
387 * Xen has valid max_pfn_mapped set in
388 * arch/x86/xen/mmu.c:xen_setup_kernel_pagetable().
389 */
390 if (max_pfn_mapped)
391 vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
392
309 for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { 393 for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
310 if (pmd_none(*pmd)) 394 if (pmd_none(*pmd))
311 continue; 395 continue;
@@ -314,69 +398,24 @@ void __init cleanup_highmap(void)
314 } 398 }
315} 399}
316 400
317static __ref void *alloc_low_page(unsigned long *phys)
318{
319 unsigned long pfn = pgt_buf_end++;
320 void *adr;
321
322 if (after_bootmem) {
323 adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
324 *phys = __pa(adr);
325
326 return adr;
327 }
328
329 if (pfn >= pgt_buf_top)
330 panic("alloc_low_page: ran out of memory");
331
332 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
333 clear_page(adr);
334 *phys = pfn * PAGE_SIZE;
335 return adr;
336}
337
338static __ref void *map_low_page(void *virt)
339{
340 void *adr;
341 unsigned long phys, left;
342
343 if (after_bootmem)
344 return virt;
345
346 phys = __pa(virt);
347 left = phys & (PAGE_SIZE - 1);
348 adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
349 adr = (void *)(((unsigned long)adr) | left);
350
351 return adr;
352}
353
354static __ref void unmap_low_page(void *adr)
355{
356 if (after_bootmem)
357 return;
358
359 early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
360}
361
362static unsigned long __meminit 401static unsigned long __meminit
363phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, 402phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
364 pgprot_t prot) 403 pgprot_t prot)
365{ 404{
366 unsigned pages = 0; 405 unsigned long pages = 0, next;
367 unsigned long last_map_addr = end; 406 unsigned long last_map_addr = end;
368 int i; 407 int i;
369 408
370 pte_t *pte = pte_page + pte_index(addr); 409 pte_t *pte = pte_page + pte_index(addr);
371 410
372 for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { 411 for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
373 412 next = (addr & PAGE_MASK) + PAGE_SIZE;
374 if (addr >= end) { 413 if (addr >= end) {
375 if (!after_bootmem) { 414 if (!after_bootmem &&
376 for(; i < PTRS_PER_PTE; i++, pte++) 415 !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
377 set_pte(pte, __pte(0)); 416 !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
378 } 417 set_pte(pte, __pte(0));
379 break; 418 continue;
380 } 419 }
381 420
382 /* 421 /*
@@ -414,28 +453,25 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
414 int i = pmd_index(address); 453 int i = pmd_index(address);
415 454
416 for (; i < PTRS_PER_PMD; i++, address = next) { 455 for (; i < PTRS_PER_PMD; i++, address = next) {
417 unsigned long pte_phys;
418 pmd_t *pmd = pmd_page + pmd_index(address); 456 pmd_t *pmd = pmd_page + pmd_index(address);
419 pte_t *pte; 457 pte_t *pte;
420 pgprot_t new_prot = prot; 458 pgprot_t new_prot = prot;
421 459
460 next = (address & PMD_MASK) + PMD_SIZE;
422 if (address >= end) { 461 if (address >= end) {
423 if (!after_bootmem) { 462 if (!after_bootmem &&
424 for (; i < PTRS_PER_PMD; i++, pmd++) 463 !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
425 set_pmd(pmd, __pmd(0)); 464 !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
426 } 465 set_pmd(pmd, __pmd(0));
427 break; 466 continue;
428 } 467 }
429 468
430 next = (address & PMD_MASK) + PMD_SIZE;
431
432 if (pmd_val(*pmd)) { 469 if (pmd_val(*pmd)) {
433 if (!pmd_large(*pmd)) { 470 if (!pmd_large(*pmd)) {
434 spin_lock(&init_mm.page_table_lock); 471 spin_lock(&init_mm.page_table_lock);
435 pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd)); 472 pte = (pte_t *)pmd_page_vaddr(*pmd);
436 last_map_addr = phys_pte_init(pte, address, 473 last_map_addr = phys_pte_init(pte, address,
437 end, prot); 474 end, prot);
438 unmap_low_page(pte);
439 spin_unlock(&init_mm.page_table_lock); 475 spin_unlock(&init_mm.page_table_lock);
440 continue; 476 continue;
441 } 477 }
@@ -464,19 +500,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
464 pages++; 500 pages++;
465 spin_lock(&init_mm.page_table_lock); 501 spin_lock(&init_mm.page_table_lock);
466 set_pte((pte_t *)pmd, 502 set_pte((pte_t *)pmd,
467 pfn_pte(address >> PAGE_SHIFT, 503 pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
468 __pgprot(pgprot_val(prot) | _PAGE_PSE))); 504 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
469 spin_unlock(&init_mm.page_table_lock); 505 spin_unlock(&init_mm.page_table_lock);
470 last_map_addr = next; 506 last_map_addr = next;
471 continue; 507 continue;
472 } 508 }
473 509
474 pte = alloc_low_page(&pte_phys); 510 pte = alloc_low_page();
475 last_map_addr = phys_pte_init(pte, address, end, new_prot); 511 last_map_addr = phys_pte_init(pte, address, end, new_prot);
476 unmap_low_page(pte);
477 512
478 spin_lock(&init_mm.page_table_lock); 513 spin_lock(&init_mm.page_table_lock);
479 pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); 514 pmd_populate_kernel(&init_mm, pmd, pte);
480 spin_unlock(&init_mm.page_table_lock); 515 spin_unlock(&init_mm.page_table_lock);
481 } 516 }
482 update_page_count(PG_LEVEL_2M, pages); 517 update_page_count(PG_LEVEL_2M, pages);
@@ -492,27 +527,24 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
492 int i = pud_index(addr); 527 int i = pud_index(addr);
493 528
494 for (; i < PTRS_PER_PUD; i++, addr = next) { 529 for (; i < PTRS_PER_PUD; i++, addr = next) {
495 unsigned long pmd_phys;
496 pud_t *pud = pud_page + pud_index(addr); 530 pud_t *pud = pud_page + pud_index(addr);
497 pmd_t *pmd; 531 pmd_t *pmd;
498 pgprot_t prot = PAGE_KERNEL; 532 pgprot_t prot = PAGE_KERNEL;
499 533
500 if (addr >= end)
501 break;
502
503 next = (addr & PUD_MASK) + PUD_SIZE; 534 next = (addr & PUD_MASK) + PUD_SIZE;
504 535 if (addr >= end) {
505 if (!after_bootmem && !e820_any_mapped(addr, next, 0)) { 536 if (!after_bootmem &&
506 set_pud(pud, __pud(0)); 537 !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
538 !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
539 set_pud(pud, __pud(0));
507 continue; 540 continue;
508 } 541 }
509 542
510 if (pud_val(*pud)) { 543 if (pud_val(*pud)) {
511 if (!pud_large(*pud)) { 544 if (!pud_large(*pud)) {
512 pmd = map_low_page(pmd_offset(pud, 0)); 545 pmd = pmd_offset(pud, 0);
513 last_map_addr = phys_pmd_init(pmd, addr, end, 546 last_map_addr = phys_pmd_init(pmd, addr, end,
514 page_size_mask, prot); 547 page_size_mask, prot);
515 unmap_low_page(pmd);
516 __flush_tlb_all(); 548 __flush_tlb_all();
517 continue; 549 continue;
518 } 550 }
@@ -541,19 +573,19 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
541 pages++; 573 pages++;
542 spin_lock(&init_mm.page_table_lock); 574 spin_lock(&init_mm.page_table_lock);
543 set_pte((pte_t *)pud, 575 set_pte((pte_t *)pud,
544 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 576 pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
577 PAGE_KERNEL_LARGE));
545 spin_unlock(&init_mm.page_table_lock); 578 spin_unlock(&init_mm.page_table_lock);
546 last_map_addr = next; 579 last_map_addr = next;
547 continue; 580 continue;
548 } 581 }
549 582
550 pmd = alloc_low_page(&pmd_phys); 583 pmd = alloc_low_page();
551 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, 584 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
552 prot); 585 prot);
553 unmap_low_page(pmd);
554 586
555 spin_lock(&init_mm.page_table_lock); 587 spin_lock(&init_mm.page_table_lock);
556 pud_populate(&init_mm, pud, __va(pmd_phys)); 588 pud_populate(&init_mm, pud, pmd);
557 spin_unlock(&init_mm.page_table_lock); 589 spin_unlock(&init_mm.page_table_lock);
558 } 590 }
559 __flush_tlb_all(); 591 __flush_tlb_all();
@@ -578,34 +610,29 @@ kernel_physical_mapping_init(unsigned long start,
578 610
579 for (; start < end; start = next) { 611 for (; start < end; start = next) {
580 pgd_t *pgd = pgd_offset_k(start); 612 pgd_t *pgd = pgd_offset_k(start);
581 unsigned long pud_phys;
582 pud_t *pud; 613 pud_t *pud;
583 614
584 next = (start + PGDIR_SIZE) & PGDIR_MASK; 615 next = (start & PGDIR_MASK) + PGDIR_SIZE;
585 if (next > end)
586 next = end;
587 616
588 if (pgd_val(*pgd)) { 617 if (pgd_val(*pgd)) {
589 pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd)); 618 pud = (pud_t *)pgd_page_vaddr(*pgd);
590 last_map_addr = phys_pud_init(pud, __pa(start), 619 last_map_addr = phys_pud_init(pud, __pa(start),
591 __pa(end), page_size_mask); 620 __pa(end), page_size_mask);
592 unmap_low_page(pud);
593 continue; 621 continue;
594 } 622 }
595 623
596 pud = alloc_low_page(&pud_phys); 624 pud = alloc_low_page();
597 last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), 625 last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
598 page_size_mask); 626 page_size_mask);
599 unmap_low_page(pud);
600 627
601 spin_lock(&init_mm.page_table_lock); 628 spin_lock(&init_mm.page_table_lock);
602 pgd_populate(&init_mm, pgd, __va(pud_phys)); 629 pgd_populate(&init_mm, pgd, pud);
603 spin_unlock(&init_mm.page_table_lock); 630 spin_unlock(&init_mm.page_table_lock);
604 pgd_changed = true; 631 pgd_changed = true;
605 } 632 }
606 633
607 if (pgd_changed) 634 if (pgd_changed)
608 sync_global_pgds(addr, end); 635 sync_global_pgds(addr, end - 1);
609 636
610 __flush_tlb_all(); 637 __flush_tlb_all();
611 638
@@ -664,13 +691,11 @@ int arch_add_memory(int nid, u64 start, u64 size)
664{ 691{
665 struct pglist_data *pgdat = NODE_DATA(nid); 692 struct pglist_data *pgdat = NODE_DATA(nid);
666 struct zone *zone = pgdat->node_zones + ZONE_NORMAL; 693 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
667 unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT; 694 unsigned long start_pfn = start >> PAGE_SHIFT;
668 unsigned long nr_pages = size >> PAGE_SHIFT; 695 unsigned long nr_pages = size >> PAGE_SHIFT;
669 int ret; 696 int ret;
670 697
671 last_mapped_pfn = init_memory_mapping(start, start + size); 698 init_memory_mapping(start, start + size);
672 if (last_mapped_pfn > max_pfn_mapped)
673 max_pfn_mapped = last_mapped_pfn;
674 699
675 ret = __add_pages(nid, zone, start_pfn, nr_pages); 700 ret = __add_pages(nid, zone, start_pfn, nr_pages);
676 WARN_ON_ONCE(ret); 701 WARN_ON_ONCE(ret);
@@ -682,10 +707,357 @@ int arch_add_memory(int nid, u64 start, u64 size)
682} 707}
683EXPORT_SYMBOL_GPL(arch_add_memory); 708EXPORT_SYMBOL_GPL(arch_add_memory);
684 709
710#define PAGE_INUSE 0xFD
711
712static void __meminit free_pagetable(struct page *page, int order)
713{
714 struct zone *zone;
715 bool bootmem = false;
716 unsigned long magic;
717 unsigned int nr_pages = 1 << order;
718
719 /* bootmem page has reserved flag */
720 if (PageReserved(page)) {
721 __ClearPageReserved(page);
722 bootmem = true;
723
724 magic = (unsigned long)page->lru.next;
725 if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
726 while (nr_pages--)
727 put_page_bootmem(page++);
728 } else
729 __free_pages_bootmem(page, order);
730 } else
731 free_pages((unsigned long)page_address(page), order);
732
733 /*
734 * SECTION_INFO pages and MIX_SECTION_INFO pages
735 * are all allocated by bootmem.
736 */
737 if (bootmem) {
738 zone = page_zone(page);
739 zone_span_writelock(zone);
740 zone->present_pages += nr_pages;
741 zone_span_writeunlock(zone);
742 totalram_pages += nr_pages;
743 }
744}
745
746static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
747{
748 pte_t *pte;
749 int i;
750
751 for (i = 0; i < PTRS_PER_PTE; i++) {
752 pte = pte_start + i;
753 if (pte_val(*pte))
754 return;
755 }
756
757 /* free a pte talbe */
758 free_pagetable(pmd_page(*pmd), 0);
759 spin_lock(&init_mm.page_table_lock);
760 pmd_clear(pmd);
761 spin_unlock(&init_mm.page_table_lock);
762}
763
764static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
765{
766 pmd_t *pmd;
767 int i;
768
769 for (i = 0; i < PTRS_PER_PMD; i++) {
770 pmd = pmd_start + i;
771 if (pmd_val(*pmd))
772 return;
773 }
774
775 /* free a pmd talbe */
776 free_pagetable(pud_page(*pud), 0);
777 spin_lock(&init_mm.page_table_lock);
778 pud_clear(pud);
779 spin_unlock(&init_mm.page_table_lock);
780}
781
782/* Return true if pgd is changed, otherwise return false. */
783static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
784{
785 pud_t *pud;
786 int i;
787
788 for (i = 0; i < PTRS_PER_PUD; i++) {
789 pud = pud_start + i;
790 if (pud_val(*pud))
791 return false;
792 }
793
794 /* free a pud table */
795 free_pagetable(pgd_page(*pgd), 0);
796 spin_lock(&init_mm.page_table_lock);
797 pgd_clear(pgd);
798 spin_unlock(&init_mm.page_table_lock);
799
800 return true;
801}
802
803static void __meminit
804remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
805 bool direct)
806{
807 unsigned long next, pages = 0;
808 pte_t *pte;
809 void *page_addr;
810 phys_addr_t phys_addr;
811
812 pte = pte_start + pte_index(addr);
813 for (; addr < end; addr = next, pte++) {
814 next = (addr + PAGE_SIZE) & PAGE_MASK;
815 if (next > end)
816 next = end;
817
818 if (!pte_present(*pte))
819 continue;
820
821 /*
822 * We mapped [0,1G) memory as identity mapping when
823 * initializing, in arch/x86/kernel/head_64.S. These
824 * pagetables cannot be removed.
825 */
826 phys_addr = pte_val(*pte) + (addr & PAGE_MASK);
827 if (phys_addr < (phys_addr_t)0x40000000)
828 return;
829
830 if (IS_ALIGNED(addr, PAGE_SIZE) &&
831 IS_ALIGNED(next, PAGE_SIZE)) {
832 /*
833 * Do not free direct mapping pages since they were
834 * freed when offlining, or simplely not in use.
835 */
836 if (!direct)
837 free_pagetable(pte_page(*pte), 0);
838
839 spin_lock(&init_mm.page_table_lock);
840 pte_clear(&init_mm, addr, pte);
841 spin_unlock(&init_mm.page_table_lock);
842
843 /* For non-direct mapping, pages means nothing. */
844 pages++;
845 } else {
846 /*
847 * If we are here, we are freeing vmemmap pages since
848 * direct mapped memory ranges to be freed are aligned.
849 *
850 * If we are not removing the whole page, it means
851 * other page structs in this page are being used and
852 * we canot remove them. So fill the unused page_structs
853 * with 0xFD, and remove the page when it is wholly
854 * filled with 0xFD.
855 */
856 memset((void *)addr, PAGE_INUSE, next - addr);
857
858 page_addr = page_address(pte_page(*pte));
859 if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
860 free_pagetable(pte_page(*pte), 0);
861
862 spin_lock(&init_mm.page_table_lock);
863 pte_clear(&init_mm, addr, pte);
864 spin_unlock(&init_mm.page_table_lock);
865 }
866 }
867 }
868
869 /* Call free_pte_table() in remove_pmd_table(). */
870 flush_tlb_all();
871 if (direct)
872 update_page_count(PG_LEVEL_4K, -pages);
873}
874
875static void __meminit
876remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
877 bool direct)
878{
879 unsigned long next, pages = 0;
880 pte_t *pte_base;
881 pmd_t *pmd;
882 void *page_addr;
883
884 pmd = pmd_start + pmd_index(addr);
885 for (; addr < end; addr = next, pmd++) {
886 next = pmd_addr_end(addr, end);
887
888 if (!pmd_present(*pmd))
889 continue;
890
891 if (pmd_large(*pmd)) {
892 if (IS_ALIGNED(addr, PMD_SIZE) &&
893 IS_ALIGNED(next, PMD_SIZE)) {
894 if (!direct)
895 free_pagetable(pmd_page(*pmd),
896 get_order(PMD_SIZE));
897
898 spin_lock(&init_mm.page_table_lock);
899 pmd_clear(pmd);
900 spin_unlock(&init_mm.page_table_lock);
901 pages++;
902 } else {
903 /* If here, we are freeing vmemmap pages. */
904 memset((void *)addr, PAGE_INUSE, next - addr);
905
906 page_addr = page_address(pmd_page(*pmd));
907 if (!memchr_inv(page_addr, PAGE_INUSE,
908 PMD_SIZE)) {
909 free_pagetable(pmd_page(*pmd),
910 get_order(PMD_SIZE));
911
912 spin_lock(&init_mm.page_table_lock);
913 pmd_clear(pmd);
914 spin_unlock(&init_mm.page_table_lock);
915 }
916 }
917
918 continue;
919 }
920
921 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
922 remove_pte_table(pte_base, addr, next, direct);
923 free_pte_table(pte_base, pmd);
924 }
925
926 /* Call free_pmd_table() in remove_pud_table(). */
927 if (direct)
928 update_page_count(PG_LEVEL_2M, -pages);
929}
930
931static void __meminit
932remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
933 bool direct)
934{
935 unsigned long next, pages = 0;
936 pmd_t *pmd_base;
937 pud_t *pud;
938 void *page_addr;
939
940 pud = pud_start + pud_index(addr);
941 for (; addr < end; addr = next, pud++) {
942 next = pud_addr_end(addr, end);
943
944 if (!pud_present(*pud))
945 continue;
946
947 if (pud_large(*pud)) {
948 if (IS_ALIGNED(addr, PUD_SIZE) &&
949 IS_ALIGNED(next, PUD_SIZE)) {
950 if (!direct)
951 free_pagetable(pud_page(*pud),
952 get_order(PUD_SIZE));
953
954 spin_lock(&init_mm.page_table_lock);
955 pud_clear(pud);
956 spin_unlock(&init_mm.page_table_lock);
957 pages++;
958 } else {
959 /* If here, we are freeing vmemmap pages. */
960 memset((void *)addr, PAGE_INUSE, next - addr);
961
962 page_addr = page_address(pud_page(*pud));
963 if (!memchr_inv(page_addr, PAGE_INUSE,
964 PUD_SIZE)) {
965 free_pagetable(pud_page(*pud),
966 get_order(PUD_SIZE));
967
968 spin_lock(&init_mm.page_table_lock);
969 pud_clear(pud);
970 spin_unlock(&init_mm.page_table_lock);
971 }
972 }
973
974 continue;
975 }
976
977 pmd_base = (pmd_t *)pud_page_vaddr(*pud);
978 remove_pmd_table(pmd_base, addr, next, direct);
979 free_pmd_table(pmd_base, pud);
980 }
981
982 if (direct)
983 update_page_count(PG_LEVEL_1G, -pages);
984}
985
986/* start and end are both virtual address. */
987static void __meminit
988remove_pagetable(unsigned long start, unsigned long end, bool direct)
989{
990 unsigned long next;
991 pgd_t *pgd;
992 pud_t *pud;
993 bool pgd_changed = false;
994
995 for (; start < end; start = next) {
996 next = pgd_addr_end(start, end);
997
998 pgd = pgd_offset_k(start);
999 if (!pgd_present(*pgd))
1000 continue;
1001
1002 pud = (pud_t *)pgd_page_vaddr(*pgd);
1003 remove_pud_table(pud, start, next, direct);
1004 if (free_pud_table(pud, pgd))
1005 pgd_changed = true;
1006 }
1007
1008 if (pgd_changed)
1009 sync_global_pgds(start, end - 1);
1010
1011 flush_tlb_all();
1012}
1013
1014void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages)
1015{
1016 unsigned long start = (unsigned long)memmap;
1017 unsigned long end = (unsigned long)(memmap + nr_pages);
1018
1019 remove_pagetable(start, end, false);
1020}
1021
1022static void __meminit
1023kernel_physical_mapping_remove(unsigned long start, unsigned long end)
1024{
1025 start = (unsigned long)__va(start);
1026 end = (unsigned long)__va(end);
1027
1028 remove_pagetable(start, end, true);
1029}
1030
1031#ifdef CONFIG_MEMORY_HOTREMOVE
1032int __ref arch_remove_memory(u64 start, u64 size)
1033{
1034 unsigned long start_pfn = start >> PAGE_SHIFT;
1035 unsigned long nr_pages = size >> PAGE_SHIFT;
1036 struct zone *zone;
1037 int ret;
1038
1039 zone = page_zone(pfn_to_page(start_pfn));
1040 kernel_physical_mapping_remove(start, start + size);
1041 ret = __remove_pages(zone, start_pfn, nr_pages);
1042 WARN_ON_ONCE(ret);
1043
1044 return ret;
1045}
1046#endif
685#endif /* CONFIG_MEMORY_HOTPLUG */ 1047#endif /* CONFIG_MEMORY_HOTPLUG */
686 1048
687static struct kcore_list kcore_vsyscall; 1049static struct kcore_list kcore_vsyscall;
688 1050
1051static void __init register_page_bootmem_info(void)
1052{
1053#ifdef CONFIG_NUMA
1054 int i;
1055
1056 for_each_online_node(i)
1057 register_page_bootmem_info_node(NODE_DATA(i));
1058#endif
1059}
1060
689void __init mem_init(void) 1061void __init mem_init(void)
690{ 1062{
691 long codesize, reservedpages, datasize, initsize; 1063 long codesize, reservedpages, datasize, initsize;
@@ -698,11 +1070,8 @@ void __init mem_init(void)
698 reservedpages = 0; 1070 reservedpages = 0;
699 1071
700 /* this will put all low memory onto the freelists */ 1072 /* this will put all low memory onto the freelists */
701#ifdef CONFIG_NUMA 1073 register_page_bootmem_info();
702 totalram_pages = numa_free_all_bootmem();
703#else
704 totalram_pages = free_all_bootmem(); 1074 totalram_pages = free_all_bootmem();
705#endif
706 1075
707 absent_pages = absent_pages_in_range(0, max_pfn); 1076 absent_pages = absent_pages_in_range(0, max_pfn);
708 reservedpages = max_pfn - totalram_pages - absent_pages; 1077 reservedpages = max_pfn - totalram_pages - absent_pages;
@@ -772,12 +1141,11 @@ void set_kernel_text_ro(void)
772void mark_rodata_ro(void) 1141void mark_rodata_ro(void)
773{ 1142{
774 unsigned long start = PFN_ALIGN(_text); 1143 unsigned long start = PFN_ALIGN(_text);
775 unsigned long rodata_start = 1144 unsigned long rodata_start = PFN_ALIGN(__start_rodata);
776 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
777 unsigned long end = (unsigned long) &__end_rodata_hpage_align; 1145 unsigned long end = (unsigned long) &__end_rodata_hpage_align;
778 unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); 1146 unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
779 unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); 1147 unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
780 unsigned long data_start = (unsigned long) &_sdata; 1148 unsigned long all_end = PFN_ALIGN(&_end);
781 1149
782 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 1150 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
783 (end - start) >> 10); 1151 (end - start) >> 10);
@@ -786,10 +1154,10 @@ void mark_rodata_ro(void)
786 kernel_set_to_readonly = 1; 1154 kernel_set_to_readonly = 1;
787 1155
788 /* 1156 /*
789 * The rodata section (but not the kernel text!) should also be 1157 * The rodata/data/bss/brk section (but not the kernel text!)
790 * not-executable. 1158 * should also be not-executable.
791 */ 1159 */
792 set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); 1160 set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
793 1161
794 rodata_test(); 1162 rodata_test();
795 1163
@@ -802,12 +1170,12 @@ void mark_rodata_ro(void)
802#endif 1170#endif
803 1171
804 free_init_pages("unused kernel memory", 1172 free_init_pages("unused kernel memory",
805 (unsigned long) page_address(virt_to_page(text_end)), 1173 (unsigned long) __va(__pa_symbol(text_end)),
806 (unsigned long) 1174 (unsigned long) __va(__pa_symbol(rodata_start)));
807 page_address(virt_to_page(rodata_start))); 1175
808 free_init_pages("unused kernel memory", 1176 free_init_pages("unused kernel memory",
809 (unsigned long) page_address(virt_to_page(rodata_end)), 1177 (unsigned long) __va(__pa_symbol(rodata_end)),
810 (unsigned long) page_address(virt_to_page(data_start))); 1178 (unsigned long) __va(__pa_symbol(_sdata)));
811} 1179}
812 1180
813#endif 1181#endif
@@ -831,6 +1199,9 @@ int kern_addr_valid(unsigned long addr)
831 if (pud_none(*pud)) 1199 if (pud_none(*pud))
832 return 0; 1200 return 0;
833 1201
1202 if (pud_large(*pud))
1203 return pfn_valid(pud_pfn(*pud));
1204
834 pmd = pmd_offset(pud, addr); 1205 pmd = pmd_offset(pud, addr);
835 if (pmd_none(*pmd)) 1206 if (pmd_none(*pmd))
836 return 0; 1207 return 0;
@@ -981,10 +1352,70 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
981 } 1352 }
982 1353
983 } 1354 }
984 sync_global_pgds((unsigned long)start_page, end); 1355 sync_global_pgds((unsigned long)start_page, end - 1);
985 return 0; 1356 return 0;
986} 1357}
987 1358
1359#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
1360void register_page_bootmem_memmap(unsigned long section_nr,
1361 struct page *start_page, unsigned long size)
1362{
1363 unsigned long addr = (unsigned long)start_page;
1364 unsigned long end = (unsigned long)(start_page + size);
1365 unsigned long next;
1366 pgd_t *pgd;
1367 pud_t *pud;
1368 pmd_t *pmd;
1369 unsigned int nr_pages;
1370 struct page *page;
1371
1372 for (; addr < end; addr = next) {
1373 pte_t *pte = NULL;
1374
1375 pgd = pgd_offset_k(addr);
1376 if (pgd_none(*pgd)) {
1377 next = (addr + PAGE_SIZE) & PAGE_MASK;
1378 continue;
1379 }
1380 get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
1381
1382 pud = pud_offset(pgd, addr);
1383 if (pud_none(*pud)) {
1384 next = (addr + PAGE_SIZE) & PAGE_MASK;
1385 continue;
1386 }
1387 get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
1388
1389 if (!cpu_has_pse) {
1390 next = (addr + PAGE_SIZE) & PAGE_MASK;
1391 pmd = pmd_offset(pud, addr);
1392 if (pmd_none(*pmd))
1393 continue;
1394 get_page_bootmem(section_nr, pmd_page(*pmd),
1395 MIX_SECTION_INFO);
1396
1397 pte = pte_offset_kernel(pmd, addr);
1398 if (pte_none(*pte))
1399 continue;
1400 get_page_bootmem(section_nr, pte_page(*pte),
1401 SECTION_INFO);
1402 } else {
1403 next = pmd_addr_end(addr, end);
1404
1405 pmd = pmd_offset(pud, addr);
1406 if (pmd_none(*pmd))
1407 continue;
1408
1409 nr_pages = 1 << (get_order(PMD_SIZE));
1410 page = pmd_page(*pmd);
1411 while (nr_pages--)
1412 get_page_bootmem(section_nr, page++,
1413 SECTION_INFO);
1414 }
1415 }
1416}
1417#endif
1418
988void __meminit vmemmap_populate_print_last(void) 1419void __meminit vmemmap_populate_print_last(void)
989{ 1420{
990 if (p_start) { 1421 if (p_start) {
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index c80b9fb95734..8dabbed409ee 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -9,6 +9,7 @@
9#include <linux/memblock.h> 9#include <linux/memblock.h>
10 10
11static u64 patterns[] __initdata = { 11static u64 patterns[] __initdata = {
12 /* The first entry has to be 0 to leave memtest with zeroed memory */
12 0, 13 0,
13 0xffffffffffffffffULL, 14 0xffffffffffffffffULL,
14 0x5555555555555555ULL, 15 0x5555555555555555ULL,
@@ -110,15 +111,8 @@ void __init early_memtest(unsigned long start, unsigned long end)
110 return; 111 return;
111 112
112 printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); 113 printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
113 for (i = 0; i < memtest_pattern; i++) { 114 for (i = memtest_pattern-1; i < UINT_MAX; --i) {
114 idx = i % ARRAY_SIZE(patterns); 115 idx = i % ARRAY_SIZE(patterns);
115 do_one_pass(patterns[idx], start, end); 116 do_one_pass(patterns[idx], start, end);
116 } 117 }
117
118 if (idx > 0) {
119 printk(KERN_INFO "early_memtest: wipe out "
120 "test pattern from memory\n");
121 /* additional test with pattern 0 will do this */
122 do_one_pass(0, start, end);
123 }
124} 118}
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
new file mode 100644
index 000000000000..6b563a118891
--- /dev/null
+++ b/arch/x86/mm/mm_internal.h
@@ -0,0 +1,19 @@
1#ifndef __X86_MM_INTERNAL_H
2#define __X86_MM_INTERNAL_H
3
4void *alloc_low_pages(unsigned int num);
5static inline void *alloc_low_page(void)
6{
7 return alloc_low_pages(1);
8}
9
10void early_ioremap_page_table_range_init(void);
11
12unsigned long kernel_physical_mapping_init(unsigned long start,
13 unsigned long end,
14 unsigned long page_size_mask);
15void zone_sizes_init(void);
16
17extern int after_bootmem;
18
19#endif /* __X86_MM_INTERNAL_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 2d125be1bae9..72fe01e9e414 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -56,7 +56,7 @@ early_param("numa", numa_setup);
56/* 56/*
57 * apicid, cpu, node mappings 57 * apicid, cpu, node mappings
58 */ 58 */
59s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 59s16 __apicid_to_node[MAX_LOCAL_APIC] = {
60 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 60 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
61}; 61};
62 62
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(node_to_cpumask_map);
78DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 78DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
79EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 79EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
80 80
81void __cpuinit numa_set_node(int cpu, int node) 81void numa_set_node(int cpu, int node)
82{ 82{
83 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 83 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
84 84
@@ -97,11 +97,10 @@ void __cpuinit numa_set_node(int cpu, int node)
97#endif 97#endif
98 per_cpu(x86_cpu_to_node_map, cpu) = node; 98 per_cpu(x86_cpu_to_node_map, cpu) = node;
99 99
100 if (node != NUMA_NO_NODE) 100 set_cpu_numa_node(cpu, node);
101 set_cpu_numa_node(cpu, node);
102} 101}
103 102
104void __cpuinit numa_clear_node(int cpu) 103void numa_clear_node(int cpu)
105{ 104{
106 numa_set_node(cpu, NUMA_NO_NODE); 105 numa_set_node(cpu, NUMA_NO_NODE);
107} 106}
@@ -193,7 +192,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
193static void __init setup_node_data(int nid, u64 start, u64 end) 192static void __init setup_node_data(int nid, u64 start, u64 end)
194{ 193{
195 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 194 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
196 bool remapped = false;
197 u64 nd_pa; 195 u64 nd_pa;
198 void *nd; 196 void *nd;
199 int tnid; 197 int tnid;
@@ -205,37 +203,28 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
205 if (end && (end - start) < NODE_MIN_SIZE) 203 if (end && (end - start) < NODE_MIN_SIZE)
206 return; 204 return;
207 205
208 /* initialize remap allocator before aligning to ZONE_ALIGN */
209 init_alloc_remap(nid, start, end);
210
211 start = roundup(start, ZONE_ALIGN); 206 start = roundup(start, ZONE_ALIGN);
212 207
213 printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", 208 printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
214 nid, start, end - 1); 209 nid, start, end - 1);
215 210
216 /* 211 /*
217 * Allocate node data. Try remap allocator first, node-local 212 * Allocate node data. Try node-local memory and then any node.
218 * memory and then any node. Never allocate in DMA zone. 213 * Never allocate in DMA zone.
219 */ 214 */
220 nd = alloc_remap(nid, nd_size); 215 nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
221 if (nd) { 216 if (!nd_pa) {
222 nd_pa = __pa(nd); 217 pr_err("Cannot find %zu bytes in node %d\n",
223 remapped = true; 218 nd_size, nid);
224 } else { 219 return;
225 nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
226 if (!nd_pa) {
227 pr_err("Cannot find %zu bytes in node %d\n",
228 nd_size, nid);
229 return;
230 }
231 nd = __va(nd_pa);
232 } 220 }
221 nd = __va(nd_pa);
233 222
234 /* report and initialize */ 223 /* report and initialize */
235 printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n", 224 printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n",
236 nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); 225 nd_pa, nd_pa + nd_size - 1);
237 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 226 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
238 if (!remapped && tnid != nid) 227 if (tnid != nid)
239 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); 228 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid);
240 229
241 node_data[nid] = nd; 230 node_data[nid] = nd;
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 534255a36b6b..73a6d7395bd3 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
73 73
74extern unsigned long highend_pfn, highstart_pfn; 74extern unsigned long highend_pfn, highstart_pfn;
75 75
76#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
77
78static void *node_remap_start_vaddr[MAX_NUMNODES];
79void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
80
81/*
82 * Remap memory allocator
83 */
84static unsigned long node_remap_start_pfn[MAX_NUMNODES];
85static void *node_remap_end_vaddr[MAX_NUMNODES];
86static void *node_remap_alloc_vaddr[MAX_NUMNODES];
87
88/**
89 * alloc_remap - Allocate remapped memory
90 * @nid: NUMA node to allocate memory from
91 * @size: The size of allocation
92 *
93 * Allocate @size bytes from the remap area of NUMA node @nid. The
94 * size of the remap area is predetermined by init_alloc_remap() and
95 * only the callers considered there should call this function. For
96 * more info, please read the comment on top of init_alloc_remap().
97 *
98 * The caller must be ready to handle allocation failure from this
99 * function and fall back to regular memory allocator in such cases.
100 *
101 * CONTEXT:
102 * Single CPU early boot context.
103 *
104 * RETURNS:
105 * Pointer to the allocated memory on success, %NULL on failure.
106 */
107void *alloc_remap(int nid, unsigned long size)
108{
109 void *allocation = node_remap_alloc_vaddr[nid];
110
111 size = ALIGN(size, L1_CACHE_BYTES);
112
113 if (!allocation || (allocation + size) > node_remap_end_vaddr[nid])
114 return NULL;
115
116 node_remap_alloc_vaddr[nid] += size;
117 memset(allocation, 0, size);
118
119 return allocation;
120}
121
122#ifdef CONFIG_HIBERNATION
123/**
124 * resume_map_numa_kva - add KVA mapping to the temporary page tables created
125 * during resume from hibernation
126 * @pgd_base - temporary resume page directory
127 */
128void resume_map_numa_kva(pgd_t *pgd_base)
129{
130 int node;
131
132 for_each_online_node(node) {
133 unsigned long start_va, start_pfn, nr_pages, pfn;
134
135 start_va = (unsigned long)node_remap_start_vaddr[node];
136 start_pfn = node_remap_start_pfn[node];
137 nr_pages = (node_remap_end_vaddr[node] -
138 node_remap_start_vaddr[node]) >> PAGE_SHIFT;
139
140 printk(KERN_DEBUG "%s: node %d\n", __func__, node);
141
142 for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) {
143 unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
144 pgd_t *pgd = pgd_base + pgd_index(vaddr);
145 pud_t *pud = pud_offset(pgd, vaddr);
146 pmd_t *pmd = pmd_offset(pud, vaddr);
147
148 set_pmd(pmd, pfn_pmd(start_pfn + pfn,
149 PAGE_KERNEL_LARGE_EXEC));
150
151 printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
152 __func__, vaddr, start_pfn + pfn);
153 }
154 }
155}
156#endif
157
158/**
159 * init_alloc_remap - Initialize remap allocator for a NUMA node
160 * @nid: NUMA node to initizlie remap allocator for
161 *
162 * NUMA nodes may end up without any lowmem. As allocating pgdat and
163 * memmap on a different node with lowmem is inefficient, a special
164 * remap allocator is implemented which can be used by alloc_remap().
165 *
166 * For each node, the amount of memory which will be necessary for
167 * pgdat and memmap is calculated and two memory areas of the size are
168 * allocated - one in the node and the other in lowmem; then, the area
169 * in the node is remapped to the lowmem area.
170 *
171 * As pgdat and memmap must be allocated in lowmem anyway, this
172 * doesn't waste lowmem address space; however, the actual lowmem
173 * which gets remapped over is wasted. The amount shouldn't be
174 * problematic on machines this feature will be used.
175 *
176 * Initialization failure isn't fatal. alloc_remap() is used
177 * opportunistically and the callers will fall back to other memory
178 * allocation mechanisms on failure.
179 */
180void __init init_alloc_remap(int nid, u64 start, u64 end)
181{
182 unsigned long start_pfn = start >> PAGE_SHIFT;
183 unsigned long end_pfn = end >> PAGE_SHIFT;
184 unsigned long size, pfn;
185 u64 node_pa, remap_pa;
186 void *remap_va;
187
188 /*
189 * The acpi/srat node info can show hot-add memroy zones where
190 * memory could be added but not currently present.
191 */
192 printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
193 nid, start_pfn, end_pfn);
194
195 /* calculate the necessary space aligned to large page size */
196 size = node_memmap_size_bytes(nid, start_pfn, end_pfn);
197 size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
198 size = ALIGN(size, LARGE_PAGE_BYTES);
199
200 /* allocate node memory and the lowmem remap area */
201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
202 if (!node_pa) {
203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
204 size, nid);
205 return;
206 }
207 memblock_reserve(node_pa, size);
208
209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
210 max_low_pfn << PAGE_SHIFT,
211 size, LARGE_PAGE_BYTES);
212 if (!remap_pa) {
213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
214 size, nid);
215 memblock_free(node_pa, size);
216 return;
217 }
218 memblock_reserve(remap_pa, size);
219 remap_va = phys_to_virt(remap_pa);
220
221 /* perform actual remap */
222 for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE)
223 set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT),
224 (node_pa >> PAGE_SHIFT) + pfn,
225 PAGE_KERNEL_LARGE);
226
227 /* initialize remap allocator parameters */
228 node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
229 node_remap_start_vaddr[nid] = remap_va;
230 node_remap_end_vaddr[nid] = remap_va + size;
231 node_remap_alloc_vaddr[nid] = remap_va;
232
233 printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
234 nid, node_pa, node_pa + size, remap_va, remap_va + size);
235}
236
237void __init initmem_init(void) 76void __init initmem_init(void)
238{ 77{
239 x86_numa_init(); 78 x86_numa_init();
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 92e27119ee1a..9405ffc91502 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -10,16 +10,3 @@ void __init initmem_init(void)
10{ 10{
11 x86_numa_init(); 11 x86_numa_init();
12} 12}
13
14unsigned long __init numa_free_all_bootmem(void)
15{
16 unsigned long pages = 0;
17 int i;
18
19 for_each_online_node(i)
20 pages += free_all_bootmem_node(NODE_DATA(i));
21
22 pages += free_low_memory_core_early(MAX_NUMNODES);
23
24 return pages;
25}
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index 7178c3afe05e..ad86ec91e640 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -21,12 +21,6 @@ void __init numa_reset_distance(void);
21 21
22void __init x86_numa_init(void); 22void __init x86_numa_init(void);
23 23
24#ifdef CONFIG_X86_64
25static inline void init_alloc_remap(int nid, u64 start, u64 end) { }
26#else
27void __init init_alloc_remap(int nid, u64 start, u64 end);
28#endif
29
30#ifdef CONFIG_NUMA_EMU 24#ifdef CONFIG_NUMA_EMU
31void __init numa_emulation(struct numa_meminfo *numa_meminfo, 25void __init numa_emulation(struct numa_meminfo *numa_meminfo,
32 int numa_dist_cnt); 26 int numa_dist_cnt);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a718e0d23503..091934e1d0d9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -94,12 +94,12 @@ static inline void split_page_count(int level) { }
94 94
95static inline unsigned long highmap_start_pfn(void) 95static inline unsigned long highmap_start_pfn(void)
96{ 96{
97 return __pa(_text) >> PAGE_SHIFT; 97 return __pa_symbol(_text) >> PAGE_SHIFT;
98} 98}
99 99
100static inline unsigned long highmap_end_pfn(void) 100static inline unsigned long highmap_end_pfn(void)
101{ 101{
102 return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; 102 return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
103} 103}
104 104
105#endif 105#endif
@@ -276,8 +276,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
276 * The .rodata section needs to be read-only. Using the pfn 276 * The .rodata section needs to be read-only. Using the pfn
277 * catches all aliases. 277 * catches all aliases.
278 */ 278 */
279 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, 279 if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
280 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) 280 __pa_symbol(__end_rodata) >> PAGE_SHIFT))
281 pgprot_val(forbidden) |= _PAGE_RW; 281 pgprot_val(forbidden) |= _PAGE_RW;
282 282
283#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) 283#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
@@ -364,6 +364,37 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
364EXPORT_SYMBOL_GPL(lookup_address); 364EXPORT_SYMBOL_GPL(lookup_address);
365 365
366/* 366/*
367 * This is necessary because __pa() does not work on some
368 * kinds of memory, like vmalloc() or the alloc_remap()
369 * areas on 32-bit NUMA systems. The percpu areas can
370 * end up in this kind of memory, for instance.
371 *
372 * This could be optimized, but it is only intended to be
373 * used at inititalization time, and keeping it
374 * unoptimized should increase the testing coverage for
375 * the more obscure platforms.
376 */
377phys_addr_t slow_virt_to_phys(void *__virt_addr)
378{
379 unsigned long virt_addr = (unsigned long)__virt_addr;
380 phys_addr_t phys_addr;
381 unsigned long offset;
382 enum pg_level level;
383 unsigned long psize;
384 unsigned long pmask;
385 pte_t *pte;
386
387 pte = lookup_address(virt_addr, &level);
388 BUG_ON(!pte);
389 psize = page_level_size(level);
390 pmask = page_level_mask(level);
391 offset = virt_addr & ~pmask;
392 phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
393 return (phys_addr | offset);
394}
395EXPORT_SYMBOL_GPL(slow_virt_to_phys);
396
397/*
367 * Set the new pmd in all the pgds we know about: 398 * Set the new pmd in all the pgds we know about:
368 */ 399 */
369static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 400static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
@@ -396,7 +427,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
396 pte_t new_pte, old_pte, *tmp; 427 pte_t new_pte, old_pte, *tmp;
397 pgprot_t old_prot, new_prot, req_prot; 428 pgprot_t old_prot, new_prot, req_prot;
398 int i, do_split = 1; 429 int i, do_split = 1;
399 unsigned int level; 430 enum pg_level level;
400 431
401 if (cpa->force_split) 432 if (cpa->force_split)
402 return 1; 433 return 1;
@@ -412,15 +443,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
412 443
413 switch (level) { 444 switch (level) {
414 case PG_LEVEL_2M: 445 case PG_LEVEL_2M:
415 psize = PMD_PAGE_SIZE;
416 pmask = PMD_PAGE_MASK;
417 break;
418#ifdef CONFIG_X86_64 446#ifdef CONFIG_X86_64
419 case PG_LEVEL_1G: 447 case PG_LEVEL_1G:
420 psize = PUD_PAGE_SIZE;
421 pmask = PUD_PAGE_MASK;
422 break;
423#endif 448#endif
449 psize = page_level_size(level);
450 pmask = page_level_mask(level);
451 break;
424 default: 452 default:
425 do_split = -EINVAL; 453 do_split = -EINVAL;
426 goto out_unlock; 454 goto out_unlock;
@@ -445,6 +473,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
445 pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); 473 pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
446 474
447 /* 475 /*
476 * Set the PSE and GLOBAL flags only if the PRESENT flag is
477 * set otherwise pmd_present/pmd_huge will return true even on
478 * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL
479 * for the ancient hardware that doesn't support it.
480 */
481 if (pgprot_val(new_prot) & _PAGE_PRESENT)
482 pgprot_val(new_prot) |= _PAGE_PSE | _PAGE_GLOBAL;
483 else
484 pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
485
486 new_prot = canon_pgprot(new_prot);
487
488 /*
448 * old_pte points to the large page base address. So we need 489 * old_pte points to the large page base address. So we need
449 * to add the offset of the virtual address: 490 * to add the offset of the virtual address:
450 */ 491 */
@@ -489,7 +530,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
489 * The address is aligned and the number of pages 530 * The address is aligned and the number of pages
490 * covers the full page. 531 * covers the full page.
491 */ 532 */
492 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); 533 new_pte = pfn_pte(pte_pfn(old_pte), new_prot);
493 __set_pmd_pte(kpte, address, new_pte); 534 __set_pmd_pte(kpte, address, new_pte);
494 cpa->flags |= CPA_FLUSHTLB; 535 cpa->flags |= CPA_FLUSHTLB;
495 do_split = 0; 536 do_split = 0;
@@ -501,21 +542,13 @@ out_unlock:
501 return do_split; 542 return do_split;
502} 543}
503 544
504static int split_large_page(pte_t *kpte, unsigned long address) 545int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
505{ 546{
506 unsigned long pfn, pfninc = 1; 547 unsigned long pfn, pfninc = 1;
507 unsigned int i, level; 548 unsigned int i, level;
508 pte_t *pbase, *tmp; 549 pte_t *tmp;
509 pgprot_t ref_prot; 550 pgprot_t ref_prot;
510 struct page *base; 551 struct page *base = virt_to_page(pbase);
511
512 if (!debug_pagealloc)
513 spin_unlock(&cpa_lock);
514 base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
515 if (!debug_pagealloc)
516 spin_lock(&cpa_lock);
517 if (!base)
518 return -ENOMEM;
519 552
520 spin_lock(&pgd_lock); 553 spin_lock(&pgd_lock);
521 /* 554 /*
@@ -523,10 +556,11 @@ static int split_large_page(pte_t *kpte, unsigned long address)
523 * up for us already: 556 * up for us already:
524 */ 557 */
525 tmp = lookup_address(address, &level); 558 tmp = lookup_address(address, &level);
526 if (tmp != kpte) 559 if (tmp != kpte) {
527 goto out_unlock; 560 spin_unlock(&pgd_lock);
561 return 1;
562 }
528 563
529 pbase = (pte_t *)page_address(base);
530 paravirt_alloc_pte(&init_mm, page_to_pfn(base)); 564 paravirt_alloc_pte(&init_mm, page_to_pfn(base));
531 ref_prot = pte_pgprot(pte_clrhuge(*kpte)); 565 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
532 /* 566 /*
@@ -540,27 +574,40 @@ static int split_large_page(pte_t *kpte, unsigned long address)
540#ifdef CONFIG_X86_64 574#ifdef CONFIG_X86_64
541 if (level == PG_LEVEL_1G) { 575 if (level == PG_LEVEL_1G) {
542 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; 576 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
543 pgprot_val(ref_prot) |= _PAGE_PSE; 577 /*
578 * Set the PSE flags only if the PRESENT flag is set
579 * otherwise pmd_present/pmd_huge will return true
580 * even on a non present pmd.
581 */
582 if (pgprot_val(ref_prot) & _PAGE_PRESENT)
583 pgprot_val(ref_prot) |= _PAGE_PSE;
584 else
585 pgprot_val(ref_prot) &= ~_PAGE_PSE;
544 } 586 }
545#endif 587#endif
546 588
547 /* 589 /*
590 * Set the GLOBAL flags only if the PRESENT flag is set
591 * otherwise pmd/pte_present will return true even on a non
592 * present pmd/pte. The canon_pgprot will clear _PAGE_GLOBAL
593 * for the ancient hardware that doesn't support it.
594 */
595 if (pgprot_val(ref_prot) & _PAGE_PRESENT)
596 pgprot_val(ref_prot) |= _PAGE_GLOBAL;
597 else
598 pgprot_val(ref_prot) &= ~_PAGE_GLOBAL;
599
600 /*
548 * Get the target pfn from the original entry: 601 * Get the target pfn from the original entry:
549 */ 602 */
550 pfn = pte_pfn(*kpte); 603 pfn = pte_pfn(*kpte);
551 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) 604 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
552 set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); 605 set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
553 606
554 if (address >= (unsigned long)__va(0) && 607 if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
555 address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) 608 PFN_DOWN(__pa(address)) + 1))
556 split_page_count(level); 609 split_page_count(level);
557 610
558#ifdef CONFIG_X86_64
559 if (address >= (unsigned long)__va(1UL<<32) &&
560 address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
561 split_page_count(level);
562#endif
563
564 /* 611 /*
565 * Install the new, split up pagetable. 612 * Install the new, split up pagetable.
566 * 613 *
@@ -579,17 +626,27 @@ static int split_large_page(pte_t *kpte, unsigned long address)
579 * going on. 626 * going on.
580 */ 627 */
581 __flush_tlb_all(); 628 __flush_tlb_all();
629 spin_unlock(&pgd_lock);
582 630
583 base = NULL; 631 return 0;
632}
584 633
585out_unlock: 634static int split_large_page(pte_t *kpte, unsigned long address)
586 /* 635{
587 * If we dropped out via the lookup_address check under 636 pte_t *pbase;
588 * pgd_lock then stick the page back into the pool: 637 struct page *base;
589 */ 638
590 if (base) 639 if (!debug_pagealloc)
640 spin_unlock(&cpa_lock);
641 base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
642 if (!debug_pagealloc)
643 spin_lock(&cpa_lock);
644 if (!base)
645 return -ENOMEM;
646
647 pbase = (pte_t *)page_address(base);
648 if (__split_large_page(kpte, address, pbase))
591 __free_page(base); 649 __free_page(base);
592 spin_unlock(&pgd_lock);
593 650
594 return 0; 651 return 0;
595} 652}
@@ -660,6 +717,18 @@ repeat:
660 new_prot = static_protections(new_prot, address, pfn); 717 new_prot = static_protections(new_prot, address, pfn);
661 718
662 /* 719 /*
720 * Set the GLOBAL flags only if the PRESENT flag is
721 * set otherwise pte_present will return true even on
722 * a non present pte. The canon_pgprot will clear
723 * _PAGE_GLOBAL for the ancient hardware that doesn't
724 * support it.
725 */
726 if (pgprot_val(new_prot) & _PAGE_PRESENT)
727 pgprot_val(new_prot) |= _PAGE_GLOBAL;
728 else
729 pgprot_val(new_prot) &= ~_PAGE_GLOBAL;
730
731 /*
663 * We need to keep the pfn from the existing PTE, 732 * We need to keep the pfn from the existing PTE,
664 * after all we're only going to change it's attributes 733 * after all we're only going to change it's attributes
665 * not the memory it points to 734 * not the memory it points to
@@ -729,13 +798,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
729 unsigned long vaddr; 798 unsigned long vaddr;
730 int ret; 799 int ret;
731 800
732 if (cpa->pfn >= max_pfn_mapped) 801 if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
733 return 0; 802 return 0;
734 803
735#ifdef CONFIG_X86_64
736 if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
737 return 0;
738#endif
739 /* 804 /*
740 * No need to redo, when the primary call touched the direct 805 * No need to redo, when the primary call touched the direct
741 * mapping already: 806 * mapping already:
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 0eb572eda406..657438858e83 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -560,10 +560,17 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
560{ 560{
561 unsigned long id_sz; 561 unsigned long id_sz;
562 562
563 if (base >= __pa(high_memory)) 563 if (base > __pa(high_memory-1))
564 return 0; 564 return 0;
565 565
566 id_sz = (__pa(high_memory) < base + size) ? 566 /*
567 * some areas in the middle of the kernel identity range
568 * are not mapped, like the PCI space.
569 */
570 if (!page_is_ram(base >> PAGE_SHIFT))
571 return 0;
572
573 id_sz = (__pa(high_memory-1) <= base + size) ?
567 __pa(high_memory) - base : 574 __pa(high_memory) - base :
568 size; 575 size;
569 576
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e27fbf887f3b..193350b51f90 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -334,7 +334,12 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
334 if (changed && dirty) { 334 if (changed && dirty) {
335 *pmdp = entry; 335 *pmdp = entry;
336 pmd_update_defer(vma->vm_mm, address, pmdp); 336 pmd_update_defer(vma->vm_mm, address, pmdp);
337 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 337 /*
338 * We had a write-protection fault here and changed the pmd
339 * to to more permissive. No need to flush the TLB for that,
340 * #PF is architecturally guaranteed to do that and in the
341 * worst-case we'll generate a spurious fault.
342 */
338 } 343 }
339 344
340 return changed; 345 return changed;
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index d2e2735327b4..e666cbbb9261 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -1,3 +1,4 @@
1#include <linux/bootmem.h>
1#include <linux/mmdebug.h> 2#include <linux/mmdebug.h>
2#include <linux/module.h> 3#include <linux/module.h>
3#include <linux/mm.h> 4#include <linux/mm.h>
@@ -8,33 +9,54 @@
8 9
9#ifdef CONFIG_X86_64 10#ifdef CONFIG_X86_64
10 11
12#ifdef CONFIG_DEBUG_VIRTUAL
11unsigned long __phys_addr(unsigned long x) 13unsigned long __phys_addr(unsigned long x)
12{ 14{
13 if (x >= __START_KERNEL_map) { 15 unsigned long y = x - __START_KERNEL_map;
14 x -= __START_KERNEL_map; 16
15 VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); 17 /* use the carry flag to determine if x was < __START_KERNEL_map */
16 x += phys_base; 18 if (unlikely(x > y)) {
19 x = y + phys_base;
20
21 VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
17 } else { 22 } else {
18 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 23 x = y + (__START_KERNEL_map - PAGE_OFFSET);
19 x -= PAGE_OFFSET; 24
20 VIRTUAL_BUG_ON(!phys_addr_valid(x)); 25 /* carry flag will be set if starting x was >= PAGE_OFFSET */
26 VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x));
21 } 27 }
28
22 return x; 29 return x;
23} 30}
24EXPORT_SYMBOL(__phys_addr); 31EXPORT_SYMBOL(__phys_addr);
25 32
33unsigned long __phys_addr_symbol(unsigned long x)
34{
35 unsigned long y = x - __START_KERNEL_map;
36
37 /* only check upper bounds since lower bounds will trigger carry */
38 VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
39
40 return y + phys_base;
41}
42EXPORT_SYMBOL(__phys_addr_symbol);
43#endif
44
26bool __virt_addr_valid(unsigned long x) 45bool __virt_addr_valid(unsigned long x)
27{ 46{
28 if (x >= __START_KERNEL_map) { 47 unsigned long y = x - __START_KERNEL_map;
29 x -= __START_KERNEL_map; 48
30 if (x >= KERNEL_IMAGE_SIZE) 49 /* use the carry flag to determine if x was < __START_KERNEL_map */
50 if (unlikely(x > y)) {
51 x = y + phys_base;
52
53 if (y >= KERNEL_IMAGE_SIZE)
31 return false; 54 return false;
32 x += phys_base;
33 } else { 55 } else {
34 if (x < PAGE_OFFSET) 56 x = y + (__START_KERNEL_map - PAGE_OFFSET);
35 return false; 57
36 x -= PAGE_OFFSET; 58 /* carry flag will be set if starting x was >= PAGE_OFFSET */
37 if (!phys_addr_valid(x)) 59 if ((x > y) || !phys_addr_valid(x))
38 return false; 60 return false;
39 } 61 }
40 62
@@ -47,10 +69,16 @@ EXPORT_SYMBOL(__virt_addr_valid);
47#ifdef CONFIG_DEBUG_VIRTUAL 69#ifdef CONFIG_DEBUG_VIRTUAL
48unsigned long __phys_addr(unsigned long x) 70unsigned long __phys_addr(unsigned long x)
49{ 71{
72 unsigned long phys_addr = x - PAGE_OFFSET;
50 /* VMALLOC_* aren't constants */ 73 /* VMALLOC_* aren't constants */
51 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 74 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
52 VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); 75 VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
53 return x - PAGE_OFFSET; 76 /* max_low_pfn is set early, but not _that_ early */
77 if (max_low_pfn) {
78 VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn);
79 BUG_ON(slow_virt_to_phys((void *)x) != phys_addr);
80 }
81 return phys_addr;
54} 82}
55EXPORT_SYMBOL(__phys_addr); 83EXPORT_SYMBOL(__phys_addr);
56#endif 84#endif
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 4ddf497ca65b..cdd0da9dd530 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -149,39 +149,40 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
149 int node, pxm; 149 int node, pxm;
150 150
151 if (srat_disabled()) 151 if (srat_disabled())
152 return -1; 152 goto out_err;
153 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) { 153 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
154 bad_srat(); 154 goto out_err_bad_srat;
155 return -1;
156 }
157 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) 155 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
158 return -1; 156 goto out_err;
159
160 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) 157 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
161 return -1; 158 goto out_err;
159
162 start = ma->base_address; 160 start = ma->base_address;
163 end = start + ma->length; 161 end = start + ma->length;
164 pxm = ma->proximity_domain; 162 pxm = ma->proximity_domain;
165 if (acpi_srat_revision <= 1) 163 if (acpi_srat_revision <= 1)
166 pxm &= 0xff; 164 pxm &= 0xff;
165
167 node = setup_node(pxm); 166 node = setup_node(pxm);
168 if (node < 0) { 167 if (node < 0) {
169 printk(KERN_ERR "SRAT: Too many proximity domains.\n"); 168 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
170 bad_srat(); 169 goto out_err_bad_srat;
171 return -1;
172 } 170 }
173 171
174 if (numa_add_memblk(node, start, end) < 0) { 172 if (numa_add_memblk(node, start, end) < 0)
175 bad_srat(); 173 goto out_err_bad_srat;
176 return -1;
177 }
178 174
179 node_set(node, numa_nodes_parsed); 175 node_set(node, numa_nodes_parsed);
180 176
181 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", 177 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
182 node, pxm, 178 node, pxm,
183 (unsigned long long) start, (unsigned long long) end - 1); 179 (unsigned long long) start, (unsigned long long) end - 1);
180
184 return 0; 181 return 0;
182out_err_bad_srat:
183 bad_srat();
184out_err:
185 return -1;
185} 186}
186 187
187void __init acpi_numa_arch_fixup(void) {} 188void __init acpi_numa_arch_fixup(void) {}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 13a6b29e2e5d..282375f13c7e 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -335,7 +335,7 @@ static const struct file_operations fops_tlbflush = {
335 .llseek = default_llseek, 335 .llseek = default_llseek,
336}; 336};
337 337
338static int __cpuinit create_tlb_flushall_shift(void) 338static int __init create_tlb_flushall_shift(void)
339{ 339{
340 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, 340 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
341 arch_debugfs_dir, NULL, &fops_tlbflush); 341 arch_debugfs_dir, NULL, &fops_tlbflush);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index d11a47099d33..3cbe45381bbb 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,6 +1,6 @@
1/* bpf_jit_comp.c : BPF JIT compiler 1/* bpf_jit_comp.c : BPF JIT compiler
2 * 2 *
3 * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) 3 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License 6 * modify it under the terms of the GNU General Public License
@@ -124,6 +124,26 @@ static inline void bpf_flush_icache(void *start, void *end)
124#define CHOOSE_LOAD_FUNC(K, func) \ 124#define CHOOSE_LOAD_FUNC(K, func) \
125 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) 125 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
126 126
127/* Helper to find the offset of pkt_type in sk_buff
128 * We want to make sure its still a 3bit field starting at a byte boundary.
129 */
130#define PKT_TYPE_MAX 7
131static int pkt_type_offset(void)
132{
133 struct sk_buff skb_probe = {
134 .pkt_type = ~0,
135 };
136 char *ct = (char *)&skb_probe;
137 unsigned int off;
138
139 for (off = 0; off < sizeof(struct sk_buff); off++) {
140 if (ct[off] == PKT_TYPE_MAX)
141 return off;
142 }
143 pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
144 return -1;
145}
146
127void bpf_jit_compile(struct sk_filter *fp) 147void bpf_jit_compile(struct sk_filter *fp)
128{ 148{
129 u8 temp[64]; 149 u8 temp[64];
@@ -216,6 +236,7 @@ void bpf_jit_compile(struct sk_filter *fp)
216 case BPF_S_ANC_VLAN_TAG: 236 case BPF_S_ANC_VLAN_TAG:
217 case BPF_S_ANC_VLAN_TAG_PRESENT: 237 case BPF_S_ANC_VLAN_TAG_PRESENT:
218 case BPF_S_ANC_QUEUE: 238 case BPF_S_ANC_QUEUE:
239 case BPF_S_ANC_PKTTYPE:
219 case BPF_S_LD_W_ABS: 240 case BPF_S_LD_W_ABS:
220 case BPF_S_LD_H_ABS: 241 case BPF_S_LD_H_ABS:
221 case BPF_S_LD_B_ABS: 242 case BPF_S_LD_B_ABS:
@@ -536,6 +557,23 @@ void bpf_jit_compile(struct sk_filter *fp)
536 EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */ 557 EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */
537 } 558 }
538 break; 559 break;
560 case BPF_S_ANC_PKTTYPE:
561 {
562 int off = pkt_type_offset();
563
564 if (off < 0)
565 goto out;
566 if (is_imm8(off)) {
567 /* movzbl off8(%rdi),%eax */
568 EMIT4(0x0f, 0xb6, 0x47, off);
569 } else {
570 /* movbl off32(%rdi),%eax */
571 EMIT3(0x0f, 0xb6, 0x87);
572 EMIT(off, 4);
573 }
574 EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and $0x7,%eax */
575 break;
576 }
539 case BPF_S_LD_W_ABS: 577 case BPF_S_LD_W_ABS:
540 func = CHOOSE_LOAD_FUNC(K, sk_load_word); 578 func = CHOOSE_LOAD_FUNC(K, sk_load_word);
541common_load: seen |= SEEN_DATAREF; 579common_load: seen |= SEEN_DATAREF;
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 53ea60458e01..3e724256dbee 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -521,6 +521,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
521 sd = &info->sd; 521 sd = &info->sd;
522 sd->domain = domain; 522 sd->domain = domain;
523 sd->node = node; 523 sd->node = node;
524 sd->acpi = device->handle;
524 /* 525 /*
525 * Maybe the desired pci bus has been already scanned. In such case 526 * Maybe the desired pci bus has been already scanned. In such case
526 * it is unnecessary to scan the pci bus with the given domain,busnum. 527 * it is unnecessary to scan the pci bus with the given domain,busnum.
@@ -592,6 +593,14 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
592 return bus; 593 return bus;
593} 594}
594 595
596int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
597{
598 struct pci_sysdata *sd = bridge->bus->sysdata;
599
600 ACPI_HANDLE_SET(&bridge->dev, sd->acpi);
601 return 0;
602}
603
595int __init pci_acpi_init(void) 604int __init pci_acpi_init(void)
596{ 605{
597 struct pci_dev *dev = NULL; 606 struct pci_dev *dev = NULL;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index ccd0ab3ab899..901177d75ff5 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -34,7 +34,6 @@ int noioapicreroute = 1;
34#endif 34#endif
35int pcibios_last_bus = -1; 35int pcibios_last_bus = -1;
36unsigned long pirq_table_addr; 36unsigned long pirq_table_addr;
37struct pci_bus *pci_root_bus;
38const struct pci_raw_ops *__read_mostly raw_pci_ops; 37const struct pci_raw_ops *__read_mostly raw_pci_ops;
39const struct pci_raw_ops *__read_mostly raw_pci_ext_ops; 38const struct pci_raw_ops *__read_mostly raw_pci_ext_ops;
40 39
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index dd8ca6f7223b..94919e307f8e 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -51,6 +51,7 @@ struct pcibios_fwaddrmap {
51 51
52static LIST_HEAD(pcibios_fwaddrmappings); 52static LIST_HEAD(pcibios_fwaddrmappings);
53static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock); 53static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock);
54static bool pcibios_fw_addr_done;
54 55
55/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */ 56/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */
56static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) 57static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev)
@@ -72,6 +73,9 @@ pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr)
72 unsigned long flags; 73 unsigned long flags;
73 struct pcibios_fwaddrmap *map; 74 struct pcibios_fwaddrmap *map;
74 75
76 if (pcibios_fw_addr_done)
77 return;
78
75 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); 79 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
76 map = pcibios_fwaddrmap_lookup(dev); 80 map = pcibios_fwaddrmap_lookup(dev);
77 if (!map) { 81 if (!map) {
@@ -97,6 +101,9 @@ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
97 struct pcibios_fwaddrmap *map; 101 struct pcibios_fwaddrmap *map;
98 resource_size_t fw_addr = 0; 102 resource_size_t fw_addr = 0;
99 103
104 if (pcibios_fw_addr_done)
105 return 0;
106
100 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); 107 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
101 map = pcibios_fwaddrmap_lookup(dev); 108 map = pcibios_fwaddrmap_lookup(dev);
102 if (map) 109 if (map)
@@ -106,7 +113,7 @@ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
106 return fw_addr; 113 return fw_addr;
107} 114}
108 115
109static void pcibios_fw_addr_list_del(void) 116static void __init pcibios_fw_addr_list_del(void)
110{ 117{
111 unsigned long flags; 118 unsigned long flags;
112 struct pcibios_fwaddrmap *entry, *next; 119 struct pcibios_fwaddrmap *entry, *next;
@@ -118,6 +125,7 @@ static void pcibios_fw_addr_list_del(void)
118 kfree(entry); 125 kfree(entry);
119 } 126 }
120 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); 127 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
128 pcibios_fw_addr_done = true;
121} 129}
122 130
123static int 131static int
@@ -193,46 +201,46 @@ EXPORT_SYMBOL(pcibios_align_resource);
193 * as well. 201 * as well.
194 */ 202 */
195 203
196static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) 204static void pcibios_allocate_bridge_resources(struct pci_dev *dev)
197{ 205{
198 struct pci_bus *bus;
199 struct pci_dev *dev;
200 int idx; 206 int idx;
201 struct resource *r; 207 struct resource *r;
202 208
203 /* Depth-First Search on bus tree */ 209 for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
204 list_for_each_entry(bus, bus_list, node) { 210 r = &dev->resource[idx];
205 if ((dev = bus->self)) { 211 if (!r->flags)
206 for (idx = PCI_BRIDGE_RESOURCES; 212 continue;
207 idx < PCI_NUM_RESOURCES; idx++) { 213 if (!r->start || pci_claim_resource(dev, idx) < 0) {
208 r = &dev->resource[idx]; 214 /*
209 if (!r->flags) 215 * Something is wrong with the region.
210 continue; 216 * Invalidate the resource to prevent
211 if (!r->start || 217 * child resource allocations in this
212 pci_claim_resource(dev, idx) < 0) { 218 * range.
213 /* 219 */
214 * Something is wrong with the region. 220 r->start = r->end = 0;
215 * Invalidate the resource to prevent 221 r->flags = 0;
216 * child resource allocations in this
217 * range.
218 */
219 r->start = r->end = 0;
220 r->flags = 0;
221 }
222 }
223 } 222 }
224 pcibios_allocate_bus_resources(&bus->children);
225 } 223 }
226} 224}
227 225
226static void pcibios_allocate_bus_resources(struct pci_bus *bus)
227{
228 struct pci_bus *child;
229
230 /* Depth-First Search on bus tree */
231 if (bus->self)
232 pcibios_allocate_bridge_resources(bus->self);
233 list_for_each_entry(child, &bus->children, node)
234 pcibios_allocate_bus_resources(child);
235}
236
228struct pci_check_idx_range { 237struct pci_check_idx_range {
229 int start; 238 int start;
230 int end; 239 int end;
231}; 240};
232 241
233static void __init pcibios_allocate_resources(int pass) 242static void pcibios_allocate_dev_resources(struct pci_dev *dev, int pass)
234{ 243{
235 struct pci_dev *dev = NULL;
236 int idx, disabled, i; 244 int idx, disabled, i;
237 u16 command; 245 u16 command;
238 struct resource *r; 246 struct resource *r;
@@ -244,14 +252,13 @@ static void __init pcibios_allocate_resources(int pass)
244#endif 252#endif
245 }; 253 };
246 254
247 for_each_pci_dev(dev) { 255 pci_read_config_word(dev, PCI_COMMAND, &command);
248 pci_read_config_word(dev, PCI_COMMAND, &command); 256 for (i = 0; i < ARRAY_SIZE(idx_range); i++)
249 for (i = 0; i < ARRAY_SIZE(idx_range); i++)
250 for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) { 257 for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) {
251 r = &dev->resource[idx]; 258 r = &dev->resource[idx];
252 if (r->parent) /* Already allocated */ 259 if (r->parent) /* Already allocated */
253 continue; 260 continue;
254 if (!r->start) /* Address not assigned at all */ 261 if (!r->start) /* Address not assigned at all */
255 continue; 262 continue;
256 if (r->flags & IORESOURCE_IO) 263 if (r->flags & IORESOURCE_IO)
257 disabled = !(command & PCI_COMMAND_IO); 264 disabled = !(command & PCI_COMMAND_IO);
@@ -270,44 +277,74 @@ static void __init pcibios_allocate_resources(int pass)
270 } 277 }
271 } 278 }
272 } 279 }
273 if (!pass) { 280 if (!pass) {
274 r = &dev->resource[PCI_ROM_RESOURCE]; 281 r = &dev->resource[PCI_ROM_RESOURCE];
275 if (r->flags & IORESOURCE_ROM_ENABLE) { 282 if (r->flags & IORESOURCE_ROM_ENABLE) {
276 /* Turn the ROM off, leave the resource region, 283 /* Turn the ROM off, leave the resource region,
277 * but keep it unregistered. */ 284 * but keep it unregistered. */
278 u32 reg; 285 u32 reg;
279 dev_dbg(&dev->dev, "disabling ROM %pR\n", r); 286 dev_dbg(&dev->dev, "disabling ROM %pR\n", r);
280 r->flags &= ~IORESOURCE_ROM_ENABLE; 287 r->flags &= ~IORESOURCE_ROM_ENABLE;
281 pci_read_config_dword(dev, 288 pci_read_config_dword(dev, dev->rom_base_reg, &reg);
282 dev->rom_base_reg, &reg); 289 pci_write_config_dword(dev, dev->rom_base_reg,
283 pci_write_config_dword(dev, dev->rom_base_reg,
284 reg & ~PCI_ROM_ADDRESS_ENABLE); 290 reg & ~PCI_ROM_ADDRESS_ENABLE);
285 }
286 } 291 }
287 } 292 }
288} 293}
289 294
290static int __init pcibios_assign_resources(void) 295static void pcibios_allocate_resources(struct pci_bus *bus, int pass)
296{
297 struct pci_dev *dev;
298 struct pci_bus *child;
299
300 list_for_each_entry(dev, &bus->devices, bus_list) {
301 pcibios_allocate_dev_resources(dev, pass);
302
303 child = dev->subordinate;
304 if (child)
305 pcibios_allocate_resources(child, pass);
306 }
307}
308
309static void pcibios_allocate_dev_rom_resource(struct pci_dev *dev)
291{ 310{
292 struct pci_dev *dev = NULL;
293 struct resource *r; 311 struct resource *r;
294 312
295 if (!(pci_probe & PCI_ASSIGN_ROMS)) { 313 /*
296 /* 314 * Try to use BIOS settings for ROMs, otherwise let
297 * Try to use BIOS settings for ROMs, otherwise let 315 * pci_assign_unassigned_resources() allocate the new
298 * pci_assign_unassigned_resources() allocate the new 316 * addresses.
299 * addresses. 317 */
300 */ 318 r = &dev->resource[PCI_ROM_RESOURCE];
301 for_each_pci_dev(dev) { 319 if (!r->flags || !r->start)
302 r = &dev->resource[PCI_ROM_RESOURCE]; 320 return;
303 if (!r->flags || !r->start) 321
304 continue; 322 if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) {
305 if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { 323 r->end -= r->start;
306 r->end -= r->start; 324 r->start = 0;
307 r->start = 0;
308 }
309 }
310 } 325 }
326}
327static void pcibios_allocate_rom_resources(struct pci_bus *bus)
328{
329 struct pci_dev *dev;
330 struct pci_bus *child;
331
332 list_for_each_entry(dev, &bus->devices, bus_list) {
333 pcibios_allocate_dev_rom_resource(dev);
334
335 child = dev->subordinate;
336 if (child)
337 pcibios_allocate_rom_resources(child);
338 }
339}
340
341static int __init pcibios_assign_resources(void)
342{
343 struct pci_bus *bus;
344
345 if (!(pci_probe & PCI_ASSIGN_ROMS))
346 list_for_each_entry(bus, &pci_root_buses, node)
347 pcibios_allocate_rom_resources(bus);
311 348
312 pci_assign_unassigned_resources(); 349 pci_assign_unassigned_resources();
313 pcibios_fw_addr_list_del(); 350 pcibios_fw_addr_list_del();
@@ -315,12 +352,32 @@ static int __init pcibios_assign_resources(void)
315 return 0; 352 return 0;
316} 353}
317 354
355void pcibios_resource_survey_bus(struct pci_bus *bus)
356{
357 dev_printk(KERN_DEBUG, &bus->dev, "Allocating resources\n");
358
359 pcibios_allocate_bus_resources(bus);
360
361 pcibios_allocate_resources(bus, 0);
362 pcibios_allocate_resources(bus, 1);
363
364 if (!(pci_probe & PCI_ASSIGN_ROMS))
365 pcibios_allocate_rom_resources(bus);
366}
367
318void __init pcibios_resource_survey(void) 368void __init pcibios_resource_survey(void)
319{ 369{
370 struct pci_bus *bus;
371
320 DBG("PCI: Allocating resources\n"); 372 DBG("PCI: Allocating resources\n");
321 pcibios_allocate_bus_resources(&pci_root_buses); 373
322 pcibios_allocate_resources(0); 374 list_for_each_entry(bus, &pci_root_buses, node)
323 pcibios_allocate_resources(1); 375 pcibios_allocate_bus_resources(bus);
376
377 list_for_each_entry(bus, &pci_root_buses, node)
378 pcibios_allocate_resources(bus, 0);
379 list_for_each_entry(bus, &pci_root_buses, node)
380 pcibios_allocate_resources(bus, 1);
324 381
325 e820_reserve_resources_late(); 382 e820_reserve_resources_late();
326 /* 383 /*
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 4a2ab9cb3659..4db96fb1c232 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -30,7 +30,7 @@ int __init pci_legacy_init(void)
30 } 30 }
31 31
32 printk("PCI: Probing PCI hardware\n"); 32 printk("PCI: Probing PCI hardware\n");
33 pci_root_bus = pcibios_scan_root(0); 33 pcibios_scan_root(0);
34 return 0; 34 return 0;
35} 35}
36 36
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index fb29968a7cd5..082e88129712 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -548,8 +548,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
548 if (cfg->address < 0xFFFFFFFF) 548 if (cfg->address < 0xFFFFFFFF)
549 return 0; 549 return 0;
550 550
551 if (!strcmp(mcfg->header.oem_id, "SGI") || 551 if (!strncmp(mcfg->header.oem_id, "SGI", 3))
552 !strcmp(mcfg->header.oem_id, "SGI2"))
553 return 0; 552 return 0;
554 553
555 if (mcfg->header.revision >= 1) { 554 if (mcfg->header.revision >= 1) {
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index b96b14c250b6..72c229f9ebcf 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -152,7 +152,7 @@ int __init pci_numaq_init(void)
152 152
153 raw_pci_ops = &pci_direct_conf1_mq; 153 raw_pci_ops = &pci_direct_conf1_mq;
154 154
155 pci_root_bus = pcibios_scan_root(0); 155 pcibios_scan_root(0);
156 if (num_online_nodes() > 1) 156 if (num_online_nodes() > 1)
157 for_each_online_node(quad) { 157 for_each_online_node(quad) {
158 if (quad == 0) 158 if (quad == 0)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 56ab74989cf1..94e76620460f 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -162,6 +162,9 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
162 struct msi_desc *msidesc; 162 struct msi_desc *msidesc;
163 int *v; 163 int *v;
164 164
165 if (type == PCI_CAP_ID_MSI && nvec > 1)
166 return 1;
167
165 v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); 168 v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
166 if (!v) 169 if (!v)
167 return -ENOMEM; 170 return -ENOMEM;
@@ -220,6 +223,9 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
220 struct msi_desc *msidesc; 223 struct msi_desc *msidesc;
221 struct msi_msg msg; 224 struct msi_msg msg;
222 225
226 if (type == PCI_CAP_ID_MSI && nvec > 1)
227 return 1;
228
223 list_for_each_entry(msidesc, &dev->msi_list, list) { 229 list_for_each_entry(msidesc, &dev->msi_list, list) {
224 __read_msi_msg(msidesc, &msg); 230 __read_msi_msg(msidesc, &msg);
225 pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | 231 pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
@@ -263,6 +269,9 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
263 int ret = 0; 269 int ret = 0;
264 struct msi_desc *msidesc; 270 struct msi_desc *msidesc;
265 271
272 if (type == PCI_CAP_ID_MSI && nvec > 1)
273 return 1;
274
266 list_for_each_entry(msidesc, &dev->msi_list, list) { 275 list_for_each_entry(msidesc, &dev->msi_list, list) {
267 struct physdev_map_pirq map_irq; 276 struct physdev_map_pirq map_irq;
268 domid_t domid; 277 domid_t domid;
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 8d874396cb29..01e0231a113e 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -2,10 +2,12 @@
2obj-y += ce4100/ 2obj-y += ce4100/
3obj-y += efi/ 3obj-y += efi/
4obj-y += geode/ 4obj-y += geode/
5obj-y += goldfish/
5obj-y += iris/ 6obj-y += iris/
6obj-y += mrst/ 7obj-y += mrst/
7obj-y += olpc/ 8obj-y += olpc/
8obj-y += scx200/ 9obj-y += scx200/
9obj-y += sfi/ 10obj-y += sfi/
11obj-y += ts5500/
10obj-y += visws/ 12obj-y += visws/
11obj-y += uv/ 13obj-y += uv/
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d9c1b95af17c..7145ec63c520 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -11,20 +11,21 @@
11 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
12 */ 12 */
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/init.h>
14#include <linux/acpi.h> 15#include <linux/acpi.h>
15#include <linux/efi.h> 16#include <linux/efi.h>
16#include <linux/efi-bgrt.h> 17#include <linux/efi-bgrt.h>
17 18
18struct acpi_table_bgrt *bgrt_tab; 19struct acpi_table_bgrt *bgrt_tab;
19void *bgrt_image; 20void *__initdata bgrt_image;
20size_t bgrt_image_size; 21size_t __initdata bgrt_image_size;
21 22
22struct bmp_header { 23struct bmp_header {
23 u16 id; 24 u16 id;
24 u32 size; 25 u32 size;
25} __packed; 26} __packed;
26 27
27void efi_bgrt_init(void) 28void __init efi_bgrt_init(void)
28{ 29{
29 acpi_status status; 30 acpi_status status;
30 void __iomem *image; 31 void __iomem *image;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 430cd784a0de..fff986da6239 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -80,9 +80,10 @@ int efi_enabled(int facility)
80} 80}
81EXPORT_SYMBOL(efi_enabled); 81EXPORT_SYMBOL(efi_enabled);
82 82
83static bool __initdata disable_runtime = false;
83static int __init setup_noefi(char *arg) 84static int __init setup_noefi(char *arg)
84{ 85{
85 clear_bit(EFI_BOOT, &x86_efi_facility); 86 disable_runtime = true;
86 return 0; 87 return 0;
87} 88}
88early_param("noefi", setup_noefi); 89early_param("noefi", setup_noefi);
@@ -412,8 +413,8 @@ void __init efi_reserve_boot_services(void)
412 * - Not within any part of the kernel 413 * - Not within any part of the kernel
413 * - Not the bios reserved area 414 * - Not the bios reserved area
414 */ 415 */
415 if ((start+size >= virt_to_phys(_text) 416 if ((start+size >= __pa_symbol(_text)
416 && start <= virt_to_phys(_end)) || 417 && start <= __pa_symbol(_end)) ||
417 !e820_all_mapped(start, start+size, E820_RAM) || 418 !e820_all_mapped(start, start+size, E820_RAM) ||
418 memblock_is_region_reserved(start, size)) { 419 memblock_is_region_reserved(start, size)) {
419 /* Could not reserve, skip it */ 420 /* Could not reserve, skip it */
@@ -730,7 +731,7 @@ void __init efi_init(void)
730 if (!efi_is_native()) 731 if (!efi_is_native())
731 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); 732 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
732 else { 733 else {
733 if (efi_runtime_init()) 734 if (disable_runtime || efi_runtime_init())
734 return; 735 return;
735 set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); 736 set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
736 } 737 }
@@ -839,7 +840,7 @@ void __init efi_enter_virtual_mode(void)
839 efi_memory_desc_t *md, *prev_md = NULL; 840 efi_memory_desc_t *md, *prev_md = NULL;
840 efi_status_t status; 841 efi_status_t status;
841 unsigned long size; 842 unsigned long size;
842 u64 end, systab, end_pfn; 843 u64 end, systab, start_pfn, end_pfn;
843 void *p, *va, *new_memmap = NULL; 844 void *p, *va, *new_memmap = NULL;
844 int count = 0; 845 int count = 0;
845 846
@@ -892,10 +893,9 @@ void __init efi_enter_virtual_mode(void)
892 size = md->num_pages << EFI_PAGE_SHIFT; 893 size = md->num_pages << EFI_PAGE_SHIFT;
893 end = md->phys_addr + size; 894 end = md->phys_addr + size;
894 895
896 start_pfn = PFN_DOWN(md->phys_addr);
895 end_pfn = PFN_UP(end); 897 end_pfn = PFN_UP(end);
896 if (end_pfn <= max_low_pfn_mapped 898 if (pfn_range_is_mapped(start_pfn, end_pfn)) {
897 || (end_pfn > (1UL << (32 - PAGE_SHIFT))
898 && end_pfn <= max_pfn_mapped)) {
899 va = __va(md->phys_addr); 899 va = __va(md->phys_addr);
900 900
901 if (!(md->attribute & EFI_MEMORY_WB)) 901 if (!(md->attribute & EFI_MEMORY_WB))
diff --git a/arch/x86/platform/goldfish/Makefile b/arch/x86/platform/goldfish/Makefile
new file mode 100644
index 000000000000..f030b532fdf3
--- /dev/null
+++ b/arch/x86/platform/goldfish/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_GOLDFISH) += goldfish.o
diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c
new file mode 100644
index 000000000000..1693107a518e
--- /dev/null
+++ b/arch/x86/platform/goldfish/goldfish.c
@@ -0,0 +1,51 @@
1/*
2 * Copyright (C) 2007 Google, Inc.
3 * Copyright (C) 2011 Intel, Inc.
4 * Copyright (C) 2013 Intel, Inc.
5 *
6 * This software is licensed under the terms of the GNU General Public
7 * License version 2, as published by the Free Software Foundation, and
8 * may be copied, distributed, and modified under those terms.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */
16
17#include <linux/kernel.h>
18#include <linux/irq.h>
19#include <linux/platform_device.h>
20
21/*
22 * Where in virtual device memory the IO devices (timers, system controllers
23 * and so on)
24 */
25
26#define GOLDFISH_PDEV_BUS_BASE (0xff001000)
27#define GOLDFISH_PDEV_BUS_END (0xff7fffff)
28#define GOLDFISH_PDEV_BUS_IRQ (4)
29
30#define GOLDFISH_TTY_BASE (0x2000)
31
32static struct resource goldfish_pdev_bus_resources[] = {
33 {
34 .start = GOLDFISH_PDEV_BUS_BASE,
35 .end = GOLDFISH_PDEV_BUS_END,
36 .flags = IORESOURCE_MEM,
37 },
38 {
39 .start = GOLDFISH_PDEV_BUS_IRQ,
40 .end = GOLDFISH_PDEV_BUS_IRQ,
41 .flags = IORESOURCE_IRQ,
42 }
43};
44
45static int __init goldfish_init(void)
46{
47 platform_device_register_simple("goldfish_pdev_bus", -1,
48 goldfish_pdev_bus_resources, 2);
49 return 0;
50}
51device_initcall(goldfish_init);
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 2fdca25905ae..fef7d0ba7e3a 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -195,7 +195,7 @@ err_sysfs:
195 return r; 195 return r;
196} 196}
197 197
198static int xo15_sci_remove(struct acpi_device *device, int type) 198static int xo15_sci_remove(struct acpi_device *device)
199{ 199{
200 acpi_disable_gpe(NULL, xo15_sci_gpe); 200 acpi_disable_gpe(NULL, xo15_sci_gpe);
201 acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler); 201 acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index 7785b72ecc3a..bcd1a703e3e6 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -35,7 +35,7 @@
35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
36 36
37/* All CPUs enumerated by SFI must be present and enabled */ 37/* All CPUs enumerated by SFI must be present and enabled */
38static void __cpuinit mp_sfi_register_lapic(u8 id) 38static void __init mp_sfi_register_lapic(u8 id)
39{ 39{
40 if (MAX_LOCAL_APIC - id <= 0) { 40 if (MAX_LOCAL_APIC - id <= 0) {
41 pr_warning("Processor #%d invalid (max %d)\n", 41 pr_warning("Processor #%d invalid (max %d)\n",
diff --git a/arch/x86/platform/ts5500/Makefile b/arch/x86/platform/ts5500/Makefile
new file mode 100644
index 000000000000..c54e348c96a7
--- /dev/null
+++ b/arch/x86/platform/ts5500/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_TS5500) += ts5500.o
diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c
new file mode 100644
index 000000000000..39febb214e8c
--- /dev/null
+++ b/arch/x86/platform/ts5500/ts5500.c
@@ -0,0 +1,339 @@
1/*
2 * Technologic Systems TS-5500 Single Board Computer support
3 *
4 * Copyright (C) 2013 Savoir-faire Linux Inc.
5 * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option) any later
10 * version.
11 *
12 *
13 * This driver registers the Technologic Systems TS-5500 Single Board Computer
14 * (SBC) and its devices, and exposes information to userspace such as jumpers'
15 * state or available options. For further information about sysfs entries, see
16 * Documentation/ABI/testing/sysfs-platform-ts5500.
17 *
18 * This code actually supports the TS-5500 platform, but it may be extended to
19 * support similar Technologic Systems x86-based platforms, such as the TS-5600.
20 */
21
22#include <linux/delay.h>
23#include <linux/io.h>
24#include <linux/kernel.h>
25#include <linux/leds.h>
26#include <linux/module.h>
27#include <linux/platform_data/gpio-ts5500.h>
28#include <linux/platform_data/max197.h>
29#include <linux/platform_device.h>
30#include <linux/slab.h>
31
32/* Product code register */
33#define TS5500_PRODUCT_CODE_ADDR 0x74
34#define TS5500_PRODUCT_CODE 0x60 /* TS-5500 product code */
35
36/* SRAM/RS-485/ADC options, and RS-485 RTS/Automatic RS-485 flags register */
37#define TS5500_SRAM_RS485_ADC_ADDR 0x75
38#define TS5500_SRAM BIT(0) /* SRAM option */
39#define TS5500_RS485 BIT(1) /* RS-485 option */
40#define TS5500_ADC BIT(2) /* A/D converter option */
41#define TS5500_RS485_RTS BIT(6) /* RTS for RS-485 */
42#define TS5500_RS485_AUTO BIT(7) /* Automatic RS-485 */
43
44/* External Reset/Industrial Temperature Range options register */
45#define TS5500_ERESET_ITR_ADDR 0x76
46#define TS5500_ERESET BIT(0) /* External Reset option */
47#define TS5500_ITR BIT(1) /* Indust. Temp. Range option */
48
49/* LED/Jumpers register */
50#define TS5500_LED_JP_ADDR 0x77
51#define TS5500_LED BIT(0) /* LED flag */
52#define TS5500_JP1 BIT(1) /* Automatic CMOS */
53#define TS5500_JP2 BIT(2) /* Enable Serial Console */
54#define TS5500_JP3 BIT(3) /* Write Enable Drive A */
55#define TS5500_JP4 BIT(4) /* Fast Console (115K baud) */
56#define TS5500_JP5 BIT(5) /* User Jumper */
57#define TS5500_JP6 BIT(6) /* Console on COM1 (req. JP2) */
58#define TS5500_JP7 BIT(7) /* Undocumented (Unused) */
59
60/* A/D Converter registers */
61#define TS5500_ADC_CONV_BUSY_ADDR 0x195 /* Conversion state register */
62#define TS5500_ADC_CONV_BUSY BIT(0)
63#define TS5500_ADC_CONV_INIT_LSB_ADDR 0x196 /* Start conv. / LSB register */
64#define TS5500_ADC_CONV_MSB_ADDR 0x197 /* MSB register */
65#define TS5500_ADC_CONV_DELAY 12 /* usec */
66
67/**
68 * struct ts5500_sbc - TS-5500 board description
69 * @id: Board product ID.
70 * @sram: Flag for SRAM option.
71 * @rs485: Flag for RS-485 option.
72 * @adc: Flag for Analog/Digital converter option.
73 * @ereset: Flag for External Reset option.
74 * @itr: Flag for Industrial Temperature Range option.
75 * @jumpers: Bitfield for jumpers' state.
76 */
77struct ts5500_sbc {
78 int id;
79 bool sram;
80 bool rs485;
81 bool adc;
82 bool ereset;
83 bool itr;
84 u8 jumpers;
85};
86
87/* Board signatures in BIOS shadow RAM */
88static const struct {
89 const char * const string;
90 const ssize_t offset;
91} ts5500_signatures[] __initdata = {
92 { "TS-5x00 AMD Elan", 0xb14 },
93};
94
95static int __init ts5500_check_signature(void)
96{
97 void __iomem *bios;
98 int i, ret = -ENODEV;
99
100 bios = ioremap(0xf0000, 0x10000);
101 if (!bios)
102 return -ENOMEM;
103
104 for (i = 0; i < ARRAY_SIZE(ts5500_signatures); i++) {
105 if (check_signature(bios + ts5500_signatures[i].offset,
106 ts5500_signatures[i].string,
107 strlen(ts5500_signatures[i].string))) {
108 ret = 0;
109 break;
110 }
111 }
112
113 iounmap(bios);
114 return ret;
115}
116
117static int __init ts5500_detect_config(struct ts5500_sbc *sbc)
118{
119 u8 tmp;
120 int ret = 0;
121
122 if (!request_region(TS5500_PRODUCT_CODE_ADDR, 4, "ts5500"))
123 return -EBUSY;
124
125 tmp = inb(TS5500_PRODUCT_CODE_ADDR);
126 if (tmp != TS5500_PRODUCT_CODE) {
127 pr_err("This platform is not a TS-5500 (found ID 0x%x)\n", tmp);
128 ret = -ENODEV;
129 goto cleanup;
130 }
131 sbc->id = tmp;
132
133 tmp = inb(TS5500_SRAM_RS485_ADC_ADDR);
134 sbc->sram = tmp & TS5500_SRAM;
135 sbc->rs485 = tmp & TS5500_RS485;
136 sbc->adc = tmp & TS5500_ADC;
137
138 tmp = inb(TS5500_ERESET_ITR_ADDR);
139 sbc->ereset = tmp & TS5500_ERESET;
140 sbc->itr = tmp & TS5500_ITR;
141
142 tmp = inb(TS5500_LED_JP_ADDR);
143 sbc->jumpers = tmp & ~TS5500_LED;
144
145cleanup:
146 release_region(TS5500_PRODUCT_CODE_ADDR, 4);
147 return ret;
148}
149
150static ssize_t ts5500_show_id(struct device *dev,
151 struct device_attribute *attr, char *buf)
152{
153 struct ts5500_sbc *sbc = dev_get_drvdata(dev);
154
155 return sprintf(buf, "0x%.2x\n", sbc->id);
156}
157
158static ssize_t ts5500_show_jumpers(struct device *dev,
159 struct device_attribute *attr,
160 char *buf)
161{
162 struct ts5500_sbc *sbc = dev_get_drvdata(dev);
163
164 return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1);
165}
166
167#define TS5500_SHOW(field) \
168 static ssize_t ts5500_show_##field(struct device *dev, \
169 struct device_attribute *attr, \
170 char *buf) \
171 { \
172 struct ts5500_sbc *sbc = dev_get_drvdata(dev); \
173 return sprintf(buf, "%d\n", sbc->field); \
174 }
175
176TS5500_SHOW(sram)
177TS5500_SHOW(rs485)
178TS5500_SHOW(adc)
179TS5500_SHOW(ereset)
180TS5500_SHOW(itr)
181
182static DEVICE_ATTR(id, S_IRUGO, ts5500_show_id, NULL);
183static DEVICE_ATTR(jumpers, S_IRUGO, ts5500_show_jumpers, NULL);
184static DEVICE_ATTR(sram, S_IRUGO, ts5500_show_sram, NULL);
185static DEVICE_ATTR(rs485, S_IRUGO, ts5500_show_rs485, NULL);
186static DEVICE_ATTR(adc, S_IRUGO, ts5500_show_adc, NULL);
187static DEVICE_ATTR(ereset, S_IRUGO, ts5500_show_ereset, NULL);
188static DEVICE_ATTR(itr, S_IRUGO, ts5500_show_itr, NULL);
189
190static struct attribute *ts5500_attributes[] = {
191 &dev_attr_id.attr,
192 &dev_attr_jumpers.attr,
193 &dev_attr_sram.attr,
194 &dev_attr_rs485.attr,
195 &dev_attr_adc.attr,
196 &dev_attr_ereset.attr,
197 &dev_attr_itr.attr,
198 NULL
199};
200
201static const struct attribute_group ts5500_attr_group = {
202 .attrs = ts5500_attributes,
203};
204
205static struct resource ts5500_dio1_resource[] = {
206 DEFINE_RES_IRQ_NAMED(7, "DIO1 interrupt"),
207};
208
209static struct platform_device ts5500_dio1_pdev = {
210 .name = "ts5500-dio1",
211 .id = -1,
212 .resource = ts5500_dio1_resource,
213 .num_resources = 1,
214};
215
216static struct resource ts5500_dio2_resource[] = {
217 DEFINE_RES_IRQ_NAMED(6, "DIO2 interrupt"),
218};
219
220static struct platform_device ts5500_dio2_pdev = {
221 .name = "ts5500-dio2",
222 .id = -1,
223 .resource = ts5500_dio2_resource,
224 .num_resources = 1,
225};
226
227static void ts5500_led_set(struct led_classdev *led_cdev,
228 enum led_brightness brightness)
229{
230 outb(!!brightness, TS5500_LED_JP_ADDR);
231}
232
233static enum led_brightness ts5500_led_get(struct led_classdev *led_cdev)
234{
235 return (inb(TS5500_LED_JP_ADDR) & TS5500_LED) ? LED_FULL : LED_OFF;
236}
237
238static struct led_classdev ts5500_led_cdev = {
239 .name = "ts5500:green:",
240 .brightness_set = ts5500_led_set,
241 .brightness_get = ts5500_led_get,
242};
243
244static int ts5500_adc_convert(u8 ctrl)
245{
246 u8 lsb, msb;
247
248 /* Start conversion (ensure the 3 MSB are set to 0) */
249 outb(ctrl & 0x1f, TS5500_ADC_CONV_INIT_LSB_ADDR);
250
251 /*
252 * The platform has CPLD logic driving the A/D converter.
253 * The conversion must complete within 11 microseconds,
254 * otherwise we have to re-initiate a conversion.
255 */
256 udelay(TS5500_ADC_CONV_DELAY);
257 if (inb(TS5500_ADC_CONV_BUSY_ADDR) & TS5500_ADC_CONV_BUSY)
258 return -EBUSY;
259
260 /* Read the raw data */
261 lsb = inb(TS5500_ADC_CONV_INIT_LSB_ADDR);
262 msb = inb(TS5500_ADC_CONV_MSB_ADDR);
263
264 return (msb << 8) | lsb;
265}
266
267static struct max197_platform_data ts5500_adc_pdata = {
268 .convert = ts5500_adc_convert,
269};
270
271static struct platform_device ts5500_adc_pdev = {
272 .name = "max197",
273 .id = -1,
274 .dev = {
275 .platform_data = &ts5500_adc_pdata,
276 },
277};
278
279static int __init ts5500_init(void)
280{
281 struct platform_device *pdev;
282 struct ts5500_sbc *sbc;
283 int err;
284
285 /*
286 * There is no DMI available or PCI bridge subvendor info,
287 * only the BIOS provides a 16-bit identification call.
288 * It is safer to find a signature in the BIOS shadow RAM.
289 */
290 err = ts5500_check_signature();
291 if (err)
292 return err;
293
294 pdev = platform_device_register_simple("ts5500", -1, NULL, 0);
295 if (IS_ERR(pdev))
296 return PTR_ERR(pdev);
297
298 sbc = devm_kzalloc(&pdev->dev, sizeof(struct ts5500_sbc), GFP_KERNEL);
299 if (!sbc) {
300 err = -ENOMEM;
301 goto error;
302 }
303
304 err = ts5500_detect_config(sbc);
305 if (err)
306 goto error;
307
308 platform_set_drvdata(pdev, sbc);
309
310 err = sysfs_create_group(&pdev->dev.kobj, &ts5500_attr_group);
311 if (err)
312 goto error;
313
314 ts5500_dio1_pdev.dev.parent = &pdev->dev;
315 if (platform_device_register(&ts5500_dio1_pdev))
316 dev_warn(&pdev->dev, "DIO1 block registration failed\n");
317 ts5500_dio2_pdev.dev.parent = &pdev->dev;
318 if (platform_device_register(&ts5500_dio2_pdev))
319 dev_warn(&pdev->dev, "DIO2 block registration failed\n");
320
321 if (led_classdev_register(&pdev->dev, &ts5500_led_cdev))
322 dev_warn(&pdev->dev, "LED registration failed\n");
323
324 if (sbc->adc) {
325 ts5500_adc_pdev.dev.parent = &pdev->dev;
326 if (platform_device_register(&ts5500_adc_pdev))
327 dev_warn(&pdev->dev, "ADC registration failed\n");
328 }
329
330 return 0;
331error:
332 platform_device_unregister(pdev);
333 return err;
334}
335device_initcall(ts5500_init);
336
337MODULE_LICENSE("GPL");
338MODULE_AUTHOR("Savoir-faire Linux Inc. <kernel@savoirfairelinux.com>");
339MODULE_DESCRIPTION("Technologic Systems TS-5500 platform driver");
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index dbbdca5f508c..0f92173a12b6 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1467,7 +1467,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
1467 } 1467 }
1468 1468
1469 if (input_arg == 0) { 1469 if (input_arg == 0) {
1470 elements = sizeof(stat_description)/sizeof(*stat_description); 1470 elements = ARRAY_SIZE(stat_description);
1471 printk(KERN_DEBUG "# cpu: cpu number\n"); 1471 printk(KERN_DEBUG "# cpu: cpu number\n");
1472 printk(KERN_DEBUG "Sender statistics:\n"); 1472 printk(KERN_DEBUG "Sender statistics:\n");
1473 for (i = 0; i < elements; i++) 1473 for (i = 0; i < elements; i++)
@@ -1508,7 +1508,7 @@ static int parse_tunables_write(struct bau_control *bcp, char *instr,
1508 char *q; 1508 char *q;
1509 int cnt = 0; 1509 int cnt = 0;
1510 int val; 1510 int val;
1511 int e = sizeof(tunables) / sizeof(*tunables); 1511 int e = ARRAY_SIZE(tunables);
1512 1512
1513 p = instr + strspn(instr, WHITESPACE); 1513 p = instr + strspn(instr, WHITESPACE);
1514 q = p; 1514 q = p;
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 5032e0d19b86..98718f604eb6 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -15,7 +15,7 @@
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 * 17 *
18 * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved. 18 * Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Dimitri Sivanich 19 * Copyright (c) Dimitri Sivanich
20 */ 20 */
21#include <linux/clockchips.h> 21#include <linux/clockchips.h>
@@ -102,9 +102,10 @@ static int uv_intr_pending(int pnode)
102 if (is_uv1_hub()) 102 if (is_uv1_hub())
103 return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & 103 return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
104 UV1H_EVENT_OCCURRED0_RTC1_MASK; 104 UV1H_EVENT_OCCURRED0_RTC1_MASK;
105 else 105 else if (is_uvx_hub())
106 return uv_read_global_mmr64(pnode, UV2H_EVENT_OCCURRED2) & 106 return uv_read_global_mmr64(pnode, UVXH_EVENT_OCCURRED2) &
107 UV2H_EVENT_OCCURRED2_RTC_1_MASK; 107 UVXH_EVENT_OCCURRED2_RTC_1_MASK;
108 return 0;
108} 109}
109 110
110/* Setup interrupt and return non-zero if early expiration occurred. */ 111/* Setup interrupt and return non-zero if early expiration occurred. */
@@ -122,8 +123,8 @@ static int uv_setup_intr(int cpu, u64 expires)
122 uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, 123 uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
123 UV1H_EVENT_OCCURRED0_RTC1_MASK); 124 UV1H_EVENT_OCCURRED0_RTC1_MASK);
124 else 125 else
125 uv_write_global_mmr64(pnode, UV2H_EVENT_OCCURRED2_ALIAS, 126 uv_write_global_mmr64(pnode, UVXH_EVENT_OCCURRED2_ALIAS,
126 UV2H_EVENT_OCCURRED2_RTC_1_MASK); 127 UVXH_EVENT_OCCURRED2_RTC_1_MASK);
127 128
128 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | 129 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
129 ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); 130 ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 120cee1c3f8d..3c68768d7a75 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -11,6 +11,7 @@
11#include <linux/suspend.h> 11#include <linux/suspend.h>
12#include <linux/export.h> 12#include <linux/export.h>
13#include <linux/smp.h> 13#include <linux/smp.h>
14#include <linux/perf_event.h>
14 15
15#include <asm/pgtable.h> 16#include <asm/pgtable.h>
16#include <asm/proto.h> 17#include <asm/proto.h>
@@ -228,6 +229,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
228 do_fpu_end(); 229 do_fpu_end();
229 x86_platform.restore_sched_clock_state(); 230 x86_platform.restore_sched_clock_state();
230 mtrr_bp_restore(); 231 mtrr_bp_restore();
232 perf_restore_debug_store();
231} 233}
232 234
233/* Needed by apm.c */ 235/* Needed by apm.c */
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 74202c1910cd..7d28c885d238 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -129,8 +129,6 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
129 } 129 }
130 } 130 }
131 131
132 resume_map_numa_kva(pgd_base);
133
134 return 0; 132 return 0;
135} 133}
136 134
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 460f314d13e5..a0fde91c16cf 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -11,6 +11,8 @@
11#include <linux/gfp.h> 11#include <linux/gfp.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/suspend.h> 13#include <linux/suspend.h>
14
15#include <asm/init.h>
14#include <asm/proto.h> 16#include <asm/proto.h>
15#include <asm/page.h> 17#include <asm/page.h>
16#include <asm/pgtable.h> 18#include <asm/pgtable.h>
@@ -39,41 +41,21 @@ pgd_t *temp_level4_pgt;
39 41
40void *relocated_restore_code; 42void *relocated_restore_code;
41 43
42static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) 44static void *alloc_pgt_page(void *context)
43{ 45{
44 long i, j; 46 return (void *)get_safe_page(GFP_ATOMIC);
45
46 i = pud_index(address);
47 pud = pud + i;
48 for (; i < PTRS_PER_PUD; pud++, i++) {
49 unsigned long paddr;
50 pmd_t *pmd;
51
52 paddr = address + i*PUD_SIZE;
53 if (paddr >= end)
54 break;
55
56 pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
57 if (!pmd)
58 return -ENOMEM;
59 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
60 for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
61 unsigned long pe;
62
63 if (paddr >= end)
64 break;
65 pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
66 pe &= __supported_pte_mask;
67 set_pmd(pmd, __pmd(pe));
68 }
69 }
70 return 0;
71} 47}
72 48
73static int set_up_temporary_mappings(void) 49static int set_up_temporary_mappings(void)
74{ 50{
75 unsigned long start, end, next; 51 struct x86_mapping_info info = {
76 int error; 52 .alloc_pgt_page = alloc_pgt_page,
53 .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
54 .kernel_mapping = true,
55 };
56 unsigned long mstart, mend;
57 int result;
58 int i;
77 59
78 temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); 60 temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
79 if (!temp_level4_pgt) 61 if (!temp_level4_pgt)
@@ -84,21 +66,17 @@ static int set_up_temporary_mappings(void)
84 init_level4_pgt[pgd_index(__START_KERNEL_map)]); 66 init_level4_pgt[pgd_index(__START_KERNEL_map)]);
85 67
86 /* Set up the direct mapping from scratch */ 68 /* Set up the direct mapping from scratch */
87 start = (unsigned long)pfn_to_kaddr(0); 69 for (i = 0; i < nr_pfn_mapped; i++) {
88 end = (unsigned long)pfn_to_kaddr(max_pfn); 70 mstart = pfn_mapped[i].start << PAGE_SHIFT;
89 71 mend = pfn_mapped[i].end << PAGE_SHIFT;
90 for (; start < end; start = next) { 72
91 pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); 73 result = kernel_ident_mapping_init(&info, temp_level4_pgt,
92 if (!pud) 74 mstart, mend);
93 return -ENOMEM; 75
94 next = start + PGDIR_SIZE; 76 if (result)
95 if (next > end) 77 return result;
96 next = end;
97 if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
98 return error;
99 set_pgd(temp_level4_pgt + pgd_index(start),
100 mk_kernel_pgd(__pa(pud)));
101 } 78 }
79
102 return 0; 80 return 0;
103} 81}
104 82
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index cbca565af5bd..a44f457e70a1 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -8,9 +8,26 @@
8struct real_mode_header *real_mode_header; 8struct real_mode_header *real_mode_header;
9u32 *trampoline_cr4_features; 9u32 *trampoline_cr4_features;
10 10
11void __init setup_real_mode(void) 11void __init reserve_real_mode(void)
12{ 12{
13 phys_addr_t mem; 13 phys_addr_t mem;
14 unsigned char *base;
15 size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
16
17 /* Has to be under 1M so we can execute real-mode AP code. */
18 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
19 if (!mem)
20 panic("Cannot allocate trampoline\n");
21
22 base = __va(mem);
23 memblock_reserve(mem, size);
24 real_mode_header = (struct real_mode_header *) base;
25 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
26 base, (unsigned long long)mem, size);
27}
28
29void __init setup_real_mode(void)
30{
14 u16 real_mode_seg; 31 u16 real_mode_seg;
15 u32 *rel; 32 u32 *rel;
16 u32 count; 33 u32 count;
@@ -25,16 +42,7 @@ void __init setup_real_mode(void)
25 u64 efer; 42 u64 efer;
26#endif 43#endif
27 44
28 /* Has to be in very low memory so we can execute real-mode AP code. */ 45 base = (unsigned char *)real_mode_header;
29 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
30 if (!mem)
31 panic("Cannot allocate trampoline\n");
32
33 base = __va(mem);
34 memblock_reserve(mem, size);
35 real_mode_header = (struct real_mode_header *) base;
36 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
37 base, (unsigned long long)mem, size);
38 46
39 memcpy(base, real_mode_blob, size); 47 memcpy(base, real_mode_blob, size);
40 48
@@ -62,9 +70,9 @@ void __init setup_real_mode(void)
62 __va(real_mode_header->trampoline_header); 70 __va(real_mode_header->trampoline_header);
63 71
64#ifdef CONFIG_X86_32 72#ifdef CONFIG_X86_32
65 trampoline_header->start = __pa(startup_32_smp); 73 trampoline_header->start = __pa_symbol(startup_32_smp);
66 trampoline_header->gdt_limit = __BOOT_DS + 7; 74 trampoline_header->gdt_limit = __BOOT_DS + 7;
67 trampoline_header->gdt_base = __pa(boot_gdt); 75 trampoline_header->gdt_base = __pa_symbol(boot_gdt);
68#else 76#else
69 /* 77 /*
70 * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR 78 * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR
@@ -78,16 +86,18 @@ void __init setup_real_mode(void)
78 *trampoline_cr4_features = read_cr4(); 86 *trampoline_cr4_features = read_cr4();
79 87
80 trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); 88 trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
81 trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE; 89 trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
82 trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE; 90 trampoline_pgd[511] = init_level4_pgt[511].pgd;
83#endif 91#endif
84} 92}
85 93
86/* 94/*
87 * set_real_mode_permissions() gets called very early, to guarantee the 95 * reserve_real_mode() gets called very early, to guarantee the
88 * availability of low memory. This is before the proper kernel page 96 * availability of low memory. This is before the proper kernel page
89 * tables are set up, so we cannot set page permissions in that 97 * tables are set up, so we cannot set page permissions in that
90 * function. Thus, we use an arch_initcall instead. 98 * function. Also trampoline code will be executed by APs so we
99 * need to mark it executable at do_pre_smp_initcalls() at least,
100 * thus run it as a early_initcall().
91 */ 101 */
92static int __init set_real_mode_permissions(void) 102static int __init set_real_mode_permissions(void)
93{ 103{
@@ -111,5 +121,4 @@ static int __init set_real_mode_permissions(void)
111 121
112 return 0; 122 return 0;
113} 123}
114 124early_initcall(set_real_mode_permissions);
115arch_initcall(set_real_mode_permissions);
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 28e3fa9056ea..e6d55f0064df 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -25,7 +25,7 @@
2516 i386 lchown sys_lchown16 2516 i386 lchown sys_lchown16
2617 i386 break 2617 i386 break
2718 i386 oldstat sys_stat 2718 i386 oldstat sys_stat
2819 i386 lseek sys_lseek sys32_lseek 2819 i386 lseek sys_lseek compat_sys_lseek
2920 i386 getpid sys_getpid 2920 i386 getpid sys_getpid
3021 i386 mount sys_mount compat_sys_mount 3021 i386 mount sys_mount compat_sys_mount
3122 i386 umount sys_oldumount 3122 i386 umount sys_oldumount
@@ -73,12 +73,12 @@
7364 i386 getppid sys_getppid 7364 i386 getppid sys_getppid
7465 i386 getpgrp sys_getpgrp 7465 i386 getpgrp sys_getpgrp
7566 i386 setsid sys_setsid 7566 i386 setsid sys_setsid
7667 i386 sigaction sys_sigaction sys32_sigaction 7667 i386 sigaction sys_sigaction compat_sys_sigaction
7768 i386 sgetmask sys_sgetmask 7768 i386 sgetmask sys_sgetmask
7869 i386 ssetmask sys_ssetmask 7869 i386 ssetmask sys_ssetmask
7970 i386 setreuid sys_setreuid16 7970 i386 setreuid sys_setreuid16
8071 i386 setregid sys_setregid16 8071 i386 setregid sys_setregid16
8172 i386 sigsuspend sys_sigsuspend sys32_sigsuspend 8172 i386 sigsuspend sys_sigsuspend sys_sigsuspend
8273 i386 sigpending sys_sigpending compat_sys_sigpending 8273 i386 sigpending sys_sigpending compat_sys_sigpending
8374 i386 sethostname sys_sethostname 8374 i386 sethostname sys_sethostname
8475 i386 setrlimit sys_setrlimit compat_sys_setrlimit 8475 i386 setrlimit sys_setrlimit compat_sys_setrlimit
@@ -98,8 +98,8 @@
9889 i386 readdir sys_old_readdir compat_sys_old_readdir 9889 i386 readdir sys_old_readdir compat_sys_old_readdir
9990 i386 mmap sys_old_mmap sys32_mmap 9990 i386 mmap sys_old_mmap sys32_mmap
10091 i386 munmap sys_munmap 10091 i386 munmap sys_munmap
10192 i386 truncate sys_truncate 10192 i386 truncate sys_truncate compat_sys_truncate
10293 i386 ftruncate sys_ftruncate 10293 i386 ftruncate sys_ftruncate compat_sys_ftruncate
10394 i386 fchmod sys_fchmod 10394 i386 fchmod sys_fchmod
10495 i386 fchown sys_fchown16 10495 i386 fchown sys_fchown16
10596 i386 getpriority sys_getpriority 10596 i386 getpriority sys_getpriority
@@ -116,16 +116,16 @@
116107 i386 lstat sys_newlstat compat_sys_newlstat 116107 i386 lstat sys_newlstat compat_sys_newlstat
117108 i386 fstat sys_newfstat compat_sys_newfstat 117108 i386 fstat sys_newfstat compat_sys_newfstat
118109 i386 olduname sys_uname 118109 i386 olduname sys_uname
119110 i386 iopl ptregs_iopl stub32_iopl 119110 i386 iopl sys_iopl
120111 i386 vhangup sys_vhangup 120111 i386 vhangup sys_vhangup
121112 i386 idle 121112 i386 idle
122113 i386 vm86old ptregs_vm86old sys32_vm86_warning 122113 i386 vm86old sys_vm86old sys32_vm86_warning
123114 i386 wait4 sys_wait4 compat_sys_wait4 123114 i386 wait4 sys_wait4 compat_sys_wait4
124115 i386 swapoff sys_swapoff 124115 i386 swapoff sys_swapoff
125116 i386 sysinfo sys_sysinfo compat_sys_sysinfo 125116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
126117 i386 ipc sys_ipc sys32_ipc 126117 i386 ipc sys_ipc sys32_ipc
127118 i386 fsync sys_fsync 127118 i386 fsync sys_fsync
128119 i386 sigreturn ptregs_sigreturn stub32_sigreturn 128119 i386 sigreturn sys_sigreturn stub32_sigreturn
129120 i386 clone sys_clone stub32_clone 129120 i386 clone sys_clone stub32_clone
130121 i386 setdomainname sys_setdomainname 130121 i386 setdomainname sys_setdomainname
131122 i386 uname sys_newuname 131122 i386 uname sys_newuname
@@ -167,24 +167,24 @@
167158 i386 sched_yield sys_sched_yield 167158 i386 sched_yield sys_sched_yield
168159 i386 sched_get_priority_max sys_sched_get_priority_max 168159 i386 sched_get_priority_max sys_sched_get_priority_max
169160 i386 sched_get_priority_min sys_sched_get_priority_min 169160 i386 sched_get_priority_min sys_sched_get_priority_min
170161 i386 sched_rr_get_interval sys_sched_rr_get_interval sys32_sched_rr_get_interval 170161 i386 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
171162 i386 nanosleep sys_nanosleep compat_sys_nanosleep 171162 i386 nanosleep sys_nanosleep compat_sys_nanosleep
172163 i386 mremap sys_mremap 172163 i386 mremap sys_mremap
173164 i386 setresuid sys_setresuid16 173164 i386 setresuid sys_setresuid16
174165 i386 getresuid sys_getresuid16 174165 i386 getresuid sys_getresuid16
175166 i386 vm86 ptregs_vm86 sys32_vm86_warning 175166 i386 vm86 sys_vm86 sys32_vm86_warning
176167 i386 query_module 176167 i386 query_module
177168 i386 poll sys_poll 177168 i386 poll sys_poll
178169 i386 nfsservctl 178169 i386 nfsservctl
179170 i386 setresgid sys_setresgid16 179170 i386 setresgid sys_setresgid16
180171 i386 getresgid sys_getresgid16 180171 i386 getresgid sys_getresgid16
181172 i386 prctl sys_prctl 181172 i386 prctl sys_prctl
182173 i386 rt_sigreturn ptregs_rt_sigreturn stub32_rt_sigreturn 182173 i386 rt_sigreturn sys_rt_sigreturn stub32_rt_sigreturn
183174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction 183174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
184175 i386 rt_sigprocmask sys_rt_sigprocmask 184175 i386 rt_sigprocmask sys_rt_sigprocmask
185176 i386 rt_sigpending sys_rt_sigpending sys32_rt_sigpending 185176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
186177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait 186177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
187178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo sys32_rt_sigqueueinfo 187178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
188179 i386 rt_sigsuspend sys_rt_sigsuspend 188179 i386 rt_sigsuspend sys_rt_sigsuspend
189180 i386 pread64 sys_pread64 sys32_pread 189180 i386 pread64 sys_pread64 sys32_pread
190181 i386 pwrite64 sys_pwrite64 sys32_pwrite 190181 i386 pwrite64 sys_pwrite64 sys32_pwrite
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index dc97328bd90a..38ae65dfd14f 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -325,7 +325,7 @@
325# x32-specific system call numbers start at 512 to avoid cache impact 325# x32-specific system call numbers start at 512 to avoid cache impact
326# for native 64-bit operation. 326# for native 64-bit operation.
327# 327#
328512 x32 rt_sigaction sys32_rt_sigaction 328512 x32 rt_sigaction compat_sys_rt_sigaction
329513 x32 rt_sigreturn stub_x32_rt_sigreturn 329513 x32 rt_sigreturn stub_x32_rt_sigreturn
330514 x32 ioctl compat_sys_ioctl 330514 x32 ioctl compat_sys_ioctl
331515 x32 readv compat_sys_readv 331515 x32 readv compat_sys_readv
@@ -335,9 +335,9 @@
335519 x32 recvmsg compat_sys_recvmsg 335519 x32 recvmsg compat_sys_recvmsg
336520 x32 execve stub_x32_execve 336520 x32 execve stub_x32_execve
337521 x32 ptrace compat_sys_ptrace 337521 x32 ptrace compat_sys_ptrace
338522 x32 rt_sigpending sys32_rt_sigpending 338522 x32 rt_sigpending compat_sys_rt_sigpending
339523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait 339523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
340524 x32 rt_sigqueueinfo sys32_rt_sigqueueinfo 340524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
341525 x32 sigaltstack compat_sys_sigaltstack 341525 x32 sigaltstack compat_sys_sigaltstack
342526 x32 timer_create compat_sys_timer_create 342526 x32 timer_create compat_sys_timer_create
343527 x32 mq_notify compat_sys_mq_notify 343527 x32 mq_notify compat_sys_mq_notify
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index cc2f8c131286..872eb60e7806 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -55,7 +55,7 @@ static FILE *input_file; /* Input file name */
55static void usage(const char *err) 55static void usage(const char *err)
56{ 56{
57 if (err) 57 if (err)
58 fprintf(stderr, "Error: %s\n\n", err); 58 fprintf(stderr, "%s: Error: %s\n\n", prog, err);
59 fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog); 59 fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
60 fprintf(stderr, "\t-y 64bit mode\n"); 60 fprintf(stderr, "\t-y 64bit mode\n");
61 fprintf(stderr, "\t-n 32bit mode\n"); 61 fprintf(stderr, "\t-n 32bit mode\n");
@@ -269,7 +269,13 @@ int main(int argc, char **argv)
269 insns++; 269 insns++;
270 } 270 }
271 271
272 fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed); 272 fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
273 prog,
274 (errors) ? "Failure" : "Success",
275 insns,
276 (input_file) ? "given" : "random",
277 errors,
278 seed);
273 279
274 return errors ? 1 : 0; 280 return errors ? 1 : 0;
275} 281}
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 53c90fd412d1..14ef8d1dbc33 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,7 +13,6 @@ endmenu
13config UML_X86 13config UML_X86
14 def_bool y 14 def_bool y
15 select GENERIC_FIND_FIRST_BIT 15 select GENERIC_FIND_FIRST_BIT
16 select GENERIC_SIGALTSTACK
17 16
18config 64BIT 17config 64BIT
19 bool "64-bit kernel" if SUBARCH = "x86" 18 bool "64-bit kernel" if SUBARCH = "x86"
@@ -25,6 +24,8 @@ config X86_32
25 select ARCH_WANT_IPC_PARSE_VERSION 24 select ARCH_WANT_IPC_PARSE_VERSION
26 select MODULES_USE_ELF_REL 25 select MODULES_USE_ELF_REL
27 select CLONE_BACKWARDS 26 select CLONE_BACKWARDS
27 select OLD_SIGSUSPEND3
28 select OLD_SIGACTION
28 29
29config X86_64 30config X86_64
30 def_bool 64BIT 31 def_bool 64BIT
@@ -37,9 +38,8 @@ config RWSEM_GENERIC_SPINLOCK
37 def_bool !RWSEM_XCHGADD_ALGORITHM 38 def_bool !RWSEM_XCHGADD_ALGORITHM
38 39
39config 3_LEVEL_PGTABLES 40config 3_LEVEL_PGTABLES
40 bool "Three-level pagetables (EXPERIMENTAL)" if !64BIT 41 bool "Three-level pagetables" if !64BIT
41 default 64BIT 42 default 64BIT
42 depends on EXPERIMENTAL
43 help 43 help
44 Three-level pagetables will let UML have more than 4G of physical 44 Three-level pagetables will let UML have more than 4G of physical
45 memory. All the memory that can't be mapped directly will be treated 45 memory. All the memory that can't be mapped directly will be treated
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 5d065b2222d3..eafa324eb7a5 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -10,7 +10,7 @@ endif
10 10
11obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \ 11obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
12 ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ 12 ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
13 stub_$(BITS).o stub_segv.o syscalls_$(BITS).o \ 13 stub_$(BITS).o stub_segv.o \
14 sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ 14 sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
15 mem_$(BITS).o subarch.o os-$(OS)/ 15 mem_$(BITS).o subarch.o os-$(OS)/
16 16
@@ -25,7 +25,7 @@ subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o
25 25
26else 26else
27 27
28obj-y += vdso/ 28obj-y += syscalls_64.o vdso/
29 29
30subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \ 30subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \
31 ../lib/rwsem.o 31 ../lib/rwsem.o
diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c
index 8784ab30d91b..84ac7f7b0257 100644
--- a/arch/x86/um/fault.c
+++ b/arch/x86/um/fault.c
@@ -20,7 +20,7 @@ int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
20 const struct exception_table_entry *fixup; 20 const struct exception_table_entry *fixup;
21 21
22 fixup = search_exception_tables(address); 22 fixup = search_exception_tables(address);
23 if (fixup != 0) { 23 if (fixup) {
24 UPT_IP(regs) = fixup->fixup; 24 UPT_IP(regs) = fixup->fixup;
25 return 1; 25 return 1;
26 } 26 }
diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
index 8436079be914..68fd2cf526fd 100644
--- a/arch/x86/um/shared/sysdep/syscalls_32.h
+++ b/arch/x86/um/shared/sysdep/syscalls_32.h
@@ -8,11 +8,6 @@
8 8
9typedef long syscall_handler_t(struct pt_regs); 9typedef long syscall_handler_t(struct pt_regs);
10 10
11/* Not declared on x86, incompatible declarations on x86_64, so these have
12 * to go here rather than in sys_call_table.c
13 */
14extern syscall_handler_t sys_rt_sigaction;
15
16extern syscall_handler_t *sys_call_table[]; 11extern syscall_handler_t *sys_call_table[];
17 12
18#define EXECUTE_SYSCALL(syscall, regs) \ 13#define EXECUTE_SYSCALL(syscall, regs) \
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 71cef48ea5cd..ae7319db18ee 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -464,7 +464,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
464 return 0; 464 return 0;
465} 465}
466 466
467long sys_sigreturn(struct pt_regs *regs) 467long sys_sigreturn(void)
468{ 468{
469 unsigned long sp = PT_REGS_SP(&current->thread.regs); 469 unsigned long sp = PT_REGS_SP(&current->thread.regs);
470 struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); 470 struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
@@ -577,7 +577,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
577} 577}
578#endif 578#endif
579 579
580long sys_rt_sigreturn(struct pt_regs *regs) 580long sys_rt_sigreturn(void)
581{ 581{
582 unsigned long sp = PT_REGS_SP(&current->thread.regs); 582 unsigned long sp = PT_REGS_SP(&current->thread.regs);
583 struct rt_sigframe __user *frame = 583 struct rt_sigframe __user *frame =
@@ -601,14 +601,3 @@ long sys_rt_sigreturn(struct pt_regs *regs)
601 force_sig(SIGSEGV, current); 601 force_sig(SIGSEGV, current);
602 return 0; 602 return 0;
603} 603}
604
605#ifdef CONFIG_X86_32
606long ptregs_sigreturn(void)
607{
608 return sys_sigreturn(NULL);
609}
610long ptregs_rt_sigreturn(void)
611{
612 return sys_rt_sigreturn(NULL);
613}
614#endif
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index a0c3b0d1a122..531d4269e2e3 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -24,10 +24,6 @@
24 24
25#define old_mmap sys_old_mmap 25#define old_mmap sys_old_mmap
26 26
27#define ptregs_iopl sys_iopl
28#define ptregs_vm86old sys_vm86old
29#define ptregs_vm86 sys_vm86
30
31#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; 27#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
32#include <asm/syscalls_32.h> 28#include <asm/syscalls_32.h>
33 29
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
deleted file mode 100644
index e8bcea99acdb..000000000000
--- a/arch/x86/um/syscalls_32.c
+++ /dev/null
@@ -1,38 +0,0 @@
1/*
2 * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <linux/syscalls.h>
7#include <sysdep/syscalls.h>
8
9long sys_sigaction(int sig, const struct old_sigaction __user *act,
10 struct old_sigaction __user *oact)
11{
12 struct k_sigaction new_ka, old_ka;
13 int ret;
14
15 if (act) {
16 old_sigset_t mask;
17 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
18 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
19 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
20 __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
21 __get_user(mask, &act->sa_mask))
22 return -EFAULT;
23 siginitset(&new_ka.sa.sa_mask, mask);
24 }
25
26 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
27
28 if (!ret && oact) {
29 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
30 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
31 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
32 __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
33 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
34 return -EFAULT;
35 }
36
37 return ret;
38}
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 205ad328aa52..c74436e687bf 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -60,7 +60,7 @@ notrace static cycle_t vread_tsc(void)
60 60
61static notrace cycle_t vread_hpet(void) 61static notrace cycle_t vread_hpet(void)
62{ 62{
63 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 63 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER);
64} 64}
65 65
66#ifdef CONFIG_PARAVIRT_CLOCK 66#ifdef CONFIG_PARAVIRT_CLOCK
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 138e5667409a..c8e1c7b95c3b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -67,6 +67,7 @@
67#include <asm/hypervisor.h> 67#include <asm/hypervisor.h>
68#include <asm/mwait.h> 68#include <asm/mwait.h>
69#include <asm/pci_x86.h> 69#include <asm/pci_x86.h>
70#include <asm/pat.h>
70 71
71#ifdef CONFIG_ACPI 72#ifdef CONFIG_ACPI
72#include <linux/acpi.h> 73#include <linux/acpi.h>
@@ -1417,7 +1418,14 @@ asmlinkage void __init xen_start_kernel(void)
1417 */ 1418 */
1418 acpi_numa = -1; 1419 acpi_numa = -1;
1419#endif 1420#endif
1420 1421#ifdef CONFIG_X86_PAT
1422 /*
1423 * For right now disable the PAT. We should remove this once
1424 * git commit 8eaffa67b43e99ae581622c5133e20b0f48bcef1
1425 * (xen/pat: Disable PAT support for now) is reverted.
1426 */
1427 pat_enabled = 0;
1428#endif
1421 /* Don't do the full vcpu_info placement stuff until we have a 1429 /* Don't do the full vcpu_info placement stuff until we have a
1422 possible map and a non-dummy shared_info. */ 1430 possible map and a non-dummy shared_info. */
1423 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1431 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
@@ -1517,72 +1525,51 @@ asmlinkage void __init xen_start_kernel(void)
1517#endif 1525#endif
1518} 1526}
1519 1527
1520#ifdef CONFIG_XEN_PVHVM 1528void __ref xen_hvm_init_shared_info(void)
1521#define HVM_SHARED_INFO_ADDR 0xFE700000UL
1522static struct shared_info *xen_hvm_shared_info;
1523static unsigned long xen_hvm_sip_phys;
1524static int xen_major, xen_minor;
1525
1526static void xen_hvm_connect_shared_info(unsigned long pfn)
1527{ 1529{
1530 int cpu;
1528 struct xen_add_to_physmap xatp; 1531 struct xen_add_to_physmap xatp;
1532 static struct shared_info *shared_info_page = 0;
1529 1533
1534 if (!shared_info_page)
1535 shared_info_page = (struct shared_info *)
1536 extend_brk(PAGE_SIZE, PAGE_SIZE);
1530 xatp.domid = DOMID_SELF; 1537 xatp.domid = DOMID_SELF;
1531 xatp.idx = 0; 1538 xatp.idx = 0;
1532 xatp.space = XENMAPSPACE_shared_info; 1539 xatp.space = XENMAPSPACE_shared_info;
1533 xatp.gpfn = pfn; 1540 xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
1534 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 1541 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
1535 BUG(); 1542 BUG();
1536 1543
1537} 1544 HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
1538static void __init xen_hvm_set_shared_info(struct shared_info *sip)
1539{
1540 int cpu;
1541
1542 HYPERVISOR_shared_info = sip;
1543 1545
1544 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info 1546 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
1545 * page, we use it in the event channel upcall and in some pvclock 1547 * page, we use it in the event channel upcall and in some pvclock
1546 * related functions. We don't need the vcpu_info placement 1548 * related functions. We don't need the vcpu_info placement
1547 * optimizations because we don't use any pv_mmu or pv_irq op on 1549 * optimizations because we don't use any pv_mmu or pv_irq op on
1548 * HVM. */ 1550 * HVM.
1549 for_each_online_cpu(cpu) 1551 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
1552 * online but xen_hvm_init_shared_info is run at resume time too and
1553 * in that case multiple vcpus might be online. */
1554 for_each_online_cpu(cpu) {
1550 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1555 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1551}
1552
1553/* Reconnect the shared_info pfn to a (new) mfn */
1554void xen_hvm_resume_shared_info(void)
1555{
1556 xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
1557}
1558
1559/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage.
1560 * On these old tools the shared info page will be placed in E820_Ram.
1561 * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects
1562 * that nothing is mapped up to HVM_SHARED_INFO_ADDR.
1563 * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used
1564 * here for the shared info page. */
1565static void __init xen_hvm_init_shared_info(void)
1566{
1567 if (xen_major < 4) {
1568 xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
1569 xen_hvm_sip_phys = __pa(xen_hvm_shared_info);
1570 } else {
1571 xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR;
1572 set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys);
1573 xen_hvm_shared_info =
1574 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
1575 } 1556 }
1576 xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
1577 xen_hvm_set_shared_info(xen_hvm_shared_info);
1578} 1557}
1579 1558
1559#ifdef CONFIG_XEN_PVHVM
1580static void __init init_hvm_pv_info(void) 1560static void __init init_hvm_pv_info(void)
1581{ 1561{
1582 uint32_t ecx, edx, pages, msr, base; 1562 int major, minor;
1563 uint32_t eax, ebx, ecx, edx, pages, msr, base;
1583 u64 pfn; 1564 u64 pfn;
1584 1565
1585 base = xen_cpuid_base(); 1566 base = xen_cpuid_base();
1567 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1568
1569 major = eax >> 16;
1570 minor = eax & 0xffff;
1571 printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
1572
1586 cpuid(base + 2, &pages, &msr, &ecx, &edx); 1573 cpuid(base + 2, &pages, &msr, &ecx, &edx);
1587 1574
1588 pfn = __pa(hypercall_page); 1575 pfn = __pa(hypercall_page);
@@ -1633,22 +1620,12 @@ static void __init xen_hvm_guest_init(void)
1633 1620
1634static bool __init xen_hvm_platform(void) 1621static bool __init xen_hvm_platform(void)
1635{ 1622{
1636 uint32_t eax, ebx, ecx, edx, base;
1637
1638 if (xen_pv_domain()) 1623 if (xen_pv_domain())
1639 return false; 1624 return false;
1640 1625
1641 base = xen_cpuid_base(); 1626 if (!xen_cpuid_base())
1642 if (!base)
1643 return false; 1627 return false;
1644 1628
1645 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1646
1647 xen_major = eax >> 16;
1648 xen_minor = eax & 0xffff;
1649
1650 printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor);
1651
1652 return true; 1629 return true;
1653} 1630}
1654 1631
@@ -1668,6 +1645,7 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
1668 .name = "Xen HVM", 1645 .name = "Xen HVM",
1669 .detect = xen_hvm_platform, 1646 .detect = xen_hvm_platform,
1670 .init_platform = xen_hvm_guest_init, 1647 .init_platform = xen_hvm_guest_init,
1648 .x2apic_available = xen_x2apic_para_available,
1671}; 1649};
1672EXPORT_SYMBOL(x86_hyper_xen_hvm); 1650EXPORT_SYMBOL(x86_hyper_xen_hvm);
1673#endif 1651#endif
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 01de35c77221..6afbb2ca9a0a 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1178,20 +1178,6 @@ static void xen_exit_mmap(struct mm_struct *mm)
1178 1178
1179static void xen_post_allocator_init(void); 1179static void xen_post_allocator_init(void);
1180 1180
1181static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1182{
1183 /* reserve the range used */
1184 native_pagetable_reserve(start, end);
1185
1186 /* set as RW the rest */
1187 printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
1188 PFN_PHYS(pgt_buf_top));
1189 while (end < PFN_PHYS(pgt_buf_top)) {
1190 make_lowmem_page_readwrite(__va(end));
1191 end += PAGE_SIZE;
1192 }
1193}
1194
1195#ifdef CONFIG_X86_64 1181#ifdef CONFIG_X86_64
1196static void __init xen_cleanhighmap(unsigned long vaddr, 1182static void __init xen_cleanhighmap(unsigned long vaddr,
1197 unsigned long vaddr_end) 1183 unsigned long vaddr_end)
@@ -1422,7 +1408,6 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
1422 xen_mc_callback(set_current_cr3, (void *)cr3); 1408 xen_mc_callback(set_current_cr3, (void *)cr3);
1423 } 1409 }
1424} 1410}
1425
1426static void xen_write_cr3(unsigned long cr3) 1411static void xen_write_cr3(unsigned long cr3)
1427{ 1412{
1428 BUG_ON(preemptible()); 1413 BUG_ON(preemptible());
@@ -1448,6 +1433,43 @@ static void xen_write_cr3(unsigned long cr3)
1448 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ 1433 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
1449} 1434}
1450 1435
1436#ifdef CONFIG_X86_64
1437/*
1438 * At the start of the day - when Xen launches a guest, it has already
1439 * built pagetables for the guest. We diligently look over them
1440 * in xen_setup_kernel_pagetable and graft as appropiate them in the
1441 * init_level4_pgt and its friends. Then when we are happy we load
1442 * the new init_level4_pgt - and continue on.
1443 *
1444 * The generic code starts (start_kernel) and 'init_mem_mapping' sets
1445 * up the rest of the pagetables. When it has completed it loads the cr3.
1446 * N.B. that baremetal would start at 'start_kernel' (and the early
1447 * #PF handler would create bootstrap pagetables) - so we are running
1448 * with the same assumptions as what to do when write_cr3 is executed
1449 * at this point.
1450 *
1451 * Since there are no user-page tables at all, we have two variants
1452 * of xen_write_cr3 - the early bootup (this one), and the late one
1453 * (xen_write_cr3). The reason we have to do that is that in 64-bit
1454 * the Linux kernel and user-space are both in ring 3 while the
1455 * hypervisor is in ring 0.
1456 */
1457static void __init xen_write_cr3_init(unsigned long cr3)
1458{
1459 BUG_ON(preemptible());
1460
1461 xen_mc_batch(); /* disables interrupts */
1462
1463 /* Update while interrupts are disabled, so its atomic with
1464 respect to ipis */
1465 this_cpu_write(xen_cr3, cr3);
1466
1467 __xen_write_cr3(true, cr3);
1468
1469 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
1470}
1471#endif
1472
1451static int xen_pgd_alloc(struct mm_struct *mm) 1473static int xen_pgd_alloc(struct mm_struct *mm)
1452{ 1474{
1453 pgd_t *pgd = mm->pgd; 1475 pgd_t *pgd = mm->pgd;
@@ -1503,19 +1525,6 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1503#else /* CONFIG_X86_64 */ 1525#else /* CONFIG_X86_64 */
1504static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) 1526static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1505{ 1527{
1506 unsigned long pfn = pte_pfn(pte);
1507
1508 /*
1509 * If the new pfn is within the range of the newly allocated
1510 * kernel pagetable, and it isn't being mapped into an
1511 * early_ioremap fixmap slot as a freshly allocated page, make sure
1512 * it is RO.
1513 */
1514 if (((!is_early_ioremap_ptep(ptep) &&
1515 pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
1516 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
1517 pte = pte_wrprotect(pte);
1518
1519 return pte; 1528 return pte;
1520} 1529}
1521#endif /* CONFIG_X86_64 */ 1530#endif /* CONFIG_X86_64 */
@@ -2111,6 +2120,7 @@ static void __init xen_post_allocator_init(void)
2111#endif 2120#endif
2112 2121
2113#ifdef CONFIG_X86_64 2122#ifdef CONFIG_X86_64
2123 pv_mmu_ops.write_cr3 = &xen_write_cr3;
2114 SetPagePinned(virt_to_page(level3_user_vsyscall)); 2124 SetPagePinned(virt_to_page(level3_user_vsyscall));
2115#endif 2125#endif
2116 xen_mark_init_mm_pinned(); 2126 xen_mark_init_mm_pinned();
@@ -2129,11 +2139,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2129 .write_cr2 = xen_write_cr2, 2139 .write_cr2 = xen_write_cr2,
2130 2140
2131 .read_cr3 = xen_read_cr3, 2141 .read_cr3 = xen_read_cr3,
2132#ifdef CONFIG_X86_32
2133 .write_cr3 = xen_write_cr3_init, 2142 .write_cr3 = xen_write_cr3_init,
2134#else
2135 .write_cr3 = xen_write_cr3,
2136#endif
2137 2143
2138 .flush_tlb_user = xen_flush_tlb, 2144 .flush_tlb_user = xen_flush_tlb,
2139 .flush_tlb_kernel = xen_flush_tlb, 2145 .flush_tlb_kernel = xen_flush_tlb,
@@ -2197,7 +2203,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2197 2203
2198void __init xen_init_mmu_ops(void) 2204void __init xen_init_mmu_ops(void)
2199{ 2205{
2200 x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
2201 x86_init.paging.pagetable_init = xen_pagetable_init; 2206 x86_init.paging.pagetable_init = xen_pagetable_init;
2202 pv_mmu_ops = xen_mmu_ops; 2207 pv_mmu_ops = xen_mmu_ops;
2203 2208
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8971a26d21ab..94eac5c85cdc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -556,12 +556,9 @@ void __init xen_arch_setup(void)
556 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 556 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
557 557
558 /* Set up idle, making sure it calls safe_halt() pvop */ 558 /* Set up idle, making sure it calls safe_halt() pvop */
559#ifdef CONFIG_X86_32
560 boot_cpu_data.hlt_works_ok = 1;
561#endif
562 disable_cpuidle(); 559 disable_cpuidle();
563 disable_cpufreq(); 560 disable_cpufreq();
564 WARN_ON(set_pm_idle_to_default()); 561 WARN_ON(xen_set_default_idle());
565 fiddle_vdso(); 562 fiddle_vdso();
566#ifdef CONFIG_NUMA 563#ifdef CONFIG_NUMA
567 numa_off = 1; 564 numa_off = 1;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 34bc4cee8887..09ea61d2e02f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -300,8 +300,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
300 gdt = get_cpu_gdt_table(cpu); 300 gdt = get_cpu_gdt_table(cpu);
301 301
302 ctxt->flags = VGCF_IN_KERNEL; 302 ctxt->flags = VGCF_IN_KERNEL;
303 ctxt->user_regs.ds = __USER_DS;
304 ctxt->user_regs.es = __USER_DS;
305 ctxt->user_regs.ss = __KERNEL_DS; 303 ctxt->user_regs.ss = __KERNEL_DS;
306#ifdef CONFIG_X86_32 304#ifdef CONFIG_X86_32
307 ctxt->user_regs.fs = __KERNEL_PERCPU; 305 ctxt->user_regs.fs = __KERNEL_PERCPU;
@@ -310,35 +308,41 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
310 ctxt->gs_base_kernel = per_cpu_offset(cpu); 308 ctxt->gs_base_kernel = per_cpu_offset(cpu);
311#endif 309#endif
312 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 310 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
313 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
314 311
315 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 312 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
316 313
317 xen_copy_trap_info(ctxt->trap_ctxt); 314 {
315 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
316 ctxt->user_regs.ds = __USER_DS;
317 ctxt->user_regs.es = __USER_DS;
318 318
319 ctxt->ldt_ents = 0; 319 xen_copy_trap_info(ctxt->trap_ctxt);
320 320
321 BUG_ON((unsigned long)gdt & ~PAGE_MASK); 321 ctxt->ldt_ents = 0;
322 322
323 gdt_mfn = arbitrary_virt_to_mfn(gdt); 323 BUG_ON((unsigned long)gdt & ~PAGE_MASK);
324 make_lowmem_page_readonly(gdt);
325 make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
326 324
327 ctxt->gdt_frames[0] = gdt_mfn; 325 gdt_mfn = arbitrary_virt_to_mfn(gdt);
328 ctxt->gdt_ents = GDT_ENTRIES; 326 make_lowmem_page_readonly(gdt);
327 make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
329 328
330 ctxt->user_regs.cs = __KERNEL_CS; 329 ctxt->gdt_frames[0] = gdt_mfn;
331 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 330 ctxt->gdt_ents = GDT_ENTRIES;
332 331
333 ctxt->kernel_ss = __KERNEL_DS; 332 ctxt->kernel_ss = __KERNEL_DS;
334 ctxt->kernel_sp = idle->thread.sp0; 333 ctxt->kernel_sp = idle->thread.sp0;
335 334
336#ifdef CONFIG_X86_32 335#ifdef CONFIG_X86_32
337 ctxt->event_callback_cs = __KERNEL_CS; 336 ctxt->event_callback_cs = __KERNEL_CS;
338 ctxt->failsafe_callback_cs = __KERNEL_CS; 337 ctxt->failsafe_callback_cs = __KERNEL_CS;
339#endif 338#endif
340 ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; 339 ctxt->event_callback_eip =
341 ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; 340 (unsigned long)xen_hypervisor_callback;
341 ctxt->failsafe_callback_eip =
342 (unsigned long)xen_failsafe_callback;
343 }
344 ctxt->user_regs.cs = __KERNEL_CS;
345 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
342 346
343 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); 347 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
344 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 348 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 83e866d714ce..f7a080ef0354 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -328,7 +328,6 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
328 if (per_cpu(lock_spinners, cpu) == xl) { 328 if (per_cpu(lock_spinners, cpu) == xl) {
329 ADD_STATS(released_slow_kicked, 1); 329 ADD_STATS(released_slow_kicked, 1);
330 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 330 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
331 break;
332 } 331 }
333 } 332 }
334} 333}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index ae8a00c39de4..45329c8c226e 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
30{ 30{
31#ifdef CONFIG_XEN_PVHVM 31#ifdef CONFIG_XEN_PVHVM
32 int cpu; 32 int cpu;
33 xen_hvm_resume_shared_info(); 33 xen_hvm_init_shared_info();
34 xen_callback_vector(); 34 xen_callback_vector();
35 xen_unplug_emulated_devices(); 35 xen_unplug_emulated_devices();
36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) { 36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index f9643fc50de5..33ca6e42a4ca 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -89,11 +89,11 @@ ENTRY(xen_iret)
89 */ 89 */
90#ifdef CONFIG_SMP 90#ifdef CONFIG_SMP
91 GET_THREAD_INFO(%eax) 91 GET_THREAD_INFO(%eax)
92 movl TI_cpu(%eax), %eax 92 movl %ss:TI_cpu(%eax), %eax
93 movl __per_cpu_offset(,%eax,4), %eax 93 movl %ss:__per_cpu_offset(,%eax,4), %eax
94 mov xen_vcpu(%eax), %eax 94 mov %ss:xen_vcpu(%eax), %eax
95#else 95#else
96 movl xen_vcpu, %eax 96 movl %ss:xen_vcpu, %eax
97#endif 97#endif
98 98
99 /* check IF state we're restoring */ 99 /* check IF state we're restoring */
@@ -106,11 +106,11 @@ ENTRY(xen_iret)
106 * resuming the code, so we don't have to be worried about 106 * resuming the code, so we don't have to be worried about
107 * being preempted to another CPU. 107 * being preempted to another CPU.
108 */ 108 */
109 setz XEN_vcpu_info_mask(%eax) 109 setz %ss:XEN_vcpu_info_mask(%eax)
110xen_iret_start_crit: 110xen_iret_start_crit:
111 111
112 /* check for unmasked and pending */ 112 /* check for unmasked and pending */
113 cmpw $0x0001, XEN_vcpu_info_pending(%eax) 113 cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
114 114
115 /* 115 /*
116 * If there's something pending, mask events again so we can 116 * If there's something pending, mask events again so we can
@@ -118,7 +118,7 @@ xen_iret_start_crit:
118 * touch XEN_vcpu_info_mask. 118 * touch XEN_vcpu_info_mask.
119 */ 119 */
120 jne 1f 120 jne 1f
121 movb $1, XEN_vcpu_info_mask(%eax) 121 movb $1, %ss:XEN_vcpu_info_mask(%eax)
122 122
1231: popl %eax 1231: popl %eax
124 124
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d2e73d19d366..a95b41744ad0 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -40,7 +40,7 @@ void xen_enable_syscall(void);
40void xen_vcpu_restore(void); 40void xen_vcpu_restore(void);
41 41
42void xen_callback_vector(void); 42void xen_callback_vector(void);
43void xen_hvm_resume_shared_info(void); 43void xen_hvm_init_shared_info(void);
44void xen_unplug_emulated_devices(void); 44void xen_unplug_emulated_devices(void);
45 45
46void __init xen_build_dynamic_phys_to_machine(void); 46void __init xen_build_dynamic_phys_to_machine(void);