aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig63
-rw-r--r--arch/x86/Kconfig.cpu73
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/Makefile_32.cpu1
-rw-r--r--arch/x86/boot/.gitignore1
-rw-r--r--arch/x86/boot/compressed/eboot.c118
-rw-r--r--arch/x86/crypto/Makefile5
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S1102
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c558
-rw-r--r--arch/x86/crypto/camellia_glue.c92
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S348
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c79
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S206
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c77
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c (renamed from arch/x86/crypto/crc32c-intel.c)81
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S460
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx.S91
-rw-r--r--arch/x86/crypto/glue_helper.c12
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S166
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c49
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c12
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S208
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c73
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c20
-rw-r--r--arch/x86/ia32/ia32_aout.c5
-rw-r--r--arch/x86/ia32/ia32entry.S7
-rw-r--r--arch/x86/ia32/sys_ia32.c11
-rw-r--r--arch/x86/include/asm/Kbuild26
-rw-r--r--arch/x86/include/asm/atomic.h16
-rw-r--r--arch/x86/include/asm/boot.h9
-rw-r--r--arch/x86/include/asm/clocksource.h1
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h55
-rw-r--r--arch/x86/include/asm/context_tracking.h (renamed from arch/x86/include/asm/rcu.h)15
-rw-r--r--arch/x86/include/asm/cpu.h4
-rw-r--r--arch/x86/include/asm/cpufeature.h8
-rw-r--r--arch/x86/include/asm/crypto/camellia.h82
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h28
-rw-r--r--arch/x86/include/asm/crypto/serpent-avx.h27
-rw-r--r--arch/x86/include/asm/crypto/twofish.h4
-rw-r--r--arch/x86/include/asm/debugreg.h79
-rw-r--r--arch/x86/include/asm/device.h3
-rw-r--r--arch/x86/include/asm/e820.h74
-rw-r--r--arch/x86/include/asm/elf.h6
-rw-r--r--arch/x86/include/asm/fixmap.h5
-rw-r--r--arch/x86/include/asm/futex.h12
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h5
-rw-r--r--arch/x86/include/asm/ist.h17
-rw-r--r--arch/x86/include/asm/kexec.h3
-rw-r--r--arch/x86/include/asm/kvm_guest.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h24
-rw-r--r--arch/x86/include/asm/kvm_para.h99
-rw-r--r--arch/x86/include/asm/local.h18
-rw-r--r--arch/x86/include/asm/mce.h138
-rw-r--r--arch/x86/include/asm/module.h2
-rw-r--r--arch/x86/include/asm/msr.h11
-rw-r--r--arch/x86/include/asm/mtrr.h93
-rw-r--r--arch/x86/include/asm/numachip/numachip.h19
-rw-r--r--arch/x86/include/asm/paravirt.h2
-rw-r--r--arch/x86/include/asm/pci.h12
-rw-r--r--arch/x86/include/asm/percpu.h3
-rw-r--r--arch/x86/include/asm/pgtable.h17
-rw-r--r--arch/x86/include/asm/pgtable_types.h20
-rw-r--r--arch/x86/include/asm/posix_types.h10
-rw-r--r--arch/x86/include/asm/processor-flags.h97
-rw-r--r--arch/x86/include/asm/processor.h37
-rw-r--r--arch/x86/include/asm/ptrace.h84
-rw-r--r--arch/x86/include/asm/pvclock.h47
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/sigcontext.h216
-rw-r--r--arch/x86/include/asm/signal.h142
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/svm.h132
-rw-r--r--arch/x86/include/asm/sys_ia32.h2
-rw-r--r--arch/x86/include/asm/syscalls.h9
-rw-r--r--arch/x86/include/asm/tlbflush.h3
-rw-r--r--arch/x86/include/asm/trace_clock.h20
-rw-r--r--arch/x86/include/asm/uaccess.h42
-rw-r--r--arch/x86/include/asm/unistd.h17
-rw-r--r--arch/x86/include/asm/vm86.h128
-rw-r--r--arch/x86/include/asm/vmx.h92
-rw-r--r--arch/x86/include/asm/vsyscall.h34
-rw-r--r--arch/x86/include/asm/xen/interface.h1
-rw-r--r--arch/x86/include/uapi/asm/Kbuild58
-rw-r--r--arch/x86/include/uapi/asm/a.out.h (renamed from arch/x86/include/asm/a.out.h)0
-rw-r--r--arch/x86/include/uapi/asm/auxvec.h (renamed from arch/x86/include/asm/auxvec.h)0
-rw-r--r--arch/x86/include/uapi/asm/bitsperlong.h (renamed from arch/x86/include/asm/bitsperlong.h)0
-rw-r--r--arch/x86/include/uapi/asm/boot.h10
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h (renamed from arch/x86/include/asm/bootparam.h)1
-rw-r--r--arch/x86/include/uapi/asm/byteorder.h (renamed from arch/x86/include/asm/byteorder.h)0
-rw-r--r--arch/x86/include/uapi/asm/debugreg.h80
-rw-r--r--arch/x86/include/uapi/asm/e820.h75
-rw-r--r--arch/x86/include/uapi/asm/errno.h (renamed from arch/x86/include/asm/errno.h)0
-rw-r--r--arch/x86/include/uapi/asm/fcntl.h (renamed from arch/x86/include/asm/fcntl.h)0
-rw-r--r--arch/x86/include/uapi/asm/hw_breakpoint.h1
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h (renamed from arch/x86/include/asm/hyperv.h)0
-rw-r--r--arch/x86/include/uapi/asm/ioctl.h (renamed from arch/x86/include/asm/ioctl.h)0
-rw-r--r--arch/x86/include/uapi/asm/ioctls.h (renamed from arch/x86/include/asm/ioctls.h)0
-rw-r--r--arch/x86/include/uapi/asm/ipcbuf.h (renamed from arch/x86/include/asm/ipcbuf.h)0
-rw-r--r--arch/x86/include/uapi/asm/ist.h29
-rw-r--r--arch/x86/include/uapi/asm/kvm.h (renamed from arch/x86/include/asm/kvm.h)0
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h100
-rw-r--r--arch/x86/include/uapi/asm/ldt.h (renamed from arch/x86/include/asm/ldt.h)0
-rw-r--r--arch/x86/include/uapi/asm/mce.h121
-rw-r--r--arch/x86/include/uapi/asm/mman.h (renamed from arch/x86/include/asm/mman.h)3
-rw-r--r--arch/x86/include/uapi/asm/msgbuf.h (renamed from arch/x86/include/asm/msgbuf.h)0
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h (renamed from arch/x86/include/asm/msr-index.h)40
-rw-r--r--arch/x86/include/uapi/asm/msr.h15
-rw-r--r--arch/x86/include/uapi/asm/mtrr.h117
-rw-r--r--arch/x86/include/uapi/asm/param.h (renamed from arch/x86/include/asm/param.h)0
-rw-r--r--arch/x86/include/uapi/asm/perf_regs.h (renamed from arch/x86/include/asm/perf_regs.h)0
-rw-r--r--arch/x86/include/uapi/asm/poll.h (renamed from arch/x86/include/asm/poll.h)0
-rw-r--r--arch/x86/include/uapi/asm/posix_types.h9
-rw-r--r--arch/x86/include/uapi/asm/posix_types_32.h (renamed from arch/x86/include/asm/posix_types_32.h)0
-rw-r--r--arch/x86/include/uapi/asm/posix_types_64.h (renamed from arch/x86/include/asm/posix_types_64.h)0
-rw-r--r--arch/x86/include/uapi/asm/posix_types_x32.h (renamed from arch/x86/include/asm/posix_types_x32.h)0
-rw-r--r--arch/x86/include/uapi/asm/prctl.h (renamed from arch/x86/include/asm/prctl.h)0
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h99
-rw-r--r--arch/x86/include/uapi/asm/ptrace-abi.h (renamed from arch/x86/include/asm/ptrace-abi.h)0
-rw-r--r--arch/x86/include/uapi/asm/ptrace.h78
-rw-r--r--arch/x86/include/uapi/asm/resource.h (renamed from arch/x86/include/asm/resource.h)0
-rw-r--r--arch/x86/include/uapi/asm/sembuf.h (renamed from arch/x86/include/asm/sembuf.h)0
-rw-r--r--arch/x86/include/uapi/asm/setup.h1
-rw-r--r--arch/x86/include/uapi/asm/shmbuf.h (renamed from arch/x86/include/asm/shmbuf.h)0
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h221
-rw-r--r--arch/x86/include/uapi/asm/sigcontext32.h (renamed from arch/x86/include/asm/sigcontext32.h)0
-rw-r--r--arch/x86/include/uapi/asm/siginfo.h (renamed from arch/x86/include/asm/siginfo.h)0
-rw-r--r--arch/x86/include/uapi/asm/signal.h145
-rw-r--r--arch/x86/include/uapi/asm/socket.h (renamed from arch/x86/include/asm/socket.h)0
-rw-r--r--arch/x86/include/uapi/asm/sockios.h (renamed from arch/x86/include/asm/sockios.h)0
-rw-r--r--arch/x86/include/uapi/asm/stat.h (renamed from arch/x86/include/asm/stat.h)0
-rw-r--r--arch/x86/include/uapi/asm/statfs.h (renamed from arch/x86/include/asm/statfs.h)0
-rw-r--r--arch/x86/include/uapi/asm/svm.h132
-rw-r--r--arch/x86/include/uapi/asm/swab.h (renamed from arch/x86/include/asm/swab.h)29
-rw-r--r--arch/x86/include/uapi/asm/termbits.h (renamed from arch/x86/include/asm/termbits.h)0
-rw-r--r--arch/x86/include/uapi/asm/termios.h (renamed from arch/x86/include/asm/termios.h)0
-rw-r--r--arch/x86/include/uapi/asm/types.h (renamed from arch/x86/include/asm/types.h)0
-rw-r--r--arch/x86/include/uapi/asm/ucontext.h (renamed from arch/x86/include/asm/ucontext.h)0
-rw-r--r--arch/x86/include/uapi/asm/unistd.h17
-rw-r--r--arch/x86/include/uapi/asm/vm86.h129
-rw-r--r--arch/x86/include/uapi/asm/vmx.h109
-rw-r--r--arch/x86/include/uapi/asm/vsyscall.h17
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c12
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/apic/apic.c73
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c35
-rw-r--r--arch/x86/kernel/cpu/amd.c12
-rw-r--r--arch/x86/kernel/cpu/bugs.c41
-rw-r--r--arch/x86/kernel/cpu/common.c14
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c75
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c209
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.c121
-rw-r--r--arch/x86/kernel/cpu/perf_event.h5
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c2
-rw-r--r--arch/x86/kernel/crash.c32
-rw-r--r--arch/x86/kernel/entry_32.S18
-rw-r--r--arch/x86/kernel/entry_64.S65
-rw-r--r--arch/x86/kernel/head_32.S13
-rw-r--r--arch/x86/kernel/head_64.S16
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/i387.c6
-rw-r--r--arch/x86/kernel/kvm.c20
-rw-r--r--arch/x86/kernel/kvmclock.c88
-rw-r--r--arch/x86/kernel/process.c69
-rw-r--r--arch/x86/kernel/process_32.c12
-rw-r--r--arch/x86/kernel/process_64.c10
-rw-r--r--arch/x86/kernel/ptrace.c7
-rw-r--r--arch/x86/kernel/pvclock.c143
-rw-r--r--arch/x86/kernel/rtc.c6
-rw-r--r--arch/x86/kernel/setup.c8
-rw-r--r--arch/x86/kernel/signal.c5
-rw-r--r--arch/x86/kernel/smpboot.c151
-rw-r--r--arch/x86/kernel/sys_x86_64.c151
-rw-r--r--arch/x86/kernel/topology.c101
-rw-r--r--arch/x86/kernel/trace_clock.c21
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kernel/tsc.c6
-rw-r--r--arch/x86/kernel/uprobes.c54
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c110
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c5
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c65
-rw-r--r--arch/x86/kvm/paging_tmpl.h115
-rw-r--r--arch/x86/kvm/svm.c48
-rw-r--r--arch/x86/kvm/trace.h63
-rw-r--r--arch/x86/kvm/vmx.c203
-rw-r--r--arch/x86/kvm/x86.c548
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/cmpxchg.c54
-rw-r--r--arch/x86/lib/copy_page_64.S120
-rw-r--r--arch/x86/lib/usercopy_32.c57
-rw-r--r--arch/x86/mm/fault.c25
-rw-r--r--arch/x86/mm/hugetlbpage.c130
-rw-r--r--arch/x86/mm/init_32.c5
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/x86/mm/pgtable.c10
-rw-r--r--arch/x86/mm/tlb.c8
-rw-r--r--arch/x86/net/bpf_jit_comp.c21
-rw-r--r--arch/x86/pci/Makefile1
-rw-r--r--arch/x86/pci/acpi.c46
-rw-r--r--arch/x86/pci/common.c32
-rw-r--r--arch/x86/pci/numachip.c129
-rw-r--r--arch/x86/platform/ce4100/ce4100.c3
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c2
-rw-r--r--arch/x86/power/cpu.c82
-rw-r--r--arch/x86/syscalls/syscall_32.tbl6
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk6
-rw-r--r--arch/x86/um/Kconfig3
-rw-r--r--arch/x86/um/shared/sysdep/syscalls.h2
-rw-r--r--arch/x86/um/sys_call_table_32.c3
-rw-r--r--arch/x86/um/syscalls_32.c15
-rw-r--r--arch/x86/vdso/vclock_gettime.c81
-rw-r--r--arch/x86/vdso/vgetcpu.c11
-rw-r--r--arch/x86/vdso/vma.c2
-rw-r--r--arch/x86/xen/Kconfig3
-rw-r--r--arch/x86/xen/enlighten.c109
-rw-r--r--arch/x86/xen/mmu.c17
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/suspend.c2
-rw-r--r--arch/x86/xen/xen-ops.h2
233 files changed, 7771 insertions, 3680 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 46c3bff3ced2..97f8c5ad8c2d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,8 @@ config X86
22 def_bool y 22 def_bool y
23 select HAVE_AOUT if X86_32 23 select HAVE_AOUT if X86_32
24 select HAVE_UNSTABLE_SCHED_CLOCK 24 select HAVE_UNSTABLE_SCHED_CLOCK
25 select ARCH_SUPPORTS_NUMA_BALANCING
26 select ARCH_WANTS_PROT_NUMA_PROT_NONE
25 select HAVE_IDE 27 select HAVE_IDE
26 select HAVE_OPROFILE 28 select HAVE_OPROFILE
27 select HAVE_PCSPKR_PLATFORM 29 select HAVE_PCSPKR_PLATFORM
@@ -69,8 +71,8 @@ config X86
69 select HAVE_PERF_USER_STACK_DUMP 71 select HAVE_PERF_USER_STACK_DUMP
70 select HAVE_DEBUG_KMEMLEAK 72 select HAVE_DEBUG_KMEMLEAK
71 select ANON_INODES 73 select ANON_INODES
72 select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 74 select HAVE_ALIGNED_STRUCT_PAGE if SLUB
73 select HAVE_CMPXCHG_LOCAL if !M386 75 select HAVE_CMPXCHG_LOCAL
74 select HAVE_CMPXCHG_DOUBLE 76 select HAVE_CMPXCHG_DOUBLE
75 select HAVE_ARCH_KMEMCHECK 77 select HAVE_ARCH_KMEMCHECK
76 select HAVE_USER_RETURN_NOTIFIER 78 select HAVE_USER_RETURN_NOTIFIER
@@ -106,12 +108,13 @@ config X86
106 select KTIME_SCALAR if X86_32 108 select KTIME_SCALAR if X86_32
107 select GENERIC_STRNCPY_FROM_USER 109 select GENERIC_STRNCPY_FROM_USER
108 select GENERIC_STRNLEN_USER 110 select GENERIC_STRNLEN_USER
109 select HAVE_RCU_USER_QS if X86_64 111 select HAVE_CONTEXT_TRACKING if X86_64
110 select HAVE_IRQ_TIME_ACCOUNTING 112 select HAVE_IRQ_TIME_ACCOUNTING
111 select GENERIC_KERNEL_THREAD 113 select GENERIC_KERNEL_THREAD
112 select GENERIC_KERNEL_EXECVE 114 select GENERIC_KERNEL_EXECVE
113 select MODULES_USE_ELF_REL if X86_32 115 select MODULES_USE_ELF_REL if X86_32
114 select MODULES_USE_ELF_RELA if X86_64 116 select MODULES_USE_ELF_RELA if X86_64
117 select CLONE_BACKWARDS if X86_32
115 118
116config INSTRUCTION_DECODER 119config INSTRUCTION_DECODER
117 def_bool y 120 def_bool y
@@ -171,13 +174,8 @@ config ARCH_MAY_HAVE_PC_FDC
171 def_bool y 174 def_bool y
172 depends on ISA_DMA_API 175 depends on ISA_DMA_API
173 176
174config RWSEM_GENERIC_SPINLOCK
175 def_bool y
176 depends on !X86_XADD
177
178config RWSEM_XCHGADD_ALGORITHM 177config RWSEM_XCHGADD_ALGORITHM
179 def_bool y 178 def_bool y
180 depends on X86_XADD
181 179
182config GENERIC_CALIBRATE_DELAY 180config GENERIC_CALIBRATE_DELAY
183 def_bool y 181 def_bool y
@@ -310,7 +308,7 @@ config X86_X2APIC
310 If you don't know what to do here, say N. 308 If you don't know what to do here, say N.
311 309
312config X86_MPPARSE 310config X86_MPPARSE
313 bool "Enable MPS table" if ACPI 311 bool "Enable MPS table" if ACPI || SFI
314 default y 312 default y
315 depends on X86_LOCAL_APIC 313 depends on X86_LOCAL_APIC
316 ---help--- 314 ---help---
@@ -374,6 +372,7 @@ config X86_NUMACHIP
374 depends on NUMA 372 depends on NUMA
375 depends on SMP 373 depends on SMP
376 depends on X86_X2APIC 374 depends on X86_X2APIC
375 depends on PCI_MMCONFIG
377 ---help--- 376 ---help---
378 Adds support for Numascale NumaChip large-SMP systems. Needed to 377 Adds support for Numascale NumaChip large-SMP systems. Needed to
379 enable more than ~168 cores. 378 enable more than ~168 cores.
@@ -1100,7 +1099,7 @@ config HIGHMEM4G
1100 1099
1101config HIGHMEM64G 1100config HIGHMEM64G
1102 bool "64GB" 1101 bool "64GB"
1103 depends on !M386 && !M486 1102 depends on !M486
1104 select X86_PAE 1103 select X86_PAE
1105 ---help--- 1104 ---help---
1106 Select this if you have a 32-bit processor and more than 4 1105 Select this if you have a 32-bit processor and more than 4
@@ -1698,6 +1697,50 @@ config HOTPLUG_CPU
1698 automatically on SMP systems. ) 1697 automatically on SMP systems. )
1699 Say N if you want to disable CPU hotplug. 1698 Say N if you want to disable CPU hotplug.
1700 1699
1700config BOOTPARAM_HOTPLUG_CPU0
1701 bool "Set default setting of cpu0_hotpluggable"
1702 default n
1703 depends on HOTPLUG_CPU && EXPERIMENTAL
1704 ---help---
1705 Set whether default state of cpu0_hotpluggable is on or off.
1706
1707 Say Y here to enable CPU0 hotplug by default. If this switch
1708 is turned on, there is no need to give cpu0_hotplug kernel
1709 parameter and the CPU0 hotplug feature is enabled by default.
1710
1711 Please note: there are two known CPU0 dependencies if you want
1712 to enable the CPU0 hotplug feature either by this switch or by
1713 cpu0_hotplug kernel parameter.
1714
1715 First, resume from hibernate or suspend always starts from CPU0.
1716 So hibernate and suspend are prevented if CPU0 is offline.
1717
1718 Second dependency is PIC interrupts always go to CPU0. CPU0 can not
1719 offline if any interrupt can not migrate out of CPU0. There may
1720 be other CPU0 dependencies.
1721
1722 Please make sure the dependencies are under your control before
1723 you enable this feature.
1724
1725 Say N if you don't want to enable CPU0 hotplug feature by default.
1726 You still can enable the CPU0 hotplug feature at boot by kernel
1727 parameter cpu0_hotplug.
1728
1729config DEBUG_HOTPLUG_CPU0
1730 def_bool n
1731 prompt "Debug CPU0 hotplug"
1732 depends on HOTPLUG_CPU && EXPERIMENTAL
1733 ---help---
1734 Enabling this option offlines CPU0 (if CPU0 can be offlined) as
1735 soon as possible and boots up userspace with CPU0 offlined. User
1736 can online CPU0 back after boot time.
1737
1738 To debug CPU0 hotplug, you need to enable CPU0 offline/online
1739 feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during
1740 compilation or giving cpu0_hotplug kernel parameter at boot.
1741
1742 If unsure, say N.
1743
1701config COMPAT_VDSO 1744config COMPAT_VDSO
1702 def_bool y 1745 def_bool y
1703 prompt "Compat VDSO support" 1746 prompt "Compat VDSO support"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index f3b86d0df44e..c026cca5602c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -4,23 +4,24 @@ choice
4 default M686 if X86_32 4 default M686 if X86_32
5 default GENERIC_CPU if X86_64 5 default GENERIC_CPU if X86_64
6 6
7config M386 7config M486
8 bool "386" 8 bool "486"
9 depends on X86_32 && !UML 9 depends on X86_32
10 ---help--- 10 ---help---
11 This is the processor type of your CPU. This information is used for 11 This is the processor type of your CPU. This information is
12 optimizing purposes. In order to compile a kernel that can run on 12 used for optimizing purposes. In order to compile a kernel
13 all x86 CPU types (albeit not optimally fast), you can specify 13 that can run on all supported x86 CPU types (albeit not
14 "386" here. 14 optimally fast), you can specify "486" here.
15
16 Note that the 386 is no longer supported, this includes
17 AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI 486DLC/DLC2,
18 UMC 486SX-S and the NexGen Nx586.
15 19
16 The kernel will not necessarily run on earlier architectures than 20 The kernel will not necessarily run on earlier architectures than
17 the one you have chosen, e.g. a Pentium optimized kernel will run on 21 the one you have chosen, e.g. a Pentium optimized kernel will run on
18 a PPro, but not necessarily on a i486. 22 a PPro, but not necessarily on a i486.
19 23
20 Here are the settings recommended for greatest speed: 24 Here are the settings recommended for greatest speed:
21 - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
22 486DLC/DLC2, and UMC 486SX-S. Only "386" kernels will run on a 386
23 class machine.
24 - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or 25 - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
25 SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. 26 SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
26 - "586" for generic Pentium CPUs lacking the TSC 27 - "586" for generic Pentium CPUs lacking the TSC
@@ -43,16 +44,7 @@ config M386
43 - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). 44 - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
44 - "VIA C7" for VIA C7. 45 - "VIA C7" for VIA C7.
45 46
46 If you don't know what to do, choose "386". 47 If you don't know what to do, choose "486".
47
48config M486
49 bool "486"
50 depends on X86_32
51 ---help---
52 Select this for a 486 series processor, either Intel or one of the
53 compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX,
54 DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
55 U5S.
56 48
57config M586 49config M586
58 bool "586/K5/5x86/6x86/6x86MX" 50 bool "586/K5/5x86/6x86/6x86MX"
@@ -305,24 +297,16 @@ config X86_INTERNODE_CACHE_SHIFT
305 default "12" if X86_VSMP 297 default "12" if X86_VSMP
306 default X86_L1_CACHE_SHIFT 298 default X86_L1_CACHE_SHIFT
307 299
308config X86_CMPXCHG
309 def_bool y
310 depends on X86_64 || (X86_32 && !M386)
311
312config X86_L1_CACHE_SHIFT 300config X86_L1_CACHE_SHIFT
313 int 301 int
314 default "7" if MPENTIUM4 || MPSC 302 default "7" if MPENTIUM4 || MPSC
315 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU 303 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
316 default "4" if MELAN || M486 || M386 || MGEODEGX1 304 default "4" if MELAN || M486 || MGEODEGX1
317 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX 305 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
318 306
319config X86_XADD
320 def_bool y
321 depends on !M386
322
323config X86_PPRO_FENCE 307config X86_PPRO_FENCE
324 bool "PentiumPro memory ordering errata workaround" 308 bool "PentiumPro memory ordering errata workaround"
325 depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 309 depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
326 ---help--- 310 ---help---
327 Old PentiumPro multiprocessor systems had errata that could cause 311 Old PentiumPro multiprocessor systems had errata that could cause
328 memory operations to violate the x86 ordering standard in rare cases. 312 memory operations to violate the x86 ordering standard in rare cases.
@@ -335,27 +319,11 @@ config X86_PPRO_FENCE
335 319
336config X86_F00F_BUG 320config X86_F00F_BUG
337 def_bool y 321 def_bool y
338 depends on M586MMX || M586TSC || M586 || M486 || M386 322 depends on M586MMX || M586TSC || M586 || M486
339 323
340config X86_INVD_BUG 324config X86_INVD_BUG
341 def_bool y 325 def_bool y
342 depends on M486 || M386 326 depends on M486
343
344config X86_WP_WORKS_OK
345 def_bool y
346 depends on !M386
347
348config X86_INVLPG
349 def_bool y
350 depends on X86_32 && !M386
351
352config X86_BSWAP
353 def_bool y
354 depends on X86_32 && !M386
355
356config X86_POPAD_OK
357 def_bool y
358 depends on X86_32 && !M386
359 327
360config X86_ALIGNMENT_16 328config X86_ALIGNMENT_16
361 def_bool y 329 def_bool y
@@ -412,12 +380,11 @@ config X86_MINIMUM_CPU_FAMILY
412 default "64" if X86_64 380 default "64" if X86_64
413 default "6" if X86_32 && X86_P6_NOP 381 default "6" if X86_32 && X86_P6_NOP
414 default "5" if X86_32 && X86_CMPXCHG64 382 default "5" if X86_32 && X86_CMPXCHG64
415 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) 383 default "4"
416 default "3"
417 384
418config X86_DEBUGCTLMSR 385config X86_DEBUGCTLMSR
419 def_bool y 386 def_bool y
420 depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML 387 depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486) && !UML
421 388
422menuconfig PROCESSOR_SELECT 389menuconfig PROCESSOR_SELECT
423 bool "Supported processor vendors" if EXPERT 390 bool "Supported processor vendors" if EXPERT
@@ -441,7 +408,7 @@ config CPU_SUP_INTEL
441config CPU_SUP_CYRIX_32 408config CPU_SUP_CYRIX_32
442 default y 409 default y
443 bool "Support Cyrix processors" if PROCESSOR_SELECT 410 bool "Support Cyrix processors" if PROCESSOR_SELECT
444 depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT) 411 depends on M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
445 ---help--- 412 ---help---
446 This enables detection, tunings and quirks for Cyrix processors 413 This enables detection, tunings and quirks for Cyrix processors
447 414
@@ -495,7 +462,7 @@ config CPU_SUP_TRANSMETA_32
495config CPU_SUP_UMC_32 462config CPU_SUP_UMC_32
496 default y 463 default y
497 bool "Support UMC processors" if PROCESSOR_SELECT 464 bool "Support UMC processors" if PROCESSOR_SELECT
498 depends on M386 || M486 || (EXPERT && !64BIT) 465 depends on M486 || (EXPERT && !64BIT)
499 ---help--- 466 ---help---
500 This enables detection, tunings and quirks for UMC processors 467 This enables detection, tunings and quirks for UMC processors
501 468
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 05afcca66de6..e71fc4279aab 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -123,9 +123,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
123# does binutils support specific instructions? 123# does binutils support specific instructions?
124asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) 124asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
125avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) 125avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
126avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
126 127
127KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) 128KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
128KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) 129KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
129 130
130LDFLAGS := -m elf_$(UTS_MACHINE) 131LDFLAGS := -m elf_$(UTS_MACHINE)
131 132
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 86cee7b749e1..6647ed49c66c 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -10,7 +10,6 @@ tune = $(call cc-option,-mcpu=$(1),$(2))
10endif 10endif
11 11
12align := $(cc-option-align) 12align := $(cc-option-align)
13cflags-$(CONFIG_M386) += -march=i386
14cflags-$(CONFIG_M486) += -march=i486 13cflags-$(CONFIG_M486) += -march=i486
15cflags-$(CONFIG_M586) += -march=i586 14cflags-$(CONFIG_M586) += -march=i586
16cflags-$(CONFIG_M586TSC) += -march=i586 15cflags-$(CONFIG_M586TSC) += -march=i586
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 851fe936d242..e3cf9f682be5 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -2,7 +2,6 @@ bootsect
2bzImage 2bzImage
3cpustr.h 3cpustr.h
4mkcpustr 4mkcpustr
5offsets.h
6voffset.h 5voffset.h
7zoffset.h 6zoffset.h
8setup 7setup
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index e87b0cac14b5..b1942e222768 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -8,6 +8,7 @@
8 * ----------------------------------------------------------------------- */ 8 * ----------------------------------------------------------------------- */
9 9
10#include <linux/efi.h> 10#include <linux/efi.h>
11#include <linux/pci.h>
11#include <asm/efi.h> 12#include <asm/efi.h>
12#include <asm/setup.h> 13#include <asm/setup.h>
13#include <asm/desc.h> 14#include <asm/desc.h>
@@ -245,6 +246,121 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size)
245 *size = len; 246 *size = len;
246} 247}
247 248
249static efi_status_t setup_efi_pci(struct boot_params *params)
250{
251 efi_pci_io_protocol *pci;
252 efi_status_t status;
253 void **pci_handle;
254 efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
255 unsigned long nr_pci, size = 0;
256 int i;
257 struct setup_data *data;
258
259 data = (struct setup_data *)params->hdr.setup_data;
260
261 while (data && data->next)
262 data = (struct setup_data *)data->next;
263
264 status = efi_call_phys5(sys_table->boottime->locate_handle,
265 EFI_LOCATE_BY_PROTOCOL, &pci_proto,
266 NULL, &size, pci_handle);
267
268 if (status == EFI_BUFFER_TOO_SMALL) {
269 status = efi_call_phys3(sys_table->boottime->allocate_pool,
270 EFI_LOADER_DATA, size, &pci_handle);
271
272 if (status != EFI_SUCCESS)
273 return status;
274
275 status = efi_call_phys5(sys_table->boottime->locate_handle,
276 EFI_LOCATE_BY_PROTOCOL, &pci_proto,
277 NULL, &size, pci_handle);
278 }
279
280 if (status != EFI_SUCCESS)
281 goto free_handle;
282
283 nr_pci = size / sizeof(void *);
284 for (i = 0; i < nr_pci; i++) {
285 void *h = pci_handle[i];
286 uint64_t attributes;
287 struct pci_setup_rom *rom;
288
289 status = efi_call_phys3(sys_table->boottime->handle_protocol,
290 h, &pci_proto, &pci);
291
292 if (status != EFI_SUCCESS)
293 continue;
294
295 if (!pci)
296 continue;
297
298 status = efi_call_phys4(pci->attributes, pci,
299 EfiPciIoAttributeOperationGet, 0,
300 &attributes);
301
302 if (status != EFI_SUCCESS)
303 continue;
304
305 if (!attributes & EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM)
306 continue;
307
308 if (!pci->romimage || !pci->romsize)
309 continue;
310
311 size = pci->romsize + sizeof(*rom);
312
313 status = efi_call_phys3(sys_table->boottime->allocate_pool,
314 EFI_LOADER_DATA, size, &rom);
315
316 if (status != EFI_SUCCESS)
317 continue;
318
319 rom->data.type = SETUP_PCI;
320 rom->data.len = size - sizeof(struct setup_data);
321 rom->data.next = 0;
322 rom->pcilen = pci->romsize;
323
324 status = efi_call_phys5(pci->pci.read, pci,
325 EfiPciIoWidthUint16, PCI_VENDOR_ID,
326 1, &(rom->vendor));
327
328 if (status != EFI_SUCCESS)
329 goto free_struct;
330
331 status = efi_call_phys5(pci->pci.read, pci,
332 EfiPciIoWidthUint16, PCI_DEVICE_ID,
333 1, &(rom->devid));
334
335 if (status != EFI_SUCCESS)
336 goto free_struct;
337
338 status = efi_call_phys5(pci->get_location, pci,
339 &(rom->segment), &(rom->bus),
340 &(rom->device), &(rom->function));
341
342 if (status != EFI_SUCCESS)
343 goto free_struct;
344
345 memcpy(rom->romdata, pci->romimage, pci->romsize);
346
347 if (data)
348 data->next = (uint64_t)rom;
349 else
350 params->hdr.setup_data = (uint64_t)rom;
351
352 data = (struct setup_data *)rom;
353
354 continue;
355 free_struct:
356 efi_call_phys1(sys_table->boottime->free_pool, rom);
357 }
358
359free_handle:
360 efi_call_phys1(sys_table->boottime->free_pool, pci_handle);
361 return status;
362}
363
248/* 364/*
249 * See if we have Graphics Output Protocol 365 * See if we have Graphics Output Protocol
250 */ 366 */
@@ -1028,6 +1144,8 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
1028 1144
1029 setup_graphics(boot_params); 1145 setup_graphics(boot_params);
1030 1146
1147 setup_efi_pci(boot_params);
1148
1031 status = efi_call_phys3(sys_table->boottime->allocate_pool, 1149 status = efi_call_phys3(sys_table->boottime->allocate_pool,
1032 EFI_LOADER_DATA, sizeof(*gdt), 1150 EFI_LOADER_DATA, sizeof(*gdt),
1033 (void **)&gdt); 1151 (void **)&gdt);
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 5bacb4a226ac..e0ca7c9ac383 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
12 12
13obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 13obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
14obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o 14obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
15obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o
15obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o 16obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
16obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o 17obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
17obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 18obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
@@ -34,6 +35,8 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
34 35
35aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 36aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
36camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o 37camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
38camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
39 camellia_aesni_avx_glue.o
37cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o 40cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
38cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o 41cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
39blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 42blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
@@ -47,3 +50,5 @@ serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o
47aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 50aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
48ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o 51ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
49sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 52sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
53crc32c-intel-y := crc32c-intel_glue.o
54crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
new file mode 100644
index 000000000000..2306d2e4816f
--- /dev/null
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -0,0 +1,1102 @@
1/*
2 * x86_64/AVX/AES-NI assembler implementation of Camellia
3 *
4 * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13/*
14 * Version licensed under 2-clause BSD License is available at:
15 * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
16 */
17
18#define CAMELLIA_TABLE_BYTE_LEN 272
19
20/* struct camellia_ctx: */
21#define key_table 0
22#define key_length CAMELLIA_TABLE_BYTE_LEN
23
24/* register macros */
25#define CTX %rdi
26
27/**********************************************************************
28 16-way camellia
29 **********************************************************************/
30#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
31 vpand x, mask4bit, tmp0; \
32 vpandn x, mask4bit, x; \
33 vpsrld $4, x, x; \
34 \
35 vpshufb tmp0, lo_t, tmp0; \
36 vpshufb x, hi_t, x; \
37 vpxor tmp0, x, x;
38
39/*
40 * IN:
41 * x0..x7: byte-sliced AB state
42 * mem_cd: register pointer storing CD state
43 * key: index for key material
44 * OUT:
45 * x0..x7: new byte-sliced CD state
46 */
47#define roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \
48 t7, mem_cd, key) \
49 /* \
50 * S-function with AES subbytes \
51 */ \
52 vmovdqa .Linv_shift_row, t4; \
53 vbroadcastss .L0f0f0f0f, t7; \
54 vmovdqa .Lpre_tf_lo_s1, t0; \
55 vmovdqa .Lpre_tf_hi_s1, t1; \
56 \
57 /* AES inverse shift rows */ \
58 vpshufb t4, x0, x0; \
59 vpshufb t4, x7, x7; \
60 vpshufb t4, x1, x1; \
61 vpshufb t4, x4, x4; \
62 vpshufb t4, x2, x2; \
63 vpshufb t4, x5, x5; \
64 vpshufb t4, x3, x3; \
65 vpshufb t4, x6, x6; \
66 \
67 /* prefilter sboxes 1, 2 and 3 */ \
68 vmovdqa .Lpre_tf_lo_s4, t2; \
69 vmovdqa .Lpre_tf_hi_s4, t3; \
70 filter_8bit(x0, t0, t1, t7, t6); \
71 filter_8bit(x7, t0, t1, t7, t6); \
72 filter_8bit(x1, t0, t1, t7, t6); \
73 filter_8bit(x4, t0, t1, t7, t6); \
74 filter_8bit(x2, t0, t1, t7, t6); \
75 filter_8bit(x5, t0, t1, t7, t6); \
76 \
77 /* prefilter sbox 4 */ \
78 vpxor t4, t4, t4; \
79 filter_8bit(x3, t2, t3, t7, t6); \
80 filter_8bit(x6, t2, t3, t7, t6); \
81 \
82 /* AES subbytes + AES shift rows */ \
83 vmovdqa .Lpost_tf_lo_s1, t0; \
84 vmovdqa .Lpost_tf_hi_s1, t1; \
85 vaesenclast t4, x0, x0; \
86 vaesenclast t4, x7, x7; \
87 vaesenclast t4, x1, x1; \
88 vaesenclast t4, x4, x4; \
89 vaesenclast t4, x2, x2; \
90 vaesenclast t4, x5, x5; \
91 vaesenclast t4, x3, x3; \
92 vaesenclast t4, x6, x6; \
93 \
94 /* postfilter sboxes 1 and 4 */ \
95 vmovdqa .Lpost_tf_lo_s3, t2; \
96 vmovdqa .Lpost_tf_hi_s3, t3; \
97 filter_8bit(x0, t0, t1, t7, t6); \
98 filter_8bit(x7, t0, t1, t7, t6); \
99 filter_8bit(x3, t0, t1, t7, t6); \
100 filter_8bit(x6, t0, t1, t7, t6); \
101 \
102 /* postfilter sbox 3 */ \
103 vmovdqa .Lpost_tf_lo_s2, t4; \
104 vmovdqa .Lpost_tf_hi_s2, t5; \
105 filter_8bit(x2, t2, t3, t7, t6); \
106 filter_8bit(x5, t2, t3, t7, t6); \
107 \
108 vpxor t6, t6, t6; \
109 vmovq key, t0; \
110 \
111 /* postfilter sbox 2 */ \
112 filter_8bit(x1, t4, t5, t7, t2); \
113 filter_8bit(x4, t4, t5, t7, t2); \
114 \
115 vpsrldq $5, t0, t5; \
116 vpsrldq $1, t0, t1; \
117 vpsrldq $2, t0, t2; \
118 vpsrldq $3, t0, t3; \
119 vpsrldq $4, t0, t4; \
120 vpshufb t6, t0, t0; \
121 vpshufb t6, t1, t1; \
122 vpshufb t6, t2, t2; \
123 vpshufb t6, t3, t3; \
124 vpshufb t6, t4, t4; \
125 vpsrldq $2, t5, t7; \
126 vpshufb t6, t7, t7; \
127 \
128 /* \
129 * P-function \
130 */ \
131 vpxor x5, x0, x0; \
132 vpxor x6, x1, x1; \
133 vpxor x7, x2, x2; \
134 vpxor x4, x3, x3; \
135 \
136 vpxor x2, x4, x4; \
137 vpxor x3, x5, x5; \
138 vpxor x0, x6, x6; \
139 vpxor x1, x7, x7; \
140 \
141 vpxor x7, x0, x0; \
142 vpxor x4, x1, x1; \
143 vpxor x5, x2, x2; \
144 vpxor x6, x3, x3; \
145 \
146 vpxor x3, x4, x4; \
147 vpxor x0, x5, x5; \
148 vpxor x1, x6, x6; \
149 vpxor x2, x7, x7; /* note: high and low parts swapped */ \
150 \
151 /* \
152 * Add key material and result to CD (x becomes new CD) \
153 */ \
154 \
155 vpxor t3, x4, x4; \
156 vpxor 0 * 16(mem_cd), x4, x4; \
157 \
158 vpxor t2, x5, x5; \
159 vpxor 1 * 16(mem_cd), x5, x5; \
160 \
161 vpsrldq $1, t5, t3; \
162 vpshufb t6, t5, t5; \
163 vpshufb t6, t3, t6; \
164 \
165 vpxor t1, x6, x6; \
166 vpxor 2 * 16(mem_cd), x6, x6; \
167 \
168 vpxor t0, x7, x7; \
169 vpxor 3 * 16(mem_cd), x7, x7; \
170 \
171 vpxor t7, x0, x0; \
172 vpxor 4 * 16(mem_cd), x0, x0; \
173 \
174 vpxor t6, x1, x1; \
175 vpxor 5 * 16(mem_cd), x1, x1; \
176 \
177 vpxor t5, x2, x2; \
178 vpxor 6 * 16(mem_cd), x2, x2; \
179 \
180 vpxor t4, x3, x3; \
181 vpxor 7 * 16(mem_cd), x3, x3;
182
183/*
184 * Size optimization... with inlined roundsm16, binary would be over 5 times
185 * larger and would only be 0.5% faster (on sandy-bridge).
186 */
187.align 8
188roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
189 roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
190 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
191 %rcx, (%r9));
192 ret;
193
194.align 8
195roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
196 roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
197 %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
198 %rax, (%r9));
199 ret;
200
201/*
202 * IN/OUT:
203 * x0..x7: byte-sliced AB state preloaded
204 * mem_ab: byte-sliced AB state in memory
205 * mem_cb: byte-sliced CD state in memory
206 */
207#define two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
208 y6, y7, mem_ab, mem_cd, i, dir, store_ab) \
209 leaq (key_table + (i) * 8)(CTX), %r9; \
210 call roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd; \
211 \
212 vmovdqu x4, 0 * 16(mem_cd); \
213 vmovdqu x5, 1 * 16(mem_cd); \
214 vmovdqu x6, 2 * 16(mem_cd); \
215 vmovdqu x7, 3 * 16(mem_cd); \
216 vmovdqu x0, 4 * 16(mem_cd); \
217 vmovdqu x1, 5 * 16(mem_cd); \
218 vmovdqu x2, 6 * 16(mem_cd); \
219 vmovdqu x3, 7 * 16(mem_cd); \
220 \
221 leaq (key_table + ((i) + (dir)) * 8)(CTX), %r9; \
222 call roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab; \
223 \
224 store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab);
225
226#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */
227
228#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \
229 /* Store new AB state */ \
230 vmovdqu x0, 0 * 16(mem_ab); \
231 vmovdqu x1, 1 * 16(mem_ab); \
232 vmovdqu x2, 2 * 16(mem_ab); \
233 vmovdqu x3, 3 * 16(mem_ab); \
234 vmovdqu x4, 4 * 16(mem_ab); \
235 vmovdqu x5, 5 * 16(mem_ab); \
236 vmovdqu x6, 6 * 16(mem_ab); \
237 vmovdqu x7, 7 * 16(mem_ab);
238
239#define enc_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
240 y6, y7, mem_ab, mem_cd, i) \
241 two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
242 y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \
243 two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
244 y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \
245 two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
246 y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store);
247
248#define dec_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
249 y6, y7, mem_ab, mem_cd, i) \
250 two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
251 y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \
252 two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
253 y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \
254 two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
255 y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store);
256
257/*
258 * IN:
259 * v0..3: byte-sliced 32-bit integers
260 * OUT:
261 * v0..3: (IN <<< 1)
262 */
263#define rol32_1_16(v0, v1, v2, v3, t0, t1, t2, zero) \
264 vpcmpgtb v0, zero, t0; \
265 vpaddb v0, v0, v0; \
266 vpabsb t0, t0; \
267 \
268 vpcmpgtb v1, zero, t1; \
269 vpaddb v1, v1, v1; \
270 vpabsb t1, t1; \
271 \
272 vpcmpgtb v2, zero, t2; \
273 vpaddb v2, v2, v2; \
274 vpabsb t2, t2; \
275 \
276 vpor t0, v1, v1; \
277 \
278 vpcmpgtb v3, zero, t0; \
279 vpaddb v3, v3, v3; \
280 vpabsb t0, t0; \
281 \
282 vpor t1, v2, v2; \
283 vpor t2, v3, v3; \
284 vpor t0, v0, v0;
285
286/*
287 * IN:
288 * r: byte-sliced AB state in memory
289 * l: byte-sliced CD state in memory
290 * OUT:
291 * x0..x7: new byte-sliced CD state
292 */
293#define fls16(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \
294 tt1, tt2, tt3, kll, klr, krl, krr) \
295 /* \
296 * t0 = kll; \
297 * t0 &= ll; \
298 * lr ^= rol32(t0, 1); \
299 */ \
300 vpxor tt0, tt0, tt0; \
301 vmovd kll, t0; \
302 vpshufb tt0, t0, t3; \
303 vpsrldq $1, t0, t0; \
304 vpshufb tt0, t0, t2; \
305 vpsrldq $1, t0, t0; \
306 vpshufb tt0, t0, t1; \
307 vpsrldq $1, t0, t0; \
308 vpshufb tt0, t0, t0; \
309 \
310 vpand l0, t0, t0; \
311 vpand l1, t1, t1; \
312 vpand l2, t2, t2; \
313 vpand l3, t3, t3; \
314 \
315 rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
316 \
317 vpxor l4, t0, l4; \
318 vmovdqu l4, 4 * 16(l); \
319 vpxor l5, t1, l5; \
320 vmovdqu l5, 5 * 16(l); \
321 vpxor l6, t2, l6; \
322 vmovdqu l6, 6 * 16(l); \
323 vpxor l7, t3, l7; \
324 vmovdqu l7, 7 * 16(l); \
325 \
326 /* \
327 * t2 = krr; \
328 * t2 |= rr; \
329 * rl ^= t2; \
330 */ \
331 \
332 vmovd krr, t0; \
333 vpshufb tt0, t0, t3; \
334 vpsrldq $1, t0, t0; \
335 vpshufb tt0, t0, t2; \
336 vpsrldq $1, t0, t0; \
337 vpshufb tt0, t0, t1; \
338 vpsrldq $1, t0, t0; \
339 vpshufb tt0, t0, t0; \
340 \
341 vpor 4 * 16(r), t0, t0; \
342 vpor 5 * 16(r), t1, t1; \
343 vpor 6 * 16(r), t2, t2; \
344 vpor 7 * 16(r), t3, t3; \
345 \
346 vpxor 0 * 16(r), t0, t0; \
347 vpxor 1 * 16(r), t1, t1; \
348 vpxor 2 * 16(r), t2, t2; \
349 vpxor 3 * 16(r), t3, t3; \
350 vmovdqu t0, 0 * 16(r); \
351 vmovdqu t1, 1 * 16(r); \
352 vmovdqu t2, 2 * 16(r); \
353 vmovdqu t3, 3 * 16(r); \
354 \
355 /* \
356 * t2 = krl; \
357 * t2 &= rl; \
358 * rr ^= rol32(t2, 1); \
359 */ \
360 vmovd krl, t0; \
361 vpshufb tt0, t0, t3; \
362 vpsrldq $1, t0, t0; \
363 vpshufb tt0, t0, t2; \
364 vpsrldq $1, t0, t0; \
365 vpshufb tt0, t0, t1; \
366 vpsrldq $1, t0, t0; \
367 vpshufb tt0, t0, t0; \
368 \
369 vpand 0 * 16(r), t0, t0; \
370 vpand 1 * 16(r), t1, t1; \
371 vpand 2 * 16(r), t2, t2; \
372 vpand 3 * 16(r), t3, t3; \
373 \
374 rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
375 \
376 vpxor 4 * 16(r), t0, t0; \
377 vpxor 5 * 16(r), t1, t1; \
378 vpxor 6 * 16(r), t2, t2; \
379 vpxor 7 * 16(r), t3, t3; \
380 vmovdqu t0, 4 * 16(r); \
381 vmovdqu t1, 5 * 16(r); \
382 vmovdqu t2, 6 * 16(r); \
383 vmovdqu t3, 7 * 16(r); \
384 \
385 /* \
386 * t0 = klr; \
387 * t0 |= lr; \
388 * ll ^= t0; \
389 */ \
390 \
391 vmovd klr, t0; \
392 vpshufb tt0, t0, t3; \
393 vpsrldq $1, t0, t0; \
394 vpshufb tt0, t0, t2; \
395 vpsrldq $1, t0, t0; \
396 vpshufb tt0, t0, t1; \
397 vpsrldq $1, t0, t0; \
398 vpshufb tt0, t0, t0; \
399 \
400 vpor l4, t0, t0; \
401 vpor l5, t1, t1; \
402 vpor l6, t2, t2; \
403 vpor l7, t3, t3; \
404 \
405 vpxor l0, t0, l0; \
406 vmovdqu l0, 0 * 16(l); \
407 vpxor l1, t1, l1; \
408 vmovdqu l1, 1 * 16(l); \
409 vpxor l2, t2, l2; \
410 vmovdqu l2, 2 * 16(l); \
411 vpxor l3, t3, l3; \
412 vmovdqu l3, 3 * 16(l);
413
414#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
415 vpunpckhdq x1, x0, t2; \
416 vpunpckldq x1, x0, x0; \
417 \
418 vpunpckldq x3, x2, t1; \
419 vpunpckhdq x3, x2, x2; \
420 \
421 vpunpckhqdq t1, x0, x1; \
422 vpunpcklqdq t1, x0, x0; \
423 \
424 vpunpckhqdq x2, t2, x3; \
425 vpunpcklqdq x2, t2, x2;
426
427#define byteslice_16x16b(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, \
428 b3, c3, d3, st0, st1) \
429 vmovdqu d2, st0; \
430 vmovdqu d3, st1; \
431 transpose_4x4(a0, a1, a2, a3, d2, d3); \
432 transpose_4x4(b0, b1, b2, b3, d2, d3); \
433 vmovdqu st0, d2; \
434 vmovdqu st1, d3; \
435 \
436 vmovdqu a0, st0; \
437 vmovdqu a1, st1; \
438 transpose_4x4(c0, c1, c2, c3, a0, a1); \
439 transpose_4x4(d0, d1, d2, d3, a0, a1); \
440 \
441 vmovdqu .Lshufb_16x16b, a0; \
442 vmovdqu st1, a1; \
443 vpshufb a0, a2, a2; \
444 vpshufb a0, a3, a3; \
445 vpshufb a0, b0, b0; \
446 vpshufb a0, b1, b1; \
447 vpshufb a0, b2, b2; \
448 vpshufb a0, b3, b3; \
449 vpshufb a0, a1, a1; \
450 vpshufb a0, c0, c0; \
451 vpshufb a0, c1, c1; \
452 vpshufb a0, c2, c2; \
453 vpshufb a0, c3, c3; \
454 vpshufb a0, d0, d0; \
455 vpshufb a0, d1, d1; \
456 vpshufb a0, d2, d2; \
457 vpshufb a0, d3, d3; \
458 vmovdqu d3, st1; \
459 vmovdqu st0, d3; \
460 vpshufb a0, d3, a0; \
461 vmovdqu d2, st0; \
462 \
463 transpose_4x4(a0, b0, c0, d0, d2, d3); \
464 transpose_4x4(a1, b1, c1, d1, d2, d3); \
465 vmovdqu st0, d2; \
466 vmovdqu st1, d3; \
467 \
468 vmovdqu b0, st0; \
469 vmovdqu b1, st1; \
470 transpose_4x4(a2, b2, c2, d2, b0, b1); \
471 transpose_4x4(a3, b3, c3, d3, b0, b1); \
472 vmovdqu st0, b0; \
473 vmovdqu st1, b1; \
474 /* does not adjust output bytes inside vectors */
475
476/* load blocks to registers and apply pre-whitening */
477#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
478 y6, y7, rio, key) \
479 vmovq key, x0; \
480 vpshufb .Lpack_bswap, x0, x0; \
481 \
482 vpxor 0 * 16(rio), x0, y7; \
483 vpxor 1 * 16(rio), x0, y6; \
484 vpxor 2 * 16(rio), x0, y5; \
485 vpxor 3 * 16(rio), x0, y4; \
486 vpxor 4 * 16(rio), x0, y3; \
487 vpxor 5 * 16(rio), x0, y2; \
488 vpxor 6 * 16(rio), x0, y1; \
489 vpxor 7 * 16(rio), x0, y0; \
490 vpxor 8 * 16(rio), x0, x7; \
491 vpxor 9 * 16(rio), x0, x6; \
492 vpxor 10 * 16(rio), x0, x5; \
493 vpxor 11 * 16(rio), x0, x4; \
494 vpxor 12 * 16(rio), x0, x3; \
495 vpxor 13 * 16(rio), x0, x2; \
496 vpxor 14 * 16(rio), x0, x1; \
497 vpxor 15 * 16(rio), x0, x0;
498
499/* byteslice pre-whitened blocks and store to temporary memory */
500#define inpack16_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
501 y6, y7, mem_ab, mem_cd) \
502 byteslice_16x16b(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
503 y5, y6, y7, (mem_ab), (mem_cd)); \
504 \
505 vmovdqu x0, 0 * 16(mem_ab); \
506 vmovdqu x1, 1 * 16(mem_ab); \
507 vmovdqu x2, 2 * 16(mem_ab); \
508 vmovdqu x3, 3 * 16(mem_ab); \
509 vmovdqu x4, 4 * 16(mem_ab); \
510 vmovdqu x5, 5 * 16(mem_ab); \
511 vmovdqu x6, 6 * 16(mem_ab); \
512 vmovdqu x7, 7 * 16(mem_ab); \
513 vmovdqu y0, 0 * 16(mem_cd); \
514 vmovdqu y1, 1 * 16(mem_cd); \
515 vmovdqu y2, 2 * 16(mem_cd); \
516 vmovdqu y3, 3 * 16(mem_cd); \
517 vmovdqu y4, 4 * 16(mem_cd); \
518 vmovdqu y5, 5 * 16(mem_cd); \
519 vmovdqu y6, 6 * 16(mem_cd); \
520 vmovdqu y7, 7 * 16(mem_cd);
521
522/* de-byteslice, apply post-whitening and store blocks */
523#define outunpack16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
524 y5, y6, y7, key, stack_tmp0, stack_tmp1) \
525 byteslice_16x16b(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, y3, \
526 y7, x3, x7, stack_tmp0, stack_tmp1); \
527 \
528 vmovdqu x0, stack_tmp0; \
529 \
530 vmovq key, x0; \
531 vpshufb .Lpack_bswap, x0, x0; \
532 \
533 vpxor x0, y7, y7; \
534 vpxor x0, y6, y6; \
535 vpxor x0, y5, y5; \
536 vpxor x0, y4, y4; \
537 vpxor x0, y3, y3; \
538 vpxor x0, y2, y2; \
539 vpxor x0, y1, y1; \
540 vpxor x0, y0, y0; \
541 vpxor x0, x7, x7; \
542 vpxor x0, x6, x6; \
543 vpxor x0, x5, x5; \
544 vpxor x0, x4, x4; \
545 vpxor x0, x3, x3; \
546 vpxor x0, x2, x2; \
547 vpxor x0, x1, x1; \
548 vpxor stack_tmp0, x0, x0;
549
550#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
551 y6, y7, rio) \
552 vmovdqu x0, 0 * 16(rio); \
553 vmovdqu x1, 1 * 16(rio); \
554 vmovdqu x2, 2 * 16(rio); \
555 vmovdqu x3, 3 * 16(rio); \
556 vmovdqu x4, 4 * 16(rio); \
557 vmovdqu x5, 5 * 16(rio); \
558 vmovdqu x6, 6 * 16(rio); \
559 vmovdqu x7, 7 * 16(rio); \
560 vmovdqu y0, 8 * 16(rio); \
561 vmovdqu y1, 9 * 16(rio); \
562 vmovdqu y2, 10 * 16(rio); \
563 vmovdqu y3, 11 * 16(rio); \
564 vmovdqu y4, 12 * 16(rio); \
565 vmovdqu y5, 13 * 16(rio); \
566 vmovdqu y6, 14 * 16(rio); \
567 vmovdqu y7, 15 * 16(rio);
568
569.data
570.align 16
571
572#define SHUFB_BYTES(idx) \
573 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
574
575.Lshufb_16x16b:
576 .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3);
577
578.Lpack_bswap:
579 .long 0x00010203
580 .long 0x04050607
581 .long 0x80808080
582 .long 0x80808080
583
584/* For CTR-mode IV byteswap */
585.Lbswap128_mask:
586 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
587
588/*
589 * pre-SubByte transform
590 *
591 * pre-lookup for sbox1, sbox2, sbox3:
592 * swap_bitendianness(
593 * isom_map_camellia_to_aes(
594 * camellia_f(
595 * swap_bitendianess(in)
596 * )
597 * )
598 * )
599 *
600 * (note: '⊕ 0xc5' inside camellia_f())
601 */
602.Lpre_tf_lo_s1:
603 .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86
604 .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88
605.Lpre_tf_hi_s1:
606 .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a
607 .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23
608
609/*
610 * pre-SubByte transform
611 *
612 * pre-lookup for sbox4:
613 * swap_bitendianness(
614 * isom_map_camellia_to_aes(
615 * camellia_f(
616 * swap_bitendianess(in <<< 1)
617 * )
618 * )
619 * )
620 *
621 * (note: '⊕ 0xc5' inside camellia_f())
622 */
623.Lpre_tf_lo_s4:
624 .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25
625 .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74
626.Lpre_tf_hi_s4:
627 .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72
628 .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf
629
630/*
631 * post-SubByte transform
632 *
633 * post-lookup for sbox1, sbox4:
634 * swap_bitendianness(
635 * camellia_h(
636 * isom_map_aes_to_camellia(
637 * swap_bitendianness(
638 * aes_inverse_affine_transform(in)
639 * )
640 * )
641 * )
642 * )
643 *
644 * (note: '⊕ 0x6e' inside camellia_h())
645 */
646.Lpost_tf_lo_s1:
647 .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31
648 .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1
649.Lpost_tf_hi_s1:
650 .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8
651 .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c
652
653/*
654 * post-SubByte transform
655 *
656 * post-lookup for sbox2:
657 * swap_bitendianness(
658 * camellia_h(
659 * isom_map_aes_to_camellia(
660 * swap_bitendianness(
661 * aes_inverse_affine_transform(in)
662 * )
663 * )
664 * )
665 * ) <<< 1
666 *
667 * (note: '⊕ 0x6e' inside camellia_h())
668 */
669.Lpost_tf_lo_s2:
670 .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62
671 .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3
672.Lpost_tf_hi_s2:
673 .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51
674 .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18
675
676/*
677 * post-SubByte transform
678 *
679 * post-lookup for sbox3:
680 * swap_bitendianness(
681 * camellia_h(
682 * isom_map_aes_to_camellia(
683 * swap_bitendianness(
684 * aes_inverse_affine_transform(in)
685 * )
686 * )
687 * )
688 * ) >>> 1
689 *
690 * (note: '⊕ 0x6e' inside camellia_h())
691 */
692.Lpost_tf_lo_s3:
693 .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98
694 .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8
695.Lpost_tf_hi_s3:
696 .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54
697 .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06
698
699/* For isolating SubBytes from AESENCLAST, inverse shift row */
700.Linv_shift_row:
701 .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
702 .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
703
704/* 4-bit mask */
705.align 4
706.L0f0f0f0f:
707 .long 0x0f0f0f0f
708
709.text
710
711.align 8
712.type __camellia_enc_blk16,@function;
713
714__camellia_enc_blk16:
715 /* input:
716 * %rdi: ctx, CTX
717 * %rax: temporary storage, 256 bytes
718 * %xmm0..%xmm15: 16 plaintext blocks
719 * output:
720 * %xmm0..%xmm15: 16 encrypted blocks, order swapped:
721 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
722 */
723
724 leaq 8 * 16(%rax), %rcx;
725
726 inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
727 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
728 %xmm15, %rax, %rcx);
729
730 enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
731 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
732 %xmm15, %rax, %rcx, 0);
733
734 fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
735 %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
736 %xmm15,
737 ((key_table + (8) * 8) + 0)(CTX),
738 ((key_table + (8) * 8) + 4)(CTX),
739 ((key_table + (8) * 8) + 8)(CTX),
740 ((key_table + (8) * 8) + 12)(CTX));
741
742 enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
743 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
744 %xmm15, %rax, %rcx, 8);
745
746 fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
747 %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
748 %xmm15,
749 ((key_table + (16) * 8) + 0)(CTX),
750 ((key_table + (16) * 8) + 4)(CTX),
751 ((key_table + (16) * 8) + 8)(CTX),
752 ((key_table + (16) * 8) + 12)(CTX));
753
754 enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
755 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
756 %xmm15, %rax, %rcx, 16);
757
758 movl $24, %r8d;
759 cmpl $16, key_length(CTX);
760 jne .Lenc_max32;
761
762.Lenc_done:
763 /* load CD for output */
764 vmovdqu 0 * 16(%rcx), %xmm8;
765 vmovdqu 1 * 16(%rcx), %xmm9;
766 vmovdqu 2 * 16(%rcx), %xmm10;
767 vmovdqu 3 * 16(%rcx), %xmm11;
768 vmovdqu 4 * 16(%rcx), %xmm12;
769 vmovdqu 5 * 16(%rcx), %xmm13;
770 vmovdqu 6 * 16(%rcx), %xmm14;
771 vmovdqu 7 * 16(%rcx), %xmm15;
772
773 outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
774 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
775 %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax));
776
777 ret;
778
779.align 8
780.Lenc_max32:
781 movl $32, %r8d;
782
783 fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
784 %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
785 %xmm15,
786 ((key_table + (24) * 8) + 0)(CTX),
787 ((key_table + (24) * 8) + 4)(CTX),
788 ((key_table + (24) * 8) + 8)(CTX),
789 ((key_table + (24) * 8) + 12)(CTX));
790
791 enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
792 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
793 %xmm15, %rax, %rcx, 24);
794
795 jmp .Lenc_done;
796
797.align 8
798.type __camellia_dec_blk16,@function;
799
800__camellia_dec_blk16:
801 /* input:
802 * %rdi: ctx, CTX
803 * %rax: temporary storage, 256 bytes
804 * %r8d: 24 for 16 byte key, 32 for larger
805 * %xmm0..%xmm15: 16 encrypted blocks
806 * output:
807 * %xmm0..%xmm15: 16 plaintext blocks, order swapped:
808 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
809 */
810
811 leaq 8 * 16(%rax), %rcx;
812
813 inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
814 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
815 %xmm15, %rax, %rcx);
816
817 cmpl $32, %r8d;
818 je .Ldec_max32;
819
820.Ldec_max24:
821 dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
822 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
823 %xmm15, %rax, %rcx, 16);
824
825 fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
826 %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
827 %xmm15,
828 ((key_table + (16) * 8) + 8)(CTX),
829 ((key_table + (16) * 8) + 12)(CTX),
830 ((key_table + (16) * 8) + 0)(CTX),
831 ((key_table + (16) * 8) + 4)(CTX));
832
833 dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
834 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
835 %xmm15, %rax, %rcx, 8);
836
837 fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
838 %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
839 %xmm15,
840 ((key_table + (8) * 8) + 8)(CTX),
841 ((key_table + (8) * 8) + 12)(CTX),
842 ((key_table + (8) * 8) + 0)(CTX),
843 ((key_table + (8) * 8) + 4)(CTX));
844
845 dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
846 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
847 %xmm15, %rax, %rcx, 0);
848
849 /* load CD for output */
850 vmovdqu 0 * 16(%rcx), %xmm8;
851 vmovdqu 1 * 16(%rcx), %xmm9;
852 vmovdqu 2 * 16(%rcx), %xmm10;
853 vmovdqu 3 * 16(%rcx), %xmm11;
854 vmovdqu 4 * 16(%rcx), %xmm12;
855 vmovdqu 5 * 16(%rcx), %xmm13;
856 vmovdqu 6 * 16(%rcx), %xmm14;
857 vmovdqu 7 * 16(%rcx), %xmm15;
858
859 outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
860 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
861 %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
862
863 ret;
864
865.align 8
866.Ldec_max32:
867 dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
868 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
869 %xmm15, %rax, %rcx, 24);
870
871 fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
872 %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
873 %xmm15,
874 ((key_table + (24) * 8) + 8)(CTX),
875 ((key_table + (24) * 8) + 12)(CTX),
876 ((key_table + (24) * 8) + 0)(CTX),
877 ((key_table + (24) * 8) + 4)(CTX));
878
879 jmp .Ldec_max24;
880
881.align 8
882.global camellia_ecb_enc_16way
883.type camellia_ecb_enc_16way,@function;
884
885camellia_ecb_enc_16way:
886 /* input:
887 * %rdi: ctx, CTX
888 * %rsi: dst (16 blocks)
889 * %rdx: src (16 blocks)
890 */
891
892 inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
893 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
894 %xmm15, %rdx, (key_table)(CTX));
895
896 /* now dst can be used as temporary buffer (even in src == dst case) */
897 movq %rsi, %rax;
898
899 call __camellia_enc_blk16;
900
901 write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
902 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
903 %xmm8, %rsi);
904
905 ret;
906
907.align 8
908.global camellia_ecb_dec_16way
909.type camellia_ecb_dec_16way,@function;
910
911camellia_ecb_dec_16way:
912 /* input:
913 * %rdi: ctx, CTX
914 * %rsi: dst (16 blocks)
915 * %rdx: src (16 blocks)
916 */
917
918 cmpl $16, key_length(CTX);
919 movl $32, %r8d;
920 movl $24, %eax;
921 cmovel %eax, %r8d; /* max */
922
923 inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
924 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
925 %xmm15, %rdx, (key_table)(CTX, %r8, 8));
926
927 /* now dst can be used as temporary buffer (even in src == dst case) */
928 movq %rsi, %rax;
929
930 call __camellia_dec_blk16;
931
932 write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
933 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
934 %xmm8, %rsi);
935
936 ret;
937
938.align 8
939.global camellia_cbc_dec_16way
940.type camellia_cbc_dec_16way,@function;
941
942camellia_cbc_dec_16way:
943 /* input:
944 * %rdi: ctx, CTX
945 * %rsi: dst (16 blocks)
946 * %rdx: src (16 blocks)
947 */
948
949 cmpl $16, key_length(CTX);
950 movl $32, %r8d;
951 movl $24, %eax;
952 cmovel %eax, %r8d; /* max */
953
954 inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
955 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
956 %xmm15, %rdx, (key_table)(CTX, %r8, 8));
957
958 /*
959 * dst might still be in-use (in case dst == src), so use stack for
960 * temporary storage.
961 */
962 subq $(16 * 16), %rsp;
963 movq %rsp, %rax;
964
965 call __camellia_dec_blk16;
966
967 addq $(16 * 16), %rsp;
968
969 vpxor (0 * 16)(%rdx), %xmm6, %xmm6;
970 vpxor (1 * 16)(%rdx), %xmm5, %xmm5;
971 vpxor (2 * 16)(%rdx), %xmm4, %xmm4;
972 vpxor (3 * 16)(%rdx), %xmm3, %xmm3;
973 vpxor (4 * 16)(%rdx), %xmm2, %xmm2;
974 vpxor (5 * 16)(%rdx), %xmm1, %xmm1;
975 vpxor (6 * 16)(%rdx), %xmm0, %xmm0;
976 vpxor (7 * 16)(%rdx), %xmm15, %xmm15;
977 vpxor (8 * 16)(%rdx), %xmm14, %xmm14;
978 vpxor (9 * 16)(%rdx), %xmm13, %xmm13;
979 vpxor (10 * 16)(%rdx), %xmm12, %xmm12;
980 vpxor (11 * 16)(%rdx), %xmm11, %xmm11;
981 vpxor (12 * 16)(%rdx), %xmm10, %xmm10;
982 vpxor (13 * 16)(%rdx), %xmm9, %xmm9;
983 vpxor (14 * 16)(%rdx), %xmm8, %xmm8;
984 write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
985 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
986 %xmm8, %rsi);
987
988 ret;
989
990#define inc_le128(x, minus_one, tmp) \
991 vpcmpeqq minus_one, x, tmp; \
992 vpsubq minus_one, x, x; \
993 vpslldq $8, tmp, tmp; \
994 vpsubq tmp, x, x;
995
996.align 8
997.global camellia_ctr_16way
998.type camellia_ctr_16way,@function;
999
1000camellia_ctr_16way:
1001 /* input:
1002 * %rdi: ctx, CTX
1003 * %rsi: dst (16 blocks)
1004 * %rdx: src (16 blocks)
1005 * %rcx: iv (little endian, 128bit)
1006 */
1007
1008 subq $(16 * 16), %rsp;
1009 movq %rsp, %rax;
1010
1011 vmovdqa .Lbswap128_mask, %xmm14;
1012
1013 /* load IV and byteswap */
1014 vmovdqu (%rcx), %xmm0;
1015 vpshufb %xmm14, %xmm0, %xmm15;
1016 vmovdqu %xmm15, 15 * 16(%rax);
1017
1018 vpcmpeqd %xmm15, %xmm15, %xmm15;
1019 vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */
1020
1021 /* construct IVs */
1022 inc_le128(%xmm0, %xmm15, %xmm13);
1023 vpshufb %xmm14, %xmm0, %xmm13;
1024 vmovdqu %xmm13, 14 * 16(%rax);
1025 inc_le128(%xmm0, %xmm15, %xmm13);
1026 vpshufb %xmm14, %xmm0, %xmm13;
1027 vmovdqu %xmm13, 13 * 16(%rax);
1028 inc_le128(%xmm0, %xmm15, %xmm13);
1029 vpshufb %xmm14, %xmm0, %xmm12;
1030 inc_le128(%xmm0, %xmm15, %xmm13);
1031 vpshufb %xmm14, %xmm0, %xmm11;
1032 inc_le128(%xmm0, %xmm15, %xmm13);
1033 vpshufb %xmm14, %xmm0, %xmm10;
1034 inc_le128(%xmm0, %xmm15, %xmm13);
1035 vpshufb %xmm14, %xmm0, %xmm9;
1036 inc_le128(%xmm0, %xmm15, %xmm13);
1037 vpshufb %xmm14, %xmm0, %xmm8;
1038 inc_le128(%xmm0, %xmm15, %xmm13);
1039 vpshufb %xmm14, %xmm0, %xmm7;
1040 inc_le128(%xmm0, %xmm15, %xmm13);
1041 vpshufb %xmm14, %xmm0, %xmm6;
1042 inc_le128(%xmm0, %xmm15, %xmm13);
1043 vpshufb %xmm14, %xmm0, %xmm5;
1044 inc_le128(%xmm0, %xmm15, %xmm13);
1045 vpshufb %xmm14, %xmm0, %xmm4;
1046 inc_le128(%xmm0, %xmm15, %xmm13);
1047 vpshufb %xmm14, %xmm0, %xmm3;
1048 inc_le128(%xmm0, %xmm15, %xmm13);
1049 vpshufb %xmm14, %xmm0, %xmm2;
1050 inc_le128(%xmm0, %xmm15, %xmm13);
1051 vpshufb %xmm14, %xmm0, %xmm1;
1052 inc_le128(%xmm0, %xmm15, %xmm13);
1053 vmovdqa %xmm0, %xmm13;
1054 vpshufb %xmm14, %xmm0, %xmm0;
1055 inc_le128(%xmm13, %xmm15, %xmm14);
1056 vmovdqu %xmm13, (%rcx);
1057
1058 /* inpack16_pre: */
1059 vmovq (key_table)(CTX), %xmm15;
1060 vpshufb .Lpack_bswap, %xmm15, %xmm15;
1061 vpxor %xmm0, %xmm15, %xmm0;
1062 vpxor %xmm1, %xmm15, %xmm1;
1063 vpxor %xmm2, %xmm15, %xmm2;
1064 vpxor %xmm3, %xmm15, %xmm3;
1065 vpxor %xmm4, %xmm15, %xmm4;
1066 vpxor %xmm5, %xmm15, %xmm5;
1067 vpxor %xmm6, %xmm15, %xmm6;
1068 vpxor %xmm7, %xmm15, %xmm7;
1069 vpxor %xmm8, %xmm15, %xmm8;
1070 vpxor %xmm9, %xmm15, %xmm9;
1071 vpxor %xmm10, %xmm15, %xmm10;
1072 vpxor %xmm11, %xmm15, %xmm11;
1073 vpxor %xmm12, %xmm15, %xmm12;
1074 vpxor 13 * 16(%rax), %xmm15, %xmm13;
1075 vpxor 14 * 16(%rax), %xmm15, %xmm14;
1076 vpxor 15 * 16(%rax), %xmm15, %xmm15;
1077
1078 call __camellia_enc_blk16;
1079
1080 addq $(16 * 16), %rsp;
1081
1082 vpxor 0 * 16(%rdx), %xmm7, %xmm7;
1083 vpxor 1 * 16(%rdx), %xmm6, %xmm6;
1084 vpxor 2 * 16(%rdx), %xmm5, %xmm5;
1085 vpxor 3 * 16(%rdx), %xmm4, %xmm4;
1086 vpxor 4 * 16(%rdx), %xmm3, %xmm3;
1087 vpxor 5 * 16(%rdx), %xmm2, %xmm2;
1088 vpxor 6 * 16(%rdx), %xmm1, %xmm1;
1089 vpxor 7 * 16(%rdx), %xmm0, %xmm0;
1090 vpxor 8 * 16(%rdx), %xmm15, %xmm15;
1091 vpxor 9 * 16(%rdx), %xmm14, %xmm14;
1092 vpxor 10 * 16(%rdx), %xmm13, %xmm13;
1093 vpxor 11 * 16(%rdx), %xmm12, %xmm12;
1094 vpxor 12 * 16(%rdx), %xmm11, %xmm11;
1095 vpxor 13 * 16(%rdx), %xmm10, %xmm10;
1096 vpxor 14 * 16(%rdx), %xmm9, %xmm9;
1097 vpxor 15 * 16(%rdx), %xmm8, %xmm8;
1098 write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
1099 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
1100 %xmm8, %rsi);
1101
1102 ret;
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
new file mode 100644
index 000000000000..96cbb6068fce
--- /dev/null
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -0,0 +1,558 @@
1/*
2 * Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia
3 *
4 * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/crypto.h>
16#include <linux/err.h>
17#include <crypto/algapi.h>
18#include <crypto/ctr.h>
19#include <crypto/lrw.h>
20#include <crypto/xts.h>
21#include <asm/xcr.h>
22#include <asm/xsave.h>
23#include <asm/crypto/camellia.h>
24#include <asm/crypto/ablk_helper.h>
25#include <asm/crypto/glue_helper.h>
26
27#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
28
29/* 16-way AES-NI parallel cipher functions */
30asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
31 const u8 *src);
32asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
33 const u8 *src);
34
35asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
36 const u8 *src);
37asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
38 const u8 *src, le128 *iv);
39
40static const struct common_glue_ctx camellia_enc = {
41 .num_funcs = 3,
42 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
43
44 .funcs = { {
45 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
46 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
47 }, {
48 .num_blocks = 2,
49 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
50 }, {
51 .num_blocks = 1,
52 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
53 } }
54};
55
56static const struct common_glue_ctx camellia_ctr = {
57 .num_funcs = 3,
58 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
59
60 .funcs = { {
61 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
62 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
63 }, {
64 .num_blocks = 2,
65 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
66 }, {
67 .num_blocks = 1,
68 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
69 } }
70};
71
72static const struct common_glue_ctx camellia_dec = {
73 .num_funcs = 3,
74 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
75
76 .funcs = { {
77 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
78 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
79 }, {
80 .num_blocks = 2,
81 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
82 }, {
83 .num_blocks = 1,
84 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
85 } }
86};
87
88static const struct common_glue_ctx camellia_dec_cbc = {
89 .num_funcs = 3,
90 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
91
92 .funcs = { {
93 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
94 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
95 }, {
96 .num_blocks = 2,
97 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
98 }, {
99 .num_blocks = 1,
100 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
101 } }
102};
103
104static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
105 struct scatterlist *src, unsigned int nbytes)
106{
107 return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
108}
109
110static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
111 struct scatterlist *src, unsigned int nbytes)
112{
113 return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
114}
115
116static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
117 struct scatterlist *src, unsigned int nbytes)
118{
119 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
120 dst, src, nbytes);
121}
122
123static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
124 struct scatterlist *src, unsigned int nbytes)
125{
126 return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
127 nbytes);
128}
129
130static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131 struct scatterlist *src, unsigned int nbytes)
132{
133 return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
134}
135
136static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
137{
138 return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
139 CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
140 nbytes);
141}
142
143static inline void camellia_fpu_end(bool fpu_enabled)
144{
145 glue_fpu_end(fpu_enabled);
146}
147
148static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
149 unsigned int key_len)
150{
151 return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
152 &tfm->crt_flags);
153}
154
155struct crypt_priv {
156 struct camellia_ctx *ctx;
157 bool fpu_enabled;
158};
159
160static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
161{
162 const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
163 struct crypt_priv *ctx = priv;
164 int i;
165
166 ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
167
168 if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
169 camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
170 srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
171 nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
172 }
173
174 while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
175 camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
176 srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
177 nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
178 }
179
180 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
181 camellia_enc_blk(ctx->ctx, srcdst, srcdst);
182}
183
184static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
185{
186 const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
187 struct crypt_priv *ctx = priv;
188 int i;
189
190 ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
191
192 if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
193 camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
194 srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
195 nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
196 }
197
198 while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
199 camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
200 srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
201 nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
202 }
203
204 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
205 camellia_dec_blk(ctx->ctx, srcdst, srcdst);
206}
207
208static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
209 struct scatterlist *src, unsigned int nbytes)
210{
211 struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
212 be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
213 struct crypt_priv crypt_ctx = {
214 .ctx = &ctx->camellia_ctx,
215 .fpu_enabled = false,
216 };
217 struct lrw_crypt_req req = {
218 .tbuf = buf,
219 .tbuflen = sizeof(buf),
220
221 .table_ctx = &ctx->lrw_table,
222 .crypt_ctx = &crypt_ctx,
223 .crypt_fn = encrypt_callback,
224 };
225 int ret;
226
227 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
228 ret = lrw_crypt(desc, dst, src, nbytes, &req);
229 camellia_fpu_end(crypt_ctx.fpu_enabled);
230
231 return ret;
232}
233
234static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
235 struct scatterlist *src, unsigned int nbytes)
236{
237 struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
238 be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
239 struct crypt_priv crypt_ctx = {
240 .ctx = &ctx->camellia_ctx,
241 .fpu_enabled = false,
242 };
243 struct lrw_crypt_req req = {
244 .tbuf = buf,
245 .tbuflen = sizeof(buf),
246
247 .table_ctx = &ctx->lrw_table,
248 .crypt_ctx = &crypt_ctx,
249 .crypt_fn = decrypt_callback,
250 };
251 int ret;
252
253 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
254 ret = lrw_crypt(desc, dst, src, nbytes, &req);
255 camellia_fpu_end(crypt_ctx.fpu_enabled);
256
257 return ret;
258}
259
260static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
261 struct scatterlist *src, unsigned int nbytes)
262{
263 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
264 be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
265 struct crypt_priv crypt_ctx = {
266 .ctx = &ctx->crypt_ctx,
267 .fpu_enabled = false,
268 };
269 struct xts_crypt_req req = {
270 .tbuf = buf,
271 .tbuflen = sizeof(buf),
272
273 .tweak_ctx = &ctx->tweak_ctx,
274 .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
275 .crypt_ctx = &crypt_ctx,
276 .crypt_fn = encrypt_callback,
277 };
278 int ret;
279
280 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
281 ret = xts_crypt(desc, dst, src, nbytes, &req);
282 camellia_fpu_end(crypt_ctx.fpu_enabled);
283
284 return ret;
285}
286
287static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
288 struct scatterlist *src, unsigned int nbytes)
289{
290 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
291 be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
292 struct crypt_priv crypt_ctx = {
293 .ctx = &ctx->crypt_ctx,
294 .fpu_enabled = false,
295 };
296 struct xts_crypt_req req = {
297 .tbuf = buf,
298 .tbuflen = sizeof(buf),
299
300 .tweak_ctx = &ctx->tweak_ctx,
301 .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
302 .crypt_ctx = &crypt_ctx,
303 .crypt_fn = decrypt_callback,
304 };
305 int ret;
306
307 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
308 ret = xts_crypt(desc, dst, src, nbytes, &req);
309 camellia_fpu_end(crypt_ctx.fpu_enabled);
310
311 return ret;
312}
313
314static struct crypto_alg cmll_algs[10] = { {
315 .cra_name = "__ecb-camellia-aesni",
316 .cra_driver_name = "__driver-ecb-camellia-aesni",
317 .cra_priority = 0,
318 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
319 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
320 .cra_ctxsize = sizeof(struct camellia_ctx),
321 .cra_alignmask = 0,
322 .cra_type = &crypto_blkcipher_type,
323 .cra_module = THIS_MODULE,
324 .cra_u = {
325 .blkcipher = {
326 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
327 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
328 .setkey = camellia_setkey,
329 .encrypt = ecb_encrypt,
330 .decrypt = ecb_decrypt,
331 },
332 },
333}, {
334 .cra_name = "__cbc-camellia-aesni",
335 .cra_driver_name = "__driver-cbc-camellia-aesni",
336 .cra_priority = 0,
337 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
338 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
339 .cra_ctxsize = sizeof(struct camellia_ctx),
340 .cra_alignmask = 0,
341 .cra_type = &crypto_blkcipher_type,
342 .cra_module = THIS_MODULE,
343 .cra_u = {
344 .blkcipher = {
345 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
346 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
347 .setkey = camellia_setkey,
348 .encrypt = cbc_encrypt,
349 .decrypt = cbc_decrypt,
350 },
351 },
352}, {
353 .cra_name = "__ctr-camellia-aesni",
354 .cra_driver_name = "__driver-ctr-camellia-aesni",
355 .cra_priority = 0,
356 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
357 .cra_blocksize = 1,
358 .cra_ctxsize = sizeof(struct camellia_ctx),
359 .cra_alignmask = 0,
360 .cra_type = &crypto_blkcipher_type,
361 .cra_module = THIS_MODULE,
362 .cra_u = {
363 .blkcipher = {
364 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
365 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
366 .ivsize = CAMELLIA_BLOCK_SIZE,
367 .setkey = camellia_setkey,
368 .encrypt = ctr_crypt,
369 .decrypt = ctr_crypt,
370 },
371 },
372}, {
373 .cra_name = "__lrw-camellia-aesni",
374 .cra_driver_name = "__driver-lrw-camellia-aesni",
375 .cra_priority = 0,
376 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
377 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
378 .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
379 .cra_alignmask = 0,
380 .cra_type = &crypto_blkcipher_type,
381 .cra_module = THIS_MODULE,
382 .cra_exit = lrw_camellia_exit_tfm,
383 .cra_u = {
384 .blkcipher = {
385 .min_keysize = CAMELLIA_MIN_KEY_SIZE +
386 CAMELLIA_BLOCK_SIZE,
387 .max_keysize = CAMELLIA_MAX_KEY_SIZE +
388 CAMELLIA_BLOCK_SIZE,
389 .ivsize = CAMELLIA_BLOCK_SIZE,
390 .setkey = lrw_camellia_setkey,
391 .encrypt = lrw_encrypt,
392 .decrypt = lrw_decrypt,
393 },
394 },
395}, {
396 .cra_name = "__xts-camellia-aesni",
397 .cra_driver_name = "__driver-xts-camellia-aesni",
398 .cra_priority = 0,
399 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
400 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
401 .cra_ctxsize = sizeof(struct camellia_xts_ctx),
402 .cra_alignmask = 0,
403 .cra_type = &crypto_blkcipher_type,
404 .cra_module = THIS_MODULE,
405 .cra_u = {
406 .blkcipher = {
407 .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
408 .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
409 .ivsize = CAMELLIA_BLOCK_SIZE,
410 .setkey = xts_camellia_setkey,
411 .encrypt = xts_encrypt,
412 .decrypt = xts_decrypt,
413 },
414 },
415}, {
416 .cra_name = "ecb(camellia)",
417 .cra_driver_name = "ecb-camellia-aesni",
418 .cra_priority = 400,
419 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
420 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
421 .cra_ctxsize = sizeof(struct async_helper_ctx),
422 .cra_alignmask = 0,
423 .cra_type = &crypto_ablkcipher_type,
424 .cra_module = THIS_MODULE,
425 .cra_init = ablk_init,
426 .cra_exit = ablk_exit,
427 .cra_u = {
428 .ablkcipher = {
429 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
430 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
431 .setkey = ablk_set_key,
432 .encrypt = ablk_encrypt,
433 .decrypt = ablk_decrypt,
434 },
435 },
436}, {
437 .cra_name = "cbc(camellia)",
438 .cra_driver_name = "cbc-camellia-aesni",
439 .cra_priority = 400,
440 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
441 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
442 .cra_ctxsize = sizeof(struct async_helper_ctx),
443 .cra_alignmask = 0,
444 .cra_type = &crypto_ablkcipher_type,
445 .cra_module = THIS_MODULE,
446 .cra_init = ablk_init,
447 .cra_exit = ablk_exit,
448 .cra_u = {
449 .ablkcipher = {
450 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
451 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
452 .ivsize = CAMELLIA_BLOCK_SIZE,
453 .setkey = ablk_set_key,
454 .encrypt = __ablk_encrypt,
455 .decrypt = ablk_decrypt,
456 },
457 },
458}, {
459 .cra_name = "ctr(camellia)",
460 .cra_driver_name = "ctr-camellia-aesni",
461 .cra_priority = 400,
462 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
463 .cra_blocksize = 1,
464 .cra_ctxsize = sizeof(struct async_helper_ctx),
465 .cra_alignmask = 0,
466 .cra_type = &crypto_ablkcipher_type,
467 .cra_module = THIS_MODULE,
468 .cra_init = ablk_init,
469 .cra_exit = ablk_exit,
470 .cra_u = {
471 .ablkcipher = {
472 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
473 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
474 .ivsize = CAMELLIA_BLOCK_SIZE,
475 .setkey = ablk_set_key,
476 .encrypt = ablk_encrypt,
477 .decrypt = ablk_encrypt,
478 .geniv = "chainiv",
479 },
480 },
481}, {
482 .cra_name = "lrw(camellia)",
483 .cra_driver_name = "lrw-camellia-aesni",
484 .cra_priority = 400,
485 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
486 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
487 .cra_ctxsize = sizeof(struct async_helper_ctx),
488 .cra_alignmask = 0,
489 .cra_type = &crypto_ablkcipher_type,
490 .cra_module = THIS_MODULE,
491 .cra_init = ablk_init,
492 .cra_exit = ablk_exit,
493 .cra_u = {
494 .ablkcipher = {
495 .min_keysize = CAMELLIA_MIN_KEY_SIZE +
496 CAMELLIA_BLOCK_SIZE,
497 .max_keysize = CAMELLIA_MAX_KEY_SIZE +
498 CAMELLIA_BLOCK_SIZE,
499 .ivsize = CAMELLIA_BLOCK_SIZE,
500 .setkey = ablk_set_key,
501 .encrypt = ablk_encrypt,
502 .decrypt = ablk_decrypt,
503 },
504 },
505}, {
506 .cra_name = "xts(camellia)",
507 .cra_driver_name = "xts-camellia-aesni",
508 .cra_priority = 400,
509 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
510 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
511 .cra_ctxsize = sizeof(struct async_helper_ctx),
512 .cra_alignmask = 0,
513 .cra_type = &crypto_ablkcipher_type,
514 .cra_module = THIS_MODULE,
515 .cra_init = ablk_init,
516 .cra_exit = ablk_exit,
517 .cra_u = {
518 .ablkcipher = {
519 .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
520 .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
521 .ivsize = CAMELLIA_BLOCK_SIZE,
522 .setkey = ablk_set_key,
523 .encrypt = ablk_encrypt,
524 .decrypt = ablk_decrypt,
525 },
526 },
527} };
528
529static int __init camellia_aesni_init(void)
530{
531 u64 xcr0;
532
533 if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
534 pr_info("AVX or AES-NI instructions are not detected.\n");
535 return -ENODEV;
536 }
537
538 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
539 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
540 pr_info("AVX detected but unusable.\n");
541 return -ENODEV;
542 }
543
544 return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
545}
546
547static void __exit camellia_aesni_fini(void)
548{
549 crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
550}
551
552module_init(camellia_aesni_init);
553module_exit(camellia_aesni_fini);
554
555MODULE_LICENSE("GPL");
556MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized");
557MODULE_ALIAS("camellia");
558MODULE_ALIAS("camellia-asm");
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 42ffd2bbab5b..5cb86ccd4acb 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -32,53 +32,24 @@
32#include <crypto/algapi.h> 32#include <crypto/algapi.h>
33#include <crypto/lrw.h> 33#include <crypto/lrw.h>
34#include <crypto/xts.h> 34#include <crypto/xts.h>
35#include <asm/crypto/camellia.h>
35#include <asm/crypto/glue_helper.h> 36#include <asm/crypto/glue_helper.h>
36 37
37#define CAMELLIA_MIN_KEY_SIZE 16
38#define CAMELLIA_MAX_KEY_SIZE 32
39#define CAMELLIA_BLOCK_SIZE 16
40#define CAMELLIA_TABLE_BYTE_LEN 272
41
42struct camellia_ctx {
43 u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
44 u32 key_length;
45};
46
47/* regular block cipher functions */ 38/* regular block cipher functions */
48asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, 39asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
49 const u8 *src, bool xor); 40 const u8 *src, bool xor);
41EXPORT_SYMBOL_GPL(__camellia_enc_blk);
50asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, 42asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
51 const u8 *src); 43 const u8 *src);
44EXPORT_SYMBOL_GPL(camellia_dec_blk);
52 45
53/* 2-way parallel cipher functions */ 46/* 2-way parallel cipher functions */
54asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, 47asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
55 const u8 *src, bool xor); 48 const u8 *src, bool xor);
49EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way);
56asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, 50asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
57 const u8 *src); 51 const u8 *src);
58 52EXPORT_SYMBOL_GPL(camellia_dec_blk_2way);
59static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
60 const u8 *src)
61{
62 __camellia_enc_blk(ctx, dst, src, false);
63}
64
65static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
66 const u8 *src)
67{
68 __camellia_enc_blk(ctx, dst, src, true);
69}
70
71static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
72 const u8 *src)
73{
74 __camellia_enc_blk_2way(ctx, dst, src, false);
75}
76
77static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
78 const u8 *src)
79{
80 __camellia_enc_blk_2way(ctx, dst, src, true);
81}
82 53
83static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 54static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
84{ 55{
@@ -1275,9 +1246,8 @@ static void camellia_setup192(const unsigned char *key, u64 *subkey)
1275 camellia_setup256(kk, subkey); 1246 camellia_setup256(kk, subkey);
1276} 1247}
1277 1248
1278static int __camellia_setkey(struct camellia_ctx *cctx, 1249int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key,
1279 const unsigned char *key, 1250 unsigned int key_len, u32 *flags)
1280 unsigned int key_len, u32 *flags)
1281{ 1251{
1282 if (key_len != 16 && key_len != 24 && key_len != 32) { 1252 if (key_len != 16 && key_len != 24 && key_len != 32) {
1283 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; 1253 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
@@ -1300,6 +1270,7 @@ static int __camellia_setkey(struct camellia_ctx *cctx,
1300 1270
1301 return 0; 1271 return 0;
1302} 1272}
1273EXPORT_SYMBOL_GPL(__camellia_setkey);
1303 1274
1304static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, 1275static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
1305 unsigned int key_len) 1276 unsigned int key_len)
@@ -1308,7 +1279,7 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
1308 &tfm->crt_flags); 1279 &tfm->crt_flags);
1309} 1280}
1310 1281
1311static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) 1282void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
1312{ 1283{
1313 u128 iv = *src; 1284 u128 iv = *src;
1314 1285
@@ -1316,22 +1287,23 @@ static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
1316 1287
1317 u128_xor(&dst[1], &dst[1], &iv); 1288 u128_xor(&dst[1], &dst[1], &iv);
1318} 1289}
1290EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way);
1319 1291
1320static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) 1292void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
1321{ 1293{
1322 be128 ctrblk; 1294 be128 ctrblk;
1323 1295
1324 if (dst != src) 1296 if (dst != src)
1325 *dst = *src; 1297 *dst = *src;
1326 1298
1327 u128_to_be128(&ctrblk, iv); 1299 le128_to_be128(&ctrblk, iv);
1328 u128_inc(iv); 1300 le128_inc(iv);
1329 1301
1330 camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); 1302 camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
1331} 1303}
1304EXPORT_SYMBOL_GPL(camellia_crypt_ctr);
1332 1305
1333static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, 1306void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv)
1334 u128 *iv)
1335{ 1307{
1336 be128 ctrblks[2]; 1308 be128 ctrblks[2];
1337 1309
@@ -1340,13 +1312,14 @@ static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
1340 dst[1] = src[1]; 1312 dst[1] = src[1];
1341 } 1313 }
1342 1314
1343 u128_to_be128(&ctrblks[0], iv); 1315 le128_to_be128(&ctrblks[0], iv);
1344 u128_inc(iv); 1316 le128_inc(iv);
1345 u128_to_be128(&ctrblks[1], iv); 1317 le128_to_be128(&ctrblks[1], iv);
1346 u128_inc(iv); 1318 le128_inc(iv);
1347 1319
1348 camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); 1320 camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
1349} 1321}
1322EXPORT_SYMBOL_GPL(camellia_crypt_ctr_2way);
1350 1323
1351static const struct common_glue_ctx camellia_enc = { 1324static const struct common_glue_ctx camellia_enc = {
1352 .num_funcs = 2, 1325 .num_funcs = 2,
@@ -1464,13 +1437,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
1464 camellia_dec_blk(ctx, srcdst, srcdst); 1437 camellia_dec_blk(ctx, srcdst, srcdst);
1465} 1438}
1466 1439
1467struct camellia_lrw_ctx { 1440int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
1468 struct lrw_table_ctx lrw_table; 1441 unsigned int keylen)
1469 struct camellia_ctx camellia_ctx;
1470};
1471
1472static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
1473 unsigned int keylen)
1474{ 1442{
1475 struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); 1443 struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
1476 int err; 1444 int err;
@@ -1484,6 +1452,7 @@ static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
1484 return lrw_init_table(&ctx->lrw_table, 1452 return lrw_init_table(&ctx->lrw_table,
1485 key + keylen - CAMELLIA_BLOCK_SIZE); 1453 key + keylen - CAMELLIA_BLOCK_SIZE);
1486} 1454}
1455EXPORT_SYMBOL_GPL(lrw_camellia_setkey);
1487 1456
1488static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 1457static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1489 struct scatterlist *src, unsigned int nbytes) 1458 struct scatterlist *src, unsigned int nbytes)
@@ -1519,20 +1488,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1519 return lrw_crypt(desc, dst, src, nbytes, &req); 1488 return lrw_crypt(desc, dst, src, nbytes, &req);
1520} 1489}
1521 1490
1522static void lrw_exit_tfm(struct crypto_tfm *tfm) 1491void lrw_camellia_exit_tfm(struct crypto_tfm *tfm)
1523{ 1492{
1524 struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); 1493 struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
1525 1494
1526 lrw_free_table(&ctx->lrw_table); 1495 lrw_free_table(&ctx->lrw_table);
1527} 1496}
1497EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm);
1528 1498
1529struct camellia_xts_ctx { 1499int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
1530 struct camellia_ctx tweak_ctx; 1500 unsigned int keylen)
1531 struct camellia_ctx crypt_ctx;
1532};
1533
1534static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
1535 unsigned int keylen)
1536{ 1501{
1537 struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); 1502 struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
1538 u32 *flags = &tfm->crt_flags; 1503 u32 *flags = &tfm->crt_flags;
@@ -1555,6 +1520,7 @@ static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
1555 return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, 1520 return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
1556 flags); 1521 flags);
1557} 1522}
1523EXPORT_SYMBOL_GPL(xts_camellia_setkey);
1558 1524
1559static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 1525static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1560 struct scatterlist *src, unsigned int nbytes) 1526 struct scatterlist *src, unsigned int nbytes)
@@ -1679,7 +1645,7 @@ static struct crypto_alg camellia_algs[6] = { {
1679 .cra_alignmask = 0, 1645 .cra_alignmask = 0,
1680 .cra_type = &crypto_blkcipher_type, 1646 .cra_type = &crypto_blkcipher_type,
1681 .cra_module = THIS_MODULE, 1647 .cra_module = THIS_MODULE,
1682 .cra_exit = lrw_exit_tfm, 1648 .cra_exit = lrw_camellia_exit_tfm,
1683 .cra_u = { 1649 .cra_u = {
1684 .blkcipher = { 1650 .blkcipher = {
1685 .min_keysize = CAMELLIA_MIN_KEY_SIZE + 1651 .min_keysize = CAMELLIA_MIN_KEY_SIZE +
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index a41a3aaba220..15b00ac7cbd3 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -25,10 +25,10 @@
25 25
26.file "cast5-avx-x86_64-asm_64.S" 26.file "cast5-avx-x86_64-asm_64.S"
27 27
28.extern cast5_s1 28.extern cast_s1
29.extern cast5_s2 29.extern cast_s2
30.extern cast5_s3 30.extern cast_s3
31.extern cast5_s4 31.extern cast_s4
32 32
33/* structure of crypto context */ 33/* structure of crypto context */
34#define km 0 34#define km 0
@@ -36,10 +36,10 @@
36#define rr ((16*4)+16) 36#define rr ((16*4)+16)
37 37
38/* s-boxes */ 38/* s-boxes */
39#define s1 cast5_s1 39#define s1 cast_s1
40#define s2 cast5_s2 40#define s2 cast_s2
41#define s3 cast5_s3 41#define s3 cast_s3
42#define s4 cast5_s4 42#define s4 cast_s4
43 43
44/********************************************************************** 44/**********************************************************************
45 16-way AVX cast5 45 16-way AVX cast5
@@ -180,31 +180,17 @@
180 vpunpcklqdq t1, t0, x0; \ 180 vpunpcklqdq t1, t0, x0; \
181 vpunpckhqdq t1, t0, x1; 181 vpunpckhqdq t1, t0, x1;
182 182
183#define inpack_blocks(in, x0, x1, t0, t1, rmask) \ 183#define inpack_blocks(x0, x1, t0, t1, rmask) \
184 vmovdqu (0*4*4)(in), x0; \
185 vmovdqu (1*4*4)(in), x1; \
186 vpshufb rmask, x0, x0; \ 184 vpshufb rmask, x0, x0; \
187 vpshufb rmask, x1, x1; \ 185 vpshufb rmask, x1, x1; \
188 \ 186 \
189 transpose_2x4(x0, x1, t0, t1) 187 transpose_2x4(x0, x1, t0, t1)
190 188
191#define outunpack_blocks(out, x0, x1, t0, t1, rmask) \ 189#define outunpack_blocks(x0, x1, t0, t1, rmask) \
192 transpose_2x4(x0, x1, t0, t1) \ 190 transpose_2x4(x0, x1, t0, t1) \
193 \ 191 \
194 vpshufb rmask, x0, x0; \ 192 vpshufb rmask, x0, x0; \
195 vpshufb rmask, x1, x1; \ 193 vpshufb rmask, x1, x1;
196 vmovdqu x0, (0*4*4)(out); \
197 vmovdqu x1, (1*4*4)(out);
198
199#define outunpack_xor_blocks(out, x0, x1, t0, t1, rmask) \
200 transpose_2x4(x0, x1, t0, t1) \
201 \
202 vpshufb rmask, x0, x0; \
203 vpshufb rmask, x1, x1; \
204 vpxor (0*4*4)(out), x0, x0; \
205 vmovdqu x0, (0*4*4)(out); \
206 vpxor (1*4*4)(out), x1, x1; \
207 vmovdqu x1, (1*4*4)(out);
208 194
209.data 195.data
210 196
@@ -213,6 +199,8 @@
213 .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 199 .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
214.Lbswap128_mask: 200.Lbswap128_mask:
215 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 201 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
202.Lbswap_iv_mask:
203 .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
216.L16_mask: 204.L16_mask:
217 .byte 16, 16, 16, 16 205 .byte 16, 16, 16, 16
218.L32_mask: 206.L32_mask:
@@ -223,35 +211,42 @@
223.text 211.text
224 212
225.align 16 213.align 16
226.global __cast5_enc_blk_16way 214.type __cast5_enc_blk16,@function;
227.type __cast5_enc_blk_16way,@function;
228 215
229__cast5_enc_blk_16way: 216__cast5_enc_blk16:
230 /* input: 217 /* input:
231 * %rdi: ctx, CTX 218 * %rdi: ctx, CTX
232 * %rsi: dst 219 * RL1: blocks 1 and 2
233 * %rdx: src 220 * RR1: blocks 3 and 4
234 * %rcx: bool, if true: xor output 221 * RL2: blocks 5 and 6
222 * RR2: blocks 7 and 8
223 * RL3: blocks 9 and 10
224 * RR3: blocks 11 and 12
225 * RL4: blocks 13 and 14
226 * RR4: blocks 15 and 16
227 * output:
228 * RL1: encrypted blocks 1 and 2
229 * RR1: encrypted blocks 3 and 4
230 * RL2: encrypted blocks 5 and 6
231 * RR2: encrypted blocks 7 and 8
232 * RL3: encrypted blocks 9 and 10
233 * RR3: encrypted blocks 11 and 12
234 * RL4: encrypted blocks 13 and 14
235 * RR4: encrypted blocks 15 and 16
235 */ 236 */
236 237
237 pushq %rbp; 238 pushq %rbp;
238 pushq %rbx; 239 pushq %rbx;
239 pushq %rcx;
240 240
241 vmovdqa .Lbswap_mask, RKM; 241 vmovdqa .Lbswap_mask, RKM;
242 vmovd .Lfirst_mask, R1ST; 242 vmovd .Lfirst_mask, R1ST;
243 vmovd .L32_mask, R32; 243 vmovd .L32_mask, R32;
244 enc_preload_rkr(); 244 enc_preload_rkr();
245 245
246 leaq 1*(2*4*4)(%rdx), %rax; 246 inpack_blocks(RL1, RR1, RTMP, RX, RKM);
247 inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM); 247 inpack_blocks(RL2, RR2, RTMP, RX, RKM);
248 inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM); 248 inpack_blocks(RL3, RR3, RTMP, RX, RKM);
249 leaq 2*(2*4*4)(%rdx), %rax; 249 inpack_blocks(RL4, RR4, RTMP, RX, RKM);
250 inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
251 leaq 3*(2*4*4)(%rdx), %rax;
252 inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
253
254 movq %rsi, %r11;
255 250
256 round(RL, RR, 0, 1); 251 round(RL, RR, 0, 1);
257 round(RR, RL, 1, 2); 252 round(RR, RL, 1, 2);
@@ -276,44 +271,41 @@ __cast5_enc_blk_16way:
276 round(RR, RL, 15, 1); 271 round(RR, RL, 15, 1);
277 272
278__skip_enc: 273__skip_enc:
279 popq %rcx;
280 popq %rbx; 274 popq %rbx;
281 popq %rbp; 275 popq %rbp;
282 276
283 vmovdqa .Lbswap_mask, RKM; 277 vmovdqa .Lbswap_mask, RKM;
284 leaq 1*(2*4*4)(%r11), %rax;
285 278
286 testb %cl, %cl; 279 outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
287 jnz __enc_xor16; 280 outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
288 281 outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
289 outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM); 282 outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
290 outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
291 leaq 2*(2*4*4)(%r11), %rax;
292 outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
293 leaq 3*(2*4*4)(%r11), %rax;
294 outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
295
296 ret;
297
298__enc_xor16:
299 outunpack_xor_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
300 outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
301 leaq 2*(2*4*4)(%r11), %rax;
302 outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
303 leaq 3*(2*4*4)(%r11), %rax;
304 outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
305 283
306 ret; 284 ret;
307 285
308.align 16 286.align 16
309.global cast5_dec_blk_16way 287.type __cast5_dec_blk16,@function;
310.type cast5_dec_blk_16way,@function;
311 288
312cast5_dec_blk_16way: 289__cast5_dec_blk16:
313 /* input: 290 /* input:
314 * %rdi: ctx, CTX 291 * %rdi: ctx, CTX
315 * %rsi: dst 292 * RL1: encrypted blocks 1 and 2
316 * %rdx: src 293 * RR1: encrypted blocks 3 and 4
294 * RL2: encrypted blocks 5 and 6
295 * RR2: encrypted blocks 7 and 8
296 * RL3: encrypted blocks 9 and 10
297 * RR3: encrypted blocks 11 and 12
298 * RL4: encrypted blocks 13 and 14
299 * RR4: encrypted blocks 15 and 16
300 * output:
301 * RL1: decrypted blocks 1 and 2
302 * RR1: decrypted blocks 3 and 4
303 * RL2: decrypted blocks 5 and 6
304 * RR2: decrypted blocks 7 and 8
305 * RL3: decrypted blocks 9 and 10
306 * RR3: decrypted blocks 11 and 12
307 * RL4: decrypted blocks 13 and 14
308 * RR4: decrypted blocks 15 and 16
317 */ 309 */
318 310
319 pushq %rbp; 311 pushq %rbp;
@@ -324,15 +316,10 @@ cast5_dec_blk_16way:
324 vmovd .L32_mask, R32; 316 vmovd .L32_mask, R32;
325 dec_preload_rkr(); 317 dec_preload_rkr();
326 318
327 leaq 1*(2*4*4)(%rdx), %rax; 319 inpack_blocks(RL1, RR1, RTMP, RX, RKM);
328 inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM); 320 inpack_blocks(RL2, RR2, RTMP, RX, RKM);
329 inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM); 321 inpack_blocks(RL3, RR3, RTMP, RX, RKM);
330 leaq 2*(2*4*4)(%rdx), %rax; 322 inpack_blocks(RL4, RR4, RTMP, RX, RKM);
331 inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
332 leaq 3*(2*4*4)(%rdx), %rax;
333 inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
334
335 movq %rsi, %r11;
336 323
337 movzbl rr(CTX), %eax; 324 movzbl rr(CTX), %eax;
338 testl %eax, %eax; 325 testl %eax, %eax;
@@ -361,16 +348,211 @@ __dec_tail:
361 popq %rbx; 348 popq %rbx;
362 popq %rbp; 349 popq %rbp;
363 350
364 leaq 1*(2*4*4)(%r11), %rax; 351 outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
365 outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM); 352 outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
366 outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM); 353 outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
367 leaq 2*(2*4*4)(%r11), %rax; 354 outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
368 outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
369 leaq 3*(2*4*4)(%r11), %rax;
370 outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
371 355
372 ret; 356 ret;
373 357
374__skip_dec: 358__skip_dec:
375 vpsrldq $4, RKR, RKR; 359 vpsrldq $4, RKR, RKR;
376 jmp __dec_tail; 360 jmp __dec_tail;
361
362.align 16
363.global cast5_ecb_enc_16way
364.type cast5_ecb_enc_16way,@function;
365
366cast5_ecb_enc_16way:
367 /* input:
368 * %rdi: ctx, CTX
369 * %rsi: dst
370 * %rdx: src
371 */
372
373 movq %rsi, %r11;
374
375 vmovdqu (0*4*4)(%rdx), RL1;
376 vmovdqu (1*4*4)(%rdx), RR1;
377 vmovdqu (2*4*4)(%rdx), RL2;
378 vmovdqu (3*4*4)(%rdx), RR2;
379 vmovdqu (4*4*4)(%rdx), RL3;
380 vmovdqu (5*4*4)(%rdx), RR3;
381 vmovdqu (6*4*4)(%rdx), RL4;
382 vmovdqu (7*4*4)(%rdx), RR4;
383
384 call __cast5_enc_blk16;
385
386 vmovdqu RR1, (0*4*4)(%r11);
387 vmovdqu RL1, (1*4*4)(%r11);
388 vmovdqu RR2, (2*4*4)(%r11);
389 vmovdqu RL2, (3*4*4)(%r11);
390 vmovdqu RR3, (4*4*4)(%r11);
391 vmovdqu RL3, (5*4*4)(%r11);
392 vmovdqu RR4, (6*4*4)(%r11);
393 vmovdqu RL4, (7*4*4)(%r11);
394
395 ret;
396
397.align 16
398.global cast5_ecb_dec_16way
399.type cast5_ecb_dec_16way,@function;
400
401cast5_ecb_dec_16way:
402 /* input:
403 * %rdi: ctx, CTX
404 * %rsi: dst
405 * %rdx: src
406 */
407
408 movq %rsi, %r11;
409
410 vmovdqu (0*4*4)(%rdx), RL1;
411 vmovdqu (1*4*4)(%rdx), RR1;
412 vmovdqu (2*4*4)(%rdx), RL2;
413 vmovdqu (3*4*4)(%rdx), RR2;
414 vmovdqu (4*4*4)(%rdx), RL3;
415 vmovdqu (5*4*4)(%rdx), RR3;
416 vmovdqu (6*4*4)(%rdx), RL4;
417 vmovdqu (7*4*4)(%rdx), RR4;
418
419 call __cast5_dec_blk16;
420
421 vmovdqu RR1, (0*4*4)(%r11);
422 vmovdqu RL1, (1*4*4)(%r11);
423 vmovdqu RR2, (2*4*4)(%r11);
424 vmovdqu RL2, (3*4*4)(%r11);
425 vmovdqu RR3, (4*4*4)(%r11);
426 vmovdqu RL3, (5*4*4)(%r11);
427 vmovdqu RR4, (6*4*4)(%r11);
428 vmovdqu RL4, (7*4*4)(%r11);
429
430 ret;
431
432.align 16
433.global cast5_cbc_dec_16way
434.type cast5_cbc_dec_16way,@function;
435
436cast5_cbc_dec_16way:
437 /* input:
438 * %rdi: ctx, CTX
439 * %rsi: dst
440 * %rdx: src
441 */
442
443 pushq %r12;
444
445 movq %rsi, %r11;
446 movq %rdx, %r12;
447
448 vmovdqu (0*16)(%rdx), RL1;
449 vmovdqu (1*16)(%rdx), RR1;
450 vmovdqu (2*16)(%rdx), RL2;
451 vmovdqu (3*16)(%rdx), RR2;
452 vmovdqu (4*16)(%rdx), RL3;
453 vmovdqu (5*16)(%rdx), RR3;
454 vmovdqu (6*16)(%rdx), RL4;
455 vmovdqu (7*16)(%rdx), RR4;
456
457 call __cast5_dec_blk16;
458
459 /* xor with src */
460 vmovq (%r12), RX;
461 vpshufd $0x4f, RX, RX;
462 vpxor RX, RR1, RR1;
463 vpxor 0*16+8(%r12), RL1, RL1;
464 vpxor 1*16+8(%r12), RR2, RR2;
465 vpxor 2*16+8(%r12), RL2, RL2;
466 vpxor 3*16+8(%r12), RR3, RR3;
467 vpxor 4*16+8(%r12), RL3, RL3;
468 vpxor 5*16+8(%r12), RR4, RR4;
469 vpxor 6*16+8(%r12), RL4, RL4;
470
471 vmovdqu RR1, (0*16)(%r11);
472 vmovdqu RL1, (1*16)(%r11);
473 vmovdqu RR2, (2*16)(%r11);
474 vmovdqu RL2, (3*16)(%r11);
475 vmovdqu RR3, (4*16)(%r11);
476 vmovdqu RL3, (5*16)(%r11);
477 vmovdqu RR4, (6*16)(%r11);
478 vmovdqu RL4, (7*16)(%r11);
479
480 popq %r12;
481
482 ret;
483
484.align 16
485.global cast5_ctr_16way
486.type cast5_ctr_16way,@function;
487
488cast5_ctr_16way:
489 /* input:
490 * %rdi: ctx, CTX
491 * %rsi: dst
492 * %rdx: src
493 * %rcx: iv (big endian, 64bit)
494 */
495
496 pushq %r12;
497
498 movq %rsi, %r11;
499 movq %rdx, %r12;
500
501 vpcmpeqd RTMP, RTMP, RTMP;
502 vpsrldq $8, RTMP, RTMP; /* low: -1, high: 0 */
503
504 vpcmpeqd RKR, RKR, RKR;
505 vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */
506 vmovdqa .Lbswap_iv_mask, R1ST;
507 vmovdqa .Lbswap128_mask, RKM;
508
509 /* load IV and byteswap */
510 vmovq (%rcx), RX;
511 vpshufb R1ST, RX, RX;
512
513 /* construct IVs */
514 vpsubq RTMP, RX, RX; /* le: IV1, IV0 */
515 vpshufb RKM, RX, RL1; /* be: IV0, IV1 */
516 vpsubq RKR, RX, RX;
517 vpshufb RKM, RX, RR1; /* be: IV2, IV3 */
518 vpsubq RKR, RX, RX;
519 vpshufb RKM, RX, RL2; /* be: IV4, IV5 */
520 vpsubq RKR, RX, RX;
521 vpshufb RKM, RX, RR2; /* be: IV6, IV7 */
522 vpsubq RKR, RX, RX;
523 vpshufb RKM, RX, RL3; /* be: IV8, IV9 */
524 vpsubq RKR, RX, RX;
525 vpshufb RKM, RX, RR3; /* be: IV10, IV11 */
526 vpsubq RKR, RX, RX;
527 vpshufb RKM, RX, RL4; /* be: IV12, IV13 */
528 vpsubq RKR, RX, RX;
529 vpshufb RKM, RX, RR4; /* be: IV14, IV15 */
530
531 /* store last IV */
532 vpsubq RTMP, RX, RX; /* le: IV16, IV14 */
533 vpshufb R1ST, RX, RX; /* be: IV16, IV16 */
534 vmovq RX, (%rcx);
535
536 call __cast5_enc_blk16;
537
538 /* dst = src ^ iv */
539 vpxor (0*16)(%r12), RR1, RR1;
540 vpxor (1*16)(%r12), RL1, RL1;
541 vpxor (2*16)(%r12), RR2, RR2;
542 vpxor (3*16)(%r12), RL2, RL2;
543 vpxor (4*16)(%r12), RR3, RR3;
544 vpxor (5*16)(%r12), RL3, RL3;
545 vpxor (6*16)(%r12), RR4, RR4;
546 vpxor (7*16)(%r12), RL4, RL4;
547 vmovdqu RR1, (0*16)(%r11);
548 vmovdqu RL1, (1*16)(%r11);
549 vmovdqu RR2, (2*16)(%r11);
550 vmovdqu RL2, (3*16)(%r11);
551 vmovdqu RR3, (4*16)(%r11);
552 vmovdqu RL3, (5*16)(%r11);
553 vmovdqu RR4, (6*16)(%r11);
554 vmovdqu RL4, (7*16)(%r11);
555
556 popq %r12;
557
558 ret;
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index e0ea14f9547f..c6631813dc11 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -37,29 +37,14 @@
37 37
38#define CAST5_PARALLEL_BLOCKS 16 38#define CAST5_PARALLEL_BLOCKS 16
39 39
40asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst, 40asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
41 const u8 *src, bool xor);
42asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst,
43 const u8 *src); 41 const u8 *src);
44 42asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
45static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst, 43 const u8 *src);
46 const u8 *src) 44asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
47{ 45 const u8 *src);
48 __cast5_enc_blk_16way(ctx, dst, src, false); 46asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
49} 47 __be64 *iv);
50
51static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
52 const u8 *src)
53{
54 __cast5_enc_blk_16way(ctx, dst, src, true);
55}
56
57static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
58 const u8 *src)
59{
60 cast5_dec_blk_16way(ctx, dst, src);
61}
62
63 48
64static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) 49static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
65{ 50{
@@ -79,8 +64,11 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
79 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 64 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
80 const unsigned int bsize = CAST5_BLOCK_SIZE; 65 const unsigned int bsize = CAST5_BLOCK_SIZE;
81 unsigned int nbytes; 66 unsigned int nbytes;
67 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
82 int err; 68 int err;
83 69
70 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
71
84 err = blkcipher_walk_virt(desc, walk); 72 err = blkcipher_walk_virt(desc, walk);
85 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 73 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
86 74
@@ -93,10 +81,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
93 /* Process multi-block batch */ 81 /* Process multi-block batch */
94 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 82 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
95 do { 83 do {
96 if (enc) 84 fn(ctx, wdst, wsrc);
97 cast5_enc_blk_xway(ctx, wdst, wsrc);
98 else
99 cast5_dec_blk_xway(ctx, wdst, wsrc);
100 85
101 wsrc += bsize * CAST5_PARALLEL_BLOCKS; 86 wsrc += bsize * CAST5_PARALLEL_BLOCKS;
102 wdst += bsize * CAST5_PARALLEL_BLOCKS; 87 wdst += bsize * CAST5_PARALLEL_BLOCKS;
@@ -107,12 +92,11 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
107 goto done; 92 goto done;
108 } 93 }
109 94
95 fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
96
110 /* Handle leftovers */ 97 /* Handle leftovers */
111 do { 98 do {
112 if (enc) 99 fn(ctx, wdst, wsrc);
113 __cast5_encrypt(ctx, wdst, wsrc);
114 else
115 __cast5_decrypt(ctx, wdst, wsrc);
116 100
117 wsrc += bsize; 101 wsrc += bsize;
118 wdst += bsize; 102 wdst += bsize;
@@ -194,9 +178,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
194 unsigned int nbytes = walk->nbytes; 178 unsigned int nbytes = walk->nbytes;
195 u64 *src = (u64 *)walk->src.virt.addr; 179 u64 *src = (u64 *)walk->src.virt.addr;
196 u64 *dst = (u64 *)walk->dst.virt.addr; 180 u64 *dst = (u64 *)walk->dst.virt.addr;
197 u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
198 u64 last_iv; 181 u64 last_iv;
199 int i;
200 182
201 /* Start of the last block. */ 183 /* Start of the last block. */
202 src += nbytes / bsize - 1; 184 src += nbytes / bsize - 1;
@@ -211,13 +193,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
211 src -= CAST5_PARALLEL_BLOCKS - 1; 193 src -= CAST5_PARALLEL_BLOCKS - 1;
212 dst -= CAST5_PARALLEL_BLOCKS - 1; 194 dst -= CAST5_PARALLEL_BLOCKS - 1;
213 195
214 for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) 196 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
215 ivs[i] = src[i];
216
217 cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
218
219 for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
220 *(dst + (i + 1)) ^= *(ivs + i);
221 197
222 nbytes -= bsize; 198 nbytes -= bsize;
223 if (nbytes < bsize) 199 if (nbytes < bsize)
@@ -298,23 +274,12 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
298 unsigned int nbytes = walk->nbytes; 274 unsigned int nbytes = walk->nbytes;
299 u64 *src = (u64 *)walk->src.virt.addr; 275 u64 *src = (u64 *)walk->src.virt.addr;
300 u64 *dst = (u64 *)walk->dst.virt.addr; 276 u64 *dst = (u64 *)walk->dst.virt.addr;
301 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
302 __be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
303 int i;
304 277
305 /* Process multi-block batch */ 278 /* Process multi-block batch */
306 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 279 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
307 do { 280 do {
308 /* create ctrblks for parallel encrypt */ 281 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
309 for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) { 282 (__be64 *)walk->iv);
310 if (dst != src)
311 dst[i] = src[i];
312
313 ctrblocks[i] = cpu_to_be64(ctrblk++);
314 }
315
316 cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
317 (u8 *)ctrblocks);
318 283
319 src += CAST5_PARALLEL_BLOCKS; 284 src += CAST5_PARALLEL_BLOCKS;
320 dst += CAST5_PARALLEL_BLOCKS; 285 dst += CAST5_PARALLEL_BLOCKS;
@@ -327,13 +292,16 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
327 292
328 /* Handle leftovers */ 293 /* Handle leftovers */
329 do { 294 do {
295 u64 ctrblk;
296
330 if (dst != src) 297 if (dst != src)
331 *dst = *src; 298 *dst = *src;
332 299
333 ctrblocks[0] = cpu_to_be64(ctrblk++); 300 ctrblk = *(u64 *)walk->iv;
301 be64_add_cpu((__be64 *)walk->iv, 1);
334 302
335 __cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); 303 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
336 *dst ^= ctrblocks[0]; 304 *dst ^= ctrblk;
337 305
338 src += 1; 306 src += 1;
339 dst += 1; 307 dst += 1;
@@ -341,7 +309,6 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
341 } while (nbytes >= bsize); 309 } while (nbytes >= bsize);
342 310
343done: 311done:
344 *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
345 return nbytes; 312 return nbytes;
346} 313}
347 314
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 218d283772f4..2569d0da841f 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -23,22 +23,24 @@
23 * 23 *
24 */ 24 */
25 25
26#include "glue_helper-asm-avx.S"
27
26.file "cast6-avx-x86_64-asm_64.S" 28.file "cast6-avx-x86_64-asm_64.S"
27 29
28.extern cast6_s1 30.extern cast_s1
29.extern cast6_s2 31.extern cast_s2
30.extern cast6_s3 32.extern cast_s3
31.extern cast6_s4 33.extern cast_s4
32 34
33/* structure of crypto context */ 35/* structure of crypto context */
34#define km 0 36#define km 0
35#define kr (12*4*4) 37#define kr (12*4*4)
36 38
37/* s-boxes */ 39/* s-boxes */
38#define s1 cast6_s1 40#define s1 cast_s1
39#define s2 cast6_s2 41#define s2 cast_s2
40#define s3 cast6_s3 42#define s3 cast_s3
41#define s4 cast6_s4 43#define s4 cast_s4
42 44
43/********************************************************************** 45/**********************************************************************
44 8-way AVX cast6 46 8-way AVX cast6
@@ -205,11 +207,7 @@
205 vpunpcklqdq x3, t2, x2; \ 207 vpunpcklqdq x3, t2, x2; \
206 vpunpckhqdq x3, t2, x3; 208 vpunpckhqdq x3, t2, x3;
207 209
208#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2, rmask) \ 210#define inpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
209 vmovdqu (0*4*4)(in), x0; \
210 vmovdqu (1*4*4)(in), x1; \
211 vmovdqu (2*4*4)(in), x2; \
212 vmovdqu (3*4*4)(in), x3; \
213 vpshufb rmask, x0, x0; \ 211 vpshufb rmask, x0, x0; \
214 vpshufb rmask, x1, x1; \ 212 vpshufb rmask, x1, x1; \
215 vpshufb rmask, x2, x2; \ 213 vpshufb rmask, x2, x2; \
@@ -217,39 +215,21 @@
217 \ 215 \
218 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 216 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
219 217
220#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \ 218#define outunpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
221 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 219 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
222 \ 220 \
223 vpshufb rmask, x0, x0; \ 221 vpshufb rmask, x0, x0; \
224 vpshufb rmask, x1, x1; \ 222 vpshufb rmask, x1, x1; \
225 vpshufb rmask, x2, x2; \ 223 vpshufb rmask, x2, x2; \
226 vpshufb rmask, x3, x3; \ 224 vpshufb rmask, x3, x3;
227 vmovdqu x0, (0*4*4)(out); \
228 vmovdqu x1, (1*4*4)(out); \
229 vmovdqu x2, (2*4*4)(out); \
230 vmovdqu x3, (3*4*4)(out);
231
232#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
233 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
234 \
235 vpshufb rmask, x0, x0; \
236 vpshufb rmask, x1, x1; \
237 vpshufb rmask, x2, x2; \
238 vpshufb rmask, x3, x3; \
239 vpxor (0*4*4)(out), x0, x0; \
240 vmovdqu x0, (0*4*4)(out); \
241 vpxor (1*4*4)(out), x1, x1; \
242 vmovdqu x1, (1*4*4)(out); \
243 vpxor (2*4*4)(out), x2, x2; \
244 vmovdqu x2, (2*4*4)(out); \
245 vpxor (3*4*4)(out), x3, x3; \
246 vmovdqu x3, (3*4*4)(out);
247 225
248.data 226.data
249 227
250.align 16 228.align 16
251.Lbswap_mask: 229.Lbswap_mask:
252 .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 230 .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
231.Lbswap128_mask:
232 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
253.Lrkr_enc_Q_Q_QBAR_QBAR: 233.Lrkr_enc_Q_Q_QBAR_QBAR:
254 .byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12 234 .byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12
255.Lrkr_enc_QBAR_QBAR_QBAR_QBAR: 235.Lrkr_enc_QBAR_QBAR_QBAR_QBAR:
@@ -269,31 +249,26 @@
269 249
270.text 250.text
271 251
272.align 16 252.align 8
273.global __cast6_enc_blk_8way 253.type __cast6_enc_blk8,@function;
274.type __cast6_enc_blk_8way,@function;
275 254
276__cast6_enc_blk_8way: 255__cast6_enc_blk8:
277 /* input: 256 /* input:
278 * %rdi: ctx, CTX 257 * %rdi: ctx, CTX
279 * %rsi: dst 258 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
280 * %rdx: src 259 * output:
281 * %rcx: bool, if true: xor output 260 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
282 */ 261 */
283 262
284 pushq %rbp; 263 pushq %rbp;
285 pushq %rbx; 264 pushq %rbx;
286 pushq %rcx;
287 265
288 vmovdqa .Lbswap_mask, RKM; 266 vmovdqa .Lbswap_mask, RKM;
289 vmovd .Lfirst_mask, R1ST; 267 vmovd .Lfirst_mask, R1ST;
290 vmovd .L32_mask, R32; 268 vmovd .L32_mask, R32;
291 269
292 leaq (4*4*4)(%rdx), %rax; 270 inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
293 inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 271 inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
294 inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
295
296 movq %rsi, %r11;
297 272
298 preload_rkr(0, dummy, none); 273 preload_rkr(0, dummy, none);
299 Q(0); 274 Q(0);
@@ -311,36 +286,25 @@ __cast6_enc_blk_8way:
311 QBAR(10); 286 QBAR(10);
312 QBAR(11); 287 QBAR(11);
313 288
314 popq %rcx;
315 popq %rbx; 289 popq %rbx;
316 popq %rbp; 290 popq %rbp;
317 291
318 vmovdqa .Lbswap_mask, RKM; 292 vmovdqa .Lbswap_mask, RKM;
319 leaq (4*4*4)(%r11), %rax;
320
321 testb %cl, %cl;
322 jnz __enc_xor8;
323
324 outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
325 outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
326
327 ret;
328 293
329__enc_xor8: 294 outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
330 outunpack_xor_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 295 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
331 outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
332 296
333 ret; 297 ret;
334 298
335.align 16 299.align 8
336.global cast6_dec_blk_8way 300.type __cast6_dec_blk8,@function;
337.type cast6_dec_blk_8way,@function;
338 301
339cast6_dec_blk_8way: 302__cast6_dec_blk8:
340 /* input: 303 /* input:
341 * %rdi: ctx, CTX 304 * %rdi: ctx, CTX
342 * %rsi: dst 305 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
343 * %rdx: src 306 * output:
307 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
344 */ 308 */
345 309
346 pushq %rbp; 310 pushq %rbp;
@@ -350,11 +314,8 @@ cast6_dec_blk_8way:
350 vmovd .Lfirst_mask, R1ST; 314 vmovd .Lfirst_mask, R1ST;
351 vmovd .L32_mask, R32; 315 vmovd .L32_mask, R32;
352 316
353 leaq (4*4*4)(%rdx), %rax; 317 inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
354 inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 318 inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
355 inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
356
357 movq %rsi, %r11;
358 319
359 preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q); 320 preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q);
360 Q(11); 321 Q(11);
@@ -376,8 +337,103 @@ cast6_dec_blk_8way:
376 popq %rbp; 337 popq %rbp;
377 338
378 vmovdqa .Lbswap_mask, RKM; 339 vmovdqa .Lbswap_mask, RKM;
379 leaq (4*4*4)(%r11), %rax; 340 outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
380 outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 341 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
381 outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 342
343 ret;
344
345.align 8
346.global cast6_ecb_enc_8way
347.type cast6_ecb_enc_8way,@function;
348
349cast6_ecb_enc_8way:
350 /* input:
351 * %rdi: ctx, CTX
352 * %rsi: dst
353 * %rdx: src
354 */
355
356 movq %rsi, %r11;
357
358 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
359
360 call __cast6_enc_blk8;
361
362 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
363
364 ret;
365
366.align 8
367.global cast6_ecb_dec_8way
368.type cast6_ecb_dec_8way,@function;
369
370cast6_ecb_dec_8way:
371 /* input:
372 * %rdi: ctx, CTX
373 * %rsi: dst
374 * %rdx: src
375 */
376
377 movq %rsi, %r11;
378
379 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
380
381 call __cast6_dec_blk8;
382
383 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
384
385 ret;
386
387.align 8
388.global cast6_cbc_dec_8way
389.type cast6_cbc_dec_8way,@function;
390
391cast6_cbc_dec_8way:
392 /* input:
393 * %rdi: ctx, CTX
394 * %rsi: dst
395 * %rdx: src
396 */
397
398 pushq %r12;
399
400 movq %rsi, %r11;
401 movq %rdx, %r12;
402
403 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
404
405 call __cast6_dec_blk8;
406
407 store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
408
409 popq %r12;
410
411 ret;
412
413.align 8
414.global cast6_ctr_8way
415.type cast6_ctr_8way,@function;
416
417cast6_ctr_8way:
418 /* input:
419 * %rdi: ctx, CTX
420 * %rsi: dst
421 * %rdx: src
422 * %rcx: iv (little endian, 128bit)
423 */
424
425 pushq %r12;
426
427 movq %rsi, %r11;
428 movq %rdx, %r12;
429
430 load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
431 RD2, RX, RKR, RKM);
432
433 call __cast6_enc_blk8;
434
435 store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
436
437 popq %r12;
382 438
383 ret; 439 ret;
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 15e5f85a5011..92f7ca24790a 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -40,79 +40,34 @@
40 40
41#define CAST6_PARALLEL_BLOCKS 8 41#define CAST6_PARALLEL_BLOCKS 8
42 42
43asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst, 43asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
44 const u8 *src, bool xor); 44 const u8 *src);
45asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst, 45asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
46 const u8 *src); 46 const u8 *src);
47 47
48static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst, 48asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
49 const u8 *src) 49 const u8 *src);
50{ 50asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
51 __cast6_enc_blk_8way(ctx, dst, src, false); 51 le128 *iv);
52}
53
54static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst,
55 const u8 *src)
56{
57 __cast6_enc_blk_8way(ctx, dst, src, true);
58}
59
60static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst,
61 const u8 *src)
62{
63 cast6_dec_blk_8way(ctx, dst, src);
64}
65
66
67static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
68{
69 u128 ivs[CAST6_PARALLEL_BLOCKS - 1];
70 unsigned int j;
71
72 for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
73 ivs[j] = src[j];
74
75 cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
76
77 for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
78 u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
79}
80 52
81static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) 53static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
82{ 54{
83 be128 ctrblk; 55 be128 ctrblk;
84 56
85 u128_to_be128(&ctrblk, iv); 57 le128_to_be128(&ctrblk, iv);
86 u128_inc(iv); 58 le128_inc(iv);
87 59
88 __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 60 __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
89 u128_xor(dst, src, (u128 *)&ctrblk); 61 u128_xor(dst, src, (u128 *)&ctrblk);
90} 62}
91 63
92static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
93 u128 *iv)
94{
95 be128 ctrblks[CAST6_PARALLEL_BLOCKS];
96 unsigned int i;
97
98 for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) {
99 if (dst != src)
100 dst[i] = src[i];
101
102 u128_to_be128(&ctrblks[i], iv);
103 u128_inc(iv);
104 }
105
106 cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
107}
108
109static const struct common_glue_ctx cast6_enc = { 64static const struct common_glue_ctx cast6_enc = {
110 .num_funcs = 2, 65 .num_funcs = 2,
111 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, 66 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
112 67
113 .funcs = { { 68 .funcs = { {
114 .num_blocks = CAST6_PARALLEL_BLOCKS, 69 .num_blocks = CAST6_PARALLEL_BLOCKS,
115 .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) } 70 .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
116 }, { 71 }, {
117 .num_blocks = 1, 72 .num_blocks = 1,
118 .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) } 73 .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
@@ -125,7 +80,7 @@ static const struct common_glue_ctx cast6_ctr = {
125 80
126 .funcs = { { 81 .funcs = { {
127 .num_blocks = CAST6_PARALLEL_BLOCKS, 82 .num_blocks = CAST6_PARALLEL_BLOCKS,
128 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) } 83 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
129 }, { 84 }, {
130 .num_blocks = 1, 85 .num_blocks = 1,
131 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) } 86 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
@@ -138,7 +93,7 @@ static const struct common_glue_ctx cast6_dec = {
138 93
139 .funcs = { { 94 .funcs = { {
140 .num_blocks = CAST6_PARALLEL_BLOCKS, 95 .num_blocks = CAST6_PARALLEL_BLOCKS,
141 .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) } 96 .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
142 }, { 97 }, {
143 .num_blocks = 1, 98 .num_blocks = 1,
144 .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) } 99 .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
@@ -151,7 +106,7 @@ static const struct common_glue_ctx cast6_dec_cbc = {
151 106
152 .funcs = { { 107 .funcs = { {
153 .num_blocks = CAST6_PARALLEL_BLOCKS, 108 .num_blocks = CAST6_PARALLEL_BLOCKS,
154 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) } 109 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
155 }, { 110 }, {
156 .num_blocks = 1, 111 .num_blocks = 1,
157 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) } 112 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
@@ -215,7 +170,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
215 ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); 170 ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
216 171
217 if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { 172 if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
218 cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst); 173 cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
219 return; 174 return;
220 } 175 }
221 176
@@ -232,7 +187,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
232 ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); 187 ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
233 188
234 if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { 189 if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
235 cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst); 190 cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
236 return; 191 return;
237 } 192 }
238 193
diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel_glue.c
index 493f959261f7..6812ad98355c 100644
--- a/arch/x86/crypto/crc32c-intel.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -32,6 +32,8 @@
32 32
33#include <asm/cpufeature.h> 33#include <asm/cpufeature.h>
34#include <asm/cpu_device_id.h> 34#include <asm/cpu_device_id.h>
35#include <asm/i387.h>
36#include <asm/fpu-internal.h>
35 37
36#define CHKSUM_BLOCK_SIZE 1 38#define CHKSUM_BLOCK_SIZE 1
37#define CHKSUM_DIGEST_SIZE 4 39#define CHKSUM_DIGEST_SIZE 4
@@ -44,6 +46,31 @@
44#define REX_PRE 46#define REX_PRE
45#endif 47#endif
46 48
49#ifdef CONFIG_X86_64
50/*
51 * use carryless multiply version of crc32c when buffer
52 * size is >= 512 (when eager fpu is enabled) or
53 * >= 1024 (when eager fpu is disabled) to account
54 * for fpu state save/restore overhead.
55 */
56#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512
57#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024
58
59asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
60 unsigned int crc_init);
61static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
62#if defined(X86_FEATURE_EAGER_FPU)
63#define set_pcl_breakeven_point() \
64do { \
65 if (!use_eager_fpu()) \
66 crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
67} while (0)
68#else
69#define set_pcl_breakeven_point() \
70 (crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
71#endif
72#endif /* CONFIG_X86_64 */
73
47static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) 74static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
48{ 75{
49 while (length--) { 76 while (length--) {
@@ -154,6 +181,52 @@ static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
154 return 0; 181 return 0;
155} 182}
156 183
184#ifdef CONFIG_X86_64
185static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
186 unsigned int len)
187{
188 u32 *crcp = shash_desc_ctx(desc);
189
190 /*
191 * use faster PCL version if datasize is large enough to
192 * overcome kernel fpu state save/restore overhead
193 */
194 if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
195 kernel_fpu_begin();
196 *crcp = crc_pcl(data, len, *crcp);
197 kernel_fpu_end();
198 } else
199 *crcp = crc32c_intel_le_hw(*crcp, data, len);
200 return 0;
201}
202
203static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
204 u8 *out)
205{
206 if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
207 kernel_fpu_begin();
208 *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
209 kernel_fpu_end();
210 } else
211 *(__le32 *)out =
212 ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
213 return 0;
214}
215
216static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
217 unsigned int len, u8 *out)
218{
219 return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out);
220}
221
222static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
223 unsigned int len, u8 *out)
224{
225 return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
226 out);
227}
228#endif /* CONFIG_X86_64 */
229
157static struct shash_alg alg = { 230static struct shash_alg alg = {
158 .setkey = crc32c_intel_setkey, 231 .setkey = crc32c_intel_setkey,
159 .init = crc32c_intel_init, 232 .init = crc32c_intel_init,
@@ -184,6 +257,14 @@ static int __init crc32c_intel_mod_init(void)
184{ 257{
185 if (!x86_match_cpu(crc32c_cpu_id)) 258 if (!x86_match_cpu(crc32c_cpu_id))
186 return -ENODEV; 259 return -ENODEV;
260#ifdef CONFIG_X86_64
261 if (cpu_has_pclmulqdq) {
262 alg.update = crc32c_pcl_intel_update;
263 alg.finup = crc32c_pcl_intel_finup;
264 alg.digest = crc32c_pcl_intel_digest;
265 set_pcl_breakeven_point();
266 }
267#endif
187 return crypto_register_shash(&alg); 268 return crypto_register_shash(&alg);
188} 269}
189 270
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
new file mode 100644
index 000000000000..93c6d39237ac
--- /dev/null
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -0,0 +1,460 @@
1/*
2 * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
3 *
4 * The white paper on CRC32C calculations with PCLMULQDQ instruction can be
5 * downloaded from:
6 * http://download.intel.com/design/intarch/papers/323405.pdf
7 *
8 * Copyright (C) 2012 Intel Corporation.
9 *
10 * Authors:
11 * Wajdi Feghali <wajdi.k.feghali@intel.com>
12 * James Guilford <james.guilford@intel.com>
13 * David Cote <david.m.cote@intel.com>
14 * Tim Chen <tim.c.chen@linux.intel.com>
15 *
16 * This software is available to you under a choice of one of two
17 * licenses. You may choose to be licensed under the terms of the GNU
18 * General Public License (GPL) Version 2, available from the file
19 * COPYING in the main directory of this source tree, or the
20 * OpenIB.org BSD license below:
21 *
22 * Redistribution and use in source and binary forms, with or
23 * without modification, are permitted provided that the following
24 * conditions are met:
25 *
26 * - Redistributions of source code must retain the above
27 * copyright notice, this list of conditions and the following
28 * disclaimer.
29 *
30 * - Redistributions in binary form must reproduce the above
31 * copyright notice, this list of conditions and the following
32 * disclaimer in the documentation and/or other materials
33 * provided with the distribution.
34 *
35 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
38 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
39 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
40 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
41 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
42 * SOFTWARE.
43 */
44
45## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
46
47.macro LABEL prefix n
48\prefix\n\():
49.endm
50
51.macro JMPTBL_ENTRY i
52.word crc_\i - crc_array
53.endm
54
55.macro JNC_LESS_THAN j
56 jnc less_than_\j
57.endm
58
59# Define threshold where buffers are considered "small" and routed to more
60# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
61# SMALL_SIZE can be no larger than 255.
62
63#define SMALL_SIZE 200
64
65.if (SMALL_SIZE > 255)
66.error "SMALL_ SIZE must be < 256"
67.endif
68
69# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
70
71.global crc_pcl
72crc_pcl:
73#define bufp %rdi
74#define bufp_dw %edi
75#define bufp_w %di
76#define bufp_b %dil
77#define bufptmp %rcx
78#define block_0 %rcx
79#define block_1 %rdx
80#define block_2 %r11
81#define len %rsi
82#define len_dw %esi
83#define len_w %si
84#define len_b %sil
85#define crc_init_arg %rdx
86#define tmp %rbx
87#define crc_init %r8
88#define crc_init_dw %r8d
89#define crc1 %r9
90#define crc2 %r10
91
92 pushq %rbx
93 pushq %rdi
94 pushq %rsi
95
96 ## Move crc_init for Linux to a different
97 mov crc_init_arg, crc_init
98
99 ################################################################
100 ## 1) ALIGN:
101 ################################################################
102
103 mov bufp, bufptmp # rdi = *buf
104 neg bufp
105 and $7, bufp # calculate the unalignment amount of
106 # the address
107 je proc_block # Skip if aligned
108
109 ## If len is less than 8 and we're unaligned, we need to jump
110 ## to special code to avoid reading beyond the end of the buffer
111 cmp $8, len
112 jae do_align
113 # less_than_8 expects length in upper 3 bits of len_dw
114 # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
115 shl $32-3+1, len_dw
116 jmp less_than_8_post_shl1
117
118do_align:
119 #### Calculate CRC of unaligned bytes of the buffer (if any)
120 movq (bufptmp), tmp # load a quadward from the buffer
121 add bufp, bufptmp # align buffer pointer for quadword
122 # processing
123 sub bufp, len # update buffer length
124align_loop:
125 crc32b %bl, crc_init_dw # compute crc32 of 1-byte
126 shr $8, tmp # get next byte
127 dec bufp
128 jne align_loop
129
130proc_block:
131
132 ################################################################
133 ## 2) PROCESS BLOCKS:
134 ################################################################
135
136 ## compute num of bytes to be processed
137 movq len, tmp # save num bytes in tmp
138
139 cmpq $128*24, len
140 jae full_block
141
142continue_block:
143 cmpq $SMALL_SIZE, len
144 jb small
145
146 ## len < 128*24
147 movq $2731, %rax # 2731 = ceil(2^16 / 24)
148 mul len_dw
149 shrq $16, %rax
150
151 ## eax contains floor(bytes / 24) = num 24-byte chunks to do
152
153 ## process rax 24-byte chunks (128 >= rax >= 0)
154
155 ## compute end address of each block
156 ## block 0 (base addr + RAX * 8)
157 ## block 1 (base addr + RAX * 16)
158 ## block 2 (base addr + RAX * 24)
159 lea (bufptmp, %rax, 8), block_0
160 lea (block_0, %rax, 8), block_1
161 lea (block_1, %rax, 8), block_2
162
163 xor crc1, crc1
164 xor crc2, crc2
165
166 ## branch into array
167 lea jump_table(%rip), bufp
168 movzxw (bufp, %rax, 2), len
169 offset=crc_array-jump_table
170 lea offset(bufp, len, 1), bufp
171 jmp *bufp
172
173 ################################################################
174 ## 2a) PROCESS FULL BLOCKS:
175 ################################################################
176full_block:
177 movq $128,%rax
178 lea 128*8*2(block_0), block_1
179 lea 128*8*3(block_0), block_2
180 add $128*8*1, block_0
181
182 xor crc1,crc1
183 xor crc2,crc2
184
185 # Fall thruogh into top of crc array (crc_128)
186
187 ################################################################
188 ## 3) CRC Array:
189 ################################################################
190
191crc_array:
192 i=128
193.rept 128-1
194.altmacro
195LABEL crc_ %i
196.noaltmacro
197 crc32q -i*8(block_0), crc_init
198 crc32q -i*8(block_1), crc1
199 crc32q -i*8(block_2), crc2
200 i=(i-1)
201.endr
202
203.altmacro
204LABEL crc_ %i
205.noaltmacro
206 crc32q -i*8(block_0), crc_init
207 crc32q -i*8(block_1), crc1
208# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
209
210 mov block_2, block_0
211
212 ################################################################
213 ## 4) Combine three results:
214 ################################################################
215
216 lea (K_table-16)(%rip), bufp # first entry is for idx 1
217 shlq $3, %rax # rax *= 8
218 subq %rax, tmp # tmp -= rax*8
219 shlq $1, %rax
220 subq %rax, tmp # tmp -= rax*16
221 # (total tmp -= rax*24)
222 addq %rax, bufp
223
224 movdqa (bufp), %xmm0 # 2 consts: K1:K2
225
226 movq crc_init, %xmm1 # CRC for block 1
227 pclmulqdq $0x00,%xmm0,%xmm1 # Multiply by K2
228
229 movq crc1, %xmm2 # CRC for block 2
230 pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1
231
232 pxor %xmm2,%xmm1
233 movq %xmm1, %rax
234 xor -i*8(block_2), %rax
235 mov crc2, crc_init
236 crc32 %rax, crc_init
237
238################################################################
239## 5) Check for end:
240################################################################
241
242LABEL crc_ 0
243 mov tmp, len
244 cmp $128*24, tmp
245 jae full_block
246 cmp $24, tmp
247 jae continue_block
248
249less_than_24:
250 shl $32-4, len_dw # less_than_16 expects length
251 # in upper 4 bits of len_dw
252 jnc less_than_16
253 crc32q (bufptmp), crc_init
254 crc32q 8(bufptmp), crc_init
255 jz do_return
256 add $16, bufptmp
257 # len is less than 8 if we got here
258 # less_than_8 expects length in upper 3 bits of len_dw
259 # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
260 shl $2, len_dw
261 jmp less_than_8_post_shl1
262
263 #######################################################################
264 ## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
265 #######################################################################
266small:
267 shl $32-8, len_dw # Prepare len_dw for less_than_256
268 j=256
269.rept 5 # j = {256, 128, 64, 32, 16}
270.altmacro
271LABEL less_than_ %j # less_than_j: Length should be in
272 # upper lg(j) bits of len_dw
273 j=(j/2)
274 shl $1, len_dw # Get next MSB
275 JNC_LESS_THAN %j
276.noaltmacro
277 i=0
278.rept (j/8)
279 crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data
280 i=i+8
281.endr
282 jz do_return # Return if remaining length is zero
283 add $j, bufptmp # Advance buf
284.endr
285
286less_than_8: # Length should be stored in
287 # upper 3 bits of len_dw
288 shl $1, len_dw
289less_than_8_post_shl1:
290 jnc less_than_4
291 crc32l (bufptmp), crc_init_dw # CRC of 4 bytes
292 jz do_return # return if remaining data is zero
293 add $4, bufptmp
294less_than_4: # Length should be stored in
295 # upper 2 bits of len_dw
296 shl $1, len_dw
297 jnc less_than_2
298 crc32w (bufptmp), crc_init_dw # CRC of 2 bytes
299 jz do_return # return if remaining data is zero
300 add $2, bufptmp
301less_than_2: # Length should be stored in the MSB
302 # of len_dw
303 shl $1, len_dw
304 jnc less_than_1
305 crc32b (bufptmp), crc_init_dw # CRC of 1 byte
306less_than_1: # Length should be zero
307do_return:
308 movq crc_init, %rax
309 popq %rsi
310 popq %rdi
311 popq %rbx
312 ret
313
314 ################################################################
315 ## jump table Table is 129 entries x 2 bytes each
316 ################################################################
317.align 4
318jump_table:
319 i=0
320.rept 129
321.altmacro
322JMPTBL_ENTRY %i
323.noaltmacro
324 i=i+1
325.endr
326 ################################################################
327 ## PCLMULQDQ tables
328 ## Table is 128 entries x 2 quad words each
329 ################################################################
330.data
331.align 64
332K_table:
333 .quad 0x14cd00bd6,0x105ec76f0
334 .quad 0x0ba4fc28e,0x14cd00bd6
335 .quad 0x1d82c63da,0x0f20c0dfe
336 .quad 0x09e4addf8,0x0ba4fc28e
337 .quad 0x039d3b296,0x1384aa63a
338 .quad 0x102f9b8a2,0x1d82c63da
339 .quad 0x14237f5e6,0x01c291d04
340 .quad 0x00d3b6092,0x09e4addf8
341 .quad 0x0c96cfdc0,0x0740eef02
342 .quad 0x18266e456,0x039d3b296
343 .quad 0x0daece73e,0x0083a6eec
344 .quad 0x0ab7aff2a,0x102f9b8a2
345 .quad 0x1248ea574,0x1c1733996
346 .quad 0x083348832,0x14237f5e6
347 .quad 0x12c743124,0x02ad91c30
348 .quad 0x0b9e02b86,0x00d3b6092
349 .quad 0x018b33a4e,0x06992cea2
350 .quad 0x1b331e26a,0x0c96cfdc0
351 .quad 0x17d35ba46,0x07e908048
352 .quad 0x1bf2e8b8a,0x18266e456
353 .quad 0x1a3e0968a,0x11ed1f9d8
354 .quad 0x0ce7f39f4,0x0daece73e
355 .quad 0x061d82e56,0x0f1d0f55e
356 .quad 0x0d270f1a2,0x0ab7aff2a
357 .quad 0x1c3f5f66c,0x0a87ab8a8
358 .quad 0x12ed0daac,0x1248ea574
359 .quad 0x065863b64,0x08462d800
360 .quad 0x11eef4f8e,0x083348832
361 .quad 0x1ee54f54c,0x071d111a8
362 .quad 0x0b3e32c28,0x12c743124
363 .quad 0x0064f7f26,0x0ffd852c6
364 .quad 0x0dd7e3b0c,0x0b9e02b86
365 .quad 0x0f285651c,0x0dcb17aa4
366 .quad 0x010746f3c,0x018b33a4e
367 .quad 0x1c24afea4,0x0f37c5aee
368 .quad 0x0271d9844,0x1b331e26a
369 .quad 0x08e766a0c,0x06051d5a2
370 .quad 0x093a5f730,0x17d35ba46
371 .quad 0x06cb08e5c,0x11d5ca20e
372 .quad 0x06b749fb2,0x1bf2e8b8a
373 .quad 0x1167f94f2,0x021f3d99c
374 .quad 0x0cec3662e,0x1a3e0968a
375 .quad 0x19329634a,0x08f158014
376 .quad 0x0e6fc4e6a,0x0ce7f39f4
377 .quad 0x08227bb8a,0x1a5e82106
378 .quad 0x0b0cd4768,0x061d82e56
379 .quad 0x13c2b89c4,0x188815ab2
380 .quad 0x0d7a4825c,0x0d270f1a2
381 .quad 0x10f5ff2ba,0x105405f3e
382 .quad 0x00167d312,0x1c3f5f66c
383 .quad 0x0f6076544,0x0e9adf796
384 .quad 0x026f6a60a,0x12ed0daac
385 .quad 0x1a2adb74e,0x096638b34
386 .quad 0x19d34af3a,0x065863b64
387 .quad 0x049c3cc9c,0x1e50585a0
388 .quad 0x068bce87a,0x11eef4f8e
389 .quad 0x1524fa6c6,0x19f1c69dc
390 .quad 0x16cba8aca,0x1ee54f54c
391 .quad 0x042d98888,0x12913343e
392 .quad 0x1329d9f7e,0x0b3e32c28
393 .quad 0x1b1c69528,0x088f25a3a
394 .quad 0x02178513a,0x0064f7f26
395 .quad 0x0e0ac139e,0x04e36f0b0
396 .quad 0x0170076fa,0x0dd7e3b0c
397 .quad 0x141a1a2e2,0x0bd6f81f8
398 .quad 0x16ad828b4,0x0f285651c
399 .quad 0x041d17b64,0x19425cbba
400 .quad 0x1fae1cc66,0x010746f3c
401 .quad 0x1a75b4b00,0x18db37e8a
402 .quad 0x0f872e54c,0x1c24afea4
403 .quad 0x01e41e9fc,0x04c144932
404 .quad 0x086d8e4d2,0x0271d9844
405 .quad 0x160f7af7a,0x052148f02
406 .quad 0x05bb8f1bc,0x08e766a0c
407 .quad 0x0a90fd27a,0x0a3c6f37a
408 .quad 0x0b3af077a,0x093a5f730
409 .quad 0x04984d782,0x1d22c238e
410 .quad 0x0ca6ef3ac,0x06cb08e5c
411 .quad 0x0234e0b26,0x063ded06a
412 .quad 0x1d88abd4a,0x06b749fb2
413 .quad 0x04597456a,0x04d56973c
414 .quad 0x0e9e28eb4,0x1167f94f2
415 .quad 0x07b3ff57a,0x19385bf2e
416 .quad 0x0c9c8b782,0x0cec3662e
417 .quad 0x13a9cba9e,0x0e417f38a
418 .quad 0x093e106a4,0x19329634a
419 .quad 0x167001a9c,0x14e727980
420 .quad 0x1ddffc5d4,0x0e6fc4e6a
421 .quad 0x00df04680,0x0d104b8fc
422 .quad 0x02342001e,0x08227bb8a
423 .quad 0x00a2a8d7e,0x05b397730
424 .quad 0x168763fa6,0x0b0cd4768
425 .quad 0x1ed5a407a,0x0e78eb416
426 .quad 0x0d2c3ed1a,0x13c2b89c4
427 .quad 0x0995a5724,0x1641378f0
428 .quad 0x19b1afbc4,0x0d7a4825c
429 .quad 0x109ffedc0,0x08d96551c
430 .quad 0x0f2271e60,0x10f5ff2ba
431 .quad 0x00b0bf8ca,0x00bf80dd2
432 .quad 0x123888b7a,0x00167d312
433 .quad 0x1e888f7dc,0x18dcddd1c
434 .quad 0x002ee03b2,0x0f6076544
435 .quad 0x183e8d8fe,0x06a45d2b2
436 .quad 0x133d7a042,0x026f6a60a
437 .quad 0x116b0f50c,0x1dd3e10e8
438 .quad 0x05fabe670,0x1a2adb74e
439 .quad 0x130004488,0x0de87806c
440 .quad 0x000bcf5f6,0x19d34af3a
441 .quad 0x18f0c7078,0x014338754
442 .quad 0x017f27698,0x049c3cc9c
443 .quad 0x058ca5f00,0x15e3e77ee
444 .quad 0x1af900c24,0x068bce87a
445 .quad 0x0b5cfca28,0x0dd07448e
446 .quad 0x0ded288f8,0x1524fa6c6
447 .quad 0x059f229bc,0x1d8048348
448 .quad 0x06d390dec,0x16cba8aca
449 .quad 0x037170390,0x0a3e3e02c
450 .quad 0x06353c1cc,0x042d98888
451 .quad 0x0c4584f5c,0x0d73c7bea
452 .quad 0x1f16a3418,0x1329d9f7e
453 .quad 0x0531377e2,0x185137662
454 .quad 0x1d8d9ca7c,0x1b1c69528
455 .quad 0x0b25b29f2,0x18a08b5bc
456 .quad 0x19fb2a8b0,0x02178513a
457 .quad 0x1a08fe6ac,0x1da758ae0
458 .quad 0x045cddf4e,0x0e0ac139e
459 .quad 0x1a91647f2,0x169cf9eb0
460 .quad 0x1a0f717c4,0x0170076fa
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S
new file mode 100644
index 000000000000..f7b6ea2ddfdb
--- /dev/null
+++ b/arch/x86/crypto/glue_helper-asm-avx.S
@@ -0,0 +1,91 @@
1/*
2 * Shared glue code for 128bit block ciphers, AVX assembler macros
3 *
4 * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 */
17
18#define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
19 vmovdqu (0*16)(src), x0; \
20 vmovdqu (1*16)(src), x1; \
21 vmovdqu (2*16)(src), x2; \
22 vmovdqu (3*16)(src), x3; \
23 vmovdqu (4*16)(src), x4; \
24 vmovdqu (5*16)(src), x5; \
25 vmovdqu (6*16)(src), x6; \
26 vmovdqu (7*16)(src), x7;
27
28#define store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
29 vmovdqu x0, (0*16)(dst); \
30 vmovdqu x1, (1*16)(dst); \
31 vmovdqu x2, (2*16)(dst); \
32 vmovdqu x3, (3*16)(dst); \
33 vmovdqu x4, (4*16)(dst); \
34 vmovdqu x5, (5*16)(dst); \
35 vmovdqu x6, (6*16)(dst); \
36 vmovdqu x7, (7*16)(dst);
37
38#define store_cbc_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
39 vpxor (0*16)(src), x1, x1; \
40 vpxor (1*16)(src), x2, x2; \
41 vpxor (2*16)(src), x3, x3; \
42 vpxor (3*16)(src), x4, x4; \
43 vpxor (4*16)(src), x5, x5; \
44 vpxor (5*16)(src), x6, x6; \
45 vpxor (6*16)(src), x7, x7; \
46 store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
47
48#define inc_le128(x, minus_one, tmp) \
49 vpcmpeqq minus_one, x, tmp; \
50 vpsubq minus_one, x, x; \
51 vpslldq $8, tmp, tmp; \
52 vpsubq tmp, x, x;
53
54#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \
55 vpcmpeqd t0, t0, t0; \
56 vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \
57 vmovdqa bswap, t1; \
58 \
59 /* load IV and byteswap */ \
60 vmovdqu (iv), x7; \
61 vpshufb t1, x7, x0; \
62 \
63 /* construct IVs */ \
64 inc_le128(x7, t0, t2); \
65 vpshufb t1, x7, x1; \
66 inc_le128(x7, t0, t2); \
67 vpshufb t1, x7, x2; \
68 inc_le128(x7, t0, t2); \
69 vpshufb t1, x7, x3; \
70 inc_le128(x7, t0, t2); \
71 vpshufb t1, x7, x4; \
72 inc_le128(x7, t0, t2); \
73 vpshufb t1, x7, x5; \
74 inc_le128(x7, t0, t2); \
75 vpshufb t1, x7, x6; \
76 inc_le128(x7, t0, t2); \
77 vmovdqa x7, t2; \
78 vpshufb t1, x7, x7; \
79 inc_le128(t2, t0, t1); \
80 vmovdqu t2, (iv);
81
82#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
83 vpxor (0*16)(src), x0, x0; \
84 vpxor (1*16)(src), x1, x1; \
85 vpxor (2*16)(src), x2, x2; \
86 vpxor (3*16)(src), x3, x3; \
87 vpxor (4*16)(src), x4, x4; \
88 vpxor (5*16)(src), x5, x5; \
89 vpxor (6*16)(src), x6, x6; \
90 vpxor (7*16)(src), x7, x7; \
91 store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 30b3927bd733..22ce4f683e55 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -221,16 +221,16 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
221 u8 *src = (u8 *)walk->src.virt.addr; 221 u8 *src = (u8 *)walk->src.virt.addr;
222 u8 *dst = (u8 *)walk->dst.virt.addr; 222 u8 *dst = (u8 *)walk->dst.virt.addr;
223 unsigned int nbytes = walk->nbytes; 223 unsigned int nbytes = walk->nbytes;
224 u128 ctrblk; 224 le128 ctrblk;
225 u128 tmp; 225 u128 tmp;
226 226
227 be128_to_u128(&ctrblk, (be128 *)walk->iv); 227 be128_to_le128(&ctrblk, (be128 *)walk->iv);
228 228
229 memcpy(&tmp, src, nbytes); 229 memcpy(&tmp, src, nbytes);
230 fn_ctr(ctx, &tmp, &tmp, &ctrblk); 230 fn_ctr(ctx, &tmp, &tmp, &ctrblk);
231 memcpy(dst, &tmp, nbytes); 231 memcpy(dst, &tmp, nbytes);
232 232
233 u128_to_be128((be128 *)walk->iv, &ctrblk); 233 le128_to_be128((be128 *)walk->iv, &ctrblk);
234} 234}
235EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); 235EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
236 236
@@ -243,11 +243,11 @@ static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
243 unsigned int nbytes = walk->nbytes; 243 unsigned int nbytes = walk->nbytes;
244 u128 *src = (u128 *)walk->src.virt.addr; 244 u128 *src = (u128 *)walk->src.virt.addr;
245 u128 *dst = (u128 *)walk->dst.virt.addr; 245 u128 *dst = (u128 *)walk->dst.virt.addr;
246 u128 ctrblk; 246 le128 ctrblk;
247 unsigned int num_blocks, func_bytes; 247 unsigned int num_blocks, func_bytes;
248 unsigned int i; 248 unsigned int i;
249 249
250 be128_to_u128(&ctrblk, (be128 *)walk->iv); 250 be128_to_le128(&ctrblk, (be128 *)walk->iv);
251 251
252 /* Process multi-block batch */ 252 /* Process multi-block batch */
253 for (i = 0; i < gctx->num_funcs; i++) { 253 for (i = 0; i < gctx->num_funcs; i++) {
@@ -269,7 +269,7 @@ static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
269 } 269 }
270 270
271done: 271done:
272 u128_to_be128((be128 *)walk->iv, &ctrblk); 272 le128_to_be128((be128 *)walk->iv, &ctrblk);
273 return nbytes; 273 return nbytes;
274} 274}
275 275
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 504106bf04a2..02b0e9fe997c 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -24,7 +24,16 @@
24 * 24 *
25 */ 25 */
26 26
27#include "glue_helper-asm-avx.S"
28
27.file "serpent-avx-x86_64-asm_64.S" 29.file "serpent-avx-x86_64-asm_64.S"
30
31.data
32.align 16
33
34.Lbswap128_mask:
35 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
36
28.text 37.text
29 38
30#define CTX %rdi 39#define CTX %rdi
@@ -550,51 +559,27 @@
550 vpunpcklqdq x3, t2, x2; \ 559 vpunpcklqdq x3, t2, x2; \
551 vpunpckhqdq x3, t2, x3; 560 vpunpckhqdq x3, t2, x3;
552 561
553#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ 562#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \
554 vmovdqu (0*4*4)(in), x0; \
555 vmovdqu (1*4*4)(in), x1; \
556 vmovdqu (2*4*4)(in), x2; \
557 vmovdqu (3*4*4)(in), x3; \
558 \
559 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 563 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
560 564
561#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ 565#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
562 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 566 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
563 \
564 vmovdqu x0, (0*4*4)(out); \
565 vmovdqu x1, (1*4*4)(out); \
566 vmovdqu x2, (2*4*4)(out); \
567 vmovdqu x3, (3*4*4)(out);
568
569#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
570 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
571 \
572 vpxor (0*4*4)(out), x0, x0; \
573 vmovdqu x0, (0*4*4)(out); \
574 vpxor (1*4*4)(out), x1, x1; \
575 vmovdqu x1, (1*4*4)(out); \
576 vpxor (2*4*4)(out), x2, x2; \
577 vmovdqu x2, (2*4*4)(out); \
578 vpxor (3*4*4)(out), x3, x3; \
579 vmovdqu x3, (3*4*4)(out);
580 567
581.align 8 568.align 8
582.global __serpent_enc_blk_8way_avx 569.type __serpent_enc_blk8_avx,@function;
583.type __serpent_enc_blk_8way_avx,@function;
584 570
585__serpent_enc_blk_8way_avx: 571__serpent_enc_blk8_avx:
586 /* input: 572 /* input:
587 * %rdi: ctx, CTX 573 * %rdi: ctx, CTX
588 * %rsi: dst 574 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
589 * %rdx: src 575 * output:
590 * %rcx: bool, if true: xor output 576 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
591 */ 577 */
592 578
593 vpcmpeqd RNOT, RNOT, RNOT; 579 vpcmpeqd RNOT, RNOT, RNOT;
594 580
595 leaq (4*4*4)(%rdx), %rax; 581 read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
596 read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 582 read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
597 read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
598 583
599 K2(RA, RB, RC, RD, RE, 0); 584 K2(RA, RB, RC, RD, RE, 0);
600 S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); 585 S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1);
@@ -630,38 +615,26 @@ __serpent_enc_blk_8way_avx:
630 S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); 615 S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31);
631 S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); 616 S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32);
632 617
633 leaq (4*4*4)(%rsi), %rax; 618 write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
634 619 write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
635 testb %cl, %cl;
636 jnz __enc_xor8;
637
638 write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
639 write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
640
641 ret;
642
643__enc_xor8:
644 xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
645 xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
646 620
647 ret; 621 ret;
648 622
649.align 8 623.align 8
650.global serpent_dec_blk_8way_avx 624.type __serpent_dec_blk8_avx,@function;
651.type serpent_dec_blk_8way_avx,@function;
652 625
653serpent_dec_blk_8way_avx: 626__serpent_dec_blk8_avx:
654 /* input: 627 /* input:
655 * %rdi: ctx, CTX 628 * %rdi: ctx, CTX
656 * %rsi: dst 629 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
657 * %rdx: src 630 * output:
631 * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: decrypted blocks
658 */ 632 */
659 633
660 vpcmpeqd RNOT, RNOT, RNOT; 634 vpcmpeqd RNOT, RNOT, RNOT;
661 635
662 leaq (4*4*4)(%rdx), %rax; 636 read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
663 read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 637 read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
664 read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
665 638
666 K2(RA, RB, RC, RD, RE, 32); 639 K2(RA, RB, RC, RD, RE, 32);
667 SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); 640 SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31);
@@ -697,8 +670,85 @@ serpent_dec_blk_8way_avx:
697 SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); 670 SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1);
698 S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); 671 S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0);
699 672
700 leaq (4*4*4)(%rsi), %rax; 673 write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
701 write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); 674 write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
702 write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); 675
676 ret;
677
678.align 8
679.global serpent_ecb_enc_8way_avx
680.type serpent_ecb_enc_8way_avx,@function;
681
682serpent_ecb_enc_8way_avx:
683 /* input:
684 * %rdi: ctx, CTX
685 * %rsi: dst
686 * %rdx: src
687 */
688
689 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
690
691 call __serpent_enc_blk8_avx;
692
693 store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
694
695 ret;
696
697.align 8
698.global serpent_ecb_dec_8way_avx
699.type serpent_ecb_dec_8way_avx,@function;
700
701serpent_ecb_dec_8way_avx:
702 /* input:
703 * %rdi: ctx, CTX
704 * %rsi: dst
705 * %rdx: src
706 */
707
708 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
709
710 call __serpent_dec_blk8_avx;
711
712 store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
713
714 ret;
715
716.align 8
717.global serpent_cbc_dec_8way_avx
718.type serpent_cbc_dec_8way_avx,@function;
719
720serpent_cbc_dec_8way_avx:
721 /* input:
722 * %rdi: ctx, CTX
723 * %rsi: dst
724 * %rdx: src
725 */
726
727 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
728
729 call __serpent_dec_blk8_avx;
730
731 store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
732
733 ret;
734
735.align 8
736.global serpent_ctr_8way_avx
737.type serpent_ctr_8way_avx,@function;
738
739serpent_ctr_8way_avx:
740 /* input:
741 * %rdi: ctx, CTX
742 * %rsi: dst
743 * %rdx: src
744 * %rcx: iv (little endian, 128bit)
745 */
746
747 load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
748 RD2, RK0, RK1, RK2);
749
750 call __serpent_enc_blk8_avx;
751
752 store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
703 753
704 ret; 754 ret;
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 3f543a04cf1e..52abaaf28e7f 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -42,55 +42,24 @@
42#include <asm/crypto/ablk_helper.h> 42#include <asm/crypto/ablk_helper.h>
43#include <asm/crypto/glue_helper.h> 43#include <asm/crypto/glue_helper.h>
44 44
45static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) 45static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
46{
47 u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
48 unsigned int j;
49
50 for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
51 ivs[j] = src[j];
52
53 serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
54
55 for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
56 u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
57}
58
59static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
60{ 46{
61 be128 ctrblk; 47 be128 ctrblk;
62 48
63 u128_to_be128(&ctrblk, iv); 49 le128_to_be128(&ctrblk, iv);
64 u128_inc(iv); 50 le128_inc(iv);
65 51
66 __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 52 __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
67 u128_xor(dst, src, (u128 *)&ctrblk); 53 u128_xor(dst, src, (u128 *)&ctrblk);
68} 54}
69 55
70static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
71 u128 *iv)
72{
73 be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
74 unsigned int i;
75
76 for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
77 if (dst != src)
78 dst[i] = src[i];
79
80 u128_to_be128(&ctrblks[i], iv);
81 u128_inc(iv);
82 }
83
84 serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
85}
86
87static const struct common_glue_ctx serpent_enc = { 56static const struct common_glue_ctx serpent_enc = {
88 .num_funcs = 2, 57 .num_funcs = 2,
89 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, 58 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
90 59
91 .funcs = { { 60 .funcs = { {
92 .num_blocks = SERPENT_PARALLEL_BLOCKS, 61 .num_blocks = SERPENT_PARALLEL_BLOCKS,
93 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } 62 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
94 }, { 63 }, {
95 .num_blocks = 1, 64 .num_blocks = 1,
96 .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } 65 .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
@@ -103,7 +72,7 @@ static const struct common_glue_ctx serpent_ctr = {
103 72
104 .funcs = { { 73 .funcs = { {
105 .num_blocks = SERPENT_PARALLEL_BLOCKS, 74 .num_blocks = SERPENT_PARALLEL_BLOCKS,
106 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } 75 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
107 }, { 76 }, {
108 .num_blocks = 1, 77 .num_blocks = 1,
109 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } 78 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
@@ -116,7 +85,7 @@ static const struct common_glue_ctx serpent_dec = {
116 85
117 .funcs = { { 86 .funcs = { {
118 .num_blocks = SERPENT_PARALLEL_BLOCKS, 87 .num_blocks = SERPENT_PARALLEL_BLOCKS,
119 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } 88 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
120 }, { 89 }, {
121 .num_blocks = 1, 90 .num_blocks = 1,
122 .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } 91 .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
@@ -129,7 +98,7 @@ static const struct common_glue_ctx serpent_dec_cbc = {
129 98
130 .funcs = { { 99 .funcs = { {
131 .num_blocks = SERPENT_PARALLEL_BLOCKS, 100 .num_blocks = SERPENT_PARALLEL_BLOCKS,
132 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } 101 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
133 }, { 102 }, {
134 .num_blocks = 1, 103 .num_blocks = 1,
135 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } 104 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
@@ -193,7 +162,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
193 ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); 162 ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
194 163
195 if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { 164 if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
196 serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); 165 serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
197 return; 166 return;
198 } 167 }
199 168
@@ -210,7 +179,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
210 ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); 179 ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
211 180
212 if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { 181 if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
213 serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); 182 serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
214 return; 183 return;
215 } 184 }
216 185
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 9107a9908c41..97a356ece24d 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -59,19 +59,19 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
59 u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); 59 u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
60} 60}
61 61
62static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) 62static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
63{ 63{
64 be128 ctrblk; 64 be128 ctrblk;
65 65
66 u128_to_be128(&ctrblk, iv); 66 le128_to_be128(&ctrblk, iv);
67 u128_inc(iv); 67 le128_inc(iv);
68 68
69 __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 69 __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
70 u128_xor(dst, src, (u128 *)&ctrblk); 70 u128_xor(dst, src, (u128 *)&ctrblk);
71} 71}
72 72
73static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, 73static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
74 u128 *iv) 74 le128 *iv)
75{ 75{
76 be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; 76 be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
77 unsigned int i; 77 unsigned int i;
@@ -80,8 +80,8 @@ static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
80 if (dst != src) 80 if (dst != src)
81 dst[i] = src[i]; 81 dst[i] = src[i];
82 82
83 u128_to_be128(&ctrblks[i], iv); 83 le128_to_be128(&ctrblks[i], iv);
84 u128_inc(iv); 84 le128_inc(iv);
85 } 85 }
86 86
87 serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); 87 serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 1585abb13dde..ebac16bfa830 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -23,7 +23,16 @@
23 * 23 *
24 */ 24 */
25 25
26#include "glue_helper-asm-avx.S"
27
26.file "twofish-avx-x86_64-asm_64.S" 28.file "twofish-avx-x86_64-asm_64.S"
29
30.data
31.align 16
32
33.Lbswap128_mask:
34 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
35
27.text 36.text
28 37
29/* structure of crypto context */ 38/* structure of crypto context */
@@ -217,69 +226,45 @@
217 vpunpcklqdq x3, t2, x2; \ 226 vpunpcklqdq x3, t2, x2; \
218 vpunpckhqdq x3, t2, x3; 227 vpunpckhqdq x3, t2, x3;
219 228
220#define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \ 229#define inpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \
221 vpxor (0*4*4)(in), wkey, x0; \ 230 vpxor x0, wkey, x0; \
222 vpxor (1*4*4)(in), wkey, x1; \ 231 vpxor x1, wkey, x1; \
223 vpxor (2*4*4)(in), wkey, x2; \ 232 vpxor x2, wkey, x2; \
224 vpxor (3*4*4)(in), wkey, x3; \ 233 vpxor x3, wkey, x3; \
225 \ 234 \
226 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 235 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
227 236
228#define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ 237#define outunpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \
229 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
230 \
231 vpxor x0, wkey, x0; \
232 vmovdqu x0, (0*4*4)(out); \
233 vpxor x1, wkey, x1; \
234 vmovdqu x1, (1*4*4)(out); \
235 vpxor x2, wkey, x2; \
236 vmovdqu x2, (2*4*4)(out); \
237 vpxor x3, wkey, x3; \
238 vmovdqu x3, (3*4*4)(out);
239
240#define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
241 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 238 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
242 \ 239 \
243 vpxor x0, wkey, x0; \ 240 vpxor x0, wkey, x0; \
244 vpxor (0*4*4)(out), x0, x0; \ 241 vpxor x1, wkey, x1; \
245 vmovdqu x0, (0*4*4)(out); \ 242 vpxor x2, wkey, x2; \
246 vpxor x1, wkey, x1; \ 243 vpxor x3, wkey, x3;
247 vpxor (1*4*4)(out), x1, x1; \
248 vmovdqu x1, (1*4*4)(out); \
249 vpxor x2, wkey, x2; \
250 vpxor (2*4*4)(out), x2, x2; \
251 vmovdqu x2, (2*4*4)(out); \
252 vpxor x3, wkey, x3; \
253 vpxor (3*4*4)(out), x3, x3; \
254 vmovdqu x3, (3*4*4)(out);
255 244
256.align 8 245.align 8
257.global __twofish_enc_blk_8way 246.type __twofish_enc_blk8,@function;
258.type __twofish_enc_blk_8way,@function;
259 247
260__twofish_enc_blk_8way: 248__twofish_enc_blk8:
261 /* input: 249 /* input:
262 * %rdi: ctx, CTX 250 * %rdi: ctx, CTX
263 * %rsi: dst 251 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
264 * %rdx: src 252 * output:
265 * %rcx: bool, if true: xor output 253 * RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
266 */ 254 */
267 255
256 vmovdqu w(CTX), RK1;
257
268 pushq %rbp; 258 pushq %rbp;
269 pushq %rbx; 259 pushq %rbx;
270 pushq %rcx; 260 pushq %rcx;
271 261
272 vmovdqu w(CTX), RK1; 262 inpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
273
274 leaq (4*4*4)(%rdx), %rax;
275 inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
276 preload_rgi(RA1); 263 preload_rgi(RA1);
277 rotate_1l(RD1); 264 rotate_1l(RD1);
278 inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); 265 inpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
279 rotate_1l(RD2); 266 rotate_1l(RD2);
280 267
281 movq %rsi, %r11;
282
283 encrypt_cycle(0); 268 encrypt_cycle(0);
284 encrypt_cycle(1); 269 encrypt_cycle(1);
285 encrypt_cycle(2); 270 encrypt_cycle(2);
@@ -295,47 +280,33 @@ __twofish_enc_blk_8way:
295 popq %rbx; 280 popq %rbx;
296 popq %rbp; 281 popq %rbp;
297 282
298 leaq (4*4*4)(%r11), %rax; 283 outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
299 284 outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
300 testb %cl, %cl;
301 jnz __enc_xor8;
302
303 outunpack_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
304 outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
305
306 ret;
307
308__enc_xor8:
309 outunpack_xor_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
310 outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
311 285
312 ret; 286 ret;
313 287
314.align 8 288.align 8
315.global twofish_dec_blk_8way 289.type __twofish_dec_blk8,@function;
316.type twofish_dec_blk_8way,@function;
317 290
318twofish_dec_blk_8way: 291__twofish_dec_blk8:
319 /* input: 292 /* input:
320 * %rdi: ctx, CTX 293 * %rdi: ctx, CTX
321 * %rsi: dst 294 * RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
322 * %rdx: src 295 * output:
296 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
323 */ 297 */
324 298
299 vmovdqu (w+4*4)(CTX), RK1;
300
325 pushq %rbp; 301 pushq %rbp;
326 pushq %rbx; 302 pushq %rbx;
327 303
328 vmovdqu (w+4*4)(CTX), RK1; 304 inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
329
330 leaq (4*4*4)(%rdx), %rax;
331 inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
332 preload_rgi(RC1); 305 preload_rgi(RC1);
333 rotate_1l(RA1); 306 rotate_1l(RA1);
334 inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); 307 inpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
335 rotate_1l(RA2); 308 rotate_1l(RA2);
336 309
337 movq %rsi, %r11;
338
339 decrypt_cycle(7); 310 decrypt_cycle(7);
340 decrypt_cycle(6); 311 decrypt_cycle(6);
341 decrypt_cycle(5); 312 decrypt_cycle(5);
@@ -350,8 +321,103 @@ twofish_dec_blk_8way:
350 popq %rbx; 321 popq %rbx;
351 popq %rbp; 322 popq %rbp;
352 323
353 leaq (4*4*4)(%r11), %rax; 324 outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
354 outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); 325 outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
355 outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); 326
327 ret;
328
329.align 8
330.global twofish_ecb_enc_8way
331.type twofish_ecb_enc_8way,@function;
332
333twofish_ecb_enc_8way:
334 /* input:
335 * %rdi: ctx, CTX
336 * %rsi: dst
337 * %rdx: src
338 */
339
340 movq %rsi, %r11;
341
342 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
343
344 call __twofish_enc_blk8;
345
346 store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
347
348 ret;
349
350.align 8
351.global twofish_ecb_dec_8way
352.type twofish_ecb_dec_8way,@function;
353
354twofish_ecb_dec_8way:
355 /* input:
356 * %rdi: ctx, CTX
357 * %rsi: dst
358 * %rdx: src
359 */
360
361 movq %rsi, %r11;
362
363 load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
364
365 call __twofish_dec_blk8;
366
367 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
368
369 ret;
370
371.align 8
372.global twofish_cbc_dec_8way
373.type twofish_cbc_dec_8way,@function;
374
375twofish_cbc_dec_8way:
376 /* input:
377 * %rdi: ctx, CTX
378 * %rsi: dst
379 * %rdx: src
380 */
381
382 pushq %r12;
383
384 movq %rsi, %r11;
385 movq %rdx, %r12;
386
387 load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
388
389 call __twofish_dec_blk8;
390
391 store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
392
393 popq %r12;
394
395 ret;
396
397.align 8
398.global twofish_ctr_8way
399.type twofish_ctr_8way,@function;
400
401twofish_ctr_8way:
402 /* input:
403 * %rdi: ctx, CTX
404 * %rsi: dst
405 * %rdx: src
406 * %rcx: iv (little endian, 128bit)
407 */
408
409 pushq %r12;
410
411 movq %rsi, %r11;
412 movq %rdx, %r12;
413
414 load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
415 RD2, RX0, RX1, RY0);
416
417 call __twofish_enc_blk8;
418
419 store_ctr_8way(%r12, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
420
421 popq %r12;
356 422
357 ret; 423 ret;
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index e7708b5442e0..94ac91d26e47 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -45,66 +45,23 @@
45 45
46#define TWOFISH_PARALLEL_BLOCKS 8 46#define TWOFISH_PARALLEL_BLOCKS 8
47 47
48static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
49 const u8 *src)
50{
51 __twofish_enc_blk_3way(ctx, dst, src, false);
52}
53
54/* 8-way parallel cipher functions */ 48/* 8-way parallel cipher functions */
55asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst, 49asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
56 const u8 *src, bool xor); 50 const u8 *src);
57asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst, 51asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
58 const u8 *src); 52 const u8 *src);
59 53
60static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst, 54asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
61 const u8 *src) 55 const u8 *src);
62{ 56asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
63 __twofish_enc_blk_8way(ctx, dst, src, false); 57 const u8 *src, le128 *iv);
64}
65
66static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst,
67 const u8 *src)
68{
69 __twofish_enc_blk_8way(ctx, dst, src, true);
70}
71 58
72static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst, 59static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
73 const u8 *src) 60 const u8 *src)
74{ 61{
75 twofish_dec_blk_8way(ctx, dst, src); 62 __twofish_enc_blk_3way(ctx, dst, src, false);
76}
77
78static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src)
79{
80 u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1];
81 unsigned int j;
82
83 for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
84 ivs[j] = src[j];
85
86 twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
87
88 for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
89 u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
90} 63}
91 64
92static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
93 u128 *iv)
94{
95 be128 ctrblks[TWOFISH_PARALLEL_BLOCKS];
96 unsigned int i;
97
98 for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) {
99 if (dst != src)
100 dst[i] = src[i];
101
102 u128_to_be128(&ctrblks[i], iv);
103 u128_inc(iv);
104 }
105
106 twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
107}
108 65
109static const struct common_glue_ctx twofish_enc = { 66static const struct common_glue_ctx twofish_enc = {
110 .num_funcs = 3, 67 .num_funcs = 3,
@@ -112,7 +69,7 @@ static const struct common_glue_ctx twofish_enc = {
112 69
113 .funcs = { { 70 .funcs = { {
114 .num_blocks = TWOFISH_PARALLEL_BLOCKS, 71 .num_blocks = TWOFISH_PARALLEL_BLOCKS,
115 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) } 72 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
116 }, { 73 }, {
117 .num_blocks = 3, 74 .num_blocks = 3,
118 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } 75 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
@@ -128,7 +85,7 @@ static const struct common_glue_ctx twofish_ctr = {
128 85
129 .funcs = { { 86 .funcs = { {
130 .num_blocks = TWOFISH_PARALLEL_BLOCKS, 87 .num_blocks = TWOFISH_PARALLEL_BLOCKS,
131 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) } 88 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
132 }, { 89 }, {
133 .num_blocks = 3, 90 .num_blocks = 3,
134 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } 91 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
@@ -144,7 +101,7 @@ static const struct common_glue_ctx twofish_dec = {
144 101
145 .funcs = { { 102 .funcs = { {
146 .num_blocks = TWOFISH_PARALLEL_BLOCKS, 103 .num_blocks = TWOFISH_PARALLEL_BLOCKS,
147 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) } 104 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
148 }, { 105 }, {
149 .num_blocks = 3, 106 .num_blocks = 3,
150 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } 107 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
@@ -160,7 +117,7 @@ static const struct common_glue_ctx twofish_dec_cbc = {
160 117
161 .funcs = { { 118 .funcs = { {
162 .num_blocks = TWOFISH_PARALLEL_BLOCKS, 119 .num_blocks = TWOFISH_PARALLEL_BLOCKS,
163 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) } 120 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
164 }, { 121 }, {
165 .num_blocks = 3, 122 .num_blocks = 3,
166 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } 123 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
@@ -227,7 +184,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
227 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); 184 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
228 185
229 if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { 186 if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
230 twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst); 187 twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
231 return; 188 return;
232 } 189 }
233 190
@@ -249,7 +206,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
249 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); 206 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
250 207
251 if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { 208 if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
252 twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst); 209 twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
253 return; 210 return;
254 } 211 }
255 212
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index aa3eb358b7e8..13e63b3e1dfb 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -62,15 +62,15 @@ void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
62} 62}
63EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); 63EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
64 64
65void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) 65void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
66{ 66{
67 be128 ctrblk; 67 be128 ctrblk;
68 68
69 if (dst != src) 69 if (dst != src)
70 *dst = *src; 70 *dst = *src;
71 71
72 u128_to_be128(&ctrblk, iv); 72 le128_to_be128(&ctrblk, iv);
73 u128_inc(iv); 73 le128_inc(iv);
74 74
75 twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 75 twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
76 u128_xor(dst, dst, (u128 *)&ctrblk); 76 u128_xor(dst, dst, (u128 *)&ctrblk);
@@ -78,7 +78,7 @@ void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
78EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); 78EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
79 79
80void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, 80void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
81 u128 *iv) 81 le128 *iv)
82{ 82{
83 be128 ctrblks[3]; 83 be128 ctrblks[3];
84 84
@@ -88,12 +88,12 @@ void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
88 dst[2] = src[2]; 88 dst[2] = src[2];
89 } 89 }
90 90
91 u128_to_be128(&ctrblks[0], iv); 91 le128_to_be128(&ctrblks[0], iv);
92 u128_inc(iv); 92 le128_inc(iv);
93 u128_to_be128(&ctrblks[1], iv); 93 le128_to_be128(&ctrblks[1], iv);
94 u128_inc(iv); 94 le128_inc(iv);
95 u128_to_be128(&ctrblks[2], iv); 95 le128_to_be128(&ctrblks[2], iv);
96 u128_inc(iv); 96 le128_inc(iv);
97 97
98 twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); 98 twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
99} 99}
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 07b3a68d2d29..a703af19c281 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -35,7 +35,7 @@
35#undef WARN_OLD 35#undef WARN_OLD
36#undef CORE_DUMP /* definitely broken */ 36#undef CORE_DUMP /* definitely broken */
37 37
38static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs); 38static int load_aout_binary(struct linux_binprm *);
39static int load_aout_library(struct file *); 39static int load_aout_library(struct file *);
40 40
41#ifdef CORE_DUMP 41#ifdef CORE_DUMP
@@ -260,9 +260,10 @@ static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
260 * These are the functions used to load a.out style executables and shared 260 * These are the functions used to load a.out style executables and shared
261 * libraries. There is no binary dependent code anywhere else. 261 * libraries. There is no binary dependent code anywhere else.
262 */ 262 */
263static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) 263static int load_aout_binary(struct linux_binprm *bprm)
264{ 264{
265 unsigned long error, fd_offset, rlim; 265 unsigned long error, fd_offset, rlim;
266 struct pt_regs *regs = current_pt_regs();
266 struct exec ex; 267 struct exec ex;
267 int retval; 268 int retval;
268 269
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 076745fc8045..32e6f05ddaaa 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -467,11 +467,16 @@ GLOBAL(\label)
467 PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx 467 PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
468 PTREGSCALL stub32_execve, compat_sys_execve, %rcx 468 PTREGSCALL stub32_execve, compat_sys_execve, %rcx
469 PTREGSCALL stub32_fork, sys_fork, %rdi 469 PTREGSCALL stub32_fork, sys_fork, %rdi
470 PTREGSCALL stub32_clone, sys32_clone, %rdx
471 PTREGSCALL stub32_vfork, sys_vfork, %rdi 470 PTREGSCALL stub32_vfork, sys_vfork, %rdi
472 PTREGSCALL stub32_iopl, sys_iopl, %rsi 471 PTREGSCALL stub32_iopl, sys_iopl, %rsi
473 472
474 ALIGN 473 ALIGN
474GLOBAL(stub32_clone)
475 leaq sys_clone(%rip),%rax
476 mov %r8, %rcx
477 jmp ia32_ptregs_common
478
479 ALIGN
475ia32_ptregs_common: 480ia32_ptregs_common:
476 popq %r11 481 popq %r11
477 CFI_ENDPROC 482 CFI_ENDPROC
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 86d68d1c8806..d0b689ba7be2 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -385,17 +385,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
385 return ret; 385 return ret;
386} 386}
387 387
388asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
389 struct pt_regs *regs)
390{
391 void __user *parent_tid = (void __user *)regs->dx;
392 void __user *child_tid = (void __user *)regs->di;
393
394 if (!newsp)
395 newsp = regs->sp;
396 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
397}
398
399/* 388/*
400 * Some system calls that need sign extended arguments. This could be 389 * Some system calls that need sign extended arguments. This could be
401 * done by a generic wrapper. 390 * done by a generic wrapper.
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 79fd8a3418f9..7f669853317a 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -1,30 +1,4 @@
1include include/asm-generic/Kbuild.asm
2 1
3header-y += boot.h
4header-y += bootparam.h
5header-y += debugreg.h
6header-y += e820.h
7header-y += hw_breakpoint.h
8header-y += hyperv.h
9header-y += ist.h
10header-y += ldt.h
11header-y += mce.h
12header-y += msr-index.h
13header-y += msr.h
14header-y += mtrr.h
15header-y += perf_regs.h
16header-y += posix_types_32.h
17header-y += posix_types_64.h
18header-y += posix_types_x32.h
19header-y += prctl.h
20header-y += processor-flags.h
21header-y += ptrace-abi.h
22header-y += sigcontext32.h
23header-y += svm.h
24header-y += ucontext.h
25header-y += vm86.h
26header-y += vmx.h
27header-y += vsyscall.h
28 2
29genhdr-y += unistd_32.h 3genhdr-y += unistd_32.h
30genhdr-y += unistd_64.h 4genhdr-y += unistd_64.h
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index b6c3b821acf6..722aa3b04624 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -172,23 +172,7 @@ static inline int atomic_add_negative(int i, atomic_t *v)
172 */ 172 */
173static inline int atomic_add_return(int i, atomic_t *v) 173static inline int atomic_add_return(int i, atomic_t *v)
174{ 174{
175#ifdef CONFIG_M386
176 int __i;
177 unsigned long flags;
178 if (unlikely(boot_cpu_data.x86 <= 3))
179 goto no_xadd;
180#endif
181 /* Modern 486+ processor */
182 return i + xadd(&v->counter, i); 175 return i + xadd(&v->counter, i);
183
184#ifdef CONFIG_M386
185no_xadd: /* Legacy 386 processor */
186 raw_local_irq_save(flags);
187 __i = atomic_read(v);
188 atomic_set(v, i + __i);
189 raw_local_irq_restore(flags);
190 return i + __i;
191#endif
192} 176}
193 177
194/** 178/**
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index b13fe63bdc59..4fa687a47a62 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -1,14 +1,9 @@
1#ifndef _ASM_X86_BOOT_H 1#ifndef _ASM_X86_BOOT_H
2#define _ASM_X86_BOOT_H 2#define _ASM_X86_BOOT_H
3 3
4/* Internal svga startup constants */
5#define NORMAL_VGA 0xffff /* 80x25 mode */
6#define EXTENDED_VGA 0xfffe /* 80x50 mode */
7#define ASK_VGA 0xfffd /* ask for it at bootup */
8
9#ifdef __KERNEL__
10 4
11#include <asm/pgtable_types.h> 5#include <asm/pgtable_types.h>
6#include <uapi/asm/boot.h>
12 7
13/* Physical address where kernel should be loaded. */ 8/* Physical address where kernel should be loaded. */
14#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ 9#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
@@ -42,6 +37,4 @@
42#define BOOT_STACK_SIZE 0x1000 37#define BOOT_STACK_SIZE 0x1000
43#endif 38#endif
44 39
45#endif /* __KERNEL__ */
46
47#endif /* _ASM_X86_BOOT_H */ 40#endif /* _ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index 0bdbbb3b9ce7..16a57f4ed64d 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -8,6 +8,7 @@
8#define VCLOCK_NONE 0 /* No vDSO clock available. */ 8#define VCLOCK_NONE 0 /* No vDSO clock available. */
9#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ 9#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
10#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ 10#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
11#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
11 12
12struct arch_clocksource_data { 13struct arch_clocksource_data {
13 int vclock_mode; 14 int vclock_mode;
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 53f4b219336b..f8bf2eecab86 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -34,9 +34,7 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
34 : "memory"); 34 : "memory");
35} 35}
36 36
37#ifdef CONFIG_X86_CMPXCHG
38#define __HAVE_ARCH_CMPXCHG 1 37#define __HAVE_ARCH_CMPXCHG 1
39#endif
40 38
41#ifdef CONFIG_X86_CMPXCHG64 39#ifdef CONFIG_X86_CMPXCHG64
42#define cmpxchg64(ptr, o, n) \ 40#define cmpxchg64(ptr, o, n) \
@@ -73,59 +71,6 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
73 return prev; 71 return prev;
74} 72}
75 73
76#ifndef CONFIG_X86_CMPXCHG
77/*
78 * Building a kernel capable running on 80386. It may be necessary to
79 * simulate the cmpxchg on the 80386 CPU. For that purpose we define
80 * a function for each of the sizes we support.
81 */
82
83extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
84extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
85extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
86
87static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
88 unsigned long new, int size)
89{
90 switch (size) {
91 case 1:
92 return cmpxchg_386_u8(ptr, old, new);
93 case 2:
94 return cmpxchg_386_u16(ptr, old, new);
95 case 4:
96 return cmpxchg_386_u32(ptr, old, new);
97 }
98 return old;
99}
100
101#define cmpxchg(ptr, o, n) \
102({ \
103 __typeof__(*(ptr)) __ret; \
104 if (likely(boot_cpu_data.x86 > 3)) \
105 __ret = (__typeof__(*(ptr)))__cmpxchg((ptr), \
106 (unsigned long)(o), (unsigned long)(n), \
107 sizeof(*(ptr))); \
108 else \
109 __ret = (__typeof__(*(ptr)))cmpxchg_386((ptr), \
110 (unsigned long)(o), (unsigned long)(n), \
111 sizeof(*(ptr))); \
112 __ret; \
113})
114#define cmpxchg_local(ptr, o, n) \
115({ \
116 __typeof__(*(ptr)) __ret; \
117 if (likely(boot_cpu_data.x86 > 3)) \
118 __ret = (__typeof__(*(ptr)))__cmpxchg_local((ptr), \
119 (unsigned long)(o), (unsigned long)(n), \
120 sizeof(*(ptr))); \
121 else \
122 __ret = (__typeof__(*(ptr)))cmpxchg_386((ptr), \
123 (unsigned long)(o), (unsigned long)(n), \
124 sizeof(*(ptr))); \
125 __ret; \
126})
127#endif
128
129#ifndef CONFIG_X86_CMPXCHG64 74#ifndef CONFIG_X86_CMPXCHG64
130/* 75/*
131 * Building a kernel capable running on 80386 and 80486. It may be necessary 76 * Building a kernel capable running on 80386 and 80486. It may be necessary
diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/context_tracking.h
index d1ac07a23979..1616562683e9 100644
--- a/arch/x86/include/asm/rcu.h
+++ b/arch/x86/include/asm/context_tracking.h
@@ -1,27 +1,26 @@
1#ifndef _ASM_X86_RCU_H 1#ifndef _ASM_X86_CONTEXT_TRACKING_H
2#define _ASM_X86_RCU_H 2#define _ASM_X86_CONTEXT_TRACKING_H
3 3
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5 5#include <linux/context_tracking.h>
6#include <linux/rcupdate.h>
7#include <asm/ptrace.h> 6#include <asm/ptrace.h>
8 7
9static inline void exception_enter(struct pt_regs *regs) 8static inline void exception_enter(struct pt_regs *regs)
10{ 9{
11 rcu_user_exit(); 10 user_exit();
12} 11}
13 12
14static inline void exception_exit(struct pt_regs *regs) 13static inline void exception_exit(struct pt_regs *regs)
15{ 14{
16#ifdef CONFIG_RCU_USER_QS 15#ifdef CONFIG_CONTEXT_TRACKING
17 if (user_mode(regs)) 16 if (user_mode(regs))
18 rcu_user_enter(); 17 user_enter();
19#endif 18#endif
20} 19}
21 20
22#else /* __ASSEMBLY__ */ 21#else /* __ASSEMBLY__ */
23 22
24#ifdef CONFIG_RCU_USER_QS 23#ifdef CONFIG_CONTEXT_TRACKING
25# define SCHEDULE_USER call schedule_user 24# define SCHEDULE_USER call schedule_user
26#else 25#else
27# define SCHEDULE_USER call schedule 26# define SCHEDULE_USER call schedule
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 4564c8e28a33..5f9a1243190e 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -28,6 +28,10 @@ struct x86_cpu {
28#ifdef CONFIG_HOTPLUG_CPU 28#ifdef CONFIG_HOTPLUG_CPU
29extern int arch_register_cpu(int num); 29extern int arch_register_cpu(int num);
30extern void arch_unregister_cpu(int); 30extern void arch_unregister_cpu(int);
31extern void __cpuinit start_cpu0(void);
32#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
33extern int _debug_hotplug_cpu(int cpu, int action);
34#endif
31#endif 35#endif
32 36
33DECLARE_PER_CPU(int, cpu_state); 37DECLARE_PER_CPU(int, cpu_state);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 8c297aa53eef..2d9075e863a0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -202,6 +202,7 @@
202 202
203/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 203/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
204#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 204#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
205#define X86_FEATURE_TSC_ADJUST (9*32+ 1) /* TSC adjustment MSR 0x3b */
205#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ 206#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */
206#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */ 207#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */
207#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ 208#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
@@ -311,12 +312,7 @@ extern const char * const x86_power_flags[32];
311#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) 312#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
312#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) 313#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
313#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) 314#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
314 315#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT)
315#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
316# define cpu_has_invlpg 1
317#else
318# define cpu_has_invlpg (boot_cpu_data.x86 > 3)
319#endif
320 316
321#ifdef CONFIG_X86_64 317#ifdef CONFIG_X86_64
322 318
diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h
new file mode 100644
index 000000000000..98038add801e
--- /dev/null
+++ b/arch/x86/include/asm/crypto/camellia.h
@@ -0,0 +1,82 @@
1#ifndef ASM_X86_CAMELLIA_H
2#define ASM_X86_CAMELLIA_H
3
4#include <linux/kernel.h>
5#include <linux/crypto.h>
6
7#define CAMELLIA_MIN_KEY_SIZE 16
8#define CAMELLIA_MAX_KEY_SIZE 32
9#define CAMELLIA_BLOCK_SIZE 16
10#define CAMELLIA_TABLE_BYTE_LEN 272
11#define CAMELLIA_PARALLEL_BLOCKS 2
12
13struct camellia_ctx {
14 u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
15 u32 key_length;
16};
17
18struct camellia_lrw_ctx {
19 struct lrw_table_ctx lrw_table;
20 struct camellia_ctx camellia_ctx;
21};
22
23struct camellia_xts_ctx {
24 struct camellia_ctx tweak_ctx;
25 struct camellia_ctx crypt_ctx;
26};
27
28extern int __camellia_setkey(struct camellia_ctx *cctx,
29 const unsigned char *key,
30 unsigned int key_len, u32 *flags);
31
32extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
33 unsigned int keylen);
34extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm);
35
36extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
37 unsigned int keylen);
38
39/* regular block cipher functions */
40asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
41 const u8 *src, bool xor);
42asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
43 const u8 *src);
44
45/* 2-way parallel cipher functions */
46asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
47 const u8 *src, bool xor);
48asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
49 const u8 *src);
50
51static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
52 const u8 *src)
53{
54 __camellia_enc_blk(ctx, dst, src, false);
55}
56
57static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
58 const u8 *src)
59{
60 __camellia_enc_blk(ctx, dst, src, true);
61}
62
63static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
64 const u8 *src)
65{
66 __camellia_enc_blk_2way(ctx, dst, src, false);
67}
68
69static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
70 const u8 *src)
71{
72 __camellia_enc_blk_2way(ctx, dst, src, true);
73}
74
75/* glue helpers */
76extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src);
77extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
78 le128 *iv);
79extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
80 le128 *iv);
81
82#endif /* ASM_X86_CAMELLIA_H */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 3e408bddc96f..e2d65b061d27 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -13,7 +13,7 @@
13typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); 13typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
14typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); 14typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
15typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, 15typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
16 u128 *iv); 16 le128 *iv);
17 17
18#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) 18#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
19#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) 19#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
@@ -71,23 +71,29 @@ static inline void glue_fpu_end(bool fpu_enabled)
71 kernel_fpu_end(); 71 kernel_fpu_end();
72} 72}
73 73
74static inline void u128_to_be128(be128 *dst, const u128 *src) 74static inline void le128_to_be128(be128 *dst, const le128 *src)
75{ 75{
76 dst->a = cpu_to_be64(src->a); 76 dst->a = cpu_to_be64(le64_to_cpu(src->a));
77 dst->b = cpu_to_be64(src->b); 77 dst->b = cpu_to_be64(le64_to_cpu(src->b));
78} 78}
79 79
80static inline void be128_to_u128(u128 *dst, const be128 *src) 80static inline void be128_to_le128(le128 *dst, const be128 *src)
81{ 81{
82 dst->a = be64_to_cpu(src->a); 82 dst->a = cpu_to_le64(be64_to_cpu(src->a));
83 dst->b = be64_to_cpu(src->b); 83 dst->b = cpu_to_le64(be64_to_cpu(src->b));
84} 84}
85 85
86static inline void u128_inc(u128 *i) 86static inline void le128_inc(le128 *i)
87{ 87{
88 i->b++; 88 u64 a = le64_to_cpu(i->a);
89 if (!i->b) 89 u64 b = le64_to_cpu(i->b);
90 i->a++; 90
91 b++;
92 if (!b)
93 a++;
94
95 i->a = cpu_to_le64(a);
96 i->b = cpu_to_le64(b);
91} 97}
92 98
93extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, 99extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
index 432deedd2945..0da1d3e2a55c 100644
--- a/arch/x86/include/asm/crypto/serpent-avx.h
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -6,27 +6,14 @@
6 6
7#define SERPENT_PARALLEL_BLOCKS 8 7#define SERPENT_PARALLEL_BLOCKS 8
8 8
9asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, 9asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
10 const u8 *src, bool xor); 10 const u8 *src);
11asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, 11asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
12 const u8 *src); 12 const u8 *src);
13 13
14static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, 14asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
15 const u8 *src) 15 const u8 *src);
16{ 16asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
17 __serpent_enc_blk_8way_avx(ctx, dst, src, false); 17 const u8 *src, le128 *iv);
18}
19
20static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
21 const u8 *src)
22{
23 __serpent_enc_blk_8way_avx(ctx, dst, src, true);
24}
25
26static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
27 const u8 *src)
28{
29 serpent_dec_blk_8way_avx(ctx, dst, src);
30}
31 18
32#endif 19#endif
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
index 9d2c514bd5f9..878c51ceebb5 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -31,9 +31,9 @@ asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
31/* helpers from twofish_x86_64-3way module */ 31/* helpers from twofish_x86_64-3way module */
32extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); 32extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
33extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, 33extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
34 u128 *iv); 34 le128 *iv);
35extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, 35extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
36 u128 *iv); 36 le128 *iv);
37 37
38extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, 38extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
39 unsigned int keylen); 39 unsigned int keylen);
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 2d91580bf228..4b528a970bd4 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -2,83 +2,8 @@
2#define _ASM_X86_DEBUGREG_H 2#define _ASM_X86_DEBUGREG_H
3 3
4 4
5/* Indicate the register numbers for a number of the specific
6 debug registers. Registers 0-3 contain the addresses we wish to trap on */
7#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */
8#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */
9
10#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */
11#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */
12
13/* Define a few things for the status register. We can use this to determine
14 which debugging register was responsible for the trap. The other bits
15 are either reserved or not of interest to us. */
16
17/* Define reserved bits in DR6 which are always set to 1 */
18#define DR6_RESERVED (0xFFFF0FF0)
19
20#define DR_TRAP0 (0x1) /* db0 */
21#define DR_TRAP1 (0x2) /* db1 */
22#define DR_TRAP2 (0x4) /* db2 */
23#define DR_TRAP3 (0x8) /* db3 */
24#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
25
26#define DR_STEP (0x4000) /* single-step */
27#define DR_SWITCH (0x8000) /* task switch */
28
29/* Now define a bunch of things for manipulating the control register.
30 The top two bytes of the control register consist of 4 fields of 4
31 bits - each field corresponds to one of the four debug registers,
32 and indicates what types of access we trap on, and how large the data
33 field is that we are looking at */
34
35#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
36#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
37
38#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
39#define DR_RW_WRITE (0x1)
40#define DR_RW_READ (0x3)
41
42#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
43#define DR_LEN_2 (0x4)
44#define DR_LEN_4 (0xC)
45#define DR_LEN_8 (0x8)
46
47/* The low byte to the control register determine which registers are
48 enabled. There are 4 fields of two bits. One bit is "local", meaning
49 that the processor will reset the bit after a task switch and the other
50 is global meaning that we have to explicitly reset the bit. With linux,
51 you can use either one, since we explicitly zero the register when we enter
52 kernel mode. */
53
54#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
55#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
56#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
57#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
58#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
59
60#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
61#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
62
63/* The second byte to the control register has a few special things.
64 We can slow the instruction pipeline for instructions coming via the
65 gdt or the ldt if we want to. I am not sure why this is an advantage */
66
67#ifdef __i386__
68#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
69#else
70#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
71#endif
72
73#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
74#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
75
76/*
77 * HW breakpoint additions
78 */
79#ifdef __KERNEL__
80
81#include <linux/bug.h> 5#include <linux/bug.h>
6#include <uapi/asm/debugreg.h>
82 7
83DECLARE_PER_CPU(unsigned long, cpu_dr7); 8DECLARE_PER_CPU(unsigned long, cpu_dr7);
84 9
@@ -190,6 +115,4 @@ static inline void debug_stack_usage_dec(void) { }
190#endif /* X86_64 */ 115#endif /* X86_64 */
191 116
192 117
193#endif /* __KERNEL__ */
194
195#endif /* _ASM_X86_DEBUGREG_H */ 118#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 93e1c55f14ab..03dd72957d2f 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -2,9 +2,6 @@
2#define _ASM_X86_DEVICE_H 2#define _ASM_X86_DEVICE_H
3 3
4struct dev_archdata { 4struct dev_archdata {
5#ifdef CONFIG_ACPI
6 void *acpi_handle;
7#endif
8#ifdef CONFIG_X86_DEV_DMA_OPS 5#ifdef CONFIG_X86_DEV_DMA_OPS
9 struct dma_map_ops *dma_ops; 6 struct dma_map_ops *dma_ops;
10#endif 7#endif
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 37782566af24..cccd07fa5e3a 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -1,81 +1,14 @@
1#ifndef _ASM_X86_E820_H 1#ifndef _ASM_X86_E820_H
2#define _ASM_X86_E820_H 2#define _ASM_X86_E820_H
3#define E820MAP 0x2d0 /* our map */
4#define E820MAX 128 /* number of entries in E820MAP */
5 3
6/*
7 * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the
8 * constrained space in the zeropage. If we have more nodes than
9 * that, and if we've booted off EFI firmware, then the EFI tables
10 * passed us from the EFI firmware can list more nodes. Size our
11 * internal memory map tables to have room for these additional
12 * nodes, based on up to three entries per node for which the
13 * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT),
14 * plus E820MAX, allowing space for the possible duplicate E820
15 * entries that might need room in the same arrays, prior to the
16 * call to sanitize_e820_map() to remove duplicates. The allowance
17 * of three memory map entries per node is "enough" entries for
18 * the initial hardware platform motivating this mechanism to make
19 * use of additional EFI map entries. Future platforms may want
20 * to allow more than three entries per node or otherwise refine
21 * this size.
22 */
23
24/*
25 * Odd: 'make headers_check' complains about numa.h if I try
26 * to collapse the next two #ifdef lines to a single line:
27 * #if defined(__KERNEL__) && defined(CONFIG_EFI)
28 */
29#ifdef __KERNEL__
30#ifdef CONFIG_EFI 4#ifdef CONFIG_EFI
31#include <linux/numa.h> 5#include <linux/numa.h>
32#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES) 6#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
33#else /* ! CONFIG_EFI */ 7#else /* ! CONFIG_EFI */
34#define E820_X_MAX E820MAX 8#define E820_X_MAX E820MAX
35#endif 9#endif
36#else /* ! __KERNEL__ */ 10#include <uapi/asm/e820.h>
37#define E820_X_MAX E820MAX
38#endif
39
40#define E820NR 0x1e8 /* # entries in E820MAP */
41
42#define E820_RAM 1
43#define E820_RESERVED 2
44#define E820_ACPI 3
45#define E820_NVS 4
46#define E820_UNUSABLE 5
47
48/*
49 * reserved RAM used by kernel itself
50 * if CONFIG_INTEL_TXT is enabled, memory of this type will be
51 * included in the S3 integrity calculation and so should not include
52 * any memory that BIOS might alter over the S3 transition
53 */
54#define E820_RESERVED_KERN 128
55
56#ifndef __ASSEMBLY__ 11#ifndef __ASSEMBLY__
57#include <linux/types.h>
58struct e820entry {
59 __u64 addr; /* start of memory segment */
60 __u64 size; /* size of memory segment */
61 __u32 type; /* type of memory segment */
62} __attribute__((packed));
63
64struct e820map {
65 __u32 nr_map;
66 struct e820entry map[E820_X_MAX];
67};
68
69#define ISA_START_ADDRESS 0xa0000
70#define ISA_END_ADDRESS 0x100000
71
72#define BIOS_BEGIN 0x000a0000
73#define BIOS_END 0x00100000
74
75#define BIOS_ROM_BASE 0xffe00000
76#define BIOS_ROM_END 0xffffffff
77
78#ifdef __KERNEL__
79/* see comment in arch/x86/kernel/e820.c */ 12/* see comment in arch/x86/kernel/e820.c */
80extern struct e820map e820; 13extern struct e820map e820;
81extern struct e820map e820_saved; 14extern struct e820map e820_saved;
@@ -137,13 +70,8 @@ static inline bool is_ISA_range(u64 s, u64 e)
137 return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS; 70 return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS;
138} 71}
139 72
140#endif /* __KERNEL__ */
141#endif /* __ASSEMBLY__ */ 73#endif /* __ASSEMBLY__ */
142
143#ifdef __KERNEL__
144#include <linux/ioport.h> 74#include <linux/ioport.h>
145 75
146#define HIGH_MEMORY (1024*1024) 76#define HIGH_MEMORY (1024*1024)
147#endif /* __KERNEL__ */
148
149#endif /* _ASM_X86_E820_H */ 77#endif /* _ASM_X86_E820_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 5939f44fe0c0..9c999c1674fa 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -354,12 +354,10 @@ static inline int mmap_is_ia32(void)
354 return 0; 354 return 0;
355} 355}
356 356
357/* The first two values are special, do not change. See align_addr() */ 357/* Do not change the values. See get_align_mask() */
358enum align_flags { 358enum align_flags {
359 ALIGN_VA_32 = BIT(0), 359 ALIGN_VA_32 = BIT(0),
360 ALIGN_VA_64 = BIT(1), 360 ALIGN_VA_64 = BIT(1),
361 ALIGN_VDSO = BIT(2),
362 ALIGN_TOPDOWN = BIT(3),
363}; 361};
364 362
365struct va_alignment { 363struct va_alignment {
@@ -368,5 +366,5 @@ struct va_alignment {
368} ____cacheline_aligned; 366} ____cacheline_aligned;
369 367
370extern struct va_alignment va_align; 368extern struct va_alignment va_align;
371extern unsigned long align_addr(unsigned long, struct file *, enum align_flags); 369extern unsigned long align_vdso_addr(unsigned long);
372#endif /* _ASM_X86_ELF_H */ 370#endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 4da3c0c4c974..a09c28571064 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -19,6 +19,7 @@
19#include <asm/acpi.h> 19#include <asm/acpi.h>
20#include <asm/apicdef.h> 20#include <asm/apicdef.h>
21#include <asm/page.h> 21#include <asm/page.h>
22#include <asm/pvclock.h>
22#ifdef CONFIG_X86_32 23#ifdef CONFIG_X86_32
23#include <linux/threads.h> 24#include <linux/threads.h>
24#include <asm/kmap_types.h> 25#include <asm/kmap_types.h>
@@ -81,6 +82,10 @@ enum fixed_addresses {
81 VVAR_PAGE, 82 VVAR_PAGE,
82 VSYSCALL_HPET, 83 VSYSCALL_HPET,
83#endif 84#endif
85#ifdef CONFIG_PARAVIRT_CLOCK
86 PVCLOCK_FIXMAP_BEGIN,
87 PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
88#endif
84 FIX_DBGP_BASE, 89 FIX_DBGP_BASE,
85 FIX_EARLYCON_MEM_BASE, 90 FIX_EARLYCON_MEM_BASE,
86#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT 91#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index f373046e63ec..be27ba1e947a 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -55,12 +55,6 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
55 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) 55 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
56 return -EFAULT; 56 return -EFAULT;
57 57
58#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
59 /* Real i386 machines can only support FUTEX_OP_SET */
60 if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3)
61 return -ENOSYS;
62#endif
63
64 pagefault_disable(); 58 pagefault_disable();
65 59
66 switch (op) { 60 switch (op) {
@@ -118,12 +112,6 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
118{ 112{
119 int ret = 0; 113 int ret = 0;
120 114
121#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
122 /* Real i386 machines have no cmpxchg instruction */
123 if (boot_cpu_data.x86 == 3)
124 return -ENOSYS;
125#endif
126
127 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) 115 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
128 return -EFAULT; 116 return -EFAULT;
129 117
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 824ca07860d0..ef1c4d2d41ec 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -1,7 +1,8 @@
1#ifndef _I386_HW_BREAKPOINT_H 1#ifndef _I386_HW_BREAKPOINT_H
2#define _I386_HW_BREAKPOINT_H 2#define _I386_HW_BREAKPOINT_H
3 3
4#ifdef __KERNEL__ 4#include <uapi/asm/hw_breakpoint.h>
5
5#define __ARCH_HW_BREAKPOINT_H 6#define __ARCH_HW_BREAKPOINT_H
6 7
7/* 8/*
@@ -71,6 +72,4 @@ extern int arch_bp_generic_fields(int x86_len, int x86_type,
71 72
72extern struct pmu perf_ops_bp; 73extern struct pmu perf_ops_bp;
73 74
74#endif /* __KERNEL__ */
75#endif /* _I386_HW_BREAKPOINT_H */ 75#endif /* _I386_HW_BREAKPOINT_H */
76
diff --git a/arch/x86/include/asm/ist.h b/arch/x86/include/asm/ist.h
index 7e5dff1de0e9..c9803f1a2033 100644
--- a/arch/x86/include/asm/ist.h
+++ b/arch/x86/include/asm/ist.h
@@ -1,6 +1,3 @@
1#ifndef _ASM_X86_IST_H
2#define _ASM_X86_IST_H
3
4/* 1/*
5 * Include file for the interface to IST BIOS 2 * Include file for the interface to IST BIOS
6 * Copyright 2002 Andy Grover <andrew.grover@intel.com> 3 * Copyright 2002 Andy Grover <andrew.grover@intel.com>
@@ -15,20 +12,12 @@
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details. 13 * General Public License for more details.
17 */ 14 */
15#ifndef _ASM_X86_IST_H
16#define _ASM_X86_IST_H
18 17
18#include <uapi/asm/ist.h>
19 19
20#include <linux/types.h>
21
22struct ist_info {
23 __u32 signature;
24 __u32 command;
25 __u32 event;
26 __u32 perf_level;
27};
28
29#ifdef __KERNEL__
30 20
31extern struct ist_info ist_info; 21extern struct ist_info ist_info;
32 22
33#endif /* __KERNEL__ */
34#endif /* _ASM_X86_IST_H */ 23#endif /* _ASM_X86_IST_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 317ff1703d0b..6080d2694bad 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -163,6 +163,9 @@ struct kimage_arch {
163}; 163};
164#endif 164#endif
165 165
166typedef void crash_vmclear_fn(void);
167extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
168
166#endif /* __ASSEMBLY__ */ 169#endif /* __ASSEMBLY__ */
167 170
168#endif /* _ASM_X86_KEXEC_H */ 171#endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/include/asm/kvm_guest.h b/arch/x86/include/asm/kvm_guest.h
new file mode 100644
index 000000000000..a92b1763c419
--- /dev/null
+++ b/arch/x86/include/asm/kvm_guest.h
@@ -0,0 +1,6 @@
1#ifndef _ASM_X86_KVM_GUEST_H
2#define _ASM_X86_KVM_GUEST_H
3
4int kvm_setup_vsyscall_timeinfo(void);
5
6#endif /* _ASM_X86_KVM_GUEST_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b2e11f452435..dc87b65e9c3a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
22#include <linux/kvm_para.h> 22#include <linux/kvm_para.h>
23#include <linux/kvm_types.h> 23#include <linux/kvm_types.h>
24#include <linux/perf_event.h> 24#include <linux/perf_event.h>
25#include <linux/pvclock_gtod.h>
26#include <linux/clocksource.h>
25 27
26#include <asm/pvclock-abi.h> 28#include <asm/pvclock-abi.h>
27#include <asm/desc.h> 29#include <asm/desc.h>
@@ -442,6 +444,7 @@ struct kvm_vcpu_arch {
442 s8 virtual_tsc_shift; 444 s8 virtual_tsc_shift;
443 u32 virtual_tsc_mult; 445 u32 virtual_tsc_mult;
444 u32 virtual_tsc_khz; 446 u32 virtual_tsc_khz;
447 s64 ia32_tsc_adjust_msr;
445 448
446 atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ 449 atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
447 unsigned nmi_pending; /* NMI queued after currently running handler */ 450 unsigned nmi_pending; /* NMI queued after currently running handler */
@@ -559,6 +562,12 @@ struct kvm_arch {
559 u64 cur_tsc_write; 562 u64 cur_tsc_write;
560 u64 cur_tsc_offset; 563 u64 cur_tsc_offset;
561 u8 cur_tsc_generation; 564 u8 cur_tsc_generation;
565 int nr_vcpus_matched_tsc;
566
567 spinlock_t pvclock_gtod_sync_lock;
568 bool use_master_clock;
569 u64 master_kernel_ns;
570 cycle_t master_cycle_now;
562 571
563 struct kvm_xen_hvm_config xen_hvm_config; 572 struct kvm_xen_hvm_config xen_hvm_config;
564 573
@@ -612,6 +621,12 @@ struct kvm_vcpu_stat {
612 621
613struct x86_instruction_info; 622struct x86_instruction_info;
614 623
624struct msr_data {
625 bool host_initiated;
626 u32 index;
627 u64 data;
628};
629
615struct kvm_x86_ops { 630struct kvm_x86_ops {
616 int (*cpu_has_kvm_support)(void); /* __init */ 631 int (*cpu_has_kvm_support)(void); /* __init */
617 int (*disabled_by_bios)(void); /* __init */ 632 int (*disabled_by_bios)(void); /* __init */
@@ -634,7 +649,7 @@ struct kvm_x86_ops {
634 649
635 void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu); 650 void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
636 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); 651 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
637 int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 652 int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
638 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); 653 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
639 void (*get_segment)(struct kvm_vcpu *vcpu, 654 void (*get_segment)(struct kvm_vcpu *vcpu,
640 struct kvm_segment *var, int seg); 655 struct kvm_segment *var, int seg);
@@ -697,10 +712,11 @@ struct kvm_x86_ops {
697 bool (*has_wbinvd_exit)(void); 712 bool (*has_wbinvd_exit)(void);
698 713
699 void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale); 714 void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
715 u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu);
700 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); 716 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
701 717
702 u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); 718 u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
703 u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu); 719 u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
704 720
705 void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); 721 void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
706 722
@@ -785,7 +801,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
785 801
786void kvm_enable_efer_bits(u64); 802void kvm_enable_efer_bits(u64);
787int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); 803int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
788int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 804int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
789 805
790struct x86_emulate_ctxt; 806struct x86_emulate_ctxt;
791 807
@@ -812,7 +828,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
812int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); 828int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
813 829
814int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); 830int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
815int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); 831int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
816 832
817unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); 833unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
818void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); 834void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index eb3e9d85e1f1..5ed1f16187be 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -1,103 +1,8 @@
1#ifndef _ASM_X86_KVM_PARA_H 1#ifndef _ASM_X86_KVM_PARA_H
2#define _ASM_X86_KVM_PARA_H 2#define _ASM_X86_KVM_PARA_H
3 3
4#include <linux/types.h>
5#include <asm/hyperv.h>
6
7/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
8 * should be used to determine that a VM is running under KVM.
9 */
10#define KVM_CPUID_SIGNATURE 0x40000000
11
12/* This CPUID returns a feature bitmap in eax. Before enabling a particular
13 * paravirtualization, the appropriate feature bit should be checked.
14 */
15#define KVM_CPUID_FEATURES 0x40000001
16#define KVM_FEATURE_CLOCKSOURCE 0
17#define KVM_FEATURE_NOP_IO_DELAY 1
18#define KVM_FEATURE_MMU_OP 2
19/* This indicates that the new set of kvmclock msrs
20 * are available. The use of 0x11 and 0x12 is deprecated
21 */
22#define KVM_FEATURE_CLOCKSOURCE2 3
23#define KVM_FEATURE_ASYNC_PF 4
24#define KVM_FEATURE_STEAL_TIME 5
25#define KVM_FEATURE_PV_EOI 6
26
27/* The last 8 bits are used to indicate how to interpret the flags field
28 * in pvclock structure. If no bits are set, all flags are ignored.
29 */
30#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24
31
32#define MSR_KVM_WALL_CLOCK 0x11
33#define MSR_KVM_SYSTEM_TIME 0x12
34
35#define KVM_MSR_ENABLED 1
36/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
37#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00
38#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
39#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
40#define MSR_KVM_STEAL_TIME 0x4b564d03
41#define MSR_KVM_PV_EOI_EN 0x4b564d04
42
43struct kvm_steal_time {
44 __u64 steal;
45 __u32 version;
46 __u32 flags;
47 __u32 pad[12];
48};
49
50#define KVM_STEAL_ALIGNMENT_BITS 5
51#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
52#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
53
54#define KVM_MAX_MMU_OP_BATCH 32
55
56#define KVM_ASYNC_PF_ENABLED (1 << 0)
57#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
58
59/* Operations for KVM_HC_MMU_OP */
60#define KVM_MMU_OP_WRITE_PTE 1
61#define KVM_MMU_OP_FLUSH_TLB 2
62#define KVM_MMU_OP_RELEASE_PT 3
63
64/* Payload for KVM_HC_MMU_OP */
65struct kvm_mmu_op_header {
66 __u32 op;
67 __u32 pad;
68};
69
70struct kvm_mmu_op_write_pte {
71 struct kvm_mmu_op_header header;
72 __u64 pte_phys;
73 __u64 pte_val;
74};
75
76struct kvm_mmu_op_flush_tlb {
77 struct kvm_mmu_op_header header;
78};
79
80struct kvm_mmu_op_release_pt {
81 struct kvm_mmu_op_header header;
82 __u64 pt_phys;
83};
84
85#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
86#define KVM_PV_REASON_PAGE_READY 2
87
88struct kvm_vcpu_pv_apf_data {
89 __u32 reason;
90 __u8 pad[60];
91 __u32 enabled;
92};
93
94#define KVM_PV_EOI_BIT 0
95#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
96#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
97#define KVM_PV_EOI_DISABLED 0x0
98
99#ifdef __KERNEL__
100#include <asm/processor.h> 4#include <asm/processor.h>
5#include <uapi/asm/kvm_para.h>
101 6
102extern void kvmclock_init(void); 7extern void kvmclock_init(void);
103extern int kvm_register_clock(char *txt); 8extern int kvm_register_clock(char *txt);
@@ -228,6 +133,4 @@ static inline void kvm_disable_steal_time(void)
228} 133}
229#endif 134#endif
230 135
231#endif /* __KERNEL__ */
232
233#endif /* _ASM_X86_KVM_PARA_H */ 136#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index c8bed0da434a..2d89e3980cbd 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -124,27 +124,11 @@ static inline int local_add_negative(long i, local_t *l)
124 */ 124 */
125static inline long local_add_return(long i, local_t *l) 125static inline long local_add_return(long i, local_t *l)
126{ 126{
127 long __i; 127 long __i = i;
128#ifdef CONFIG_M386
129 unsigned long flags;
130 if (unlikely(boot_cpu_data.x86 <= 3))
131 goto no_xadd;
132#endif
133 /* Modern 486+ processor */
134 __i = i;
135 asm volatile(_ASM_XADD "%0, %1;" 128 asm volatile(_ASM_XADD "%0, %1;"
136 : "+r" (i), "+m" (l->a.counter) 129 : "+r" (i), "+m" (l->a.counter)
137 : : "memory"); 130 : : "memory");
138 return i + __i; 131 return i + __i;
139
140#ifdef CONFIG_M386
141no_xadd: /* Legacy 386 processor */
142 local_irq_save(flags);
143 __i = local_read(l);
144 local_set(l, i + __i);
145 local_irq_restore(flags);
146 return i + __i;
147#endif
148} 132}
149 133
150static inline long local_sub_return(long i, local_t *l) 134static inline long local_sub_return(long i, local_t *l)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 54d73b1f00a0..ecdfee60ee4a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -1,124 +1,25 @@
1#ifndef _ASM_X86_MCE_H 1#ifndef _ASM_X86_MCE_H
2#define _ASM_X86_MCE_H 2#define _ASM_X86_MCE_H
3 3
4#include <linux/types.h> 4#include <uapi/asm/mce.h>
5#include <asm/ioctls.h> 5
6 6
7/* 7struct mca_config {
8 * Machine Check support for x86 8 bool dont_log_ce;
9 */ 9 bool cmci_disabled;
10 10 bool ignore_ce;
11/* MCG_CAP register defines */ 11 bool disabled;
12#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ 12 bool ser;
13#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ 13 bool bios_cmci_threshold;
14#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ 14 u8 banks;
15#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ 15 s8 bootlog;
16#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ 16 int tolerant;
17#define MCG_EXT_CNT_SHIFT 16 17 int monarch_timeout;
18#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) 18 int panic_timeout;
19#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ 19 u32 rip_msr;
20
21/* MCG_STATUS register defines */
22#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
23#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
24#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
25
26/* MCi_STATUS register defines */
27#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
28#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
29#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
30#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
31#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
32#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
33#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
34#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
35#define MCI_STATUS_AR (1ULL<<55) /* Action required */
36#define MCACOD 0xffff /* MCA Error Code */
37
38/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
39#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
40#define MCACOD_SCRUBMSK 0xfff0
41#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
42#define MCACOD_DATA 0x0134 /* Data Load */
43#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
44
45/* MCi_MISC register defines */
46#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
47#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
48#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
49#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
50#define MCI_MISC_ADDR_PHYS 2 /* physical address */
51#define MCI_MISC_ADDR_MEM 3 /* memory address */
52#define MCI_MISC_ADDR_GENERIC 7 /* generic */
53
54/* CTL2 register defines */
55#define MCI_CTL2_CMCI_EN (1ULL << 30)
56#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
57
58#define MCJ_CTX_MASK 3
59#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
60#define MCJ_CTX_RANDOM 0 /* inject context: random */
61#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
62#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
63#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
64#define MCJ_EXCEPTION 0x8 /* raise as exception */
65#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
66
67/* Fields are zero when not available */
68struct mce {
69 __u64 status;
70 __u64 misc;
71 __u64 addr;
72 __u64 mcgstatus;
73 __u64 ip;
74 __u64 tsc; /* cpu time stamp counter */
75 __u64 time; /* wall time_t when error was detected */
76 __u8 cpuvendor; /* cpu vendor as encoded in system.h */
77 __u8 inject_flags; /* software inject flags */
78 __u16 pad;
79 __u32 cpuid; /* CPUID 1 EAX */
80 __u8 cs; /* code segment */
81 __u8 bank; /* machine check bank */
82 __u8 cpu; /* cpu number; obsolete; use extcpu now */
83 __u8 finished; /* entry is valid */
84 __u32 extcpu; /* linux cpu number that detected the error */
85 __u32 socketid; /* CPU socket ID */
86 __u32 apicid; /* CPU initial apic ID */
87 __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
88};
89
90/*
91 * This structure contains all data related to the MCE log. Also
92 * carries a signature to make it easier to find from external
93 * debugging tools. Each entry is only valid when its finished flag
94 * is set.
95 */
96
97#define MCE_LOG_LEN 32
98
99struct mce_log {
100 char signature[12]; /* "MACHINECHECK" */
101 unsigned len; /* = MCE_LOG_LEN */
102 unsigned next;
103 unsigned flags;
104 unsigned recordlen; /* length of struct mce */
105 struct mce entry[MCE_LOG_LEN];
106}; 20};
107 21
108#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ 22extern struct mca_config mca_cfg;
109
110#define MCE_LOG_SIGNATURE "MACHINECHECK"
111
112#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
113#define MCE_GET_LOG_LEN _IOR('M', 2, int)
114#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
115
116/* Software defined banks */
117#define MCE_EXTENDED_BANK 128
118#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
119#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
120
121#ifdef __KERNEL__
122extern void mce_register_decode_chain(struct notifier_block *nb); 23extern void mce_register_decode_chain(struct notifier_block *nb);
123extern void mce_unregister_decode_chain(struct notifier_block *nb); 24extern void mce_unregister_decode_chain(struct notifier_block *nb);
124 25
@@ -126,7 +27,6 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
126#include <linux/init.h> 27#include <linux/init.h>
127#include <linux/atomic.h> 28#include <linux/atomic.h>
128 29
129extern int mce_disabled;
130extern int mce_p5_enabled; 30extern int mce_p5_enabled;
131 31
132#ifdef CONFIG_X86_MCE 32#ifdef CONFIG_X86_MCE
@@ -159,9 +59,6 @@ DECLARE_PER_CPU(struct device *, mce_device);
159#define MAX_NR_BANKS 32 59#define MAX_NR_BANKS 32
160 60
161#ifdef CONFIG_X86_MCE_INTEL 61#ifdef CONFIG_X86_MCE_INTEL
162extern int mce_cmci_disabled;
163extern int mce_ignore_ce;
164extern int mce_bios_cmci_threshold;
165void mce_intel_feature_init(struct cpuinfo_x86 *c); 62void mce_intel_feature_init(struct cpuinfo_x86 *c);
166void cmci_clear(void); 63void cmci_clear(void);
167void cmci_reenable(void); 64void cmci_reenable(void);
@@ -247,5 +144,4 @@ struct cper_sec_mem_err;
247extern void apei_mce_report_mem_error(int corrected, 144extern void apei_mce_report_mem_error(int corrected,
248 struct cper_sec_mem_err *mem_err); 145 struct cper_sec_mem_err *mem_err);
249 146
250#endif /* __KERNEL__ */
251#endif /* _ASM_X86_MCE_H */ 147#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 9eae7752ae9b..e3b7819caeef 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -5,8 +5,6 @@
5 5
6#ifdef CONFIG_X86_64 6#ifdef CONFIG_X86_64
7/* X86_64 does not define MODULE_PROC_FAMILY */ 7/* X86_64 does not define MODULE_PROC_FAMILY */
8#elif defined CONFIG_M386
9#define MODULE_PROC_FAMILY "386 "
10#elif defined CONFIG_M486 8#elif defined CONFIG_M486
11#define MODULE_PROC_FAMILY "486 " 9#define MODULE_PROC_FAMILY "486 "
12#elif defined CONFIG_M586 10#elif defined CONFIG_M586
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 813ed103f45e..9264802e2824 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -1,18 +1,10 @@
1#ifndef _ASM_X86_MSR_H 1#ifndef _ASM_X86_MSR_H
2#define _ASM_X86_MSR_H 2#define _ASM_X86_MSR_H
3 3
4#include <asm/msr-index.h> 4#include <uapi/asm/msr.h>
5 5
6#ifndef __ASSEMBLY__ 6#ifndef __ASSEMBLY__
7 7
8#include <linux/types.h>
9#include <linux/ioctl.h>
10
11#define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8])
12#define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8])
13
14#ifdef __KERNEL__
15
16#include <asm/asm.h> 8#include <asm/asm.h>
17#include <asm/errno.h> 9#include <asm/errno.h>
18#include <asm/cpumask.h> 10#include <asm/cpumask.h>
@@ -271,6 +263,5 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
271 return wrmsr_safe_regs(regs); 263 return wrmsr_safe_regs(regs);
272} 264}
273#endif /* CONFIG_SMP */ 265#endif /* CONFIG_SMP */
274#endif /* __KERNEL__ */
275#endif /* __ASSEMBLY__ */ 266#endif /* __ASSEMBLY__ */
276#endif /* _ASM_X86_MSR_H */ 267#endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 7e3f17f92c66..e235582f9930 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -23,97 +23,8 @@
23#ifndef _ASM_X86_MTRR_H 23#ifndef _ASM_X86_MTRR_H
24#define _ASM_X86_MTRR_H 24#define _ASM_X86_MTRR_H
25 25
26#include <linux/types.h> 26#include <uapi/asm/mtrr.h>
27#include <linux/ioctl.h>
28#include <linux/errno.h>
29 27
30#define MTRR_IOCTL_BASE 'M'
31
32/* Warning: this structure has a different order from i386
33 on x86-64. The 32bit emulation code takes care of that.
34 But you need to use this for 64bit, otherwise your X server
35 will break. */
36
37#ifdef __i386__
38struct mtrr_sentry {
39 unsigned long base; /* Base address */
40 unsigned int size; /* Size of region */
41 unsigned int type; /* Type of region */
42};
43
44struct mtrr_gentry {
45 unsigned int regnum; /* Register number */
46 unsigned long base; /* Base address */
47 unsigned int size; /* Size of region */
48 unsigned int type; /* Type of region */
49};
50
51#else /* __i386__ */
52
53struct mtrr_sentry {
54 __u64 base; /* Base address */
55 __u32 size; /* Size of region */
56 __u32 type; /* Type of region */
57};
58
59struct mtrr_gentry {
60 __u64 base; /* Base address */
61 __u32 size; /* Size of region */
62 __u32 regnum; /* Register number */
63 __u32 type; /* Type of region */
64 __u32 _pad; /* Unused */
65};
66
67#endif /* !__i386__ */
68
69struct mtrr_var_range {
70 __u32 base_lo;
71 __u32 base_hi;
72 __u32 mask_lo;
73 __u32 mask_hi;
74};
75
76/* In the Intel processor's MTRR interface, the MTRR type is always held in
77 an 8 bit field: */
78typedef __u8 mtrr_type;
79
80#define MTRR_NUM_FIXED_RANGES 88
81#define MTRR_MAX_VAR_RANGES 256
82
83struct mtrr_state_type {
84 struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
85 mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
86 unsigned char enabled;
87 unsigned char have_fixed;
88 mtrr_type def_type;
89};
90
91#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
92#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
93
94/* These are the various ioctls */
95#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry)
96#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)
97#define MTRRIOC_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry)
98#define MTRRIOC_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry)
99#define MTRRIOC_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry)
100#define MTRRIOC_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry)
101#define MTRRIOC_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry)
102#define MTRRIOC_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry)
103#define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry)
104#define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry)
105
106/* These are the region types */
107#define MTRR_TYPE_UNCACHABLE 0
108#define MTRR_TYPE_WRCOMB 1
109/*#define MTRR_TYPE_ 2*/
110/*#define MTRR_TYPE_ 3*/
111#define MTRR_TYPE_WRTHROUGH 4
112#define MTRR_TYPE_WRPROT 5
113#define MTRR_TYPE_WRBACK 6
114#define MTRR_NUM_TYPES 7
115
116#ifdef __KERNEL__
117 28
118/* The following functions are for use by other drivers */ 29/* The following functions are for use by other drivers */
119# ifdef CONFIG_MTRR 30# ifdef CONFIG_MTRR
@@ -208,6 +119,4 @@ struct mtrr_gentry32 {
208 _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32) 119 _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32)
209#endif /* CONFIG_COMPAT */ 120#endif /* CONFIG_COMPAT */
210 121
211#endif /* __KERNEL__ */
212
213#endif /* _ASM_X86_MTRR_H */ 122#endif /* _ASM_X86_MTRR_H */
diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h
new file mode 100644
index 000000000000..1c6f7f6212c1
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip.h
@@ -0,0 +1,19 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Numascale NumaConnect-specific header file
7 *
8 * Copyright (C) 2012 Numascale AS. All rights reserved.
9 *
10 * Send feedback to <support@numascale.com>
11 *
12 */
13
14#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H
15#define _ASM_X86_NUMACHIP_NUMACHIP_H
16
17extern int __init pci_numachip_init(void);
18
19#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a0facf3908d7..5edd1742cfd0 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -528,7 +528,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
528 PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); 528 PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
529} 529}
530 530
531#ifdef CONFIG_TRANSPARENT_HUGEPAGE
532static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, 531static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
533 pmd_t *pmdp, pmd_t pmd) 532 pmd_t *pmdp, pmd_t pmd)
534{ 533{
@@ -539,7 +538,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
539 PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp, 538 PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
540 native_pmd_val(pmd)); 539 native_pmd_val(pmd));
541} 540}
542#endif
543 541
544static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) 542static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
545{ 543{
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 6e41b9343928..dba7805176bf 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -171,4 +171,16 @@ cpumask_of_pcibus(const struct pci_bus *bus)
171} 171}
172#endif 172#endif
173 173
174struct pci_setup_rom {
175 struct setup_data data;
176 uint16_t vendor;
177 uint16_t devid;
178 uint64_t pcilen;
179 unsigned long segment;
180 unsigned long bus;
181 unsigned long device;
182 unsigned long function;
183 uint8_t romdata[0];
184};
185
174#endif /* _ASM_X86_PCI_H */ 186#endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 1104afaba52b..0da5200ee79d 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -406,7 +406,6 @@ do { \
406#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) 406#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
407#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) 407#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
408 408
409#ifndef CONFIG_M386
410#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 409#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
411#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 410#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
412#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 411#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
@@ -421,8 +420,6 @@ do { \
421#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 420#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
422#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 421#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
423 422
424#endif /* !CONFIG_M386 */
425
426#ifdef CONFIG_X86_CMPXCHG64 423#ifdef CONFIG_X86_CMPXCHG64
427#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ 424#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
428({ \ 425({ \
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a1f780d45f76..5199db2923d3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -404,7 +404,14 @@ static inline int pte_same(pte_t a, pte_t b)
404 404
405static inline int pte_present(pte_t a) 405static inline int pte_present(pte_t a)
406{ 406{
407 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); 407 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
408 _PAGE_NUMA);
409}
410
411#define pte_accessible pte_accessible
412static inline int pte_accessible(pte_t a)
413{
414 return pte_flags(a) & _PAGE_PRESENT;
408} 415}
409 416
410static inline int pte_hidden(pte_t pte) 417static inline int pte_hidden(pte_t pte)
@@ -420,7 +427,8 @@ static inline int pmd_present(pmd_t pmd)
420 * the _PAGE_PSE flag will remain set at all times while the 427 * the _PAGE_PSE flag will remain set at all times while the
421 * _PAGE_PRESENT bit is clear). 428 * _PAGE_PRESENT bit is clear).
422 */ 429 */
423 return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); 430 return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE |
431 _PAGE_NUMA);
424} 432}
425 433
426static inline int pmd_none(pmd_t pmd) 434static inline int pmd_none(pmd_t pmd)
@@ -479,6 +487,11 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
479 487
480static inline int pmd_bad(pmd_t pmd) 488static inline int pmd_bad(pmd_t pmd)
481{ 489{
490#ifdef CONFIG_NUMA_BALANCING
491 /* pmd_numa check */
492 if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA)
493 return 0;
494#endif
482 return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; 495 return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
483} 496}
484 497
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index ec8a1fc9505d..3c32db8c539d 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -64,6 +64,26 @@
64#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) 64#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
65#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) 65#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
66 66
67/*
68 * _PAGE_NUMA indicates that this page will trigger a numa hinting
69 * minor page fault to gather numa placement statistics (see
70 * pte_numa()). The bit picked (8) is within the range between
71 * _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't
72 * require changes to the swp entry format because that bit is always
73 * zero when the pte is not present.
74 *
75 * The bit picked must be always zero when the pmd is present and not
76 * present, so that we don't lose information when we set it while
77 * atomically clearing the present bit.
78 *
79 * Because we shared the same bit (8) with _PAGE_PROTNONE this can be
80 * interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE
81 * couldn't reach, like handle_mm_fault() (see access_error in
82 * arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for
83 * handle_mm_fault() to be invoked).
84 */
85#define _PAGE_NUMA _PAGE_PROTNONE
86
67#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ 87#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
68 _PAGE_ACCESSED | _PAGE_DIRTY) 88 _PAGE_ACCESSED | _PAGE_DIRTY)
69#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ 89#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h
index bad3665c25fc..f565f6dd59d4 100644
--- a/arch/x86/include/asm/posix_types.h
+++ b/arch/x86/include/asm/posix_types.h
@@ -1,15 +1,5 @@
1#ifdef __KERNEL__
2# ifdef CONFIG_X86_32 1# ifdef CONFIG_X86_32
3# include <asm/posix_types_32.h> 2# include <asm/posix_types_32.h>
4# else 3# else
5# include <asm/posix_types_64.h> 4# include <asm/posix_types_64.h>
6# endif 5# endif
7#else
8# ifdef __i386__
9# include <asm/posix_types_32.h>
10# elif defined(__ILP32__)
11# include <asm/posix_types_x32.h>
12# else
13# include <asm/posix_types_64.h>
14# endif
15#endif
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 680cf09ed100..39fb618e2211 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -1,106 +1,11 @@
1#ifndef _ASM_X86_PROCESSOR_FLAGS_H 1#ifndef _ASM_X86_PROCESSOR_FLAGS_H
2#define _ASM_X86_PROCESSOR_FLAGS_H 2#define _ASM_X86_PROCESSOR_FLAGS_H
3/* Various flags defined: can be included from assembler. */
4 3
5/* 4#include <uapi/asm/processor-flags.h>
6 * EFLAGS bits
7 */
8#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
9#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
10#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
11#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */
12#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
13#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
14#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
15#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
16#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
17#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
18#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
19#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
20#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
21#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
22#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
23#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
24#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
25#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
26 5
27/*
28 * Basic CPU control in CR0
29 */
30#define X86_CR0_PE 0x00000001 /* Protection Enable */
31#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */
32#define X86_CR0_EM 0x00000004 /* Emulation */
33#define X86_CR0_TS 0x00000008 /* Task Switched */
34#define X86_CR0_ET 0x00000010 /* Extension Type */
35#define X86_CR0_NE 0x00000020 /* Numeric Error */
36#define X86_CR0_WP 0x00010000 /* Write Protect */
37#define X86_CR0_AM 0x00040000 /* Alignment Mask */
38#define X86_CR0_NW 0x20000000 /* Not Write-through */
39#define X86_CR0_CD 0x40000000 /* Cache Disable */
40#define X86_CR0_PG 0x80000000 /* Paging */
41
42/*
43 * Paging options in CR3
44 */
45#define X86_CR3_PWT 0x00000008 /* Page Write Through */
46#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
47#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
48
49/*
50 * Intel CPU features in CR4
51 */
52#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */
53#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */
54#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */
55#define X86_CR4_DE 0x00000008 /* enable debugging extensions */
56#define X86_CR4_PSE 0x00000010 /* enable page size extensions */
57#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */
58#define X86_CR4_MCE 0x00000040 /* Machine check enable */
59#define X86_CR4_PGE 0x00000080 /* enable global pages */
60#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */
61#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */
62#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
63#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
64#define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
65#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
66#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
67#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
68#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */
69
70/*
71 * x86-64 Task Priority Register, CR8
72 */
73#define X86_CR8_TPR 0x0000000F /* task priority register */
74
75/*
76 * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h>
77 */
78
79/*
80 * NSC/Cyrix CPU configuration register indexes
81 */
82#define CX86_PCR0 0x20
83#define CX86_GCR 0xb8
84#define CX86_CCR0 0xc0
85#define CX86_CCR1 0xc1
86#define CX86_CCR2 0xc2
87#define CX86_CCR3 0xc3
88#define CX86_CCR4 0xe8
89#define CX86_CCR5 0xe9
90#define CX86_CCR6 0xea
91#define CX86_CCR7 0xeb
92#define CX86_PCR1 0xf0
93#define CX86_DIR0 0xfe
94#define CX86_DIR1 0xff
95#define CX86_ARR_BASE 0xc4
96#define CX86_RCR_BASE 0xdc
97
98#ifdef __KERNEL__
99#ifdef CONFIG_VM86 6#ifdef CONFIG_VM86
100#define X86_VM_MASK X86_EFLAGS_VM 7#define X86_VM_MASK X86_EFLAGS_VM
101#else 8#else
102#define X86_VM_MASK 0 /* No VM86 support */ 9#define X86_VM_MASK 0 /* No VM86 support */
103#endif 10#endif
104#endif
105
106#endif /* _ASM_X86_PROCESSOR_FLAGS_H */ 11#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ad1fc8511674..888184b2fc85 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -178,8 +178,6 @@ static inline int hlt_works(int cpu)
178 178
179extern void cpu_detect(struct cpuinfo_x86 *c); 179extern void cpu_detect(struct cpuinfo_x86 *c);
180 180
181extern struct pt_regs *idle_regs(struct pt_regs *);
182
183extern void early_cpu_init(void); 181extern void early_cpu_init(void);
184extern void identify_boot_cpu(void); 182extern void identify_boot_cpu(void);
185extern void identify_secondary_cpu(struct cpuinfo_x86 *); 183extern void identify_secondary_cpu(struct cpuinfo_x86 *);
@@ -187,7 +185,7 @@ extern void print_cpu_info(struct cpuinfo_x86 *);
187void print_cpu_msr(struct cpuinfo_x86 *); 185void print_cpu_msr(struct cpuinfo_x86 *);
188extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); 186extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
189extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 187extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
190extern unsigned short num_cache_leaves; 188extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
191 189
192extern void detect_extended_topology(struct cpuinfo_x86 *c); 190extern void detect_extended_topology(struct cpuinfo_x86 *c);
193extern void detect_ht(struct cpuinfo_x86 *c); 191extern void detect_ht(struct cpuinfo_x86 *c);
@@ -672,18 +670,29 @@ static inline void sync_core(void)
672{ 670{
673 int tmp; 671 int tmp;
674 672
675#if defined(CONFIG_M386) || defined(CONFIG_M486) 673#ifdef CONFIG_M486
676 if (boot_cpu_data.x86 < 5) 674 /*
677 /* There is no speculative execution. 675 * Do a CPUID if available, otherwise do a jump. The jump
678 * jmp is a barrier to prefetching. */ 676 * can conveniently enough be the jump around CPUID.
679 asm volatile("jmp 1f\n1:\n" ::: "memory"); 677 */
680 else 678 asm volatile("cmpl %2,%1\n\t"
679 "jl 1f\n\t"
680 "cpuid\n"
681 "1:"
682 : "=a" (tmp)
683 : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1)
684 : "ebx", "ecx", "edx", "memory");
685#else
686 /*
687 * CPUID is a barrier to speculative execution.
688 * Prefetched instructions are automatically
689 * invalidated when modified.
690 */
691 asm volatile("cpuid"
692 : "=a" (tmp)
693 : "0" (1)
694 : "ebx", "ecx", "edx", "memory");
681#endif 695#endif
682 /* cpuid is a barrier to speculative execution.
683 * Prefetched instructions are automatically
684 * invalidated when modified. */
685 asm volatile("cpuid" : "=a" (tmp) : "0" (1)
686 : "ebx", "ecx", "edx", "memory");
687} 696}
688 697
689static inline void __monitor(const void *eax, unsigned long ecx, 698static inline void __monitor(const void *eax, unsigned long ecx,
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 19f16ebaf4fa..03ca442d8f0d 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -1,44 +1,12 @@
1#ifndef _ASM_X86_PTRACE_H 1#ifndef _ASM_X86_PTRACE_H
2#define _ASM_X86_PTRACE_H 2#define _ASM_X86_PTRACE_H
3 3
4#include <linux/compiler.h> /* For __user */
5#include <asm/ptrace-abi.h>
6#include <asm/processor-flags.h>
7
8#ifdef __KERNEL__
9#include <asm/segment.h> 4#include <asm/segment.h>
10#include <asm/page_types.h> 5#include <asm/page_types.h>
11#endif 6#include <uapi/asm/ptrace.h>
12 7
13#ifndef __ASSEMBLY__ 8#ifndef __ASSEMBLY__
14
15#ifdef __i386__ 9#ifdef __i386__
16/* this struct defines the way the registers are stored on the
17 stack during a system call. */
18
19#ifndef __KERNEL__
20
21struct pt_regs {
22 long ebx;
23 long ecx;
24 long edx;
25 long esi;
26 long edi;
27 long ebp;
28 long eax;
29 int xds;
30 int xes;
31 int xfs;
32 int xgs;
33 long orig_eax;
34 long eip;
35 int xcs;
36 long eflags;
37 long esp;
38 int xss;
39};
40
41#else /* __KERNEL__ */
42 10
43struct pt_regs { 11struct pt_regs {
44 unsigned long bx; 12 unsigned long bx;
@@ -60,42 +28,8 @@ struct pt_regs {
60 unsigned long ss; 28 unsigned long ss;
61}; 29};
62 30
63#endif /* __KERNEL__ */
64
65#else /* __i386__ */ 31#else /* __i386__ */
66 32
67#ifndef __KERNEL__
68
69struct pt_regs {
70 unsigned long r15;
71 unsigned long r14;
72 unsigned long r13;
73 unsigned long r12;
74 unsigned long rbp;
75 unsigned long rbx;
76/* arguments: non interrupts/non tracing syscalls only save up to here*/
77 unsigned long r11;
78 unsigned long r10;
79 unsigned long r9;
80 unsigned long r8;
81 unsigned long rax;
82 unsigned long rcx;
83 unsigned long rdx;
84 unsigned long rsi;
85 unsigned long rdi;
86 unsigned long orig_rax;
87/* end of arguments */
88/* cpu exception frame or undefined */
89 unsigned long rip;
90 unsigned long cs;
91 unsigned long eflags;
92 unsigned long rsp;
93 unsigned long ss;
94/* top of stack page */
95};
96
97#else /* __KERNEL__ */
98
99struct pt_regs { 33struct pt_regs {
100 unsigned long r15; 34 unsigned long r15;
101 unsigned long r14; 35 unsigned long r14;
@@ -124,12 +58,8 @@ struct pt_regs {
124/* top of stack page */ 58/* top of stack page */
125}; 59};
126 60
127#endif /* __KERNEL__ */
128#endif /* !__i386__ */ 61#endif /* !__i386__ */
129 62
130
131#ifdef __KERNEL__
132
133#include <linux/init.h> 63#include <linux/init.h>
134#ifdef CONFIG_PARAVIRT 64#ifdef CONFIG_PARAVIRT
135#include <asm/paravirt_types.h> 65#include <asm/paravirt_types.h>
@@ -239,6 +169,15 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
239{ 169{
240 if (unlikely(offset > MAX_REG_OFFSET)) 170 if (unlikely(offset > MAX_REG_OFFSET))
241 return 0; 171 return 0;
172#ifdef CONFIG_X86_32
173 /*
174 * Traps from the kernel do not save sp and ss.
175 * Use the helper function to retrieve sp.
176 */
177 if (offset == offsetof(struct pt_regs, sp) &&
178 regs->cs == __KERNEL_CS)
179 return kernel_stack_pointer(regs);
180#endif
242 return *(unsigned long *)((unsigned long)regs + offset); 181 return *(unsigned long *)((unsigned long)regs + offset);
243} 182}
244 183
@@ -292,8 +231,5 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
292extern int do_set_thread_area(struct task_struct *p, int idx, 231extern int do_set_thread_area(struct task_struct *p, int idx,
293 struct user_desc __user *info, int can_allocate); 232 struct user_desc __user *info, int can_allocate);
294 233
295#endif /* __KERNEL__ */
296
297#endif /* !__ASSEMBLY__ */ 234#endif /* !__ASSEMBLY__ */
298
299#endif /* _ASM_X86_PTRACE_H */ 235#endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index c59cc97fe6c1..109a9dd5d454 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -6,6 +6,7 @@
6 6
7/* some helper functions for xen and kvm pv clock sources */ 7/* some helper functions for xen and kvm pv clock sources */
8cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); 8cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
9u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
9void pvclock_set_flags(u8 flags); 10void pvclock_set_flags(u8 flags);
10unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); 11unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
11void pvclock_read_wallclock(struct pvclock_wall_clock *wall, 12void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
@@ -56,4 +57,50 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
56 return product; 57 return product;
57} 58}
58 59
60static __always_inline
61u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
62{
63 u64 delta = __native_read_tsc() - src->tsc_timestamp;
64 return pvclock_scale_delta(delta, src->tsc_to_system_mul,
65 src->tsc_shift);
66}
67
68static __always_inline
69unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
70 cycle_t *cycles, u8 *flags)
71{
72 unsigned version;
73 cycle_t ret, offset;
74 u8 ret_flags;
75
76 version = src->version;
77 /* Note: emulated platforms which do not advertise SSE2 support
78 * result in kvmclock not using the necessary RDTSC barriers.
79 * Without barriers, it is possible that RDTSC instruction reads from
80 * the time stamp counter outside rdtsc_barrier protected section
81 * below, resulting in violation of monotonicity.
82 */
83 rdtsc_barrier();
84 offset = pvclock_get_nsec_offset(src);
85 ret = src->system_time + offset;
86 ret_flags = src->flags;
87 rdtsc_barrier();
88
89 *cycles = ret;
90 *flags = ret_flags;
91 return version;
92}
93
94struct pvclock_vsyscall_time_info {
95 struct pvclock_vcpu_time_info pvti;
96 u32 migrate_count;
97} __attribute__((__aligned__(SMP_CACHE_BYTES)));
98
99#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
100#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1)
101
102int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
103 int size);
104struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu);
105
59#endif /* _ASM_X86_PVCLOCK_H */ 106#endif /* _ASM_X86_PVCLOCK_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index d0f19f9fb846..b7bf3505e1ec 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -1,7 +1,8 @@
1#ifndef _ASM_X86_SETUP_H 1#ifndef _ASM_X86_SETUP_H
2#define _ASM_X86_SETUP_H 2#define _ASM_X86_SETUP_H
3 3
4#ifdef __KERNEL__ 4#include <uapi/asm/setup.h>
5
5 6
6#define COMMAND_LINE_SIZE 2048 7#define COMMAND_LINE_SIZE 2048
7 8
@@ -123,6 +124,4 @@ void __init x86_64_start_reservations(char *real_mode_data);
123 .size .brk.name,.-1b; \ 124 .size .brk.name,.-1b; \
124 .popsection 125 .popsection
125#endif /* __ASSEMBLY__ */ 126#endif /* __ASSEMBLY__ */
126#endif /* __KERNEL__ */
127
128#endif /* _ASM_X86_SETUP_H */ 127#endif /* _ASM_X86_SETUP_H */
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 5ca71c065eef..9dfce4e0417d 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -1,104 +1,9 @@
1#ifndef _ASM_X86_SIGCONTEXT_H 1#ifndef _ASM_X86_SIGCONTEXT_H
2#define _ASM_X86_SIGCONTEXT_H 2#define _ASM_X86_SIGCONTEXT_H
3 3
4#include <linux/compiler.h> 4#include <uapi/asm/sigcontext.h>
5#include <linux/types.h>
6
7#define FP_XSTATE_MAGIC1 0x46505853U
8#define FP_XSTATE_MAGIC2 0x46505845U
9#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
10
11/*
12 * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame
13 * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes
14 * are used to extended the fpstate pointer in the sigcontext, which now
15 * includes the extended state information along with fpstate information.
16 *
17 * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved
18 * area and FP_XSTATE_MAGIC2 at the end of memory layout
19 * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the
20 * extended state information in the memory layout pointed by the fpstate
21 * pointer in sigcontext.
22 */
23struct _fpx_sw_bytes {
24 __u32 magic1; /* FP_XSTATE_MAGIC1 */
25 __u32 extended_size; /* total size of the layout referred by
26 * fpstate pointer in the sigcontext.
27 */
28 __u64 xstate_bv;
29 /* feature bit mask (including fp/sse/extended
30 * state) that is present in the memory
31 * layout.
32 */
33 __u32 xstate_size; /* actual xsave state size, based on the
34 * features saved in the layout.
35 * 'extended_size' will be greater than
36 * 'xstate_size'.
37 */
38 __u32 padding[7]; /* for future use. */
39};
40 5
41#ifdef __i386__ 6#ifdef __i386__
42/*
43 * As documented in the iBCS2 standard..
44 *
45 * The first part of "struct _fpstate" is just the normal i387
46 * hardware setup, the extra "status" word is used to save the
47 * coprocessor status word before entering the handler.
48 *
49 * Pentium III FXSR, SSE support
50 * Gareth Hughes <gareth@valinux.com>, May 2000
51 *
52 * The FPU state data structure has had to grow to accommodate the
53 * extended FPU state required by the Streaming SIMD Extensions.
54 * There is no documented standard to accomplish this at the moment.
55 */
56struct _fpreg {
57 unsigned short significand[4];
58 unsigned short exponent;
59};
60
61struct _fpxreg {
62 unsigned short significand[4];
63 unsigned short exponent;
64 unsigned short padding[3];
65};
66
67struct _xmmreg {
68 unsigned long element[4];
69};
70
71struct _fpstate {
72 /* Regular FPU environment */
73 unsigned long cw;
74 unsigned long sw;
75 unsigned long tag;
76 unsigned long ipoff;
77 unsigned long cssel;
78 unsigned long dataoff;
79 unsigned long datasel;
80 struct _fpreg _st[8];
81 unsigned short status;
82 unsigned short magic; /* 0xffff = regular FPU data only */
83
84 /* FXSR FPU environment */
85 unsigned long _fxsr_env[6]; /* FXSR FPU env is ignored */
86 unsigned long mxcsr;
87 unsigned long reserved;
88 struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
89 struct _xmmreg _xmm[8];
90 unsigned long padding1[44];
91
92 union {
93 unsigned long padding2[12];
94 struct _fpx_sw_bytes sw_reserved; /* represents the extended
95 * state info */
96 };
97};
98
99#define X86_FXSR_MAGIC 0x0000
100
101#ifdef __KERNEL__
102struct sigcontext { 7struct sigcontext {
103 unsigned short gs, __gsh; 8 unsigned short gs, __gsh;
104 unsigned short fs, __fsh; 9 unsigned short fs, __fsh;
@@ -131,62 +36,7 @@ struct sigcontext {
131 unsigned long oldmask; 36 unsigned long oldmask;
132 unsigned long cr2; 37 unsigned long cr2;
133}; 38};
134#else /* __KERNEL__ */
135/*
136 * User-space might still rely on the old definition:
137 */
138struct sigcontext {
139 unsigned short gs, __gsh;
140 unsigned short fs, __fsh;
141 unsigned short es, __esh;
142 unsigned short ds, __dsh;
143 unsigned long edi;
144 unsigned long esi;
145 unsigned long ebp;
146 unsigned long esp;
147 unsigned long ebx;
148 unsigned long edx;
149 unsigned long ecx;
150 unsigned long eax;
151 unsigned long trapno;
152 unsigned long err;
153 unsigned long eip;
154 unsigned short cs, __csh;
155 unsigned long eflags;
156 unsigned long esp_at_signal;
157 unsigned short ss, __ssh;
158 struct _fpstate __user *fpstate;
159 unsigned long oldmask;
160 unsigned long cr2;
161};
162#endif /* !__KERNEL__ */
163
164#else /* __i386__ */ 39#else /* __i386__ */
165
166/* FXSAVE frame */
167/* Note: reserved1/2 may someday contain valuable data. Always save/restore
168 them when you change signal frames. */
169struct _fpstate {
170 __u16 cwd;
171 __u16 swd;
172 __u16 twd; /* Note this is not the same as the
173 32bit/x87/FSAVE twd */
174 __u16 fop;
175 __u64 rip;
176 __u64 rdp;
177 __u32 mxcsr;
178 __u32 mxcsr_mask;
179 __u32 st_space[32]; /* 8*16 bytes for each FP-reg */
180 __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */
181 __u32 reserved2[12];
182 union {
183 __u32 reserved3[12];
184 struct _fpx_sw_bytes sw_reserved; /* represents the extended
185 * state information */
186 };
187};
188
189#ifdef __KERNEL__
190struct sigcontext { 40struct sigcontext {
191 unsigned long r8; 41 unsigned long r8;
192 unsigned long r9; 42 unsigned long r9;
@@ -225,69 +75,5 @@ struct sigcontext {
225 void __user *fpstate; /* zero when no FPU/extended context */ 75 void __user *fpstate; /* zero when no FPU/extended context */
226 unsigned long reserved1[8]; 76 unsigned long reserved1[8];
227}; 77};
228#else /* __KERNEL__ */
229/*
230 * User-space might still rely on the old definition:
231 */
232struct sigcontext {
233 __u64 r8;
234 __u64 r9;
235 __u64 r10;
236 __u64 r11;
237 __u64 r12;
238 __u64 r13;
239 __u64 r14;
240 __u64 r15;
241 __u64 rdi;
242 __u64 rsi;
243 __u64 rbp;
244 __u64 rbx;
245 __u64 rdx;
246 __u64 rax;
247 __u64 rcx;
248 __u64 rsp;
249 __u64 rip;
250 __u64 eflags; /* RFLAGS */
251 __u16 cs;
252 __u16 gs;
253 __u16 fs;
254 __u16 __pad0;
255 __u64 err;
256 __u64 trapno;
257 __u64 oldmask;
258 __u64 cr2;
259 struct _fpstate __user *fpstate; /* zero when no FPU context */
260#ifdef __ILP32__
261 __u32 __fpstate_pad;
262#endif
263 __u64 reserved1[8];
264};
265#endif /* !__KERNEL__ */
266
267#endif /* !__i386__ */ 78#endif /* !__i386__ */
268
269struct _xsave_hdr {
270 __u64 xstate_bv;
271 __u64 reserved1[2];
272 __u64 reserved2[5];
273};
274
275struct _ymmh_state {
276 /* 16 * 16 bytes for each YMMH-reg */
277 __u32 ymmh_space[64];
278};
279
280/*
281 * Extended state pointed by the fpstate pointer in the sigcontext.
282 * In addition to the fpstate, information encoded in the xstate_hdr
283 * indicates the presence of other extended state information
284 * supported by the processor and OS.
285 */
286struct _xstate {
287 struct _fpstate fpstate;
288 struct _xsave_hdr xstate_hdr;
289 struct _ymmh_state ymmh;
290 /* new processor state extensions go here */
291};
292
293#endif /* _ASM_X86_SIGCONTEXT_H */ 79#endif /* _ASM_X86_SIGCONTEXT_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 323973f4abf1..216bf364a7e7 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -2,14 +2,6 @@
2#define _ASM_X86_SIGNAL_H 2#define _ASM_X86_SIGNAL_H
3 3
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5#include <linux/types.h>
6#include <linux/time.h>
7#include <linux/compiler.h>
8
9/* Avoid too many header ordering problems. */
10struct siginfo;
11
12#ifdef __KERNEL__
13#include <linux/linkage.h> 5#include <linux/linkage.h>
14 6
15/* Most things should be clean enough to redefine this at will, if care 7/* Most things should be clean enough to redefine this at will, if care
@@ -35,102 +27,11 @@ typedef struct {
35typedef sigset_t compat_sigset_t; 27typedef sigset_t compat_sigset_t;
36#endif 28#endif
37 29
38#else
39/* Here we must cater to libcs that poke about in kernel headers. */
40
41#define NSIG 32
42typedef unsigned long sigset_t;
43
44#endif /* __KERNEL__ */
45#endif /* __ASSEMBLY__ */ 30#endif /* __ASSEMBLY__ */
46 31#include <uapi/asm/signal.h>
47#define SIGHUP 1
48#define SIGINT 2
49#define SIGQUIT 3
50#define SIGILL 4
51#define SIGTRAP 5
52#define SIGABRT 6
53#define SIGIOT 6
54#define SIGBUS 7
55#define SIGFPE 8
56#define SIGKILL 9
57#define SIGUSR1 10
58#define SIGSEGV 11
59#define SIGUSR2 12
60#define SIGPIPE 13
61#define SIGALRM 14
62#define SIGTERM 15
63#define SIGSTKFLT 16
64#define SIGCHLD 17
65#define SIGCONT 18
66#define SIGSTOP 19
67#define SIGTSTP 20
68#define SIGTTIN 21
69#define SIGTTOU 22
70#define SIGURG 23
71#define SIGXCPU 24
72#define SIGXFSZ 25
73#define SIGVTALRM 26
74#define SIGPROF 27
75#define SIGWINCH 28
76#define SIGIO 29
77#define SIGPOLL SIGIO
78/*
79#define SIGLOST 29
80*/
81#define SIGPWR 30
82#define SIGSYS 31
83#define SIGUNUSED 31
84
85/* These should not be considered constants from userland. */
86#define SIGRTMIN 32
87#define SIGRTMAX _NSIG
88
89/*
90 * SA_FLAGS values:
91 *
92 * SA_ONSTACK indicates that a registered stack_t will be used.
93 * SA_RESTART flag to get restarting signals (which were the default long ago)
94 * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
95 * SA_RESETHAND clears the handler when the signal is delivered.
96 * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
97 * SA_NODEFER prevents the current signal from being masked in the handler.
98 *
99 * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
100 * Unix names RESETHAND and NODEFER respectively.
101 */
102#define SA_NOCLDSTOP 0x00000001u
103#define SA_NOCLDWAIT 0x00000002u
104#define SA_SIGINFO 0x00000004u
105#define SA_ONSTACK 0x08000000u
106#define SA_RESTART 0x10000000u
107#define SA_NODEFER 0x40000000u
108#define SA_RESETHAND 0x80000000u
109
110#define SA_NOMASK SA_NODEFER
111#define SA_ONESHOT SA_RESETHAND
112
113#define SA_RESTORER 0x04000000
114
115/*
116 * sigaltstack controls
117 */
118#define SS_ONSTACK 1
119#define SS_DISABLE 2
120
121#define MINSIGSTKSZ 2048
122#define SIGSTKSZ 8192
123
124#include <asm-generic/signal-defs.h>
125
126#ifndef __ASSEMBLY__ 32#ifndef __ASSEMBLY__
127
128# ifdef __KERNEL__
129extern void do_notify_resume(struct pt_regs *, void *, __u32); 33extern void do_notify_resume(struct pt_regs *, void *, __u32);
130# endif /* __KERNEL__ */
131
132#ifdef __i386__ 34#ifdef __i386__
133# ifdef __KERNEL__
134struct old_sigaction { 35struct old_sigaction {
135 __sighandler_t sa_handler; 36 __sighandler_t sa_handler;
136 old_sigset_t sa_mask; 37 old_sigset_t sa_mask;
@@ -149,45 +50,8 @@ struct k_sigaction {
149 struct sigaction sa; 50 struct sigaction sa;
150}; 51};
151 52
152# else /* __KERNEL__ */
153/* Here we must cater to libcs that poke about in kernel headers. */
154
155struct sigaction {
156 union {
157 __sighandler_t _sa_handler;
158 void (*_sa_sigaction)(int, struct siginfo *, void *);
159 } _u;
160 sigset_t sa_mask;
161 unsigned long sa_flags;
162 void (*sa_restorer)(void);
163};
164
165#define sa_handler _u._sa_handler
166#define sa_sigaction _u._sa_sigaction
167
168# endif /* ! __KERNEL__ */
169#else /* __i386__ */ 53#else /* __i386__ */
170
171struct sigaction {
172 __sighandler_t sa_handler;
173 unsigned long sa_flags;
174 __sigrestore_t sa_restorer;
175 sigset_t sa_mask; /* mask last for extensibility */
176};
177
178struct k_sigaction {
179 struct sigaction sa;
180};
181
182#endif /* !__i386__ */ 54#endif /* !__i386__ */
183
184typedef struct sigaltstack {
185 void __user *ss_sp;
186 int ss_flags;
187 size_t ss_size;
188} stack_t;
189
190#ifdef __KERNEL__
191#include <asm/sigcontext.h> 55#include <asm/sigcontext.h>
192 56
193#ifdef __i386__ 57#ifdef __i386__
@@ -260,9 +124,5 @@ struct pt_regs;
260 124
261#endif /* !__i386__ */ 125#endif /* !__i386__ */
262 126
263#define ptrace_signal_deliver(regs, cookie) do { } while (0)
264
265#endif /* __KERNEL__ */
266#endif /* __ASSEMBLY__ */ 127#endif /* __ASSEMBLY__ */
267
268#endif /* _ASM_X86_SIGNAL_H */ 128#endif /* _ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4f19a1526037..b073aaea747c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -166,6 +166,7 @@ void native_send_call_func_ipi(const struct cpumask *mask);
166void native_send_call_func_single_ipi(int cpu); 166void native_send_call_func_single_ipi(int cpu);
167void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); 167void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
168 168
169void smp_store_boot_cpu_info(void);
169void smp_store_cpu_info(int id); 170void smp_store_cpu_info(int id);
170#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) 171#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
171 172
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index cdf5674dd23a..6136d99f537b 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -1,134 +1,8 @@
1#ifndef __SVM_H 1#ifndef __SVM_H
2#define __SVM_H 2#define __SVM_H
3 3
4#define SVM_EXIT_READ_CR0 0x000 4#include <uapi/asm/svm.h>
5#define SVM_EXIT_READ_CR3 0x003 5
6#define SVM_EXIT_READ_CR4 0x004
7#define SVM_EXIT_READ_CR8 0x008
8#define SVM_EXIT_WRITE_CR0 0x010
9#define SVM_EXIT_WRITE_CR3 0x013
10#define SVM_EXIT_WRITE_CR4 0x014
11#define SVM_EXIT_WRITE_CR8 0x018
12#define SVM_EXIT_READ_DR0 0x020
13#define SVM_EXIT_READ_DR1 0x021
14#define SVM_EXIT_READ_DR2 0x022
15#define SVM_EXIT_READ_DR3 0x023
16#define SVM_EXIT_READ_DR4 0x024
17#define SVM_EXIT_READ_DR5 0x025
18#define SVM_EXIT_READ_DR6 0x026
19#define SVM_EXIT_READ_DR7 0x027
20#define SVM_EXIT_WRITE_DR0 0x030
21#define SVM_EXIT_WRITE_DR1 0x031
22#define SVM_EXIT_WRITE_DR2 0x032
23#define SVM_EXIT_WRITE_DR3 0x033
24#define SVM_EXIT_WRITE_DR4 0x034
25#define SVM_EXIT_WRITE_DR5 0x035
26#define SVM_EXIT_WRITE_DR6 0x036
27#define SVM_EXIT_WRITE_DR7 0x037
28#define SVM_EXIT_EXCP_BASE 0x040
29#define SVM_EXIT_INTR 0x060
30#define SVM_EXIT_NMI 0x061
31#define SVM_EXIT_SMI 0x062
32#define SVM_EXIT_INIT 0x063
33#define SVM_EXIT_VINTR 0x064
34#define SVM_EXIT_CR0_SEL_WRITE 0x065
35#define SVM_EXIT_IDTR_READ 0x066
36#define SVM_EXIT_GDTR_READ 0x067
37#define SVM_EXIT_LDTR_READ 0x068
38#define SVM_EXIT_TR_READ 0x069
39#define SVM_EXIT_IDTR_WRITE 0x06a
40#define SVM_EXIT_GDTR_WRITE 0x06b
41#define SVM_EXIT_LDTR_WRITE 0x06c
42#define SVM_EXIT_TR_WRITE 0x06d
43#define SVM_EXIT_RDTSC 0x06e
44#define SVM_EXIT_RDPMC 0x06f
45#define SVM_EXIT_PUSHF 0x070
46#define SVM_EXIT_POPF 0x071
47#define SVM_EXIT_CPUID 0x072
48#define SVM_EXIT_RSM 0x073
49#define SVM_EXIT_IRET 0x074
50#define SVM_EXIT_SWINT 0x075
51#define SVM_EXIT_INVD 0x076
52#define SVM_EXIT_PAUSE 0x077
53#define SVM_EXIT_HLT 0x078
54#define SVM_EXIT_INVLPG 0x079
55#define SVM_EXIT_INVLPGA 0x07a
56#define SVM_EXIT_IOIO 0x07b
57#define SVM_EXIT_MSR 0x07c
58#define SVM_EXIT_TASK_SWITCH 0x07d
59#define SVM_EXIT_FERR_FREEZE 0x07e
60#define SVM_EXIT_SHUTDOWN 0x07f
61#define SVM_EXIT_VMRUN 0x080
62#define SVM_EXIT_VMMCALL 0x081
63#define SVM_EXIT_VMLOAD 0x082
64#define SVM_EXIT_VMSAVE 0x083
65#define SVM_EXIT_STGI 0x084
66#define SVM_EXIT_CLGI 0x085
67#define SVM_EXIT_SKINIT 0x086
68#define SVM_EXIT_RDTSCP 0x087
69#define SVM_EXIT_ICEBP 0x088
70#define SVM_EXIT_WBINVD 0x089
71#define SVM_EXIT_MONITOR 0x08a
72#define SVM_EXIT_MWAIT 0x08b
73#define SVM_EXIT_MWAIT_COND 0x08c
74#define SVM_EXIT_XSETBV 0x08d
75#define SVM_EXIT_NPF 0x400
76
77#define SVM_EXIT_ERR -1
78
79#define SVM_EXIT_REASONS \
80 { SVM_EXIT_READ_CR0, "read_cr0" }, \
81 { SVM_EXIT_READ_CR3, "read_cr3" }, \
82 { SVM_EXIT_READ_CR4, "read_cr4" }, \
83 { SVM_EXIT_READ_CR8, "read_cr8" }, \
84 { SVM_EXIT_WRITE_CR0, "write_cr0" }, \
85 { SVM_EXIT_WRITE_CR3, "write_cr3" }, \
86 { SVM_EXIT_WRITE_CR4, "write_cr4" }, \
87 { SVM_EXIT_WRITE_CR8, "write_cr8" }, \
88 { SVM_EXIT_READ_DR0, "read_dr0" }, \
89 { SVM_EXIT_READ_DR1, "read_dr1" }, \
90 { SVM_EXIT_READ_DR2, "read_dr2" }, \
91 { SVM_EXIT_READ_DR3, "read_dr3" }, \
92 { SVM_EXIT_WRITE_DR0, "write_dr0" }, \
93 { SVM_EXIT_WRITE_DR1, "write_dr1" }, \
94 { SVM_EXIT_WRITE_DR2, "write_dr2" }, \
95 { SVM_EXIT_WRITE_DR3, "write_dr3" }, \
96 { SVM_EXIT_WRITE_DR5, "write_dr5" }, \
97 { SVM_EXIT_WRITE_DR7, "write_dr7" }, \
98 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
99 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
100 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
101 { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
102 { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
103 { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
104 { SVM_EXIT_INTR, "interrupt" }, \
105 { SVM_EXIT_NMI, "nmi" }, \
106 { SVM_EXIT_SMI, "smi" }, \
107 { SVM_EXIT_INIT, "init" }, \
108 { SVM_EXIT_VINTR, "vintr" }, \
109 { SVM_EXIT_CPUID, "cpuid" }, \
110 { SVM_EXIT_INVD, "invd" }, \
111 { SVM_EXIT_HLT, "hlt" }, \
112 { SVM_EXIT_INVLPG, "invlpg" }, \
113 { SVM_EXIT_INVLPGA, "invlpga" }, \
114 { SVM_EXIT_IOIO, "io" }, \
115 { SVM_EXIT_MSR, "msr" }, \
116 { SVM_EXIT_TASK_SWITCH, "task_switch" }, \
117 { SVM_EXIT_SHUTDOWN, "shutdown" }, \
118 { SVM_EXIT_VMRUN, "vmrun" }, \
119 { SVM_EXIT_VMMCALL, "hypercall" }, \
120 { SVM_EXIT_VMLOAD, "vmload" }, \
121 { SVM_EXIT_VMSAVE, "vmsave" }, \
122 { SVM_EXIT_STGI, "stgi" }, \
123 { SVM_EXIT_CLGI, "clgi" }, \
124 { SVM_EXIT_SKINIT, "skinit" }, \
125 { SVM_EXIT_WBINVD, "wbinvd" }, \
126 { SVM_EXIT_MONITOR, "monitor" }, \
127 { SVM_EXIT_MWAIT, "mwait" }, \
128 { SVM_EXIT_XSETBV, "xsetbv" }, \
129 { SVM_EXIT_NPF, "npf" }
130
131#ifdef __KERNEL__
132 6
133enum { 7enum {
134 INTERCEPT_INTR, 8 INTERCEPT_INTR,
@@ -403,5 +277,3 @@ struct __attribute__ ((__packed__)) vmcb {
403#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf" 277#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
404 278
405#endif 279#endif
406
407#endif
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index a9a8cf3da49d..c76fae4d90be 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -54,8 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
54asmlinkage long sys32_personality(unsigned long); 54asmlinkage long sys32_personality(unsigned long);
55asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); 55asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
56 56
57asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
58
59long sys32_lseek(unsigned int, int, unsigned int); 57long sys32_lseek(unsigned int, int, unsigned int);
60long sys32_kill(int, int); 58long sys32_kill(int, int);
61long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); 59long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 2be0b880417e..2f8374718aa3 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -20,15 +20,6 @@
20asmlinkage long sys_ioperm(unsigned long, unsigned long, int); 20asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
21long sys_iopl(unsigned int, struct pt_regs *); 21long sys_iopl(unsigned int, struct pt_regs *);
22 22
23/* kernel/process.c */
24int sys_fork(struct pt_regs *);
25int sys_vfork(struct pt_regs *);
26long sys_execve(const char __user *,
27 const char __user *const __user *,
28 const char __user *const __user *);
29long sys_clone(unsigned long, unsigned long, void __user *,
30 void __user *, struct pt_regs *);
31
32/* kernel/ldt.c */ 23/* kernel/ldt.c */
33asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); 24asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
34 25
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 74a44333545a..0fee48e279cc 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -56,10 +56,7 @@ static inline void __flush_tlb_all(void)
56 56
57static inline void __flush_tlb_one(unsigned long addr) 57static inline void __flush_tlb_one(unsigned long addr)
58{ 58{
59 if (cpu_has_invlpg)
60 __flush_tlb_single(addr); 59 __flush_tlb_single(addr);
61 else
62 __flush_tlb();
63} 60}
64 61
65#define TLB_FLUSH_ALL -1UL 62#define TLB_FLUSH_ALL -1UL
diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h
new file mode 100644
index 000000000000..beab86cc282d
--- /dev/null
+++ b/arch/x86/include/asm/trace_clock.h
@@ -0,0 +1,20 @@
1#ifndef _ASM_X86_TRACE_CLOCK_H
2#define _ASM_X86_TRACE_CLOCK_H
3
4#include <linux/compiler.h>
5#include <linux/types.h>
6
7#ifdef CONFIG_X86_TSC
8
9extern u64 notrace trace_clock_x86_tsc(void);
10
11# define ARCH_TRACE_CLOCKS \
12 { trace_clock_x86_tsc, "x86-tsc", .in_ns = 0 },
13
14#else /* !CONFIG_X86_TSC */
15
16#define ARCH_TRACE_CLOCKS
17
18#endif
19
20#endif /* _ASM_X86_TRACE_CLOCK_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 7ccf8d131535..1709801d18ec 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -237,8 +237,6 @@ extern void __put_user_2(void);
237extern void __put_user_4(void); 237extern void __put_user_4(void);
238extern void __put_user_8(void); 238extern void __put_user_8(void);
239 239
240#ifdef CONFIG_X86_WP_WORKS_OK
241
242/** 240/**
243 * put_user: - Write a simple value into user space. 241 * put_user: - Write a simple value into user space.
244 * @x: Value to copy to user space. 242 * @x: Value to copy to user space.
@@ -326,29 +324,6 @@ do { \
326 } \ 324 } \
327} while (0) 325} while (0)
328 326
329#else
330
331#define __put_user_size(x, ptr, size, retval, errret) \
332do { \
333 __typeof__(*(ptr))__pus_tmp = x; \
334 retval = 0; \
335 \
336 if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \
337 retval = errret; \
338} while (0)
339
340#define put_user(x, ptr) \
341({ \
342 int __ret_pu; \
343 __typeof__(*(ptr))__pus_tmp = x; \
344 __ret_pu = 0; \
345 if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \
346 sizeof(*(ptr))) != 0)) \
347 __ret_pu = -EFAULT; \
348 __ret_pu; \
349})
350#endif
351
352#ifdef CONFIG_X86_32 327#ifdef CONFIG_X86_32
353#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() 328#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad()
354#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() 329#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad()
@@ -543,29 +518,12 @@ struct __large_struct { unsigned long buf[100]; };
543 (x) = (__force __typeof__(*(ptr)))__gue_val; \ 518 (x) = (__force __typeof__(*(ptr)))__gue_val; \
544} while (0) 519} while (0)
545 520
546#ifdef CONFIG_X86_WP_WORKS_OK
547
548#define put_user_try uaccess_try 521#define put_user_try uaccess_try
549#define put_user_catch(err) uaccess_catch(err) 522#define put_user_catch(err) uaccess_catch(err)
550 523
551#define put_user_ex(x, ptr) \ 524#define put_user_ex(x, ptr) \
552 __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) 525 __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
553 526
554#else /* !CONFIG_X86_WP_WORKS_OK */
555
556#define put_user_try do { \
557 int __uaccess_err = 0;
558
559#define put_user_catch(err) \
560 (err) |= __uaccess_err; \
561} while (0)
562
563#define put_user_ex(x, ptr) do { \
564 __uaccess_err |= __put_user(x, ptr); \
565} while (0)
566
567#endif /* CONFIG_X86_WP_WORKS_OK */
568
569extern unsigned long 527extern unsigned long
570copy_from_user_nmi(void *to, const void __user *from, unsigned long n); 528copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
571extern __must_check long 529extern __must_check long
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 16f3fc6ebf2e..1003e69a40d9 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -1,10 +1,8 @@
1#ifndef _ASM_X86_UNISTD_H 1#ifndef _ASM_X86_UNISTD_H
2#define _ASM_X86_UNISTD_H 1 2#define _ASM_X86_UNISTD_H 1
3 3
4/* x32 syscall flag bit */ 4#include <uapi/asm/unistd.h>
5#define __X32_SYSCALL_BIT 0x40000000
6 5
7#ifdef __KERNEL__
8 6
9# ifdef CONFIG_X86_X32_ABI 7# ifdef CONFIG_X86_X32_ABI
10# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT)) 8# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
@@ -51,6 +49,9 @@
51# define __ARCH_WANT_SYS_UTIME 49# define __ARCH_WANT_SYS_UTIME
52# define __ARCH_WANT_SYS_WAITPID 50# define __ARCH_WANT_SYS_WAITPID
53# define __ARCH_WANT_SYS_EXECVE 51# define __ARCH_WANT_SYS_EXECVE
52# define __ARCH_WANT_SYS_FORK
53# define __ARCH_WANT_SYS_VFORK
54# define __ARCH_WANT_SYS_CLONE
54 55
55/* 56/*
56 * "Conditional" syscalls 57 * "Conditional" syscalls
@@ -60,14 +61,4 @@
60 */ 61 */
61# define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") 62# define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
62 63
63#else
64# ifdef __i386__
65# include <asm/unistd_32.h>
66# elif defined(__ILP32__)
67# include <asm/unistd_x32.h>
68# else
69# include <asm/unistd_64.h>
70# endif
71#endif
72
73#endif /* _ASM_X86_UNISTD_H */ 64#endif /* _ASM_X86_UNISTD_H */
diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h
index f9303602fbc0..1d8de3f3feca 100644
--- a/arch/x86/include/asm/vm86.h
+++ b/arch/x86/include/asm/vm86.h
@@ -1,133 +1,9 @@
1#ifndef _ASM_X86_VM86_H 1#ifndef _ASM_X86_VM86_H
2#define _ASM_X86_VM86_H 2#define _ASM_X86_VM86_H
3 3
4/*
5 * I'm guessing at the VIF/VIP flag usage, but hope that this is how
6 * the Pentium uses them. Linux will return from vm86 mode when both
7 * VIF and VIP is set.
8 *
9 * On a Pentium, we could probably optimize the virtual flags directly
10 * in the eflags register instead of doing it "by hand" in vflags...
11 *
12 * Linus
13 */
14
15#include <asm/processor-flags.h>
16
17#define BIOSSEG 0x0f000
18
19#define CPU_086 0
20#define CPU_186 1
21#define CPU_286 2
22#define CPU_386 3
23#define CPU_486 4
24#define CPU_586 5
25
26/*
27 * Return values for the 'vm86()' system call
28 */
29#define VM86_TYPE(retval) ((retval) & 0xff)
30#define VM86_ARG(retval) ((retval) >> 8)
31
32#define VM86_SIGNAL 0 /* return due to signal */
33#define VM86_UNKNOWN 1 /* unhandled GP fault
34 - IO-instruction or similar */
35#define VM86_INTx 2 /* int3/int x instruction (ARG = x) */
36#define VM86_STI 3 /* sti/popf/iret instruction enabled
37 virtual interrupts */
38
39/*
40 * Additional return values when invoking new vm86()
41 */
42#define VM86_PICRETURN 4 /* return due to pending PIC request */
43#define VM86_TRAP 6 /* return due to DOS-debugger request */
44
45/*
46 * function codes when invoking new vm86()
47 */
48#define VM86_PLUS_INSTALL_CHECK 0
49#define VM86_ENTER 1
50#define VM86_ENTER_NO_BYPASS 2
51#define VM86_REQUEST_IRQ 3
52#define VM86_FREE_IRQ 4
53#define VM86_GET_IRQ_BITS 5
54#define VM86_GET_AND_RESET_IRQ 6
55
56/*
57 * This is the stack-layout seen by the user space program when we have
58 * done a translation of "SAVE_ALL" from vm86 mode. The real kernel layout
59 * is 'kernel_vm86_regs' (see below).
60 */
61
62struct vm86_regs {
63/*
64 * normal regs, with special meaning for the segment descriptors..
65 */
66 long ebx;
67 long ecx;
68 long edx;
69 long esi;
70 long edi;
71 long ebp;
72 long eax;
73 long __null_ds;
74 long __null_es;
75 long __null_fs;
76 long __null_gs;
77 long orig_eax;
78 long eip;
79 unsigned short cs, __csh;
80 long eflags;
81 long esp;
82 unsigned short ss, __ssh;
83/*
84 * these are specific to v86 mode:
85 */
86 unsigned short es, __esh;
87 unsigned short ds, __dsh;
88 unsigned short fs, __fsh;
89 unsigned short gs, __gsh;
90};
91
92struct revectored_struct {
93 unsigned long __map[8]; /* 256 bits */
94};
95
96struct vm86_struct {
97 struct vm86_regs regs;
98 unsigned long flags;
99 unsigned long screen_bitmap;
100 unsigned long cpu_type;
101 struct revectored_struct int_revectored;
102 struct revectored_struct int21_revectored;
103};
104
105/*
106 * flags masks
107 */
108#define VM86_SCREEN_BITMAP 0x0001
109
110struct vm86plus_info_struct {
111 unsigned long force_return_for_pic:1;
112 unsigned long vm86dbg_active:1; /* for debugger */
113 unsigned long vm86dbg_TFpendig:1; /* for debugger */
114 unsigned long unused:28;
115 unsigned long is_vm86pus:1; /* for vm86 internal use */
116 unsigned char vm86dbg_intxxtab[32]; /* for debugger */
117};
118struct vm86plus_struct {
119 struct vm86_regs regs;
120 unsigned long flags;
121 unsigned long screen_bitmap;
122 unsigned long cpu_type;
123 struct revectored_struct int_revectored;
124 struct revectored_struct int21_revectored;
125 struct vm86plus_info_struct vm86plus;
126};
127
128#ifdef __KERNEL__
129 4
130#include <asm/ptrace.h> 5#include <asm/ptrace.h>
6#include <uapi/asm/vm86.h>
131 7
132/* 8/*
133 * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86 9 * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86
@@ -203,6 +79,4 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c)
203 79
204#endif /* CONFIG_VM86 */ 80#endif /* CONFIG_VM86 */
205 81
206#endif /* __KERNEL__ */
207
208#endif /* _ASM_X86_VM86_H */ 82#endif /* _ASM_X86_VM86_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 36ec21c36d68..235b49fa554b 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -1,6 +1,3 @@
1#ifndef VMX_H
2#define VMX_H
3
4/* 1/*
5 * vmx.h: VMX Architecture related definitions 2 * vmx.h: VMX Architecture related definitions
6 * Copyright (c) 2004, Intel Corporation. 3 * Copyright (c) 2004, Intel Corporation.
@@ -24,90 +21,12 @@
24 * Yaniv Kamay <yaniv@qumranet.com> 21 * Yaniv Kamay <yaniv@qumranet.com>
25 * 22 *
26 */ 23 */
24#ifndef VMX_H
25#define VMX_H
27 26
28#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
29
30#define EXIT_REASON_EXCEPTION_NMI 0
31#define EXIT_REASON_EXTERNAL_INTERRUPT 1
32#define EXIT_REASON_TRIPLE_FAULT 2
33
34#define EXIT_REASON_PENDING_INTERRUPT 7
35#define EXIT_REASON_NMI_WINDOW 8
36#define EXIT_REASON_TASK_SWITCH 9
37#define EXIT_REASON_CPUID 10
38#define EXIT_REASON_HLT 12
39#define EXIT_REASON_INVD 13
40#define EXIT_REASON_INVLPG 14
41#define EXIT_REASON_RDPMC 15
42#define EXIT_REASON_RDTSC 16
43#define EXIT_REASON_VMCALL 18
44#define EXIT_REASON_VMCLEAR 19
45#define EXIT_REASON_VMLAUNCH 20
46#define EXIT_REASON_VMPTRLD 21
47#define EXIT_REASON_VMPTRST 22
48#define EXIT_REASON_VMREAD 23
49#define EXIT_REASON_VMRESUME 24
50#define EXIT_REASON_VMWRITE 25
51#define EXIT_REASON_VMOFF 26
52#define EXIT_REASON_VMON 27
53#define EXIT_REASON_CR_ACCESS 28
54#define EXIT_REASON_DR_ACCESS 29
55#define EXIT_REASON_IO_INSTRUCTION 30
56#define EXIT_REASON_MSR_READ 31
57#define EXIT_REASON_MSR_WRITE 32
58#define EXIT_REASON_INVALID_STATE 33
59#define EXIT_REASON_MWAIT_INSTRUCTION 36
60#define EXIT_REASON_MONITOR_INSTRUCTION 39
61#define EXIT_REASON_PAUSE_INSTRUCTION 40
62#define EXIT_REASON_MCE_DURING_VMENTRY 41
63#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
64#define EXIT_REASON_APIC_ACCESS 44
65#define EXIT_REASON_EPT_VIOLATION 48
66#define EXIT_REASON_EPT_MISCONFIG 49
67#define EXIT_REASON_WBINVD 54
68#define EXIT_REASON_XSETBV 55
69#define EXIT_REASON_INVPCID 58
70
71#define VMX_EXIT_REASONS \
72 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
73 { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
74 { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
75 { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
76 { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
77 { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
78 { EXIT_REASON_CPUID, "CPUID" }, \
79 { EXIT_REASON_HLT, "HLT" }, \
80 { EXIT_REASON_INVLPG, "INVLPG" }, \
81 { EXIT_REASON_RDPMC, "RDPMC" }, \
82 { EXIT_REASON_RDTSC, "RDTSC" }, \
83 { EXIT_REASON_VMCALL, "VMCALL" }, \
84 { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \
85 { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \
86 { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \
87 { EXIT_REASON_VMPTRST, "VMPTRST" }, \
88 { EXIT_REASON_VMREAD, "VMREAD" }, \
89 { EXIT_REASON_VMRESUME, "VMRESUME" }, \
90 { EXIT_REASON_VMWRITE, "VMWRITE" }, \
91 { EXIT_REASON_VMOFF, "VMOFF" }, \
92 { EXIT_REASON_VMON, "VMON" }, \
93 { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \
94 { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \
95 { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
96 { EXIT_REASON_MSR_READ, "MSR_READ" }, \
97 { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
98 { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
99 { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
100 { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
101 { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
102 { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
103 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
104 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
105 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
106 { EXIT_REASON_WBINVD, "WBINVD" }
107
108#ifdef __KERNEL__
109 27
110#include <linux/types.h> 28#include <linux/types.h>
29#include <uapi/asm/vmx.h>
111 30
112/* 31/*
113 * Definitions of Primary Processor-Based VM-Execution Controls. 32 * Definitions of Primary Processor-Based VM-Execution Controls.
@@ -445,8 +364,7 @@ enum vmcs_field {
445#define VMX_EPTP_WB_BIT (1ull << 14) 364#define VMX_EPTP_WB_BIT (1ull << 14)
446#define VMX_EPT_2MB_PAGE_BIT (1ull << 16) 365#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
447#define VMX_EPT_1GB_PAGE_BIT (1ull << 17) 366#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
448#define VMX_EPT_AD_BIT (1ull << 21) 367#define VMX_EPT_AD_BIT (1ull << 21)
449#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
450#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) 368#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
451#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) 369#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
452 370
@@ -527,5 +445,3 @@ enum vm_instruction_error_number {
527}; 445};
528 446
529#endif 447#endif
530
531#endif
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index eaea1d31f753..2a46ca720afc 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -1,20 +1,8 @@
1#ifndef _ASM_X86_VSYSCALL_H 1#ifndef _ASM_X86_VSYSCALL_H
2#define _ASM_X86_VSYSCALL_H 2#define _ASM_X86_VSYSCALL_H
3 3
4enum vsyscall_num {
5 __NR_vgettimeofday,
6 __NR_vtime,
7 __NR_vgetcpu,
8};
9
10#define VSYSCALL_START (-10UL << 20)
11#define VSYSCALL_SIZE 1024
12#define VSYSCALL_END (-2UL << 20)
13#define VSYSCALL_MAPPED_PAGES 1
14#define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
15
16#ifdef __KERNEL__
17#include <linux/seqlock.h> 4#include <linux/seqlock.h>
5#include <uapi/asm/vsyscall.h>
18 6
19#define VGETCPU_RDTSCP 1 7#define VGETCPU_RDTSCP 1
20#define VGETCPU_LSL 2 8#define VGETCPU_LSL 2
@@ -33,6 +21,24 @@ extern void map_vsyscall(void);
33 */ 21 */
34extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); 22extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
35 23
36#endif /* __KERNEL__ */ 24#ifdef CONFIG_X86_64
25
26#define VGETCPU_CPU_MASK 0xfff
27
28static inline unsigned int __getcpu(void)
29{
30 unsigned int p;
31
32 if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
33 /* Load per CPU data from RDTSCP */
34 native_read_tscp(&p);
35 } else {
36 /* Load per CPU data from GDT */
37 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
38 }
39
40 return p;
41}
42#endif /* CONFIG_X86_64 */
37 43
38#endif /* _ASM_X86_VSYSCALL_H */ 44#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 54d52ff1304a..fd9cb7695b5f 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -63,6 +63,7 @@ DEFINE_GUEST_HANDLE(void);
63DEFINE_GUEST_HANDLE(uint64_t); 63DEFINE_GUEST_HANDLE(uint64_t);
64DEFINE_GUEST_HANDLE(uint32_t); 64DEFINE_GUEST_HANDLE(uint32_t);
65DEFINE_GUEST_HANDLE(xen_pfn_t); 65DEFINE_GUEST_HANDLE(xen_pfn_t);
66DEFINE_GUEST_HANDLE(xen_ulong_t);
66#endif 67#endif
67 68
68#ifndef HYPERVISOR_VIRT_START 69#ifndef HYPERVISOR_VIRT_START
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index 83b6e9a0dce4..09409c44f9a5 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -4,3 +4,61 @@ include include/uapi/asm-generic/Kbuild.asm
4genhdr-y += unistd_32.h 4genhdr-y += unistd_32.h
5genhdr-y += unistd_64.h 5genhdr-y += unistd_64.h
6genhdr-y += unistd_x32.h 6genhdr-y += unistd_x32.h
7header-y += a.out.h
8header-y += auxvec.h
9header-y += bitsperlong.h
10header-y += boot.h
11header-y += bootparam.h
12header-y += byteorder.h
13header-y += debugreg.h
14header-y += e820.h
15header-y += errno.h
16header-y += fcntl.h
17header-y += hw_breakpoint.h
18header-y += hyperv.h
19header-y += ioctl.h
20header-y += ioctls.h
21header-y += ipcbuf.h
22header-y += ist.h
23header-y += kvm.h
24header-y += kvm_para.h
25header-y += ldt.h
26header-y += mce.h
27header-y += mman.h
28header-y += msgbuf.h
29header-y += msr-index.h
30header-y += msr.h
31header-y += mtrr.h
32header-y += param.h
33header-y += perf_regs.h
34header-y += poll.h
35header-y += posix_types.h
36header-y += posix_types_32.h
37header-y += posix_types_64.h
38header-y += posix_types_x32.h
39header-y += prctl.h
40header-y += processor-flags.h
41header-y += ptrace-abi.h
42header-y += ptrace.h
43header-y += resource.h
44header-y += sembuf.h
45header-y += setup.h
46header-y += shmbuf.h
47header-y += sigcontext.h
48header-y += sigcontext32.h
49header-y += siginfo.h
50header-y += signal.h
51header-y += socket.h
52header-y += sockios.h
53header-y += stat.h
54header-y += statfs.h
55header-y += svm.h
56header-y += swab.h
57header-y += termbits.h
58header-y += termios.h
59header-y += types.h
60header-y += ucontext.h
61header-y += unistd.h
62header-y += vm86.h
63header-y += vmx.h
64header-y += vsyscall.h
diff --git a/arch/x86/include/asm/a.out.h b/arch/x86/include/uapi/asm/a.out.h
index 4684f97a5bbd..4684f97a5bbd 100644
--- a/arch/x86/include/asm/a.out.h
+++ b/arch/x86/include/uapi/asm/a.out.h
diff --git a/arch/x86/include/asm/auxvec.h b/arch/x86/include/uapi/asm/auxvec.h
index 77203ac352de..77203ac352de 100644
--- a/arch/x86/include/asm/auxvec.h
+++ b/arch/x86/include/uapi/asm/auxvec.h
diff --git a/arch/x86/include/asm/bitsperlong.h b/arch/x86/include/uapi/asm/bitsperlong.h
index b0ae1c4dc791..b0ae1c4dc791 100644
--- a/arch/x86/include/asm/bitsperlong.h
+++ b/arch/x86/include/uapi/asm/bitsperlong.h
diff --git a/arch/x86/include/uapi/asm/boot.h b/arch/x86/include/uapi/asm/boot.h
new file mode 100644
index 000000000000..94292c4c8122
--- /dev/null
+++ b/arch/x86/include/uapi/asm/boot.h
@@ -0,0 +1,10 @@
1#ifndef _UAPI_ASM_X86_BOOT_H
2#define _UAPI_ASM_X86_BOOT_H
3
4/* Internal svga startup constants */
5#define NORMAL_VGA 0xffff /* 80x25 mode */
6#define EXTENDED_VGA 0xfffe /* 80x50 mode */
7#define ASK_VGA 0xfffd /* ask for it at bootup */
8
9
10#endif /* _UAPI_ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 2ad874cb661c..92862cd90201 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -13,6 +13,7 @@
13#define SETUP_NONE 0 13#define SETUP_NONE 0
14#define SETUP_E820_EXT 1 14#define SETUP_E820_EXT 1
15#define SETUP_DTB 2 15#define SETUP_DTB 2
16#define SETUP_PCI 3
16 17
17/* extensible setup data list node */ 18/* extensible setup data list node */
18struct setup_data { 19struct setup_data {
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/uapi/asm/byteorder.h
index b13a7a88f3eb..b13a7a88f3eb 100644
--- a/arch/x86/include/asm/byteorder.h
+++ b/arch/x86/include/uapi/asm/byteorder.h
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
new file mode 100644
index 000000000000..3c0874dd9861
--- /dev/null
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -0,0 +1,80 @@
1#ifndef _UAPI_ASM_X86_DEBUGREG_H
2#define _UAPI_ASM_X86_DEBUGREG_H
3
4
5/* Indicate the register numbers for a number of the specific
6 debug registers. Registers 0-3 contain the addresses we wish to trap on */
7#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */
8#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */
9
10#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */
11#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */
12
13/* Define a few things for the status register. We can use this to determine
14 which debugging register was responsible for the trap. The other bits
15 are either reserved or not of interest to us. */
16
17/* Define reserved bits in DR6 which are always set to 1 */
18#define DR6_RESERVED (0xFFFF0FF0)
19
20#define DR_TRAP0 (0x1) /* db0 */
21#define DR_TRAP1 (0x2) /* db1 */
22#define DR_TRAP2 (0x4) /* db2 */
23#define DR_TRAP3 (0x8) /* db3 */
24#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
25
26#define DR_STEP (0x4000) /* single-step */
27#define DR_SWITCH (0x8000) /* task switch */
28
29/* Now define a bunch of things for manipulating the control register.
30 The top two bytes of the control register consist of 4 fields of 4
31 bits - each field corresponds to one of the four debug registers,
32 and indicates what types of access we trap on, and how large the data
33 field is that we are looking at */
34
35#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
36#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
37
38#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
39#define DR_RW_WRITE (0x1)
40#define DR_RW_READ (0x3)
41
42#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
43#define DR_LEN_2 (0x4)
44#define DR_LEN_4 (0xC)
45#define DR_LEN_8 (0x8)
46
47/* The low byte to the control register determine which registers are
48 enabled. There are 4 fields of two bits. One bit is "local", meaning
49 that the processor will reset the bit after a task switch and the other
50 is global meaning that we have to explicitly reset the bit. With linux,
51 you can use either one, since we explicitly zero the register when we enter
52 kernel mode. */
53
54#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
55#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
56#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
57#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
58#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
59
60#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
61#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
62
63/* The second byte to the control register has a few special things.
64 We can slow the instruction pipeline for instructions coming via the
65 gdt or the ldt if we want to. I am not sure why this is an advantage */
66
67#ifdef __i386__
68#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
69#else
70#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
71#endif
72
73#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
74#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
75
76/*
77 * HW breakpoint additions
78 */
79
80#endif /* _UAPI_ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
new file mode 100644
index 000000000000..bbae02470701
--- /dev/null
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -0,0 +1,75 @@
1#ifndef _UAPI_ASM_X86_E820_H
2#define _UAPI_ASM_X86_E820_H
3#define E820MAP 0x2d0 /* our map */
4#define E820MAX 128 /* number of entries in E820MAP */
5
6/*
7 * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the
8 * constrained space in the zeropage. If we have more nodes than
9 * that, and if we've booted off EFI firmware, then the EFI tables
10 * passed us from the EFI firmware can list more nodes. Size our
11 * internal memory map tables to have room for these additional
12 * nodes, based on up to three entries per node for which the
13 * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT),
14 * plus E820MAX, allowing space for the possible duplicate E820
15 * entries that might need room in the same arrays, prior to the
16 * call to sanitize_e820_map() to remove duplicates. The allowance
17 * of three memory map entries per node is "enough" entries for
18 * the initial hardware platform motivating this mechanism to make
19 * use of additional EFI map entries. Future platforms may want
20 * to allow more than three entries per node or otherwise refine
21 * this size.
22 */
23
24/*
25 * Odd: 'make headers_check' complains about numa.h if I try
26 * to collapse the next two #ifdef lines to a single line:
27 * #if defined(__KERNEL__) && defined(CONFIG_EFI)
28 */
29#ifndef __KERNEL__
30#define E820_X_MAX E820MAX
31#endif
32
33#define E820NR 0x1e8 /* # entries in E820MAP */
34
35#define E820_RAM 1
36#define E820_RESERVED 2
37#define E820_ACPI 3
38#define E820_NVS 4
39#define E820_UNUSABLE 5
40
41
42/*
43 * reserved RAM used by kernel itself
44 * if CONFIG_INTEL_TXT is enabled, memory of this type will be
45 * included in the S3 integrity calculation and so should not include
46 * any memory that BIOS might alter over the S3 transition
47 */
48#define E820_RESERVED_KERN 128
49
50#ifndef __ASSEMBLY__
51#include <linux/types.h>
52struct e820entry {
53 __u64 addr; /* start of memory segment */
54 __u64 size; /* size of memory segment */
55 __u32 type; /* type of memory segment */
56} __attribute__((packed));
57
58struct e820map {
59 __u32 nr_map;
60 struct e820entry map[E820_X_MAX];
61};
62
63#define ISA_START_ADDRESS 0xa0000
64#define ISA_END_ADDRESS 0x100000
65
66#define BIOS_BEGIN 0x000a0000
67#define BIOS_END 0x00100000
68
69#define BIOS_ROM_BASE 0xffe00000
70#define BIOS_ROM_END 0xffffffff
71
72#endif /* __ASSEMBLY__ */
73
74
75#endif /* _UAPI_ASM_X86_E820_H */
diff --git a/arch/x86/include/asm/errno.h b/arch/x86/include/uapi/asm/errno.h
index 4c82b503d92f..4c82b503d92f 100644
--- a/arch/x86/include/asm/errno.h
+++ b/arch/x86/include/uapi/asm/errno.h
diff --git a/arch/x86/include/asm/fcntl.h b/arch/x86/include/uapi/asm/fcntl.h
index 46ab12db5739..46ab12db5739 100644
--- a/arch/x86/include/asm/fcntl.h
+++ b/arch/x86/include/uapi/asm/fcntl.h
diff --git a/arch/x86/include/uapi/asm/hw_breakpoint.h b/arch/x86/include/uapi/asm/hw_breakpoint.h
new file mode 100644
index 000000000000..79a9626b5500
--- /dev/null
+++ b/arch/x86/include/uapi/asm/hw_breakpoint.h
@@ -0,0 +1 @@
/* */
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index b80420bcd09d..b80420bcd09d 100644
--- a/arch/x86/include/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
diff --git a/arch/x86/include/asm/ioctl.h b/arch/x86/include/uapi/asm/ioctl.h
index b279fe06dfe5..b279fe06dfe5 100644
--- a/arch/x86/include/asm/ioctl.h
+++ b/arch/x86/include/uapi/asm/ioctl.h
diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/uapi/asm/ioctls.h
index ec34c760665e..ec34c760665e 100644
--- a/arch/x86/include/asm/ioctls.h
+++ b/arch/x86/include/uapi/asm/ioctls.h
diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/uapi/asm/ipcbuf.h
index 84c7e51cb6d0..84c7e51cb6d0 100644
--- a/arch/x86/include/asm/ipcbuf.h
+++ b/arch/x86/include/uapi/asm/ipcbuf.h
diff --git a/arch/x86/include/uapi/asm/ist.h b/arch/x86/include/uapi/asm/ist.h
new file mode 100644
index 000000000000..bad9f5ea4070
--- /dev/null
+++ b/arch/x86/include/uapi/asm/ist.h
@@ -0,0 +1,29 @@
1/*
2 * Include file for the interface to IST BIOS
3 * Copyright 2002 Andy Grover <andrew.grover@intel.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; either version 2, or (at your option) any
8 * later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 */
15#ifndef _UAPI_ASM_X86_IST_H
16#define _UAPI_ASM_X86_IST_H
17
18
19
20#include <linux/types.h>
21
22struct ist_info {
23 __u32 signature;
24 __u32 command;
25 __u32 event;
26 __u32 perf_level;
27};
28
29#endif /* _UAPI_ASM_X86_IST_H */
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a65ec29e6ffb..a65ec29e6ffb 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
new file mode 100644
index 000000000000..06fdbd987e97
--- /dev/null
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -0,0 +1,100 @@
1#ifndef _UAPI_ASM_X86_KVM_PARA_H
2#define _UAPI_ASM_X86_KVM_PARA_H
3
4#include <linux/types.h>
5#include <asm/hyperv.h>
6
7/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
8 * should be used to determine that a VM is running under KVM.
9 */
10#define KVM_CPUID_SIGNATURE 0x40000000
11
12/* This CPUID returns a feature bitmap in eax. Before enabling a particular
13 * paravirtualization, the appropriate feature bit should be checked.
14 */
15#define KVM_CPUID_FEATURES 0x40000001
16#define KVM_FEATURE_CLOCKSOURCE 0
17#define KVM_FEATURE_NOP_IO_DELAY 1
18#define KVM_FEATURE_MMU_OP 2
19/* This indicates that the new set of kvmclock msrs
20 * are available. The use of 0x11 and 0x12 is deprecated
21 */
22#define KVM_FEATURE_CLOCKSOURCE2 3
23#define KVM_FEATURE_ASYNC_PF 4
24#define KVM_FEATURE_STEAL_TIME 5
25#define KVM_FEATURE_PV_EOI 6
26
27/* The last 8 bits are used to indicate how to interpret the flags field
28 * in pvclock structure. If no bits are set, all flags are ignored.
29 */
30#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24
31
32#define MSR_KVM_WALL_CLOCK 0x11
33#define MSR_KVM_SYSTEM_TIME 0x12
34
35#define KVM_MSR_ENABLED 1
36/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
37#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00
38#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
39#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
40#define MSR_KVM_STEAL_TIME 0x4b564d03
41#define MSR_KVM_PV_EOI_EN 0x4b564d04
42
43struct kvm_steal_time {
44 __u64 steal;
45 __u32 version;
46 __u32 flags;
47 __u32 pad[12];
48};
49
50#define KVM_STEAL_ALIGNMENT_BITS 5
51#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
52#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
53
54#define KVM_MAX_MMU_OP_BATCH 32
55
56#define KVM_ASYNC_PF_ENABLED (1 << 0)
57#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
58
59/* Operations for KVM_HC_MMU_OP */
60#define KVM_MMU_OP_WRITE_PTE 1
61#define KVM_MMU_OP_FLUSH_TLB 2
62#define KVM_MMU_OP_RELEASE_PT 3
63
64/* Payload for KVM_HC_MMU_OP */
65struct kvm_mmu_op_header {
66 __u32 op;
67 __u32 pad;
68};
69
70struct kvm_mmu_op_write_pte {
71 struct kvm_mmu_op_header header;
72 __u64 pte_phys;
73 __u64 pte_val;
74};
75
76struct kvm_mmu_op_flush_tlb {
77 struct kvm_mmu_op_header header;
78};
79
80struct kvm_mmu_op_release_pt {
81 struct kvm_mmu_op_header header;
82 __u64 pt_phys;
83};
84
85#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
86#define KVM_PV_REASON_PAGE_READY 2
87
88struct kvm_vcpu_pv_apf_data {
89 __u32 reason;
90 __u8 pad[60];
91 __u32 enabled;
92};
93
94#define KVM_PV_EOI_BIT 0
95#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
96#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
97#define KVM_PV_EOI_DISABLED 0x0
98
99
100#endif /* _UAPI_ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/include/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index 46727eb37bfe..46727eb37bfe 100644
--- a/arch/x86/include/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
new file mode 100644
index 000000000000..58c829871c31
--- /dev/null
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -0,0 +1,121 @@
1#ifndef _UAPI_ASM_X86_MCE_H
2#define _UAPI_ASM_X86_MCE_H
3
4#include <linux/types.h>
5#include <asm/ioctls.h>
6
7/*
8 * Machine Check support for x86
9 */
10
11/* MCG_CAP register defines */
12#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
13#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
14#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
15#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
16#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
17#define MCG_EXT_CNT_SHIFT 16
18#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
19#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
20
21/* MCG_STATUS register defines */
22#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
23#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
24#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
25
26/* MCi_STATUS register defines */
27#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
28#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
29#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
30#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
31#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
32#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
33#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
34#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
35#define MCI_STATUS_AR (1ULL<<55) /* Action required */
36#define MCACOD 0xffff /* MCA Error Code */
37
38/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
39#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
40#define MCACOD_SCRUBMSK 0xfff0
41#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
42#define MCACOD_DATA 0x0134 /* Data Load */
43#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
44
45/* MCi_MISC register defines */
46#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
47#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
48#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
49#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
50#define MCI_MISC_ADDR_PHYS 2 /* physical address */
51#define MCI_MISC_ADDR_MEM 3 /* memory address */
52#define MCI_MISC_ADDR_GENERIC 7 /* generic */
53
54/* CTL2 register defines */
55#define MCI_CTL2_CMCI_EN (1ULL << 30)
56#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
57
58#define MCJ_CTX_MASK 3
59#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
60#define MCJ_CTX_RANDOM 0 /* inject context: random */
61#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
62#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
63#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
64#define MCJ_EXCEPTION 0x8 /* raise as exception */
65#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
66
67/* Fields are zero when not available */
68struct mce {
69 __u64 status;
70 __u64 misc;
71 __u64 addr;
72 __u64 mcgstatus;
73 __u64 ip;
74 __u64 tsc; /* cpu time stamp counter */
75 __u64 time; /* wall time_t when error was detected */
76 __u8 cpuvendor; /* cpu vendor as encoded in system.h */
77 __u8 inject_flags; /* software inject flags */
78 __u16 pad;
79 __u32 cpuid; /* CPUID 1 EAX */
80 __u8 cs; /* code segment */
81 __u8 bank; /* machine check bank */
82 __u8 cpu; /* cpu number; obsolete; use extcpu now */
83 __u8 finished; /* entry is valid */
84 __u32 extcpu; /* linux cpu number that detected the error */
85 __u32 socketid; /* CPU socket ID */
86 __u32 apicid; /* CPU initial apic ID */
87 __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
88};
89
90/*
91 * This structure contains all data related to the MCE log. Also
92 * carries a signature to make it easier to find from external
93 * debugging tools. Each entry is only valid when its finished flag
94 * is set.
95 */
96
97#define MCE_LOG_LEN 32
98
99struct mce_log {
100 char signature[12]; /* "MACHINECHECK" */
101 unsigned len; /* = MCE_LOG_LEN */
102 unsigned next;
103 unsigned flags;
104 unsigned recordlen; /* length of struct mce */
105 struct mce entry[MCE_LOG_LEN];
106};
107
108#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
109
110#define MCE_LOG_SIGNATURE "MACHINECHECK"
111
112#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
113#define MCE_GET_LOG_LEN _IOR('M', 2, int)
114#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
115
116/* Software defined banks */
117#define MCE_EXTENDED_BANK 128
118#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
119#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
120
121#endif /* _UAPI_ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 593e51d4643f..513b05f15bb4 100644
--- a/arch/x86/include/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -3,6 +3,9 @@
3 3
4#define MAP_32BIT 0x40 /* only give out 32bit addresses */ 4#define MAP_32BIT 0x40 /* only give out 32bit addresses */
5 5
6#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
7#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
8
6#include <asm-generic/mman.h> 9#include <asm-generic/mman.h>
7 10
8#endif /* _ASM_X86_MMAN_H */ 11#endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index 809134c644a6..809134c644a6 100644
--- a/arch/x86/include/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 7f0edceb7563..433a59fb1a74 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -35,11 +35,14 @@
35#define MSR_IA32_PERFCTR0 0x000000c1 35#define MSR_IA32_PERFCTR0 0x000000c1
36#define MSR_IA32_PERFCTR1 0x000000c2 36#define MSR_IA32_PERFCTR1 0x000000c2
37#define MSR_FSB_FREQ 0x000000cd 37#define MSR_FSB_FREQ 0x000000cd
38#define MSR_NHM_PLATFORM_INFO 0x000000ce
38 39
39#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 40#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
40#define NHM_C3_AUTO_DEMOTE (1UL << 25) 41#define NHM_C3_AUTO_DEMOTE (1UL << 25)
41#define NHM_C1_AUTO_DEMOTE (1UL << 26) 42#define NHM_C1_AUTO_DEMOTE (1UL << 26)
42#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) 43#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
44#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
45#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
43 46
44#define MSR_MTRRcap 0x000000fe 47#define MSR_MTRRcap 0x000000fe
45#define MSR_IA32_BBL_CR_CTL 0x00000119 48#define MSR_IA32_BBL_CR_CTL 0x00000119
@@ -55,6 +58,8 @@
55 58
56#define MSR_OFFCORE_RSP_0 0x000001a6 59#define MSR_OFFCORE_RSP_0 0x000001a6
57#define MSR_OFFCORE_RSP_1 0x000001a7 60#define MSR_OFFCORE_RSP_1 0x000001a7
61#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
62#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae
58 63
59#define MSR_LBR_SELECT 0x000001c8 64#define MSR_LBR_SELECT 0x000001c8
60#define MSR_LBR_TOS 0x000001c9 65#define MSR_LBR_TOS 0x000001c9
@@ -103,6 +108,38 @@
103#define MSR_IA32_MC0_ADDR 0x00000402 108#define MSR_IA32_MC0_ADDR 0x00000402
104#define MSR_IA32_MC0_MISC 0x00000403 109#define MSR_IA32_MC0_MISC 0x00000403
105 110
111/* C-state Residency Counters */
112#define MSR_PKG_C3_RESIDENCY 0x000003f8
113#define MSR_PKG_C6_RESIDENCY 0x000003f9
114#define MSR_PKG_C7_RESIDENCY 0x000003fa
115#define MSR_CORE_C3_RESIDENCY 0x000003fc
116#define MSR_CORE_C6_RESIDENCY 0x000003fd
117#define MSR_CORE_C7_RESIDENCY 0x000003fe
118#define MSR_PKG_C2_RESIDENCY 0x0000060d
119
120/* Run Time Average Power Limiting (RAPL) Interface */
121
122#define MSR_RAPL_POWER_UNIT 0x00000606
123
124#define MSR_PKG_POWER_LIMIT 0x00000610
125#define MSR_PKG_ENERGY_STATUS 0x00000611
126#define MSR_PKG_PERF_STATUS 0x00000613
127#define MSR_PKG_POWER_INFO 0x00000614
128
129#define MSR_DRAM_POWER_LIMIT 0x00000618
130#define MSR_DRAM_ENERGY_STATUS 0x00000619
131#define MSR_DRAM_PERF_STATUS 0x0000061b
132#define MSR_DRAM_POWER_INFO 0x0000061c
133
134#define MSR_PP0_POWER_LIMIT 0x00000638
135#define MSR_PP0_ENERGY_STATUS 0x00000639
136#define MSR_PP0_POLICY 0x0000063a
137#define MSR_PP0_PERF_STATUS 0x0000063b
138
139#define MSR_PP1_POWER_LIMIT 0x00000640
140#define MSR_PP1_ENERGY_STATUS 0x00000641
141#define MSR_PP1_POLICY 0x00000642
142
106#define MSR_AMD64_MC0_MASK 0xc0010044 143#define MSR_AMD64_MC0_MASK 0xc0010044
107 144
108#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) 145#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
@@ -236,6 +273,7 @@
236#define MSR_IA32_EBL_CR_POWERON 0x0000002a 273#define MSR_IA32_EBL_CR_POWERON 0x0000002a
237#define MSR_EBC_FREQUENCY_ID 0x0000002c 274#define MSR_EBC_FREQUENCY_ID 0x0000002c
238#define MSR_IA32_FEATURE_CONTROL 0x0000003a 275#define MSR_IA32_FEATURE_CONTROL 0x0000003a
276#define MSR_IA32_TSC_ADJUST 0x0000003b
239 277
240#define FEATURE_CONTROL_LOCKED (1<<0) 278#define FEATURE_CONTROL_LOCKED (1<<0)
241#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) 279#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
@@ -337,6 +375,8 @@
337#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) 375#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38)
338#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39) 376#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39)
339 377
378#define MSR_IA32_TSC_DEADLINE 0x000006E0
379
340/* P4/Xeon+ specific */ 380/* P4/Xeon+ specific */
341#define MSR_IA32_MCG_EAX 0x00000180 381#define MSR_IA32_MCG_EAX 0x00000180
342#define MSR_IA32_MCG_EBX 0x00000181 382#define MSR_IA32_MCG_EBX 0x00000181
diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h
new file mode 100644
index 000000000000..155e51048fa4
--- /dev/null
+++ b/arch/x86/include/uapi/asm/msr.h
@@ -0,0 +1,15 @@
1#ifndef _UAPI_ASM_X86_MSR_H
2#define _UAPI_ASM_X86_MSR_H
3
4#include <asm/msr-index.h>
5
6#ifndef __ASSEMBLY__
7
8#include <linux/types.h>
9#include <linux/ioctl.h>
10
11#define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8])
12#define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8])
13
14#endif /* __ASSEMBLY__ */
15#endif /* _UAPI_ASM_X86_MSR_H */
diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h
new file mode 100644
index 000000000000..d0acb658c8f4
--- /dev/null
+++ b/arch/x86/include/uapi/asm/mtrr.h
@@ -0,0 +1,117 @@
1/* Generic MTRR (Memory Type Range Register) ioctls.
2
3 Copyright (C) 1997-1999 Richard Gooch
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free
17 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
19 Richard Gooch may be reached by email at rgooch@atnf.csiro.au
20 The postal address is:
21 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
22*/
23#ifndef _UAPI_ASM_X86_MTRR_H
24#define _UAPI_ASM_X86_MTRR_H
25
26#include <linux/types.h>
27#include <linux/ioctl.h>
28#include <linux/errno.h>
29
30#define MTRR_IOCTL_BASE 'M'
31
32/* Warning: this structure has a different order from i386
33 on x86-64. The 32bit emulation code takes care of that.
34 But you need to use this for 64bit, otherwise your X server
35 will break. */
36
37#ifdef __i386__
38struct mtrr_sentry {
39 unsigned long base; /* Base address */
40 unsigned int size; /* Size of region */
41 unsigned int type; /* Type of region */
42};
43
44struct mtrr_gentry {
45 unsigned int regnum; /* Register number */
46 unsigned long base; /* Base address */
47 unsigned int size; /* Size of region */
48 unsigned int type; /* Type of region */
49};
50
51#else /* __i386__ */
52
53struct mtrr_sentry {
54 __u64 base; /* Base address */
55 __u32 size; /* Size of region */
56 __u32 type; /* Type of region */
57};
58
59struct mtrr_gentry {
60 __u64 base; /* Base address */
61 __u32 size; /* Size of region */
62 __u32 regnum; /* Register number */
63 __u32 type; /* Type of region */
64 __u32 _pad; /* Unused */
65};
66
67#endif /* !__i386__ */
68
69struct mtrr_var_range {
70 __u32 base_lo;
71 __u32 base_hi;
72 __u32 mask_lo;
73 __u32 mask_hi;
74};
75
76/* In the Intel processor's MTRR interface, the MTRR type is always held in
77 an 8 bit field: */
78typedef __u8 mtrr_type;
79
80#define MTRR_NUM_FIXED_RANGES 88
81#define MTRR_MAX_VAR_RANGES 256
82
83struct mtrr_state_type {
84 struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
85 mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
86 unsigned char enabled;
87 unsigned char have_fixed;
88 mtrr_type def_type;
89};
90
91#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
92#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
93
94/* These are the various ioctls */
95#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry)
96#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)
97#define MTRRIOC_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry)
98#define MTRRIOC_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry)
99#define MTRRIOC_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry)
100#define MTRRIOC_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry)
101#define MTRRIOC_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry)
102#define MTRRIOC_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry)
103#define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry)
104#define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry)
105
106/* These are the region types */
107#define MTRR_TYPE_UNCACHABLE 0
108#define MTRR_TYPE_WRCOMB 1
109/*#define MTRR_TYPE_ 2*/
110/*#define MTRR_TYPE_ 3*/
111#define MTRR_TYPE_WRTHROUGH 4
112#define MTRR_TYPE_WRPROT 5
113#define MTRR_TYPE_WRBACK 6
114#define MTRR_NUM_TYPES 7
115
116
117#endif /* _UAPI_ASM_X86_MTRR_H */
diff --git a/arch/x86/include/asm/param.h b/arch/x86/include/uapi/asm/param.h
index 965d45427975..965d45427975 100644
--- a/arch/x86/include/asm/param.h
+++ b/arch/x86/include/uapi/asm/param.h
diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index 3f2207bfd17b..3f2207bfd17b 100644
--- a/arch/x86/include/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
diff --git a/arch/x86/include/asm/poll.h b/arch/x86/include/uapi/asm/poll.h
index c98509d3149e..c98509d3149e 100644
--- a/arch/x86/include/asm/poll.h
+++ b/arch/x86/include/uapi/asm/poll.h
diff --git a/arch/x86/include/uapi/asm/posix_types.h b/arch/x86/include/uapi/asm/posix_types.h
new file mode 100644
index 000000000000..85506b383627
--- /dev/null
+++ b/arch/x86/include/uapi/asm/posix_types.h
@@ -0,0 +1,9 @@
1#ifndef __KERNEL__
2# ifdef __i386__
3# include <asm/posix_types_32.h>
4# elif defined(__ILP32__)
5# include <asm/posix_types_x32.h>
6# else
7# include <asm/posix_types_64.h>
8# endif
9#endif
diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/uapi/asm/posix_types_32.h
index 8e525059e7d8..8e525059e7d8 100644
--- a/arch/x86/include/asm/posix_types_32.h
+++ b/arch/x86/include/uapi/asm/posix_types_32.h
diff --git a/arch/x86/include/asm/posix_types_64.h b/arch/x86/include/uapi/asm/posix_types_64.h
index cba0c1ead162..cba0c1ead162 100644
--- a/arch/x86/include/asm/posix_types_64.h
+++ b/arch/x86/include/uapi/asm/posix_types_64.h
diff --git a/arch/x86/include/asm/posix_types_x32.h b/arch/x86/include/uapi/asm/posix_types_x32.h
index 85f9bdafa93c..85f9bdafa93c 100644
--- a/arch/x86/include/asm/posix_types_x32.h
+++ b/arch/x86/include/uapi/asm/posix_types_x32.h
diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032fae09..3ac5032fae09 100644
--- a/arch/x86/include/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
new file mode 100644
index 000000000000..54991a746043
--- /dev/null
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -0,0 +1,99 @@
1#ifndef _UAPI_ASM_X86_PROCESSOR_FLAGS_H
2#define _UAPI_ASM_X86_PROCESSOR_FLAGS_H
3/* Various flags defined: can be included from assembler. */
4
5/*
6 * EFLAGS bits
7 */
8#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
9#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
10#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
11#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */
12#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
13#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
14#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
15#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
16#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
17#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
18#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
19#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
20#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
21#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
22#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
23#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
24#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
25#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
26
27/*
28 * Basic CPU control in CR0
29 */
30#define X86_CR0_PE 0x00000001 /* Protection Enable */
31#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */
32#define X86_CR0_EM 0x00000004 /* Emulation */
33#define X86_CR0_TS 0x00000008 /* Task Switched */
34#define X86_CR0_ET 0x00000010 /* Extension Type */
35#define X86_CR0_NE 0x00000020 /* Numeric Error */
36#define X86_CR0_WP 0x00010000 /* Write Protect */
37#define X86_CR0_AM 0x00040000 /* Alignment Mask */
38#define X86_CR0_NW 0x20000000 /* Not Write-through */
39#define X86_CR0_CD 0x40000000 /* Cache Disable */
40#define X86_CR0_PG 0x80000000 /* Paging */
41
42/*
43 * Paging options in CR3
44 */
45#define X86_CR3_PWT 0x00000008 /* Page Write Through */
46#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
47#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
48
49/*
50 * Intel CPU features in CR4
51 */
52#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */
53#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */
54#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */
55#define X86_CR4_DE 0x00000008 /* enable debugging extensions */
56#define X86_CR4_PSE 0x00000010 /* enable page size extensions */
57#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */
58#define X86_CR4_MCE 0x00000040 /* Machine check enable */
59#define X86_CR4_PGE 0x00000080 /* enable global pages */
60#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */
61#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */
62#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
63#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
64#define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
65#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
66#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
67#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
68#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */
69
70/*
71 * x86-64 Task Priority Register, CR8
72 */
73#define X86_CR8_TPR 0x0000000F /* task priority register */
74
75/*
76 * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h>
77 */
78
79/*
80 * NSC/Cyrix CPU configuration register indexes
81 */
82#define CX86_PCR0 0x20
83#define CX86_GCR 0xb8
84#define CX86_CCR0 0xc0
85#define CX86_CCR1 0xc1
86#define CX86_CCR2 0xc2
87#define CX86_CCR3 0xc3
88#define CX86_CCR4 0xe8
89#define CX86_CCR5 0xe9
90#define CX86_CCR6 0xea
91#define CX86_CCR7 0xeb
92#define CX86_PCR1 0xf0
93#define CX86_DIR0 0xfe
94#define CX86_DIR1 0xff
95#define CX86_ARR_BASE 0xc4
96#define CX86_RCR_BASE 0xdc
97
98
99#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a88851..7b0a55a88851 100644
--- a/arch/x86/include/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
new file mode 100644
index 000000000000..ac4b9aa4d999
--- /dev/null
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -0,0 +1,78 @@
1#ifndef _UAPI_ASM_X86_PTRACE_H
2#define _UAPI_ASM_X86_PTRACE_H
3
4#include <linux/compiler.h> /* For __user */
5#include <asm/ptrace-abi.h>
6#include <asm/processor-flags.h>
7
8
9#ifndef __ASSEMBLY__
10
11#ifdef __i386__
12/* this struct defines the way the registers are stored on the
13 stack during a system call. */
14
15#ifndef __KERNEL__
16
17struct pt_regs {
18 long ebx;
19 long ecx;
20 long edx;
21 long esi;
22 long edi;
23 long ebp;
24 long eax;
25 int xds;
26 int xes;
27 int xfs;
28 int xgs;
29 long orig_eax;
30 long eip;
31 int xcs;
32 long eflags;
33 long esp;
34 int xss;
35};
36
37#endif /* __KERNEL__ */
38
39#else /* __i386__ */
40
41#ifndef __KERNEL__
42
43struct pt_regs {
44 unsigned long r15;
45 unsigned long r14;
46 unsigned long r13;
47 unsigned long r12;
48 unsigned long rbp;
49 unsigned long rbx;
50/* arguments: non interrupts/non tracing syscalls only save up to here*/
51 unsigned long r11;
52 unsigned long r10;
53 unsigned long r9;
54 unsigned long r8;
55 unsigned long rax;
56 unsigned long rcx;
57 unsigned long rdx;
58 unsigned long rsi;
59 unsigned long rdi;
60 unsigned long orig_rax;
61/* end of arguments */
62/* cpu exception frame or undefined */
63 unsigned long rip;
64 unsigned long cs;
65 unsigned long eflags;
66 unsigned long rsp;
67 unsigned long ss;
68/* top of stack page */
69};
70
71#endif /* __KERNEL__ */
72#endif /* !__i386__ */
73
74
75
76#endif /* !__ASSEMBLY__ */
77
78#endif /* _UAPI_ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/resource.h b/arch/x86/include/uapi/asm/resource.h
index 04bc4db8921b..04bc4db8921b 100644
--- a/arch/x86/include/asm/resource.h
+++ b/arch/x86/include/uapi/asm/resource.h
diff --git a/arch/x86/include/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index ee50c801f7b7..ee50c801f7b7 100644
--- a/arch/x86/include/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
diff --git a/arch/x86/include/uapi/asm/setup.h b/arch/x86/include/uapi/asm/setup.h
new file mode 100644
index 000000000000..79a9626b5500
--- /dev/null
+++ b/arch/x86/include/uapi/asm/setup.h
@@ -0,0 +1 @@
/* */
diff --git a/arch/x86/include/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h
index 83c05fc2de38..83c05fc2de38 100644
--- a/arch/x86/include/asm/shmbuf.h
+++ b/arch/x86/include/uapi/asm/shmbuf.h
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
new file mode 100644
index 000000000000..d8b9f9081e86
--- /dev/null
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -0,0 +1,221 @@
1#ifndef _UAPI_ASM_X86_SIGCONTEXT_H
2#define _UAPI_ASM_X86_SIGCONTEXT_H
3
4#include <linux/compiler.h>
5#include <linux/types.h>
6
7#define FP_XSTATE_MAGIC1 0x46505853U
8#define FP_XSTATE_MAGIC2 0x46505845U
9#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
10
11/*
12 * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame
13 * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes
14 * are used to extended the fpstate pointer in the sigcontext, which now
15 * includes the extended state information along with fpstate information.
16 *
17 * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved
18 * area and FP_XSTATE_MAGIC2 at the end of memory layout
19 * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the
20 * extended state information in the memory layout pointed by the fpstate
21 * pointer in sigcontext.
22 */
23struct _fpx_sw_bytes {
24 __u32 magic1; /* FP_XSTATE_MAGIC1 */
25 __u32 extended_size; /* total size of the layout referred by
26 * fpstate pointer in the sigcontext.
27 */
28 __u64 xstate_bv;
29 /* feature bit mask (including fp/sse/extended
30 * state) that is present in the memory
31 * layout.
32 */
33 __u32 xstate_size; /* actual xsave state size, based on the
34 * features saved in the layout.
35 * 'extended_size' will be greater than
36 * 'xstate_size'.
37 */
38 __u32 padding[7]; /* for future use. */
39};
40
41#ifdef __i386__
42/*
43 * As documented in the iBCS2 standard..
44 *
45 * The first part of "struct _fpstate" is just the normal i387
46 * hardware setup, the extra "status" word is used to save the
47 * coprocessor status word before entering the handler.
48 *
49 * Pentium III FXSR, SSE support
50 * Gareth Hughes <gareth@valinux.com>, May 2000
51 *
52 * The FPU state data structure has had to grow to accommodate the
53 * extended FPU state required by the Streaming SIMD Extensions.
54 * There is no documented standard to accomplish this at the moment.
55 */
56struct _fpreg {
57 unsigned short significand[4];
58 unsigned short exponent;
59};
60
61struct _fpxreg {
62 unsigned short significand[4];
63 unsigned short exponent;
64 unsigned short padding[3];
65};
66
67struct _xmmreg {
68 unsigned long element[4];
69};
70
71struct _fpstate {
72 /* Regular FPU environment */
73 unsigned long cw;
74 unsigned long sw;
75 unsigned long tag;
76 unsigned long ipoff;
77 unsigned long cssel;
78 unsigned long dataoff;
79 unsigned long datasel;
80 struct _fpreg _st[8];
81 unsigned short status;
82 unsigned short magic; /* 0xffff = regular FPU data only */
83
84 /* FXSR FPU environment */
85 unsigned long _fxsr_env[6]; /* FXSR FPU env is ignored */
86 unsigned long mxcsr;
87 unsigned long reserved;
88 struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
89 struct _xmmreg _xmm[8];
90 unsigned long padding1[44];
91
92 union {
93 unsigned long padding2[12];
94 struct _fpx_sw_bytes sw_reserved; /* represents the extended
95 * state info */
96 };
97};
98
99#define X86_FXSR_MAGIC 0x0000
100
101#ifndef __KERNEL__
102/*
103 * User-space might still rely on the old definition:
104 */
105struct sigcontext {
106 unsigned short gs, __gsh;
107 unsigned short fs, __fsh;
108 unsigned short es, __esh;
109 unsigned short ds, __dsh;
110 unsigned long edi;
111 unsigned long esi;
112 unsigned long ebp;
113 unsigned long esp;
114 unsigned long ebx;
115 unsigned long edx;
116 unsigned long ecx;
117 unsigned long eax;
118 unsigned long trapno;
119 unsigned long err;
120 unsigned long eip;
121 unsigned short cs, __csh;
122 unsigned long eflags;
123 unsigned long esp_at_signal;
124 unsigned short ss, __ssh;
125 struct _fpstate __user *fpstate;
126 unsigned long oldmask;
127 unsigned long cr2;
128};
129#endif /* !__KERNEL__ */
130
131#else /* __i386__ */
132
133/* FXSAVE frame */
134/* Note: reserved1/2 may someday contain valuable data. Always save/restore
135 them when you change signal frames. */
136struct _fpstate {
137 __u16 cwd;
138 __u16 swd;
139 __u16 twd; /* Note this is not the same as the
140 32bit/x87/FSAVE twd */
141 __u16 fop;
142 __u64 rip;
143 __u64 rdp;
144 __u32 mxcsr;
145 __u32 mxcsr_mask;
146 __u32 st_space[32]; /* 8*16 bytes for each FP-reg */
147 __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */
148 __u32 reserved2[12];
149 union {
150 __u32 reserved3[12];
151 struct _fpx_sw_bytes sw_reserved; /* represents the extended
152 * state information */
153 };
154};
155
156#ifndef __KERNEL__
157/*
158 * User-space might still rely on the old definition:
159 */
160struct sigcontext {
161 __u64 r8;
162 __u64 r9;
163 __u64 r10;
164 __u64 r11;
165 __u64 r12;
166 __u64 r13;
167 __u64 r14;
168 __u64 r15;
169 __u64 rdi;
170 __u64 rsi;
171 __u64 rbp;
172 __u64 rbx;
173 __u64 rdx;
174 __u64 rax;
175 __u64 rcx;
176 __u64 rsp;
177 __u64 rip;
178 __u64 eflags; /* RFLAGS */
179 __u16 cs;
180 __u16 gs;
181 __u16 fs;
182 __u16 __pad0;
183 __u64 err;
184 __u64 trapno;
185 __u64 oldmask;
186 __u64 cr2;
187 struct _fpstate __user *fpstate; /* zero when no FPU context */
188#ifdef __ILP32__
189 __u32 __fpstate_pad;
190#endif
191 __u64 reserved1[8];
192};
193#endif /* !__KERNEL__ */
194
195#endif /* !__i386__ */
196
197struct _xsave_hdr {
198 __u64 xstate_bv;
199 __u64 reserved1[2];
200 __u64 reserved2[5];
201};
202
203struct _ymmh_state {
204 /* 16 * 16 bytes for each YMMH-reg */
205 __u32 ymmh_space[64];
206};
207
208/*
209 * Extended state pointed by the fpstate pointer in the sigcontext.
210 * In addition to the fpstate, information encoded in the xstate_hdr
211 * indicates the presence of other extended state information
212 * supported by the processor and OS.
213 */
214struct _xstate {
215 struct _fpstate fpstate;
216 struct _xsave_hdr xstate_hdr;
217 struct _ymmh_state ymmh;
218 /* new processor state extensions go here */
219};
220
221#endif /* _UAPI_ASM_X86_SIGCONTEXT_H */
diff --git a/arch/x86/include/asm/sigcontext32.h b/arch/x86/include/uapi/asm/sigcontext32.h
index ad1478c4ae12..ad1478c4ae12 100644
--- a/arch/x86/include/asm/sigcontext32.h
+++ b/arch/x86/include/uapi/asm/sigcontext32.h
diff --git a/arch/x86/include/asm/siginfo.h b/arch/x86/include/uapi/asm/siginfo.h
index 34c47b3341c0..34c47b3341c0 100644
--- a/arch/x86/include/asm/siginfo.h
+++ b/arch/x86/include/uapi/asm/siginfo.h
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
new file mode 100644
index 000000000000..0818f9a8e889
--- /dev/null
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -0,0 +1,145 @@
1#ifndef _UAPI_ASM_X86_SIGNAL_H
2#define _UAPI_ASM_X86_SIGNAL_H
3
4#ifndef __ASSEMBLY__
5#include <linux/types.h>
6#include <linux/time.h>
7#include <linux/compiler.h>
8
9/* Avoid too many header ordering problems. */
10struct siginfo;
11
12#ifndef __KERNEL__
13/* Here we must cater to libcs that poke about in kernel headers. */
14
15#define NSIG 32
16typedef unsigned long sigset_t;
17
18#endif /* __KERNEL__ */
19#endif /* __ASSEMBLY__ */
20
21
22#define SIGHUP 1
23#define SIGINT 2
24#define SIGQUIT 3
25#define SIGILL 4
26#define SIGTRAP 5
27#define SIGABRT 6
28#define SIGIOT 6
29#define SIGBUS 7
30#define SIGFPE 8
31#define SIGKILL 9
32#define SIGUSR1 10
33#define SIGSEGV 11
34#define SIGUSR2 12
35#define SIGPIPE 13
36#define SIGALRM 14
37#define SIGTERM 15
38#define SIGSTKFLT 16
39#define SIGCHLD 17
40#define SIGCONT 18
41#define SIGSTOP 19
42#define SIGTSTP 20
43#define SIGTTIN 21
44#define SIGTTOU 22
45#define SIGURG 23
46#define SIGXCPU 24
47#define SIGXFSZ 25
48#define SIGVTALRM 26
49#define SIGPROF 27
50#define SIGWINCH 28
51#define SIGIO 29
52#define SIGPOLL SIGIO
53/*
54#define SIGLOST 29
55*/
56#define SIGPWR 30
57#define SIGSYS 31
58#define SIGUNUSED 31
59
60/* These should not be considered constants from userland. */
61#define SIGRTMIN 32
62#define SIGRTMAX _NSIG
63
64/*
65 * SA_FLAGS values:
66 *
67 * SA_ONSTACK indicates that a registered stack_t will be used.
68 * SA_RESTART flag to get restarting signals (which were the default long ago)
69 * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
70 * SA_RESETHAND clears the handler when the signal is delivered.
71 * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
72 * SA_NODEFER prevents the current signal from being masked in the handler.
73 *
74 * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
75 * Unix names RESETHAND and NODEFER respectively.
76 */
77#define SA_NOCLDSTOP 0x00000001u
78#define SA_NOCLDWAIT 0x00000002u
79#define SA_SIGINFO 0x00000004u
80#define SA_ONSTACK 0x08000000u
81#define SA_RESTART 0x10000000u
82#define SA_NODEFER 0x40000000u
83#define SA_RESETHAND 0x80000000u
84
85#define SA_NOMASK SA_NODEFER
86#define SA_ONESHOT SA_RESETHAND
87
88#define SA_RESTORER 0x04000000
89
90/*
91 * sigaltstack controls
92 */
93#define SS_ONSTACK 1
94#define SS_DISABLE 2
95
96#define MINSIGSTKSZ 2048
97#define SIGSTKSZ 8192
98
99#include <asm-generic/signal-defs.h>
100
101#ifndef __ASSEMBLY__
102
103
104#ifdef __i386__
105# ifndef __KERNEL__
106/* Here we must cater to libcs that poke about in kernel headers. */
107
108struct sigaction {
109 union {
110 __sighandler_t _sa_handler;
111 void (*_sa_sigaction)(int, struct siginfo *, void *);
112 } _u;
113 sigset_t sa_mask;
114 unsigned long sa_flags;
115 void (*sa_restorer)(void);
116};
117
118#define sa_handler _u._sa_handler
119#define sa_sigaction _u._sa_sigaction
120
121# endif /* ! __KERNEL__ */
122#else /* __i386__ */
123
124struct sigaction {
125 __sighandler_t sa_handler;
126 unsigned long sa_flags;
127 __sigrestore_t sa_restorer;
128 sigset_t sa_mask; /* mask last for extensibility */
129};
130
131struct k_sigaction {
132 struct sigaction sa;
133};
134
135#endif /* !__i386__ */
136
137typedef struct sigaltstack {
138 void __user *ss_sp;
139 int ss_flags;
140 size_t ss_size;
141} stack_t;
142
143#endif /* __ASSEMBLY__ */
144
145#endif /* _UAPI_ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/uapi/asm/socket.h
index 6b71384b9d8b..6b71384b9d8b 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/uapi/asm/socket.h
diff --git a/arch/x86/include/asm/sockios.h b/arch/x86/include/uapi/asm/sockios.h
index def6d4746ee7..def6d4746ee7 100644
--- a/arch/x86/include/asm/sockios.h
+++ b/arch/x86/include/uapi/asm/sockios.h
diff --git a/arch/x86/include/asm/stat.h b/arch/x86/include/uapi/asm/stat.h
index 7b3ddc348585..7b3ddc348585 100644
--- a/arch/x86/include/asm/stat.h
+++ b/arch/x86/include/uapi/asm/stat.h
diff --git a/arch/x86/include/asm/statfs.h b/arch/x86/include/uapi/asm/statfs.h
index 2d0adbf99a8e..2d0adbf99a8e 100644
--- a/arch/x86/include/asm/statfs.h
+++ b/arch/x86/include/uapi/asm/statfs.h
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
new file mode 100644
index 000000000000..b5d7640abc5d
--- /dev/null
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -0,0 +1,132 @@
1#ifndef _UAPI__SVM_H
2#define _UAPI__SVM_H
3
4#define SVM_EXIT_READ_CR0 0x000
5#define SVM_EXIT_READ_CR3 0x003
6#define SVM_EXIT_READ_CR4 0x004
7#define SVM_EXIT_READ_CR8 0x008
8#define SVM_EXIT_WRITE_CR0 0x010
9#define SVM_EXIT_WRITE_CR3 0x013
10#define SVM_EXIT_WRITE_CR4 0x014
11#define SVM_EXIT_WRITE_CR8 0x018
12#define SVM_EXIT_READ_DR0 0x020
13#define SVM_EXIT_READ_DR1 0x021
14#define SVM_EXIT_READ_DR2 0x022
15#define SVM_EXIT_READ_DR3 0x023
16#define SVM_EXIT_READ_DR4 0x024
17#define SVM_EXIT_READ_DR5 0x025
18#define SVM_EXIT_READ_DR6 0x026
19#define SVM_EXIT_READ_DR7 0x027
20#define SVM_EXIT_WRITE_DR0 0x030
21#define SVM_EXIT_WRITE_DR1 0x031
22#define SVM_EXIT_WRITE_DR2 0x032
23#define SVM_EXIT_WRITE_DR3 0x033
24#define SVM_EXIT_WRITE_DR4 0x034
25#define SVM_EXIT_WRITE_DR5 0x035
26#define SVM_EXIT_WRITE_DR6 0x036
27#define SVM_EXIT_WRITE_DR7 0x037
28#define SVM_EXIT_EXCP_BASE 0x040
29#define SVM_EXIT_INTR 0x060
30#define SVM_EXIT_NMI 0x061
31#define SVM_EXIT_SMI 0x062
32#define SVM_EXIT_INIT 0x063
33#define SVM_EXIT_VINTR 0x064
34#define SVM_EXIT_CR0_SEL_WRITE 0x065
35#define SVM_EXIT_IDTR_READ 0x066
36#define SVM_EXIT_GDTR_READ 0x067
37#define SVM_EXIT_LDTR_READ 0x068
38#define SVM_EXIT_TR_READ 0x069
39#define SVM_EXIT_IDTR_WRITE 0x06a
40#define SVM_EXIT_GDTR_WRITE 0x06b
41#define SVM_EXIT_LDTR_WRITE 0x06c
42#define SVM_EXIT_TR_WRITE 0x06d
43#define SVM_EXIT_RDTSC 0x06e
44#define SVM_EXIT_RDPMC 0x06f
45#define SVM_EXIT_PUSHF 0x070
46#define SVM_EXIT_POPF 0x071
47#define SVM_EXIT_CPUID 0x072
48#define SVM_EXIT_RSM 0x073
49#define SVM_EXIT_IRET 0x074
50#define SVM_EXIT_SWINT 0x075
51#define SVM_EXIT_INVD 0x076
52#define SVM_EXIT_PAUSE 0x077
53#define SVM_EXIT_HLT 0x078
54#define SVM_EXIT_INVLPG 0x079
55#define SVM_EXIT_INVLPGA 0x07a
56#define SVM_EXIT_IOIO 0x07b
57#define SVM_EXIT_MSR 0x07c
58#define SVM_EXIT_TASK_SWITCH 0x07d
59#define SVM_EXIT_FERR_FREEZE 0x07e
60#define SVM_EXIT_SHUTDOWN 0x07f
61#define SVM_EXIT_VMRUN 0x080
62#define SVM_EXIT_VMMCALL 0x081
63#define SVM_EXIT_VMLOAD 0x082
64#define SVM_EXIT_VMSAVE 0x083
65#define SVM_EXIT_STGI 0x084
66#define SVM_EXIT_CLGI 0x085
67#define SVM_EXIT_SKINIT 0x086
68#define SVM_EXIT_RDTSCP 0x087
69#define SVM_EXIT_ICEBP 0x088
70#define SVM_EXIT_WBINVD 0x089
71#define SVM_EXIT_MONITOR 0x08a
72#define SVM_EXIT_MWAIT 0x08b
73#define SVM_EXIT_MWAIT_COND 0x08c
74#define SVM_EXIT_XSETBV 0x08d
75#define SVM_EXIT_NPF 0x400
76
77#define SVM_EXIT_ERR -1
78
79#define SVM_EXIT_REASONS \
80 { SVM_EXIT_READ_CR0, "read_cr0" }, \
81 { SVM_EXIT_READ_CR3, "read_cr3" }, \
82 { SVM_EXIT_READ_CR4, "read_cr4" }, \
83 { SVM_EXIT_READ_CR8, "read_cr8" }, \
84 { SVM_EXIT_WRITE_CR0, "write_cr0" }, \
85 { SVM_EXIT_WRITE_CR3, "write_cr3" }, \
86 { SVM_EXIT_WRITE_CR4, "write_cr4" }, \
87 { SVM_EXIT_WRITE_CR8, "write_cr8" }, \
88 { SVM_EXIT_READ_DR0, "read_dr0" }, \
89 { SVM_EXIT_READ_DR1, "read_dr1" }, \
90 { SVM_EXIT_READ_DR2, "read_dr2" }, \
91 { SVM_EXIT_READ_DR3, "read_dr3" }, \
92 { SVM_EXIT_WRITE_DR0, "write_dr0" }, \
93 { SVM_EXIT_WRITE_DR1, "write_dr1" }, \
94 { SVM_EXIT_WRITE_DR2, "write_dr2" }, \
95 { SVM_EXIT_WRITE_DR3, "write_dr3" }, \
96 { SVM_EXIT_WRITE_DR5, "write_dr5" }, \
97 { SVM_EXIT_WRITE_DR7, "write_dr7" }, \
98 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
99 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
100 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
101 { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
102 { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
103 { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
104 { SVM_EXIT_INTR, "interrupt" }, \
105 { SVM_EXIT_NMI, "nmi" }, \
106 { SVM_EXIT_SMI, "smi" }, \
107 { SVM_EXIT_INIT, "init" }, \
108 { SVM_EXIT_VINTR, "vintr" }, \
109 { SVM_EXIT_CPUID, "cpuid" }, \
110 { SVM_EXIT_INVD, "invd" }, \
111 { SVM_EXIT_HLT, "hlt" }, \
112 { SVM_EXIT_INVLPG, "invlpg" }, \
113 { SVM_EXIT_INVLPGA, "invlpga" }, \
114 { SVM_EXIT_IOIO, "io" }, \
115 { SVM_EXIT_MSR, "msr" }, \
116 { SVM_EXIT_TASK_SWITCH, "task_switch" }, \
117 { SVM_EXIT_SHUTDOWN, "shutdown" }, \
118 { SVM_EXIT_VMRUN, "vmrun" }, \
119 { SVM_EXIT_VMMCALL, "hypercall" }, \
120 { SVM_EXIT_VMLOAD, "vmload" }, \
121 { SVM_EXIT_VMSAVE, "vmsave" }, \
122 { SVM_EXIT_STGI, "stgi" }, \
123 { SVM_EXIT_CLGI, "clgi" }, \
124 { SVM_EXIT_SKINIT, "skinit" }, \
125 { SVM_EXIT_WBINVD, "wbinvd" }, \
126 { SVM_EXIT_MONITOR, "monitor" }, \
127 { SVM_EXIT_MWAIT, "mwait" }, \
128 { SVM_EXIT_XSETBV, "xsetbv" }, \
129 { SVM_EXIT_NPF, "npf" }
130
131
132#endif /* _UAPI__SVM_H */
diff --git a/arch/x86/include/asm/swab.h b/arch/x86/include/uapi/asm/swab.h
index 557cd9f00661..7f235c7105c1 100644
--- a/arch/x86/include/asm/swab.h
+++ b/arch/x86/include/uapi/asm/swab.h
@@ -6,22 +6,7 @@
6 6
7static inline __attribute_const__ __u32 __arch_swab32(__u32 val) 7static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
8{ 8{
9#ifdef __i386__ 9 asm("bswapl %0" : "=r" (val) : "0" (val));
10# ifdef CONFIG_X86_BSWAP
11 asm("bswap %0" : "=r" (val) : "0" (val));
12# else
13 asm("xchgb %b0,%h0\n\t" /* swap lower bytes */
14 "rorl $16,%0\n\t" /* swap words */
15 "xchgb %b0,%h0" /* swap higher bytes */
16 : "=q" (val)
17 : "0" (val));
18# endif
19
20#else /* __i386__ */
21 asm("bswapl %0"
22 : "=r" (val)
23 : "0" (val));
24#endif
25 return val; 10 return val;
26} 11}
27#define __arch_swab32 __arch_swab32 12#define __arch_swab32 __arch_swab32
@@ -37,22 +22,12 @@ static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
37 __u64 u; 22 __u64 u;
38 } v; 23 } v;
39 v.u = val; 24 v.u = val;
40# ifdef CONFIG_X86_BSWAP
41 asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" 25 asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
42 : "=r" (v.s.a), "=r" (v.s.b) 26 : "=r" (v.s.a), "=r" (v.s.b)
43 : "0" (v.s.a), "1" (v.s.b)); 27 : "0" (v.s.a), "1" (v.s.b));
44# else
45 v.s.a = __arch_swab32(v.s.a);
46 v.s.b = __arch_swab32(v.s.b);
47 asm("xchgl %0,%1"
48 : "=r" (v.s.a), "=r" (v.s.b)
49 : "0" (v.s.a), "1" (v.s.b));
50# endif
51 return v.u; 28 return v.u;
52#else /* __i386__ */ 29#else /* __i386__ */
53 asm("bswapq %0" 30 asm("bswapq %0" : "=r" (val) : "0" (val));
54 : "=r" (val)
55 : "0" (val));
56 return val; 31 return val;
57#endif 32#endif
58} 33}
diff --git a/arch/x86/include/asm/termbits.h b/arch/x86/include/uapi/asm/termbits.h
index 3935b106de79..3935b106de79 100644
--- a/arch/x86/include/asm/termbits.h
+++ b/arch/x86/include/uapi/asm/termbits.h
diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/uapi/asm/termios.h
index 280d78a9d966..280d78a9d966 100644
--- a/arch/x86/include/asm/termios.h
+++ b/arch/x86/include/uapi/asm/termios.h
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/uapi/asm/types.h
index 8e8c23fef08c..8e8c23fef08c 100644
--- a/arch/x86/include/asm/types.h
+++ b/arch/x86/include/uapi/asm/types.h
diff --git a/arch/x86/include/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h
index b7c29c8017f2..b7c29c8017f2 100644
--- a/arch/x86/include/asm/ucontext.h
+++ b/arch/x86/include/uapi/asm/ucontext.h
diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h
new file mode 100644
index 000000000000..a26df0d75cd0
--- /dev/null
+++ b/arch/x86/include/uapi/asm/unistd.h
@@ -0,0 +1,17 @@
1#ifndef _UAPI_ASM_X86_UNISTD_H
2#define _UAPI_ASM_X86_UNISTD_H
3
4/* x32 syscall flag bit */
5#define __X32_SYSCALL_BIT 0x40000000
6
7#ifndef __KERNEL__
8# ifdef __i386__
9# include <asm/unistd_32.h>
10# elif defined(__ILP32__)
11# include <asm/unistd_x32.h>
12# else
13# include <asm/unistd_64.h>
14# endif
15#endif
16
17#endif /* _UAPI_ASM_X86_UNISTD_H */
diff --git a/arch/x86/include/uapi/asm/vm86.h b/arch/x86/include/uapi/asm/vm86.h
new file mode 100644
index 000000000000..e0b243e9d859
--- /dev/null
+++ b/arch/x86/include/uapi/asm/vm86.h
@@ -0,0 +1,129 @@
1#ifndef _UAPI_ASM_X86_VM86_H
2#define _UAPI_ASM_X86_VM86_H
3
4/*
5 * I'm guessing at the VIF/VIP flag usage, but hope that this is how
6 * the Pentium uses them. Linux will return from vm86 mode when both
7 * VIF and VIP is set.
8 *
9 * On a Pentium, we could probably optimize the virtual flags directly
10 * in the eflags register instead of doing it "by hand" in vflags...
11 *
12 * Linus
13 */
14
15#include <asm/processor-flags.h>
16
17#define BIOSSEG 0x0f000
18
19#define CPU_086 0
20#define CPU_186 1
21#define CPU_286 2
22#define CPU_386 3
23#define CPU_486 4
24#define CPU_586 5
25
26/*
27 * Return values for the 'vm86()' system call
28 */
29#define VM86_TYPE(retval) ((retval) & 0xff)
30#define VM86_ARG(retval) ((retval) >> 8)
31
32#define VM86_SIGNAL 0 /* return due to signal */
33#define VM86_UNKNOWN 1 /* unhandled GP fault
34 - IO-instruction or similar */
35#define VM86_INTx 2 /* int3/int x instruction (ARG = x) */
36#define VM86_STI 3 /* sti/popf/iret instruction enabled
37 virtual interrupts */
38
39/*
40 * Additional return values when invoking new vm86()
41 */
42#define VM86_PICRETURN 4 /* return due to pending PIC request */
43#define VM86_TRAP 6 /* return due to DOS-debugger request */
44
45/*
46 * function codes when invoking new vm86()
47 */
48#define VM86_PLUS_INSTALL_CHECK 0
49#define VM86_ENTER 1
50#define VM86_ENTER_NO_BYPASS 2
51#define VM86_REQUEST_IRQ 3
52#define VM86_FREE_IRQ 4
53#define VM86_GET_IRQ_BITS 5
54#define VM86_GET_AND_RESET_IRQ 6
55
56/*
57 * This is the stack-layout seen by the user space program when we have
58 * done a translation of "SAVE_ALL" from vm86 mode. The real kernel layout
59 * is 'kernel_vm86_regs' (see below).
60 */
61
62struct vm86_regs {
63/*
64 * normal regs, with special meaning for the segment descriptors..
65 */
66 long ebx;
67 long ecx;
68 long edx;
69 long esi;
70 long edi;
71 long ebp;
72 long eax;
73 long __null_ds;
74 long __null_es;
75 long __null_fs;
76 long __null_gs;
77 long orig_eax;
78 long eip;
79 unsigned short cs, __csh;
80 long eflags;
81 long esp;
82 unsigned short ss, __ssh;
83/*
84 * these are specific to v86 mode:
85 */
86 unsigned short es, __esh;
87 unsigned short ds, __dsh;
88 unsigned short fs, __fsh;
89 unsigned short gs, __gsh;
90};
91
92struct revectored_struct {
93 unsigned long __map[8]; /* 256 bits */
94};
95
96struct vm86_struct {
97 struct vm86_regs regs;
98 unsigned long flags;
99 unsigned long screen_bitmap;
100 unsigned long cpu_type;
101 struct revectored_struct int_revectored;
102 struct revectored_struct int21_revectored;
103};
104
105/*
106 * flags masks
107 */
108#define VM86_SCREEN_BITMAP 0x0001
109
110struct vm86plus_info_struct {
111 unsigned long force_return_for_pic:1;
112 unsigned long vm86dbg_active:1; /* for debugger */
113 unsigned long vm86dbg_TFpendig:1; /* for debugger */
114 unsigned long unused:28;
115 unsigned long is_vm86pus:1; /* for vm86 internal use */
116 unsigned char vm86dbg_intxxtab[32]; /* for debugger */
117};
118struct vm86plus_struct {
119 struct vm86_regs regs;
120 unsigned long flags;
121 unsigned long screen_bitmap;
122 unsigned long cpu_type;
123 struct revectored_struct int_revectored;
124 struct revectored_struct int21_revectored;
125 struct vm86plus_info_struct vm86plus;
126};
127
128
129#endif /* _UAPI_ASM_X86_VM86_H */
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
new file mode 100644
index 000000000000..979d03bce135
--- /dev/null
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -0,0 +1,109 @@
1/*
2 * vmx.h: VMX Architecture related definitions
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * A few random additions are:
19 * Copyright (C) 2006 Qumranet
20 * Avi Kivity <avi@qumranet.com>
21 * Yaniv Kamay <yaniv@qumranet.com>
22 *
23 */
24#ifndef _UAPIVMX_H
25#define _UAPIVMX_H
26
27
28#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
29
30#define EXIT_REASON_EXCEPTION_NMI 0
31#define EXIT_REASON_EXTERNAL_INTERRUPT 1
32#define EXIT_REASON_TRIPLE_FAULT 2
33
34#define EXIT_REASON_PENDING_INTERRUPT 7
35#define EXIT_REASON_NMI_WINDOW 8
36#define EXIT_REASON_TASK_SWITCH 9
37#define EXIT_REASON_CPUID 10
38#define EXIT_REASON_HLT 12
39#define EXIT_REASON_INVD 13
40#define EXIT_REASON_INVLPG 14
41#define EXIT_REASON_RDPMC 15
42#define EXIT_REASON_RDTSC 16
43#define EXIT_REASON_VMCALL 18
44#define EXIT_REASON_VMCLEAR 19
45#define EXIT_REASON_VMLAUNCH 20
46#define EXIT_REASON_VMPTRLD 21
47#define EXIT_REASON_VMPTRST 22
48#define EXIT_REASON_VMREAD 23
49#define EXIT_REASON_VMRESUME 24
50#define EXIT_REASON_VMWRITE 25
51#define EXIT_REASON_VMOFF 26
52#define EXIT_REASON_VMON 27
53#define EXIT_REASON_CR_ACCESS 28
54#define EXIT_REASON_DR_ACCESS 29
55#define EXIT_REASON_IO_INSTRUCTION 30
56#define EXIT_REASON_MSR_READ 31
57#define EXIT_REASON_MSR_WRITE 32
58#define EXIT_REASON_INVALID_STATE 33
59#define EXIT_REASON_MWAIT_INSTRUCTION 36
60#define EXIT_REASON_MONITOR_INSTRUCTION 39
61#define EXIT_REASON_PAUSE_INSTRUCTION 40
62#define EXIT_REASON_MCE_DURING_VMENTRY 41
63#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
64#define EXIT_REASON_APIC_ACCESS 44
65#define EXIT_REASON_EPT_VIOLATION 48
66#define EXIT_REASON_EPT_MISCONFIG 49
67#define EXIT_REASON_WBINVD 54
68#define EXIT_REASON_XSETBV 55
69#define EXIT_REASON_INVPCID 58
70
71#define VMX_EXIT_REASONS \
72 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
73 { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
74 { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
75 { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
76 { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
77 { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
78 { EXIT_REASON_CPUID, "CPUID" }, \
79 { EXIT_REASON_HLT, "HLT" }, \
80 { EXIT_REASON_INVLPG, "INVLPG" }, \
81 { EXIT_REASON_RDPMC, "RDPMC" }, \
82 { EXIT_REASON_RDTSC, "RDTSC" }, \
83 { EXIT_REASON_VMCALL, "VMCALL" }, \
84 { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \
85 { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \
86 { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \
87 { EXIT_REASON_VMPTRST, "VMPTRST" }, \
88 { EXIT_REASON_VMREAD, "VMREAD" }, \
89 { EXIT_REASON_VMRESUME, "VMRESUME" }, \
90 { EXIT_REASON_VMWRITE, "VMWRITE" }, \
91 { EXIT_REASON_VMOFF, "VMOFF" }, \
92 { EXIT_REASON_VMON, "VMON" }, \
93 { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \
94 { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \
95 { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
96 { EXIT_REASON_MSR_READ, "MSR_READ" }, \
97 { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
98 { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
99 { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
100 { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
101 { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
102 { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
103 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
104 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
105 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
106 { EXIT_REASON_WBINVD, "WBINVD" }
107
108
109#endif /* _UAPIVMX_H */
diff --git a/arch/x86/include/uapi/asm/vsyscall.h b/arch/x86/include/uapi/asm/vsyscall.h
new file mode 100644
index 000000000000..85dc1b3825ab
--- /dev/null
+++ b/arch/x86/include/uapi/asm/vsyscall.h
@@ -0,0 +1,17 @@
1#ifndef _UAPI_ASM_X86_VSYSCALL_H
2#define _UAPI_ASM_X86_VSYSCALL_H
3
4enum vsyscall_num {
5 __NR_vgettimeofday,
6 __NR_vtime,
7 __NR_vgetcpu,
8};
9
10#define VSYSCALL_START (-10UL << 20)
11#define VSYSCALL_SIZE 1024
12#define VSYSCALL_END (-2UL << 20)
13#define VSYSCALL_MAPPED_PAGES 1
14#define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
15
16
17#endif /* _UAPI_ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 91ce48f05f9f..34e923a53762 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -9,7 +9,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
9ifdef CONFIG_FUNCTION_TRACER 9ifdef CONFIG_FUNCTION_TRACER
10# Do not profile debug and lowlevel utilities 10# Do not profile debug and lowlevel utilities
11CFLAGS_REMOVE_tsc.o = -pg 11CFLAGS_REMOVE_tsc.o = -pg
12CFLAGS_REMOVE_rtc.o = -pg
13CFLAGS_REMOVE_paravirt-spinlocks.o = -pg 12CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
14CFLAGS_REMOVE_pvclock.o = -pg 13CFLAGS_REMOVE_pvclock.o = -pg
15CFLAGS_REMOVE_kvmclock.o = -pg 14CFLAGS_REMOVE_kvmclock.o = -pg
@@ -62,6 +61,7 @@ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
62obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 61obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
63obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 62obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
64obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o 63obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
64obj-$(CONFIG_X86_TSC) += trace_clock.o
65obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 65obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
66obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 66obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
67obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 67obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e651f7a589ac..bacf4b0d91f4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -574,6 +574,12 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
574 574
575 return irq; 575 return irq;
576} 576}
577EXPORT_SYMBOL_GPL(acpi_register_gsi);
578
579void acpi_unregister_gsi(u32 gsi)
580{
581}
582EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
577 583
578void __init acpi_set_irq_model_pic(void) 584void __init acpi_set_irq_model_pic(void)
579{ 585{
@@ -1700,3 +1706,9 @@ int __acpi_release_global_lock(unsigned int *lock)
1700 } while (unlikely (val != old)); 1706 } while (unlikely (val != old));
1701 return old & 0x1; 1707 return old & 0x1;
1702} 1708}
1709
1710void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
1711{
1712 e820_add_region(addr, size, E820_ACPI);
1713 update_e820();
1714}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 11676cf65aee..d5e0d717005a 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,8 @@ static int __init acpi_sleep_setup(char *str)
101#endif 101#endif
102 if (strncmp(str, "nonvs", 5) == 0) 102 if (strncmp(str, "nonvs", 5) == 0)
103 acpi_nvs_nosave(); 103 acpi_nvs_nosave();
104 if (strncmp(str, "nonvs_s3", 8) == 0)
105 acpi_nvs_nosave_s3();
104 if (strncmp(str, "old_ordering", 12) == 0) 106 if (strncmp(str, "old_ordering", 12) == 0)
105 acpi_old_suspend_ordering(); 107 acpi_old_suspend_ordering();
106 str = strchr(str, ','); 108 str = strchr(str, ',');
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b17416e72fbd..b994cc84aa7e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -90,21 +90,6 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
90 */ 90 */
91DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); 91DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
92 92
93/*
94 * Knob to control our willingness to enable the local APIC.
95 *
96 * +1=force-enable
97 */
98static int force_enable_local_apic __initdata;
99/*
100 * APIC command line parameters
101 */
102static int __init parse_lapic(char *arg)
103{
104 force_enable_local_apic = 1;
105 return 0;
106}
107early_param("lapic", parse_lapic);
108/* Local APIC was disabled by the BIOS and enabled by the kernel */ 93/* Local APIC was disabled by the BIOS and enabled by the kernel */
109static int enabled_via_apicbase; 94static int enabled_via_apicbase;
110 95
@@ -133,6 +118,25 @@ static inline void imcr_apic_to_pic(void)
133} 118}
134#endif 119#endif
135 120
121/*
122 * Knob to control our willingness to enable the local APIC.
123 *
124 * +1=force-enable
125 */
126static int force_enable_local_apic __initdata;
127/*
128 * APIC command line parameters
129 */
130static int __init parse_lapic(char *arg)
131{
132 if (config_enabled(CONFIG_X86_32) && !arg)
133 force_enable_local_apic = 1;
134 else if (!strncmp(arg, "notscdeadline", 13))
135 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
136 return 0;
137}
138early_param("lapic", parse_lapic);
139
136#ifdef CONFIG_X86_64 140#ifdef CONFIG_X86_64
137static int apic_calibrate_pmtmr __initdata; 141static int apic_calibrate_pmtmr __initdata;
138static __init int setup_apicpmtimer(char *s) 142static __init int setup_apicpmtimer(char *s)
@@ -315,6 +319,7 @@ int lapic_get_maxlvt(void)
315 319
316/* Clock divisor */ 320/* Clock divisor */
317#define APIC_DIVISOR 16 321#define APIC_DIVISOR 16
322#define TSC_DIVISOR 32
318 323
319/* 324/*
320 * This function sets up the local APIC timer, with a timeout of 325 * This function sets up the local APIC timer, with a timeout of
@@ -333,6 +338,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
333 lvtt_value = LOCAL_TIMER_VECTOR; 338 lvtt_value = LOCAL_TIMER_VECTOR;
334 if (!oneshot) 339 if (!oneshot)
335 lvtt_value |= APIC_LVT_TIMER_PERIODIC; 340 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
341 else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
342 lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
343
336 if (!lapic_is_integrated()) 344 if (!lapic_is_integrated())
337 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); 345 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
338 346
@@ -341,6 +349,11 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
341 349
342 apic_write(APIC_LVTT, lvtt_value); 350 apic_write(APIC_LVTT, lvtt_value);
343 351
352 if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
353 printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
354 return;
355 }
356
344 /* 357 /*
345 * Divide PICLK by 16 358 * Divide PICLK by 16
346 */ 359 */
@@ -453,6 +466,16 @@ static int lapic_next_event(unsigned long delta,
453 return 0; 466 return 0;
454} 467}
455 468
469static int lapic_next_deadline(unsigned long delta,
470 struct clock_event_device *evt)
471{
472 u64 tsc;
473
474 rdtscll(tsc);
475 wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
476 return 0;
477}
478
456/* 479/*
457 * Setup the lapic timer in periodic or oneshot mode 480 * Setup the lapic timer in periodic or oneshot mode
458 */ 481 */
@@ -533,7 +556,15 @@ static void __cpuinit setup_APIC_timer(void)
533 memcpy(levt, &lapic_clockevent, sizeof(*levt)); 556 memcpy(levt, &lapic_clockevent, sizeof(*levt));
534 levt->cpumask = cpumask_of(smp_processor_id()); 557 levt->cpumask = cpumask_of(smp_processor_id());
535 558
536 clockevents_register_device(levt); 559 if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
560 levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
561 CLOCK_EVT_FEAT_DUMMY);
562 levt->set_next_event = lapic_next_deadline;
563 clockevents_config_and_register(levt,
564 (tsc_khz / TSC_DIVISOR) * 1000,
565 0xF, ~0UL);
566 } else
567 clockevents_register_device(levt);
537} 568}
538 569
539/* 570/*
@@ -661,7 +692,9 @@ static int __init calibrate_APIC_clock(void)
661 * in the clockevent structure and return. 692 * in the clockevent structure and return.
662 */ 693 */
663 694
664 if (lapic_timer_frequency) { 695 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
696 return 0;
697 } else if (lapic_timer_frequency) {
665 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", 698 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
666 lapic_timer_frequency); 699 lapic_timer_frequency);
667 lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, 700 lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
@@ -674,6 +707,9 @@ static int __init calibrate_APIC_clock(void)
674 return 0; 707 return 0;
675 } 708 }
676 709
710 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
711 "calibrating APIC timer ...\n");
712
677 local_irq_disable(); 713 local_irq_disable();
678 714
679 /* Replace the global interrupt handler */ 715 /* Replace the global interrupt handler */
@@ -811,9 +847,6 @@ void __init setup_boot_APIC_clock(void)
811 return; 847 return;
812 } 848 }
813 849
814 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
815 "calibrating APIC timer ...\n");
816
817 if (calibrate_APIC_clock()) { 850 if (calibrate_APIC_clock()) {
818 /* No broadcast on UP ! */ 851 /* No broadcast on UP ! */
819 if (num_possible_cpus() > 1) 852 if (num_possible_cpus() > 1)
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index a65829ac2b9a..9c2aa89a11cb 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -22,6 +22,7 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/delay.h> 23#include <linux/delay.h>
24 24
25#include <asm/numachip/numachip.h>
25#include <asm/numachip/numachip_csr.h> 26#include <asm/numachip/numachip_csr.h>
26#include <asm/smp.h> 27#include <asm/smp.h>
27#include <asm/apic.h> 28#include <asm/apic.h>
@@ -179,6 +180,7 @@ static int __init numachip_system_init(void)
179 return 0; 180 return 0;
180 181
181 x86_cpuinit.fixup_cpu_id = fixup_cpu_id; 182 x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
183 x86_init.pci.arch_init = pci_numachip_init;
182 184
183 map_csrs(); 185 map_csrs();
184 186
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1817fa911024..b739d398bb29 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -234,11 +234,11 @@ int __init arch_early_irq_init(void)
234 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); 234 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
235 /* 235 /*
236 * For legacy IRQ's, start with assigning irq0 to irq15 to 236 * For legacy IRQ's, start with assigning irq0 to irq15 to
237 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. 237 * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
238 */ 238 */
239 if (i < legacy_pic->nr_legacy_irqs) { 239 if (i < legacy_pic->nr_legacy_irqs) {
240 cfg[i].vector = IRQ0_VECTOR + i; 240 cfg[i].vector = IRQ0_VECTOR + i;
241 cpumask_set_cpu(0, cfg[i].domain); 241 cpumask_setall(cfg[i].domain);
242 } 242 }
243 } 243 }
244 244
@@ -1141,7 +1141,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1141 * allocation for the members that are not used anymore. 1141 * allocation for the members that are not used anymore.
1142 */ 1142 */
1143 cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); 1143 cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
1144 cfg->move_in_progress = 1; 1144 cfg->move_in_progress =
1145 cpumask_intersects(cfg->old_domain, cpu_online_mask);
1145 cpumask_and(cfg->domain, cfg->domain, tmp_mask); 1146 cpumask_and(cfg->domain, cfg->domain, tmp_mask);
1146 break; 1147 break;
1147 } 1148 }
@@ -1172,8 +1173,9 @@ next:
1172 current_vector = vector; 1173 current_vector = vector;
1173 current_offset = offset; 1174 current_offset = offset;
1174 if (cfg->vector) { 1175 if (cfg->vector) {
1175 cfg->move_in_progress = 1;
1176 cpumask_copy(cfg->old_domain, cfg->domain); 1176 cpumask_copy(cfg->old_domain, cfg->domain);
1177 cfg->move_in_progress =
1178 cpumask_intersects(cfg->old_domain, cpu_online_mask);
1177 } 1179 }
1178 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) 1180 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1179 per_cpu(vector_irq, new_cpu)[vector] = irq; 1181 per_cpu(vector_irq, new_cpu)[vector] = irq;
@@ -1241,12 +1243,6 @@ void __setup_vector_irq(int cpu)
1241 cfg = irq_get_chip_data(irq); 1243 cfg = irq_get_chip_data(irq);
1242 if (!cfg) 1244 if (!cfg)
1243 continue; 1245 continue;
1244 /*
1245 * If it is a legacy IRQ handled by the legacy PIC, this cpu
1246 * will be part of the irq_cfg's domain.
1247 */
1248 if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
1249 cpumask_set_cpu(cpu, cfg->domain);
1250 1246
1251 if (!cpumask_test_cpu(cpu, cfg->domain)) 1247 if (!cpumask_test_cpu(cpu, cfg->domain))
1252 continue; 1248 continue;
@@ -1356,16 +1352,6 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1356 if (!IO_APIC_IRQ(irq)) 1352 if (!IO_APIC_IRQ(irq))
1357 return; 1353 return;
1358 1354
1359 /*
1360 * For legacy irqs, cfg->domain starts with cpu 0. Now that IO-APIC
1361 * can handle this irq and the apic driver is finialized at this point,
1362 * update the cfg->domain.
1363 */
1364 if (irq < legacy_pic->nr_legacy_irqs &&
1365 cpumask_equal(cfg->domain, cpumask_of(0)))
1366 apic->vector_allocation_domain(0, cfg->domain,
1367 apic->target_cpus());
1368
1369 if (assign_irq_vector(irq, cfg, apic->target_cpus())) 1355 if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1370 return; 1356 return;
1371 1357
@@ -2199,9 +2185,11 @@ static int ioapic_retrigger_irq(struct irq_data *data)
2199{ 2185{
2200 struct irq_cfg *cfg = data->chip_data; 2186 struct irq_cfg *cfg = data->chip_data;
2201 unsigned long flags; 2187 unsigned long flags;
2188 int cpu;
2202 2189
2203 raw_spin_lock_irqsave(&vector_lock, flags); 2190 raw_spin_lock_irqsave(&vector_lock, flags);
2204 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); 2191 cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
2192 apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
2205 raw_spin_unlock_irqrestore(&vector_lock, flags); 2193 raw_spin_unlock_irqrestore(&vector_lock, flags);
2206 2194
2207 return 1; 2195 return 1;
@@ -3317,8 +3305,9 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3317 int ret; 3305 int ret;
3318 3306
3319 if (irq_remapping_enabled) { 3307 if (irq_remapping_enabled) {
3320 if (!setup_hpet_msi_remapped(irq, id)) 3308 ret = setup_hpet_msi_remapped(irq, id);
3321 return -1; 3309 if (ret)
3310 return ret;
3322 } 3311 }
3323 3312
3324 ret = msi_compose_msg(NULL, irq, &msg, id); 3313 ret = msi_compose_msg(NULL, irq, &msg, id);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1b7d1656a042..15239fffd6fe 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -304,7 +304,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
304 int cpu = smp_processor_id(); 304 int cpu = smp_processor_id();
305 305
306 /* get information required for multi-node processors */ 306 /* get information required for multi-node processors */
307 if (cpu_has(c, X86_FEATURE_TOPOEXT)) { 307 if (cpu_has_topoext) {
308 u32 eax, ebx, ecx, edx; 308 u32 eax, ebx, ecx, edx;
309 309
310 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); 310 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -657,12 +657,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
657 detect_ht(c); 657 detect_ht(c);
658#endif 658#endif
659 659
660 if (c->extended_cpuid_level >= 0x80000006) { 660 init_amd_cacheinfo(c);
661 if (cpuid_edx(0x80000006) & 0xf000)
662 num_cache_leaves = 4;
663 else
664 num_cache_leaves = 3;
665 }
666 661
667 if (c->x86 >= 0xf) 662 if (c->x86 >= 0xf)
668 set_cpu_cap(c, X86_FEATURE_K8); 663 set_cpu_cap(c, X86_FEATURE_K8);
@@ -753,9 +748,6 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
753 748
754static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) 749static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
755{ 750{
756 if (!cpu_has_invlpg)
757 return;
758
759 tlb_flushall_shift = 5; 751 tlb_flushall_shift = 5;
760 752
761 if (c->x86 <= 0x11) 753 if (c->x86 <= 0x11)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d0e910da16c5..92dfec986a48 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -107,53 +107,17 @@ static void __init check_hlt(void)
107} 107}
108 108
109/* 109/*
110 * Most 386 processors have a bug where a POPAD can lock the
111 * machine even from user space.
112 */
113
114static void __init check_popad(void)
115{
116#ifndef CONFIG_X86_POPAD_OK
117 int res, inp = (int) &res;
118
119 pr_info("Checking for popad bug... ");
120 __asm__ __volatile__(
121 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
122 : "=&a" (res)
123 : "d" (inp)
124 : "ecx", "edi");
125 /*
126 * If this fails, it means that any user program may lock the
127 * CPU hard. Too bad.
128 */
129 if (res != 12345678)
130 pr_cont("Buggy\n");
131 else
132 pr_cont("OK\n");
133#endif
134}
135
136/*
137 * Check whether we are able to run this kernel safely on SMP. 110 * Check whether we are able to run this kernel safely on SMP.
138 * 111 *
139 * - In order to run on a i386, we need to be compiled for i386 112 * - i386 is no longer supported.
140 * (for due to lack of "invlpg" and working WP on a i386)
141 * - In order to run on anything without a TSC, we need to be 113 * - In order to run on anything without a TSC, we need to be
142 * compiled for a i486. 114 * compiled for a i486.
143 */ 115 */
144 116
145static void __init check_config(void) 117static void __init check_config(void)
146{ 118{
147/* 119 if (boot_cpu_data.x86 < 4)
148 * We'd better not be a i386 if we're configured to use some
149 * i486+ only features! (WP works in supervisor mode and the
150 * new "invlpg" and "bswap" instructions)
151 */
152#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
153 defined(CONFIG_X86_BSWAP)
154 if (boot_cpu_data.x86 == 3)
155 panic("Kernel requires i486+ for 'invlpg' and other features"); 120 panic("Kernel requires i486+ for 'invlpg' and other features");
156#endif
157} 121}
158 122
159 123
@@ -166,7 +130,6 @@ void __init check_bugs(void)
166#endif 130#endif
167 check_config(); 131 check_config();
168 check_hlt(); 132 check_hlt();
169 check_popad();
170 init_utsname()->machine[1] = 133 init_utsname()->machine[1] =
171 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 134 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
172 alternative_instructions(); 135 alternative_instructions();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7505f7b13e71..9c3ab43a6954 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1173,15 +1173,6 @@ DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1173DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); 1173DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
1174#endif 1174#endif
1175 1175
1176/* Make sure %fs and %gs are initialized properly in idle threads */
1177struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
1178{
1179 memset(regs, 0, sizeof(struct pt_regs));
1180 regs->fs = __KERNEL_PERCPU;
1181 regs->gs = __KERNEL_STACK_CANARY;
1182
1183 return regs;
1184}
1185#endif /* CONFIG_X86_64 */ 1176#endif /* CONFIG_X86_64 */
1186 1177
1187/* 1178/*
@@ -1237,7 +1228,7 @@ void __cpuinit cpu_init(void)
1237 oist = &per_cpu(orig_ist, cpu); 1228 oist = &per_cpu(orig_ist, cpu);
1238 1229
1239#ifdef CONFIG_NUMA 1230#ifdef CONFIG_NUMA
1240 if (cpu != 0 && this_cpu_read(numa_node) == 0 && 1231 if (this_cpu_read(numa_node) == 0 &&
1241 early_cpu_to_node(cpu) != NUMA_NO_NODE) 1232 early_cpu_to_node(cpu) != NUMA_NO_NODE)
1242 set_numa_node(early_cpu_to_node(cpu)); 1233 set_numa_node(early_cpu_to_node(cpu));
1243#endif 1234#endif
@@ -1269,8 +1260,7 @@ void __cpuinit cpu_init(void)
1269 barrier(); 1260 barrier();
1270 1261
1271 x86_configure_nx(); 1262 x86_configure_nx();
1272 if (cpu != 0) 1263 enable_x2apic();
1273 enable_x2apic();
1274 1264
1275 /* 1265 /*
1276 * set up and load the per-CPU TSS 1266 * set up and load the per-CPU TSS
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 198e019a531a..fcaabd0432c5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -612,10 +612,6 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc)
612 612
613static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) 613static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
614{ 614{
615 if (!cpu_has_invlpg) {
616 tlb_flushall_shift = -1;
617 return;
618 }
619 switch ((c->x86 << 8) + c->x86_model) { 615 switch ((c->x86 << 8) + c->x86_model) {
620 case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 616 case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
621 case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 617 case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 93c5451bdd52..fe9edec6698a 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -538,7 +538,11 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
538 unsigned edx; 538 unsigned edx;
539 539
540 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 540 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
541 amd_cpuid4(index, &eax, &ebx, &ecx); 541 if (cpu_has_topoext)
542 cpuid_count(0x8000001d, index, &eax.full,
543 &ebx.full, &ecx.full, &edx);
544 else
545 amd_cpuid4(index, &eax, &ebx, &ecx);
542 amd_init_l3_cache(this_leaf, index); 546 amd_init_l3_cache(this_leaf, index);
543 } else { 547 } else {
544 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 548 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
@@ -557,21 +561,39 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
557 return 0; 561 return 0;
558} 562}
559 563
560static int __cpuinit find_num_cache_leaves(void) 564static int __cpuinit find_num_cache_leaves(struct cpuinfo_x86 *c)
561{ 565{
562 unsigned int eax, ebx, ecx, edx; 566 unsigned int eax, ebx, ecx, edx, op;
563 union _cpuid4_leaf_eax cache_eax; 567 union _cpuid4_leaf_eax cache_eax;
564 int i = -1; 568 int i = -1;
565 569
570 if (c->x86_vendor == X86_VENDOR_AMD)
571 op = 0x8000001d;
572 else
573 op = 4;
574
566 do { 575 do {
567 ++i; 576 ++i;
568 /* Do cpuid(4) loop to find out num_cache_leaves */ 577 /* Do cpuid(op) loop to find out num_cache_leaves */
569 cpuid_count(4, i, &eax, &ebx, &ecx, &edx); 578 cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
570 cache_eax.full = eax; 579 cache_eax.full = eax;
571 } while (cache_eax.split.type != CACHE_TYPE_NULL); 580 } while (cache_eax.split.type != CACHE_TYPE_NULL);
572 return i; 581 return i;
573} 582}
574 583
584void __cpuinit init_amd_cacheinfo(struct cpuinfo_x86 *c)
585{
586
587 if (cpu_has_topoext) {
588 num_cache_leaves = find_num_cache_leaves(c);
589 } else if (c->extended_cpuid_level >= 0x80000006) {
590 if (cpuid_edx(0x80000006) & 0xf000)
591 num_cache_leaves = 4;
592 else
593 num_cache_leaves = 3;
594 }
595}
596
575unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) 597unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
576{ 598{
577 /* Cache sizes */ 599 /* Cache sizes */
@@ -588,7 +610,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
588 610
589 if (is_initialized == 0) { 611 if (is_initialized == 0) {
590 /* Init num_cache_leaves from boot CPU */ 612 /* Init num_cache_leaves from boot CPU */
591 num_cache_leaves = find_num_cache_leaves(); 613 num_cache_leaves = find_num_cache_leaves(c);
592 is_initialized++; 614 is_initialized++;
593 } 615 }
594 616
@@ -728,37 +750,50 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
728static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) 750static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
729{ 751{
730 struct _cpuid4_info *this_leaf; 752 struct _cpuid4_info *this_leaf;
731 int ret, i, sibling; 753 int i, sibling;
732 struct cpuinfo_x86 *c = &cpu_data(cpu);
733 754
734 ret = 0; 755 if (cpu_has_topoext) {
735 if (index == 3) { 756 unsigned int apicid, nshared, first, last;
736 ret = 1; 757
737 for_each_cpu(i, cpu_llc_shared_mask(cpu)) { 758 if (!per_cpu(ici_cpuid4_info, cpu))
759 return 0;
760
761 this_leaf = CPUID4_INFO_IDX(cpu, index);
762 nshared = this_leaf->base.eax.split.num_threads_sharing + 1;
763 apicid = cpu_data(cpu).apicid;
764 first = apicid - (apicid % nshared);
765 last = first + nshared - 1;
766
767 for_each_online_cpu(i) {
768 apicid = cpu_data(i).apicid;
769 if ((apicid < first) || (apicid > last))
770 continue;
738 if (!per_cpu(ici_cpuid4_info, i)) 771 if (!per_cpu(ici_cpuid4_info, i))
739 continue; 772 continue;
740 this_leaf = CPUID4_INFO_IDX(i, index); 773 this_leaf = CPUID4_INFO_IDX(i, index);
741 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { 774
742 if (!cpu_online(sibling)) 775 for_each_online_cpu(sibling) {
776 apicid = cpu_data(sibling).apicid;
777 if ((apicid < first) || (apicid > last))
743 continue; 778 continue;
744 set_bit(sibling, this_leaf->shared_cpu_map); 779 set_bit(sibling, this_leaf->shared_cpu_map);
745 } 780 }
746 } 781 }
747 } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) { 782 } else if (index == 3) {
748 ret = 1; 783 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
749 for_each_cpu(i, cpu_sibling_mask(cpu)) {
750 if (!per_cpu(ici_cpuid4_info, i)) 784 if (!per_cpu(ici_cpuid4_info, i))
751 continue; 785 continue;
752 this_leaf = CPUID4_INFO_IDX(i, index); 786 this_leaf = CPUID4_INFO_IDX(i, index);
753 for_each_cpu(sibling, cpu_sibling_mask(cpu)) { 787 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
754 if (!cpu_online(sibling)) 788 if (!cpu_online(sibling))
755 continue; 789 continue;
756 set_bit(sibling, this_leaf->shared_cpu_map); 790 set_bit(sibling, this_leaf->shared_cpu_map);
757 } 791 }
758 } 792 }
759 } 793 } else
794 return 0;
760 795
761 return ret; 796 return 1;
762} 797}
763 798
764static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 799static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 6a05c1d327a9..5b7d4fa5d3b7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -24,8 +24,6 @@ struct mce_bank {
24int mce_severity(struct mce *a, int tolerant, char **msg); 24int mce_severity(struct mce *a, int tolerant, char **msg);
25struct dentry *mce_get_debugfs_dir(void); 25struct dentry *mce_get_debugfs_dir(void);
26 26
27extern int mce_ser;
28
29extern struct mce_bank *mce_banks; 27extern struct mce_bank *mce_banks;
30 28
31#ifdef CONFIG_X86_MCE_INTEL 29#ifdef CONFIG_X86_MCE_INTEL
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 13017626f9a8..beb1f1689e52 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -193,9 +193,9 @@ int mce_severity(struct mce *m, int tolerant, char **msg)
193 continue; 193 continue;
194 if ((m->mcgstatus & s->mcgmask) != s->mcgres) 194 if ((m->mcgstatus & s->mcgmask) != s->mcgres)
195 continue; 195 continue;
196 if (s->ser == SER_REQUIRED && !mce_ser) 196 if (s->ser == SER_REQUIRED && !mca_cfg.ser)
197 continue; 197 continue;
198 if (s->ser == NO_SER && mce_ser) 198 if (s->ser == NO_SER && mca_cfg.ser)
199 continue; 199 continue;
200 if (s->context && ctx != s->context) 200 if (s->context && ctx != s->context)
201 continue; 201 continue;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 46cbf8689692..80dbda84f1c3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -58,34 +58,26 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
58#define CREATE_TRACE_POINTS 58#define CREATE_TRACE_POINTS
59#include <trace/events/mce.h> 59#include <trace/events/mce.h>
60 60
61int mce_disabled __read_mostly;
62
63#define SPINUNIT 100 /* 100ns */ 61#define SPINUNIT 100 /* 100ns */
64 62
65atomic_t mce_entry; 63atomic_t mce_entry;
66 64
67DEFINE_PER_CPU(unsigned, mce_exception_count); 65DEFINE_PER_CPU(unsigned, mce_exception_count);
68 66
69/* 67struct mce_bank *mce_banks __read_mostly;
70 * Tolerant levels: 68
71 * 0: always panic on uncorrected errors, log corrected errors 69struct mca_config mca_cfg __read_mostly = {
72 * 1: panic or SIGBUS on uncorrected errors, log corrected errors 70 .bootlog = -1,
73 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors 71 /*
74 * 3: never panic or SIGBUS, log all errors (for testing only) 72 * Tolerant levels:
75 */ 73 * 0: always panic on uncorrected errors, log corrected errors
76static int tolerant __read_mostly = 1; 74 * 1: panic or SIGBUS on uncorrected errors, log corrected errors
77static int banks __read_mostly; 75 * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors
78static int rip_msr __read_mostly; 76 * 3: never panic or SIGBUS, log all errors (for testing only)
79static int mce_bootlog __read_mostly = -1; 77 */
80static int monarch_timeout __read_mostly = -1; 78 .tolerant = 1,
81static int mce_panic_timeout __read_mostly; 79 .monarch_timeout = -1
82static int mce_dont_log_ce __read_mostly; 80};
83int mce_cmci_disabled __read_mostly;
84int mce_ignore_ce __read_mostly;
85int mce_ser __read_mostly;
86int mce_bios_cmci_threshold __read_mostly;
87
88struct mce_bank *mce_banks __read_mostly;
89 81
90/* User mode helper program triggered by machine check event */ 82/* User mode helper program triggered by machine check event */
91static unsigned long mce_need_notify; 83static unsigned long mce_need_notify;
@@ -302,7 +294,7 @@ static void wait_for_panic(void)
302 while (timeout-- > 0) 294 while (timeout-- > 0)
303 udelay(1); 295 udelay(1);
304 if (panic_timeout == 0) 296 if (panic_timeout == 0)
305 panic_timeout = mce_panic_timeout; 297 panic_timeout = mca_cfg.panic_timeout;
306 panic("Panicing machine check CPU died"); 298 panic("Panicing machine check CPU died");
307} 299}
308 300
@@ -360,7 +352,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
360 pr_emerg(HW_ERR "Machine check: %s\n", exp); 352 pr_emerg(HW_ERR "Machine check: %s\n", exp);
361 if (!fake_panic) { 353 if (!fake_panic) {
362 if (panic_timeout == 0) 354 if (panic_timeout == 0)
363 panic_timeout = mce_panic_timeout; 355 panic_timeout = mca_cfg.panic_timeout;
364 panic(msg); 356 panic(msg);
365 } else 357 } else
366 pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); 358 pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
@@ -372,7 +364,7 @@ static int msr_to_offset(u32 msr)
372{ 364{
373 unsigned bank = __this_cpu_read(injectm.bank); 365 unsigned bank = __this_cpu_read(injectm.bank);
374 366
375 if (msr == rip_msr) 367 if (msr == mca_cfg.rip_msr)
376 return offsetof(struct mce, ip); 368 return offsetof(struct mce, ip);
377 if (msr == MSR_IA32_MCx_STATUS(bank)) 369 if (msr == MSR_IA32_MCx_STATUS(bank))
378 return offsetof(struct mce, status); 370 return offsetof(struct mce, status);
@@ -451,8 +443,8 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
451 m->cs |= 3; 443 m->cs |= 3;
452 } 444 }
453 /* Use accurate RIP reporting if available. */ 445 /* Use accurate RIP reporting if available. */
454 if (rip_msr) 446 if (mca_cfg.rip_msr)
455 m->ip = mce_rdmsrl(rip_msr); 447 m->ip = mce_rdmsrl(mca_cfg.rip_msr);
456 } 448 }
457} 449}
458 450
@@ -513,7 +505,7 @@ static int mce_ring_add(unsigned long pfn)
513 505
514int mce_available(struct cpuinfo_x86 *c) 506int mce_available(struct cpuinfo_x86 *c)
515{ 507{
516 if (mce_disabled) 508 if (mca_cfg.disabled)
517 return 0; 509 return 0;
518 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); 510 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
519} 511}
@@ -565,7 +557,7 @@ static void mce_read_aux(struct mce *m, int i)
565 /* 557 /*
566 * Mask the reported address by the reported granularity. 558 * Mask the reported address by the reported granularity.
567 */ 559 */
568 if (mce_ser && (m->status & MCI_STATUS_MISCV)) { 560 if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) {
569 u8 shift = MCI_MISC_ADDR_LSB(m->misc); 561 u8 shift = MCI_MISC_ADDR_LSB(m->misc);
570 m->addr >>= shift; 562 m->addr >>= shift;
571 m->addr <<= shift; 563 m->addr <<= shift;
@@ -599,7 +591,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
599 591
600 mce_gather_info(&m, NULL); 592 mce_gather_info(&m, NULL);
601 593
602 for (i = 0; i < banks; i++) { 594 for (i = 0; i < mca_cfg.banks; i++) {
603 if (!mce_banks[i].ctl || !test_bit(i, *b)) 595 if (!mce_banks[i].ctl || !test_bit(i, *b))
604 continue; 596 continue;
605 597
@@ -620,7 +612,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
620 * TBD do the same check for MCI_STATUS_EN here? 612 * TBD do the same check for MCI_STATUS_EN here?
621 */ 613 */
622 if (!(flags & MCP_UC) && 614 if (!(flags & MCP_UC) &&
623 (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) 615 (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
624 continue; 616 continue;
625 617
626 mce_read_aux(&m, i); 618 mce_read_aux(&m, i);
@@ -631,7 +623,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
631 * Don't get the IP here because it's unlikely to 623 * Don't get the IP here because it's unlikely to
632 * have anything to do with the actual error location. 624 * have anything to do with the actual error location.
633 */ 625 */
634 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) 626 if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
635 mce_log(&m); 627 mce_log(&m);
636 628
637 /* 629 /*
@@ -658,14 +650,14 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
658{ 650{
659 int i, ret = 0; 651 int i, ret = 0;
660 652
661 for (i = 0; i < banks; i++) { 653 for (i = 0; i < mca_cfg.banks; i++) {
662 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 654 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
663 if (m->status & MCI_STATUS_VAL) { 655 if (m->status & MCI_STATUS_VAL) {
664 __set_bit(i, validp); 656 __set_bit(i, validp);
665 if (quirk_no_way_out) 657 if (quirk_no_way_out)
666 quirk_no_way_out(i, m, regs); 658 quirk_no_way_out(i, m, regs);
667 } 659 }
668 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) 660 if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY)
669 ret = 1; 661 ret = 1;
670 } 662 }
671 return ret; 663 return ret;
@@ -696,11 +688,11 @@ static int mce_timed_out(u64 *t)
696 rmb(); 688 rmb();
697 if (atomic_read(&mce_paniced)) 689 if (atomic_read(&mce_paniced))
698 wait_for_panic(); 690 wait_for_panic();
699 if (!monarch_timeout) 691 if (!mca_cfg.monarch_timeout)
700 goto out; 692 goto out;
701 if ((s64)*t < SPINUNIT) { 693 if ((s64)*t < SPINUNIT) {
702 /* CHECKME: Make panic default for 1 too? */ 694 /* CHECKME: Make panic default for 1 too? */
703 if (tolerant < 1) 695 if (mca_cfg.tolerant < 1)
704 mce_panic("Timeout synchronizing machine check over CPUs", 696 mce_panic("Timeout synchronizing machine check over CPUs",
705 NULL, NULL); 697 NULL, NULL);
706 cpu_missing = 1; 698 cpu_missing = 1;
@@ -750,7 +742,8 @@ static void mce_reign(void)
750 * Grade the severity of the errors of all the CPUs. 742 * Grade the severity of the errors of all the CPUs.
751 */ 743 */
752 for_each_possible_cpu(cpu) { 744 for_each_possible_cpu(cpu) {
753 int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, 745 int severity = mce_severity(&per_cpu(mces_seen, cpu),
746 mca_cfg.tolerant,
754 &nmsg); 747 &nmsg);
755 if (severity > global_worst) { 748 if (severity > global_worst) {
756 msg = nmsg; 749 msg = nmsg;
@@ -764,7 +757,7 @@ static void mce_reign(void)
764 * This dumps all the mces in the log buffer and stops the 757 * This dumps all the mces in the log buffer and stops the
765 * other CPUs. 758 * other CPUs.
766 */ 759 */
767 if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) 760 if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
768 mce_panic("Fatal Machine check", m, msg); 761 mce_panic("Fatal Machine check", m, msg);
769 762
770 /* 763 /*
@@ -777,7 +770,7 @@ static void mce_reign(void)
777 * No machine check event found. Must be some external 770 * No machine check event found. Must be some external
778 * source or one CPU is hung. Panic. 771 * source or one CPU is hung. Panic.
779 */ 772 */
780 if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) 773 if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
781 mce_panic("Machine check from unknown source", NULL, NULL); 774 mce_panic("Machine check from unknown source", NULL, NULL);
782 775
783 /* 776 /*
@@ -801,7 +794,7 @@ static int mce_start(int *no_way_out)
801{ 794{
802 int order; 795 int order;
803 int cpus = num_online_cpus(); 796 int cpus = num_online_cpus();
804 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; 797 u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
805 798
806 if (!timeout) 799 if (!timeout)
807 return -1; 800 return -1;
@@ -865,7 +858,7 @@ static int mce_start(int *no_way_out)
865static int mce_end(int order) 858static int mce_end(int order)
866{ 859{
867 int ret = -1; 860 int ret = -1;
868 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; 861 u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
869 862
870 if (!timeout) 863 if (!timeout)
871 goto reset; 864 goto reset;
@@ -946,7 +939,7 @@ static void mce_clear_state(unsigned long *toclear)
946{ 939{
947 int i; 940 int i;
948 941
949 for (i = 0; i < banks; i++) { 942 for (i = 0; i < mca_cfg.banks; i++) {
950 if (test_bit(i, toclear)) 943 if (test_bit(i, toclear))
951 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); 944 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
952 } 945 }
@@ -1011,6 +1004,7 @@ static void mce_clear_info(struct mce_info *mi)
1011 */ 1004 */
1012void do_machine_check(struct pt_regs *regs, long error_code) 1005void do_machine_check(struct pt_regs *regs, long error_code)
1013{ 1006{
1007 struct mca_config *cfg = &mca_cfg;
1014 struct mce m, *final; 1008 struct mce m, *final;
1015 int i; 1009 int i;
1016 int worst = 0; 1010 int worst = 0;
@@ -1022,7 +1016,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1022 int order; 1016 int order;
1023 /* 1017 /*
1024 * If no_way_out gets set, there is no safe way to recover from this 1018 * If no_way_out gets set, there is no safe way to recover from this
1025 * MCE. If tolerant is cranked up, we'll try anyway. 1019 * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
1026 */ 1020 */
1027 int no_way_out = 0; 1021 int no_way_out = 0;
1028 /* 1022 /*
@@ -1038,7 +1032,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1038 1032
1039 this_cpu_inc(mce_exception_count); 1033 this_cpu_inc(mce_exception_count);
1040 1034
1041 if (!banks) 1035 if (!cfg->banks)
1042 goto out; 1036 goto out;
1043 1037
1044 mce_gather_info(&m, regs); 1038 mce_gather_info(&m, regs);
@@ -1065,7 +1059,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1065 * because the first one to see it will clear it. 1059 * because the first one to see it will clear it.
1066 */ 1060 */
1067 order = mce_start(&no_way_out); 1061 order = mce_start(&no_way_out);
1068 for (i = 0; i < banks; i++) { 1062 for (i = 0; i < cfg->banks; i++) {
1069 __clear_bit(i, toclear); 1063 __clear_bit(i, toclear);
1070 if (!test_bit(i, valid_banks)) 1064 if (!test_bit(i, valid_banks))
1071 continue; 1065 continue;
@@ -1084,7 +1078,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1084 * Non uncorrected or non signaled errors are handled by 1078 * Non uncorrected or non signaled errors are handled by
1085 * machine_check_poll. Leave them alone, unless this panics. 1079 * machine_check_poll. Leave them alone, unless this panics.
1086 */ 1080 */
1087 if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && 1081 if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
1088 !no_way_out) 1082 !no_way_out)
1089 continue; 1083 continue;
1090 1084
@@ -1093,7 +1087,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1093 */ 1087 */
1094 add_taint(TAINT_MACHINE_CHECK); 1088 add_taint(TAINT_MACHINE_CHECK);
1095 1089
1096 severity = mce_severity(&m, tolerant, NULL); 1090 severity = mce_severity(&m, cfg->tolerant, NULL);
1097 1091
1098 /* 1092 /*
1099 * When machine check was for corrected handler don't touch, 1093 * When machine check was for corrected handler don't touch,
@@ -1117,7 +1111,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1117 * When the ring overflows we just ignore the AO error. 1111 * When the ring overflows we just ignore the AO error.
1118 * RED-PEN add some logging mechanism when 1112 * RED-PEN add some logging mechanism when
1119 * usable_address or mce_add_ring fails. 1113 * usable_address or mce_add_ring fails.
1120 * RED-PEN don't ignore overflow for tolerant == 0 1114 * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
1121 */ 1115 */
1122 if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) 1116 if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
1123 mce_ring_add(m.addr >> PAGE_SHIFT); 1117 mce_ring_add(m.addr >> PAGE_SHIFT);
@@ -1149,7 +1143,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1149 * issues we try to recover, or limit damage to the current 1143 * issues we try to recover, or limit damage to the current
1150 * process. 1144 * process.
1151 */ 1145 */
1152 if (tolerant < 3) { 1146 if (cfg->tolerant < 3) {
1153 if (no_way_out) 1147 if (no_way_out)
1154 mce_panic("Fatal machine check on current CPU", &m, msg); 1148 mce_panic("Fatal machine check on current CPU", &m, msg);
1155 if (worst == MCE_AR_SEVERITY) { 1149 if (worst == MCE_AR_SEVERITY) {
@@ -1377,11 +1371,13 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
1377static int __cpuinit __mcheck_cpu_mce_banks_init(void) 1371static int __cpuinit __mcheck_cpu_mce_banks_init(void)
1378{ 1372{
1379 int i; 1373 int i;
1374 u8 num_banks = mca_cfg.banks;
1380 1375
1381 mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); 1376 mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL);
1382 if (!mce_banks) 1377 if (!mce_banks)
1383 return -ENOMEM; 1378 return -ENOMEM;
1384 for (i = 0; i < banks; i++) { 1379
1380 for (i = 0; i < num_banks; i++) {
1385 struct mce_bank *b = &mce_banks[i]; 1381 struct mce_bank *b = &mce_banks[i];
1386 1382
1387 b->ctl = -1ULL; 1383 b->ctl = -1ULL;
@@ -1401,7 +1397,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
1401 rdmsrl(MSR_IA32_MCG_CAP, cap); 1397 rdmsrl(MSR_IA32_MCG_CAP, cap);
1402 1398
1403 b = cap & MCG_BANKCNT_MASK; 1399 b = cap & MCG_BANKCNT_MASK;
1404 if (!banks) 1400 if (!mca_cfg.banks)
1405 pr_info("CPU supports %d MCE banks\n", b); 1401 pr_info("CPU supports %d MCE banks\n", b);
1406 1402
1407 if (b > MAX_NR_BANKS) { 1403 if (b > MAX_NR_BANKS) {
@@ -1411,8 +1407,9 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
1411 } 1407 }
1412 1408
1413 /* Don't support asymmetric configurations today */ 1409 /* Don't support asymmetric configurations today */
1414 WARN_ON(banks != 0 && b != banks); 1410 WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
1415 banks = b; 1411 mca_cfg.banks = b;
1412
1416 if (!mce_banks) { 1413 if (!mce_banks) {
1417 int err = __mcheck_cpu_mce_banks_init(); 1414 int err = __mcheck_cpu_mce_banks_init();
1418 1415
@@ -1422,25 +1419,29 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
1422 1419
1423 /* Use accurate RIP reporting if available. */ 1420 /* Use accurate RIP reporting if available. */
1424 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) 1421 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
1425 rip_msr = MSR_IA32_MCG_EIP; 1422 mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
1426 1423
1427 if (cap & MCG_SER_P) 1424 if (cap & MCG_SER_P)
1428 mce_ser = 1; 1425 mca_cfg.ser = true;
1429 1426
1430 return 0; 1427 return 0;
1431} 1428}
1432 1429
1433static void __mcheck_cpu_init_generic(void) 1430static void __mcheck_cpu_init_generic(void)
1434{ 1431{
1432 enum mcp_flags m_fl = 0;
1435 mce_banks_t all_banks; 1433 mce_banks_t all_banks;
1436 u64 cap; 1434 u64 cap;
1437 int i; 1435 int i;
1438 1436
1437 if (!mca_cfg.bootlog)
1438 m_fl = MCP_DONTLOG;
1439
1439 /* 1440 /*
1440 * Log the machine checks left over from the previous reset. 1441 * Log the machine checks left over from the previous reset.
1441 */ 1442 */
1442 bitmap_fill(all_banks, MAX_NR_BANKS); 1443 bitmap_fill(all_banks, MAX_NR_BANKS);
1443 machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); 1444 machine_check_poll(MCP_UC | m_fl, &all_banks);
1444 1445
1445 set_in_cr4(X86_CR4_MCE); 1446 set_in_cr4(X86_CR4_MCE);
1446 1447
@@ -1448,7 +1449,7 @@ static void __mcheck_cpu_init_generic(void)
1448 if (cap & MCG_CTL_P) 1449 if (cap & MCG_CTL_P)
1449 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 1450 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
1450 1451
1451 for (i = 0; i < banks; i++) { 1452 for (i = 0; i < mca_cfg.banks; i++) {
1452 struct mce_bank *b = &mce_banks[i]; 1453 struct mce_bank *b = &mce_banks[i];
1453 1454
1454 if (!b->init) 1455 if (!b->init)
@@ -1489,6 +1490,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
1489/* Add per CPU specific workarounds here */ 1490/* Add per CPU specific workarounds here */
1490static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) 1491static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1491{ 1492{
1493 struct mca_config *cfg = &mca_cfg;
1494
1492 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1495 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1493 pr_info("unknown CPU type - not enabling MCE support\n"); 1496 pr_info("unknown CPU type - not enabling MCE support\n");
1494 return -EOPNOTSUPP; 1497 return -EOPNOTSUPP;
@@ -1496,7 +1499,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1496 1499
1497 /* This should be disabled by the BIOS, but isn't always */ 1500 /* This should be disabled by the BIOS, but isn't always */
1498 if (c->x86_vendor == X86_VENDOR_AMD) { 1501 if (c->x86_vendor == X86_VENDOR_AMD) {
1499 if (c->x86 == 15 && banks > 4) { 1502 if (c->x86 == 15 && cfg->banks > 4) {
1500 /* 1503 /*
1501 * disable GART TBL walk error reporting, which 1504 * disable GART TBL walk error reporting, which
1502 * trips off incorrectly with the IOMMU & 3ware 1505 * trips off incorrectly with the IOMMU & 3ware
@@ -1504,18 +1507,18 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1504 */ 1507 */
1505 clear_bit(10, (unsigned long *)&mce_banks[4].ctl); 1508 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
1506 } 1509 }
1507 if (c->x86 <= 17 && mce_bootlog < 0) { 1510 if (c->x86 <= 17 && cfg->bootlog < 0) {
1508 /* 1511 /*
1509 * Lots of broken BIOS around that don't clear them 1512 * Lots of broken BIOS around that don't clear them
1510 * by default and leave crap in there. Don't log: 1513 * by default and leave crap in there. Don't log:
1511 */ 1514 */
1512 mce_bootlog = 0; 1515 cfg->bootlog = 0;
1513 } 1516 }
1514 /* 1517 /*
1515 * Various K7s with broken bank 0 around. Always disable 1518 * Various K7s with broken bank 0 around. Always disable
1516 * by default. 1519 * by default.
1517 */ 1520 */
1518 if (c->x86 == 6 && banks > 0) 1521 if (c->x86 == 6 && cfg->banks > 0)
1519 mce_banks[0].ctl = 0; 1522 mce_banks[0].ctl = 0;
1520 1523
1521 /* 1524 /*
@@ -1566,7 +1569,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1566 * valid event later, merely don't write CTL0. 1569 * valid event later, merely don't write CTL0.
1567 */ 1570 */
1568 1571
1569 if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) 1572 if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
1570 mce_banks[0].init = 0; 1573 mce_banks[0].init = 0;
1571 1574
1572 /* 1575 /*
@@ -1574,23 +1577,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1574 * synchronization with a one second timeout. 1577 * synchronization with a one second timeout.
1575 */ 1578 */
1576 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && 1579 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
1577 monarch_timeout < 0) 1580 cfg->monarch_timeout < 0)
1578 monarch_timeout = USEC_PER_SEC; 1581 cfg->monarch_timeout = USEC_PER_SEC;
1579 1582
1580 /* 1583 /*
1581 * There are also broken BIOSes on some Pentium M and 1584 * There are also broken BIOSes on some Pentium M and
1582 * earlier systems: 1585 * earlier systems:
1583 */ 1586 */
1584 if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) 1587 if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
1585 mce_bootlog = 0; 1588 cfg->bootlog = 0;
1586 1589
1587 if (c->x86 == 6 && c->x86_model == 45) 1590 if (c->x86 == 6 && c->x86_model == 45)
1588 quirk_no_way_out = quirk_sandybridge_ifu; 1591 quirk_no_way_out = quirk_sandybridge_ifu;
1589 } 1592 }
1590 if (monarch_timeout < 0) 1593 if (cfg->monarch_timeout < 0)
1591 monarch_timeout = 0; 1594 cfg->monarch_timeout = 0;
1592 if (mce_bootlog != 0) 1595 if (cfg->bootlog != 0)
1593 mce_panic_timeout = 30; 1596 cfg->panic_timeout = 30;
1594 1597
1595 return 0; 1598 return 0;
1596} 1599}
@@ -1635,7 +1638,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
1635 1638
1636 __this_cpu_write(mce_next_interval, iv); 1639 __this_cpu_write(mce_next_interval, iv);
1637 1640
1638 if (mce_ignore_ce || !iv) 1641 if (mca_cfg.ignore_ce || !iv)
1639 return; 1642 return;
1640 1643
1641 t->expires = round_jiffies(jiffies + iv); 1644 t->expires = round_jiffies(jiffies + iv);
@@ -1668,7 +1671,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
1668 */ 1671 */
1669void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) 1672void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
1670{ 1673{
1671 if (mce_disabled) 1674 if (mca_cfg.disabled)
1672 return; 1675 return;
1673 1676
1674 if (__mcheck_cpu_ancient_init(c)) 1677 if (__mcheck_cpu_ancient_init(c))
@@ -1678,7 +1681,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
1678 return; 1681 return;
1679 1682
1680 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { 1683 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
1681 mce_disabled = 1; 1684 mca_cfg.disabled = true;
1682 return; 1685 return;
1683 } 1686 }
1684 1687
@@ -1951,6 +1954,8 @@ static struct miscdevice mce_chrdev_device = {
1951 */ 1954 */
1952static int __init mcheck_enable(char *str) 1955static int __init mcheck_enable(char *str)
1953{ 1956{
1957 struct mca_config *cfg = &mca_cfg;
1958
1954 if (*str == 0) { 1959 if (*str == 0) {
1955 enable_p5_mce(); 1960 enable_p5_mce();
1956 return 1; 1961 return 1;
@@ -1958,22 +1963,22 @@ static int __init mcheck_enable(char *str)
1958 if (*str == '=') 1963 if (*str == '=')
1959 str++; 1964 str++;
1960 if (!strcmp(str, "off")) 1965 if (!strcmp(str, "off"))
1961 mce_disabled = 1; 1966 cfg->disabled = true;
1962 else if (!strcmp(str, "no_cmci")) 1967 else if (!strcmp(str, "no_cmci"))
1963 mce_cmci_disabled = 1; 1968 cfg->cmci_disabled = true;
1964 else if (!strcmp(str, "dont_log_ce")) 1969 else if (!strcmp(str, "dont_log_ce"))
1965 mce_dont_log_ce = 1; 1970 cfg->dont_log_ce = true;
1966 else if (!strcmp(str, "ignore_ce")) 1971 else if (!strcmp(str, "ignore_ce"))
1967 mce_ignore_ce = 1; 1972 cfg->ignore_ce = true;
1968 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) 1973 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
1969 mce_bootlog = (str[0] == 'b'); 1974 cfg->bootlog = (str[0] == 'b');
1970 else if (!strcmp(str, "bios_cmci_threshold")) 1975 else if (!strcmp(str, "bios_cmci_threshold"))
1971 mce_bios_cmci_threshold = 1; 1976 cfg->bios_cmci_threshold = true;
1972 else if (isdigit(str[0])) { 1977 else if (isdigit(str[0])) {
1973 get_option(&str, &tolerant); 1978 get_option(&str, &(cfg->tolerant));
1974 if (*str == ',') { 1979 if (*str == ',') {
1975 ++str; 1980 ++str;
1976 get_option(&str, &monarch_timeout); 1981 get_option(&str, &(cfg->monarch_timeout));
1977 } 1982 }
1978 } else { 1983 } else {
1979 pr_info("mce argument %s ignored. Please use /sys\n", str); 1984 pr_info("mce argument %s ignored. Please use /sys\n", str);
@@ -2002,7 +2007,7 @@ static int mce_disable_error_reporting(void)
2002{ 2007{
2003 int i; 2008 int i;
2004 2009
2005 for (i = 0; i < banks; i++) { 2010 for (i = 0; i < mca_cfg.banks; i++) {
2006 struct mce_bank *b = &mce_banks[i]; 2011 struct mce_bank *b = &mce_banks[i];
2007 2012
2008 if (b->init) 2013 if (b->init)
@@ -2142,15 +2147,15 @@ static ssize_t set_ignore_ce(struct device *s,
2142 if (strict_strtoull(buf, 0, &new) < 0) 2147 if (strict_strtoull(buf, 0, &new) < 0)
2143 return -EINVAL; 2148 return -EINVAL;
2144 2149
2145 if (mce_ignore_ce ^ !!new) { 2150 if (mca_cfg.ignore_ce ^ !!new) {
2146 if (new) { 2151 if (new) {
2147 /* disable ce features */ 2152 /* disable ce features */
2148 mce_timer_delete_all(); 2153 mce_timer_delete_all();
2149 on_each_cpu(mce_disable_cmci, NULL, 1); 2154 on_each_cpu(mce_disable_cmci, NULL, 1);
2150 mce_ignore_ce = 1; 2155 mca_cfg.ignore_ce = true;
2151 } else { 2156 } else {
2152 /* enable ce features */ 2157 /* enable ce features */
2153 mce_ignore_ce = 0; 2158 mca_cfg.ignore_ce = false;
2154 on_each_cpu(mce_enable_ce, (void *)1, 1); 2159 on_each_cpu(mce_enable_ce, (void *)1, 1);
2155 } 2160 }
2156 } 2161 }
@@ -2166,14 +2171,14 @@ static ssize_t set_cmci_disabled(struct device *s,
2166 if (strict_strtoull(buf, 0, &new) < 0) 2171 if (strict_strtoull(buf, 0, &new) < 0)
2167 return -EINVAL; 2172 return -EINVAL;
2168 2173
2169 if (mce_cmci_disabled ^ !!new) { 2174 if (mca_cfg.cmci_disabled ^ !!new) {
2170 if (new) { 2175 if (new) {
2171 /* disable cmci */ 2176 /* disable cmci */
2172 on_each_cpu(mce_disable_cmci, NULL, 1); 2177 on_each_cpu(mce_disable_cmci, NULL, 1);
2173 mce_cmci_disabled = 1; 2178 mca_cfg.cmci_disabled = true;
2174 } else { 2179 } else {
2175 /* enable cmci */ 2180 /* enable cmci */
2176 mce_cmci_disabled = 0; 2181 mca_cfg.cmci_disabled = false;
2177 on_each_cpu(mce_enable_ce, NULL, 1); 2182 on_each_cpu(mce_enable_ce, NULL, 1);
2178 } 2183 }
2179 } 2184 }
@@ -2190,9 +2195,9 @@ static ssize_t store_int_with_restart(struct device *s,
2190} 2195}
2191 2196
2192static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); 2197static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
2193static DEVICE_INT_ATTR(tolerant, 0644, tolerant); 2198static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
2194static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); 2199static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
2195static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); 2200static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
2196 2201
2197static struct dev_ext_attribute dev_attr_check_interval = { 2202static struct dev_ext_attribute dev_attr_check_interval = {
2198 __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), 2203 __ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
@@ -2200,13 +2205,13 @@ static struct dev_ext_attribute dev_attr_check_interval = {
2200}; 2205};
2201 2206
2202static struct dev_ext_attribute dev_attr_ignore_ce = { 2207static struct dev_ext_attribute dev_attr_ignore_ce = {
2203 __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce), 2208 __ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce),
2204 &mce_ignore_ce 2209 &mca_cfg.ignore_ce
2205}; 2210};
2206 2211
2207static struct dev_ext_attribute dev_attr_cmci_disabled = { 2212static struct dev_ext_attribute dev_attr_cmci_disabled = {
2208 __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled), 2213 __ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled),
2209 &mce_cmci_disabled 2214 &mca_cfg.cmci_disabled
2210}; 2215};
2211 2216
2212static struct device_attribute *mce_device_attrs[] = { 2217static struct device_attribute *mce_device_attrs[] = {
@@ -2253,7 +2258,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
2253 if (err) 2258 if (err)
2254 goto error; 2259 goto error;
2255 } 2260 }
2256 for (j = 0; j < banks; j++) { 2261 for (j = 0; j < mca_cfg.banks; j++) {
2257 err = device_create_file(dev, &mce_banks[j].attr); 2262 err = device_create_file(dev, &mce_banks[j].attr);
2258 if (err) 2263 if (err)
2259 goto error2; 2264 goto error2;
@@ -2285,7 +2290,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
2285 for (i = 0; mce_device_attrs[i]; i++) 2290 for (i = 0; mce_device_attrs[i]; i++)
2286 device_remove_file(dev, mce_device_attrs[i]); 2291 device_remove_file(dev, mce_device_attrs[i]);
2287 2292
2288 for (i = 0; i < banks; i++) 2293 for (i = 0; i < mca_cfg.banks; i++)
2289 device_remove_file(dev, &mce_banks[i].attr); 2294 device_remove_file(dev, &mce_banks[i].attr);
2290 2295
2291 device_unregister(dev); 2296 device_unregister(dev);
@@ -2304,7 +2309,7 @@ static void __cpuinit mce_disable_cpu(void *h)
2304 2309
2305 if (!(action & CPU_TASKS_FROZEN)) 2310 if (!(action & CPU_TASKS_FROZEN))
2306 cmci_clear(); 2311 cmci_clear();
2307 for (i = 0; i < banks; i++) { 2312 for (i = 0; i < mca_cfg.banks; i++) {
2308 struct mce_bank *b = &mce_banks[i]; 2313 struct mce_bank *b = &mce_banks[i];
2309 2314
2310 if (b->init) 2315 if (b->init)
@@ -2322,7 +2327,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
2322 2327
2323 if (!(action & CPU_TASKS_FROZEN)) 2328 if (!(action & CPU_TASKS_FROZEN))
2324 cmci_reenable(); 2329 cmci_reenable();
2325 for (i = 0; i < banks; i++) { 2330 for (i = 0; i < mca_cfg.banks; i++) {
2326 struct mce_bank *b = &mce_banks[i]; 2331 struct mce_bank *b = &mce_banks[i];
2327 2332
2328 if (b->init) 2333 if (b->init)
@@ -2375,7 +2380,7 @@ static __init void mce_init_banks(void)
2375{ 2380{
2376 int i; 2381 int i;
2377 2382
2378 for (i = 0; i < banks; i++) { 2383 for (i = 0; i < mca_cfg.banks; i++) {
2379 struct mce_bank *b = &mce_banks[i]; 2384 struct mce_bank *b = &mce_banks[i];
2380 struct device_attribute *a = &b->attr; 2385 struct device_attribute *a = &b->attr;
2381 2386
@@ -2426,7 +2431,7 @@ device_initcall_sync(mcheck_init_device);
2426 */ 2431 */
2427static int __init mcheck_disable(char *str) 2432static int __init mcheck_disable(char *str)
2428{ 2433{
2429 mce_disabled = 1; 2434 mca_cfg.disabled = true;
2430 return 1; 2435 return 1;
2431} 2436}
2432__setup("nomce", mcheck_disable); 2437__setup("nomce", mcheck_disable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 4f9a3cbfc4a3..402c454fbff0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -53,7 +53,7 @@ static int cmci_supported(int *banks)
53{ 53{
54 u64 cap; 54 u64 cap;
55 55
56 if (mce_cmci_disabled || mce_ignore_ce) 56 if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
57 return 0; 57 return 0;
58 58
59 /* 59 /*
@@ -200,7 +200,7 @@ static void cmci_discover(int banks)
200 continue; 200 continue;
201 } 201 }
202 202
203 if (!mce_bios_cmci_threshold) { 203 if (!mca_cfg.bios_cmci_threshold) {
204 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 204 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
205 val |= CMCI_THRESHOLD; 205 val |= CMCI_THRESHOLD;
206 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { 206 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
@@ -227,7 +227,7 @@ static void cmci_discover(int banks)
227 * set the thresholds properly or does not work with 227 * set the thresholds properly or does not work with
228 * this boot option. Note down now and report later. 228 * this boot option. Note down now and report later.
229 */ 229 */
230 if (mce_bios_cmci_threshold && bios_zero_thresh && 230 if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
231 (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) 231 (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
232 bios_wrong_thresh = 1; 232 bios_wrong_thresh = 1;
233 } else { 233 } else {
@@ -235,7 +235,7 @@ static void cmci_discover(int banks)
235 } 235 }
236 } 236 }
237 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 237 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
238 if (mce_bios_cmci_threshold && bios_wrong_thresh) { 238 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
239 pr_info_once( 239 pr_info_once(
240 "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 240 "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
241 pr_info_once( 241 pr_info_once(
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6b96110bb0c3..726bf963c227 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -606,7 +606,7 @@ void __init mtrr_bp_init(void)
606 606
607 /* 607 /*
608 * This is an AMD specific MSR, but we assume(hope?) that 608 * This is an AMD specific MSR, but we assume(hope?) that
609 * Intel will implement it to when they extend the address 609 * Intel will implement it too when they extend the address
610 * bus of the Xeon. 610 * bus of the Xeon.
611 */ 611 */
612 if (cpuid_eax(0x80000000) >= 0x80000008) { 612 if (cpuid_eax(0x80000000) >= 0x80000008) {
@@ -695,11 +695,16 @@ void mtrr_ap_init(void)
695} 695}
696 696
697/** 697/**
698 * Save current fixed-range MTRR state of the BSP 698 * Save current fixed-range MTRR state of the first cpu in cpu_online_mask.
699 */ 699 */
700void mtrr_save_state(void) 700void mtrr_save_state(void)
701{ 701{
702 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); 702 int first_cpu;
703
704 get_online_cpus();
705 first_cpu = cpumask_first(cpu_online_mask);
706 smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
707 put_online_cpus();
703} 708}
704 709
705void set_mtrr_aps_delayed_init(void) 710void set_mtrr_aps_delayed_init(void)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4a3374e61a93..4428fd178bce 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1316,6 +1316,121 @@ static struct attribute_group x86_pmu_format_group = {
1316 .attrs = NULL, 1316 .attrs = NULL,
1317}; 1317};
1318 1318
1319struct perf_pmu_events_attr {
1320 struct device_attribute attr;
1321 u64 id;
1322};
1323
1324/*
1325 * Remove all undefined events (x86_pmu.event_map(id) == 0)
1326 * out of events_attr attributes.
1327 */
1328static void __init filter_events(struct attribute **attrs)
1329{
1330 int i, j;
1331
1332 for (i = 0; attrs[i]; i++) {
1333 if (x86_pmu.event_map(i))
1334 continue;
1335
1336 for (j = i; attrs[j]; j++)
1337 attrs[j] = attrs[j + 1];
1338
1339 /* Check the shifted attr. */
1340 i--;
1341 }
1342}
1343
1344static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
1345 char *page)
1346{
1347 struct perf_pmu_events_attr *pmu_attr = \
1348 container_of(attr, struct perf_pmu_events_attr, attr);
1349
1350 u64 config = x86_pmu.event_map(pmu_attr->id);
1351 return x86_pmu.events_sysfs_show(page, config);
1352}
1353
1354#define EVENT_VAR(_id) event_attr_##_id
1355#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
1356
1357#define EVENT_ATTR(_name, _id) \
1358static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
1359 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
1360 .id = PERF_COUNT_HW_##_id, \
1361};
1362
1363EVENT_ATTR(cpu-cycles, CPU_CYCLES );
1364EVENT_ATTR(instructions, INSTRUCTIONS );
1365EVENT_ATTR(cache-references, CACHE_REFERENCES );
1366EVENT_ATTR(cache-misses, CACHE_MISSES );
1367EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS );
1368EVENT_ATTR(branch-misses, BRANCH_MISSES );
1369EVENT_ATTR(bus-cycles, BUS_CYCLES );
1370EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND );
1371EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND );
1372EVENT_ATTR(ref-cycles, REF_CPU_CYCLES );
1373
1374static struct attribute *empty_attrs;
1375
1376static struct attribute *events_attr[] = {
1377 EVENT_PTR(CPU_CYCLES),
1378 EVENT_PTR(INSTRUCTIONS),
1379 EVENT_PTR(CACHE_REFERENCES),
1380 EVENT_PTR(CACHE_MISSES),
1381 EVENT_PTR(BRANCH_INSTRUCTIONS),
1382 EVENT_PTR(BRANCH_MISSES),
1383 EVENT_PTR(BUS_CYCLES),
1384 EVENT_PTR(STALLED_CYCLES_FRONTEND),
1385 EVENT_PTR(STALLED_CYCLES_BACKEND),
1386 EVENT_PTR(REF_CPU_CYCLES),
1387 NULL,
1388};
1389
1390static struct attribute_group x86_pmu_events_group = {
1391 .name = "events",
1392 .attrs = events_attr,
1393};
1394
1395ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
1396{
1397 u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
1398 u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
1399 bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE);
1400 bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
1401 bool any = (config & ARCH_PERFMON_EVENTSEL_ANY);
1402 bool inv = (config & ARCH_PERFMON_EVENTSEL_INV);
1403 ssize_t ret;
1404
1405 /*
1406 * We have whole page size to spend and just little data
1407 * to write, so we can safely use sprintf.
1408 */
1409 ret = sprintf(page, "event=0x%02llx", event);
1410
1411 if (umask)
1412 ret += sprintf(page + ret, ",umask=0x%02llx", umask);
1413
1414 if (edge)
1415 ret += sprintf(page + ret, ",edge");
1416
1417 if (pc)
1418 ret += sprintf(page + ret, ",pc");
1419
1420 if (any)
1421 ret += sprintf(page + ret, ",any");
1422
1423 if (inv)
1424 ret += sprintf(page + ret, ",inv");
1425
1426 if (cmask)
1427 ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
1428
1429 ret += sprintf(page + ret, "\n");
1430
1431 return ret;
1432}
1433
1319static int __init init_hw_perf_events(void) 1434static int __init init_hw_perf_events(void)
1320{ 1435{
1321 struct x86_pmu_quirk *quirk; 1436 struct x86_pmu_quirk *quirk;
@@ -1362,6 +1477,11 @@ static int __init init_hw_perf_events(void)
1362 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ 1477 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1363 x86_pmu_format_group.attrs = x86_pmu.format_attrs; 1478 x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1364 1479
1480 if (!x86_pmu.events_sysfs_show)
1481 x86_pmu_events_group.attrs = &empty_attrs;
1482 else
1483 filter_events(x86_pmu_events_group.attrs);
1484
1365 pr_info("... version: %d\n", x86_pmu.version); 1485 pr_info("... version: %d\n", x86_pmu.version);
1366 pr_info("... bit width: %d\n", x86_pmu.cntval_bits); 1486 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1367 pr_info("... generic registers: %d\n", x86_pmu.num_counters); 1487 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1651,6 +1771,7 @@ static struct attribute_group x86_pmu_attr_group = {
1651static const struct attribute_group *x86_pmu_attr_groups[] = { 1771static const struct attribute_group *x86_pmu_attr_groups[] = {
1652 &x86_pmu_attr_group, 1772 &x86_pmu_attr_group,
1653 &x86_pmu_format_group, 1773 &x86_pmu_format_group,
1774 &x86_pmu_events_group,
1654 NULL, 1775 NULL,
1655}; 1776};
1656 1777
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 271d25700297..115c1ea97746 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -354,6 +354,8 @@ struct x86_pmu {
354 int attr_rdpmc; 354 int attr_rdpmc;
355 struct attribute **format_attrs; 355 struct attribute **format_attrs;
356 356
357 ssize_t (*events_sysfs_show)(char *page, u64 config);
358
357 /* 359 /*
358 * CPU Hotplug hooks 360 * CPU Hotplug hooks
359 */ 361 */
@@ -536,6 +538,9 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
536 regs->ip = ip; 538 regs->ip = ip;
537} 539}
538 540
541ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
542ssize_t intel_event_sysfs_show(char *page, u64 config);
543
539#ifdef CONFIG_CPU_SUP_AMD 544#ifdef CONFIG_CPU_SUP_AMD
540 545
541int amd_pmu_init(void); 546int amd_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 4528ae7b6ec4..c93bc4e813a0 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -568,6 +568,14 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
568 } 568 }
569} 569}
570 570
571static ssize_t amd_event_sysfs_show(char *page, u64 config)
572{
573 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
574 (config & AMD64_EVENTSEL_EVENT) >> 24;
575
576 return x86_event_sysfs_show(page, config, event);
577}
578
571static __initconst const struct x86_pmu amd_pmu = { 579static __initconst const struct x86_pmu amd_pmu = {
572 .name = "AMD", 580 .name = "AMD",
573 .handle_irq = x86_pmu_handle_irq, 581 .handle_irq = x86_pmu_handle_irq,
@@ -591,6 +599,7 @@ static __initconst const struct x86_pmu amd_pmu = {
591 .put_event_constraints = amd_put_event_constraints, 599 .put_event_constraints = amd_put_event_constraints,
592 600
593 .format_attrs = amd_format_attr, 601 .format_attrs = amd_format_attr,
602 .events_sysfs_show = amd_event_sysfs_show,
594 603
595 .cpu_prepare = amd_pmu_cpu_prepare, 604 .cpu_prepare = amd_pmu_cpu_prepare,
596 .cpu_starting = amd_pmu_cpu_starting, 605 .cpu_starting = amd_pmu_cpu_starting,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 324bb523d9d9..93b9e1181f83 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1603,6 +1603,13 @@ static struct attribute *intel_arch_formats_attr[] = {
1603 NULL, 1603 NULL,
1604}; 1604};
1605 1605
1606ssize_t intel_event_sysfs_show(char *page, u64 config)
1607{
1608 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
1609
1610 return x86_event_sysfs_show(page, config, event);
1611}
1612
1606static __initconst const struct x86_pmu core_pmu = { 1613static __initconst const struct x86_pmu core_pmu = {
1607 .name = "core", 1614 .name = "core",
1608 .handle_irq = x86_pmu_handle_irq, 1615 .handle_irq = x86_pmu_handle_irq,
@@ -1628,6 +1635,7 @@ static __initconst const struct x86_pmu core_pmu = {
1628 .event_constraints = intel_core_event_constraints, 1635 .event_constraints = intel_core_event_constraints,
1629 .guest_get_msrs = core_guest_get_msrs, 1636 .guest_get_msrs = core_guest_get_msrs,
1630 .format_attrs = intel_arch_formats_attr, 1637 .format_attrs = intel_arch_formats_attr,
1638 .events_sysfs_show = intel_event_sysfs_show,
1631}; 1639};
1632 1640
1633struct intel_shared_regs *allocate_shared_regs(int cpu) 1641struct intel_shared_regs *allocate_shared_regs(int cpu)
@@ -1766,6 +1774,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1766 .pebs_aliases = intel_pebs_aliases_core2, 1774 .pebs_aliases = intel_pebs_aliases_core2,
1767 1775
1768 .format_attrs = intel_arch3_formats_attr, 1776 .format_attrs = intel_arch3_formats_attr,
1777 .events_sysfs_show = intel_event_sysfs_show,
1769 1778
1770 .cpu_prepare = intel_pmu_cpu_prepare, 1779 .cpu_prepare = intel_pmu_cpu_prepare,
1771 .cpu_starting = intel_pmu_cpu_starting, 1780 .cpu_starting = intel_pmu_cpu_starting,
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 7d0270bd793e..f2af39f5dc3d 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -227,6 +227,8 @@ static __initconst const struct x86_pmu p6_pmu = {
227 .event_constraints = p6_event_constraints, 227 .event_constraints = p6_event_constraints,
228 228
229 .format_attrs = intel_p6_formats_attr, 229 .format_attrs = intel_p6_formats_attr,
230 .events_sysfs_show = intel_event_sysfs_show,
231
230}; 232};
231 233
232__init int p6_pmu_init(void) 234__init int p6_pmu_init(void)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 13ad89971d47..74467feb4dc5 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -16,6 +16,7 @@
16#include <linux/delay.h> 16#include <linux/delay.h>
17#include <linux/elf.h> 17#include <linux/elf.h>
18#include <linux/elfcore.h> 18#include <linux/elfcore.h>
19#include <linux/module.h>
19 20
20#include <asm/processor.h> 21#include <asm/processor.h>
21#include <asm/hardirq.h> 22#include <asm/hardirq.h>
@@ -30,6 +31,27 @@
30 31
31int in_crash_kexec; 32int in_crash_kexec;
32 33
34/*
35 * This is used to VMCLEAR all VMCSs loaded on the
36 * processor. And when loading kvm_intel module, the
37 * callback function pointer will be assigned.
38 *
39 * protected by rcu.
40 */
41crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
42EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
43
44static inline void cpu_crash_vmclear_loaded_vmcss(void)
45{
46 crash_vmclear_fn *do_vmclear_operation = NULL;
47
48 rcu_read_lock();
49 do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
50 if (do_vmclear_operation)
51 do_vmclear_operation();
52 rcu_read_unlock();
53}
54
33#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 55#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
34 56
35static void kdump_nmi_callback(int cpu, struct pt_regs *regs) 57static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
46#endif 68#endif
47 crash_save_cpu(regs, cpu); 69 crash_save_cpu(regs, cpu);
48 70
71 /*
72 * VMCLEAR VMCSs loaded on all cpus if needed.
73 */
74 cpu_crash_vmclear_loaded_vmcss();
75
49 /* Disable VMX or SVM if needed. 76 /* Disable VMX or SVM if needed.
50 * 77 *
51 * We need to disable virtualization on all CPUs. 78 * We need to disable virtualization on all CPUs.
@@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
88 115
89 kdump_nmi_shootdown_cpus(); 116 kdump_nmi_shootdown_cpus();
90 117
118 /*
119 * VMCLEAR VMCSs loaded on this cpu if needed.
120 */
121 cpu_crash_vmclear_loaded_vmcss();
122
91 /* Booting kdump kernel with VMX or SVM enabled won't work, 123 /* Booting kdump kernel with VMX or SVM enabled won't work,
92 * because (among other limitations) we can't disable paging 124 * because (among other limitations) we can't disable paging
93 * with the virt flags. 125 * with the virt flags.
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 88b725aa1d52..c763116c5359 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -739,30 +739,12 @@ ENTRY(ptregs_##name) ; \
739ENDPROC(ptregs_##name) 739ENDPROC(ptregs_##name)
740 740
741PTREGSCALL1(iopl) 741PTREGSCALL1(iopl)
742PTREGSCALL0(fork)
743PTREGSCALL0(vfork)
744PTREGSCALL2(sigaltstack) 742PTREGSCALL2(sigaltstack)
745PTREGSCALL0(sigreturn) 743PTREGSCALL0(sigreturn)
746PTREGSCALL0(rt_sigreturn) 744PTREGSCALL0(rt_sigreturn)
747PTREGSCALL2(vm86) 745PTREGSCALL2(vm86)
748PTREGSCALL1(vm86old) 746PTREGSCALL1(vm86old)
749 747
750/* Clone is an oddball. The 4th arg is in %edi */
751ENTRY(ptregs_clone)
752 CFI_STARTPROC
753 leal 4(%esp),%eax
754 pushl_cfi %eax
755 pushl_cfi PT_EDI(%eax)
756 movl PT_EDX(%eax),%ecx
757 movl PT_ECX(%eax),%edx
758 movl PT_EBX(%eax),%eax
759 call sys_clone
760 addl $8,%esp
761 CFI_ADJUST_CFA_OFFSET -8
762 ret
763 CFI_ENDPROC
764ENDPROC(ptregs_clone)
765
766.macro FIXUP_ESPFIX_STACK 748.macro FIXUP_ESPFIX_STACK
767/* 749/*
768 * Switch back for ESPFIX stack to the normal zerobased stack 750 * Switch back for ESPFIX stack to the normal zerobased stack
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1328fe49a3f1..70641aff0c25 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,7 +56,7 @@
56#include <asm/ftrace.h> 56#include <asm/ftrace.h>
57#include <asm/percpu.h> 57#include <asm/percpu.h>
58#include <asm/asm.h> 58#include <asm/asm.h>
59#include <asm/rcu.h> 59#include <asm/context_tracking.h>
60#include <asm/smap.h> 60#include <asm/smap.h>
61#include <linux/err.h> 61#include <linux/err.h>
62 62
@@ -845,9 +845,25 @@ ENTRY(\label)
845END(\label) 845END(\label)
846 .endm 846 .endm
847 847
848 PTREGSCALL stub_clone, sys_clone, %r8 848 .macro FORK_LIKE func
849 PTREGSCALL stub_fork, sys_fork, %rdi 849ENTRY(stub_\func)
850 PTREGSCALL stub_vfork, sys_vfork, %rdi 850 CFI_STARTPROC
851 popq %r11 /* save return address */
852 PARTIAL_FRAME 0
853 SAVE_REST
854 pushq %r11 /* put it back on stack */
855 FIXUP_TOP_OF_STACK %r11, 8
856 DEFAULT_FRAME 0 8 /* offset 8: return address */
857 call sys_\func
858 RESTORE_TOP_OF_STACK %r11, 8
859 ret $REST_SKIP /* pop extended registers */
860 CFI_ENDPROC
861END(stub_\func)
862 .endm
863
864 FORK_LIKE clone
865 FORK_LIKE fork
866 FORK_LIKE vfork
851 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx 867 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
852 PTREGSCALL stub_iopl, sys_iopl, %rsi 868 PTREGSCALL stub_iopl, sys_iopl, %rsi
853 869
@@ -1699,9 +1715,10 @@ nested_nmi:
1699 1715
17001: 17161:
1701 /* Set up the interrupted NMIs stack to jump to repeat_nmi */ 1717 /* Set up the interrupted NMIs stack to jump to repeat_nmi */
1702 leaq -6*8(%rsp), %rdx 1718 leaq -1*8(%rsp), %rdx
1703 movq %rdx, %rsp 1719 movq %rdx, %rsp
1704 CFI_ADJUST_CFA_OFFSET 6*8 1720 CFI_ADJUST_CFA_OFFSET 1*8
1721 leaq -10*8(%rsp), %rdx
1705 pushq_cfi $__KERNEL_DS 1722 pushq_cfi $__KERNEL_DS
1706 pushq_cfi %rdx 1723 pushq_cfi %rdx
1707 pushfq_cfi 1724 pushfq_cfi
@@ -1709,8 +1726,8 @@ nested_nmi:
1709 pushq_cfi $repeat_nmi 1726 pushq_cfi $repeat_nmi
1710 1727
1711 /* Put stack back */ 1728 /* Put stack back */
1712 addq $(11*8), %rsp 1729 addq $(6*8), %rsp
1713 CFI_ADJUST_CFA_OFFSET -11*8 1730 CFI_ADJUST_CFA_OFFSET -6*8
1714 1731
1715nested_nmi_out: 1732nested_nmi_out:
1716 popq_cfi %rdx 1733 popq_cfi %rdx
@@ -1736,18 +1753,18 @@ first_nmi:
1736 * +-------------------------+ 1753 * +-------------------------+
1737 * | NMI executing variable | 1754 * | NMI executing variable |
1738 * +-------------------------+ 1755 * +-------------------------+
1739 * | Saved SS |
1740 * | Saved Return RSP |
1741 * | Saved RFLAGS |
1742 * | Saved CS |
1743 * | Saved RIP |
1744 * +-------------------------+
1745 * | copied SS | 1756 * | copied SS |
1746 * | copied Return RSP | 1757 * | copied Return RSP |
1747 * | copied RFLAGS | 1758 * | copied RFLAGS |
1748 * | copied CS | 1759 * | copied CS |
1749 * | copied RIP | 1760 * | copied RIP |
1750 * +-------------------------+ 1761 * +-------------------------+
1762 * | Saved SS |
1763 * | Saved Return RSP |
1764 * | Saved RFLAGS |
1765 * | Saved CS |
1766 * | Saved RIP |
1767 * +-------------------------+
1751 * | pt_regs | 1768 * | pt_regs |
1752 * +-------------------------+ 1769 * +-------------------------+
1753 * 1770 *
@@ -1763,9 +1780,14 @@ first_nmi:
1763 /* Set the NMI executing variable on the stack. */ 1780 /* Set the NMI executing variable on the stack. */
1764 pushq_cfi $1 1781 pushq_cfi $1
1765 1782
1783 /*
1784 * Leave room for the "copied" frame
1785 */
1786 subq $(5*8), %rsp
1787
1766 /* Copy the stack frame to the Saved frame */ 1788 /* Copy the stack frame to the Saved frame */
1767 .rept 5 1789 .rept 5
1768 pushq_cfi 6*8(%rsp) 1790 pushq_cfi 11*8(%rsp)
1769 .endr 1791 .endr
1770 CFI_DEF_CFA_OFFSET SS+8-RIP 1792 CFI_DEF_CFA_OFFSET SS+8-RIP
1771 1793
@@ -1786,12 +1808,15 @@ repeat_nmi:
1786 * is benign for the non-repeat case, where 1 was pushed just above 1808 * is benign for the non-repeat case, where 1 was pushed just above
1787 * to this very stack slot). 1809 * to this very stack slot).
1788 */ 1810 */
1789 movq $1, 5*8(%rsp) 1811 movq $1, 10*8(%rsp)
1790 1812
1791 /* Make another copy, this one may be modified by nested NMIs */ 1813 /* Make another copy, this one may be modified by nested NMIs */
1814 addq $(10*8), %rsp
1815 CFI_ADJUST_CFA_OFFSET -10*8
1792 .rept 5 1816 .rept 5
1793 pushq_cfi 4*8(%rsp) 1817 pushq_cfi -6*8(%rsp)
1794 .endr 1818 .endr
1819 subq $(5*8), %rsp
1795 CFI_DEF_CFA_OFFSET SS+8-RIP 1820 CFI_DEF_CFA_OFFSET SS+8-RIP
1796end_repeat_nmi: 1821end_repeat_nmi:
1797 1822
@@ -1842,8 +1867,12 @@ nmi_swapgs:
1842 SWAPGS_UNSAFE_STACK 1867 SWAPGS_UNSAFE_STACK
1843nmi_restore: 1868nmi_restore:
1844 RESTORE_ALL 8 1869 RESTORE_ALL 8
1870
1871 /* Pop the extra iret frame */
1872 addq $(5*8), %rsp
1873
1845 /* Clear the NMI executing stack variable */ 1874 /* Clear the NMI executing stack variable */
1846 movq $0, 10*8(%rsp) 1875 movq $0, 5*8(%rsp)
1847 jmp irq_return 1876 jmp irq_return
1848 CFI_ENDPROC 1877 CFI_ENDPROC
1849END(nmi) 1878END(nmi)
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 4dac2f68ed4a..8e7f6556028f 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -266,6 +266,19 @@ num_subarch_entries = (. - subarch_entries) / 4
266 jmp default_entry 266 jmp default_entry
267#endif /* CONFIG_PARAVIRT */ 267#endif /* CONFIG_PARAVIRT */
268 268
269#ifdef CONFIG_HOTPLUG_CPU
270/*
271 * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
272 * up already except stack. We just set up stack here. Then call
273 * start_secondary().
274 */
275ENTRY(start_cpu0)
276 movl stack_start, %ecx
277 movl %ecx, %esp
278 jmp *(initial_code)
279ENDPROC(start_cpu0)
280#endif
281
269/* 282/*
270 * Non-boot CPU entry point; entered from trampoline.S 283 * Non-boot CPU entry point; entered from trampoline.S
271 * We can't lgdt here, because lgdt itself uses a data segment, but 284 * We can't lgdt here, because lgdt itself uses a data segment, but
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 94bf9cc2c7ee..980053c4b9cc 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -252,6 +252,22 @@ ENTRY(secondary_startup_64)
252 pushq %rax # target address in negative space 252 pushq %rax # target address in negative space
253 lretq 253 lretq
254 254
255#ifdef CONFIG_HOTPLUG_CPU
256/*
257 * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
258 * up already except stack. We just set up stack here. Then call
259 * start_secondary().
260 */
261ENTRY(start_cpu0)
262 movq stack_start(%rip),%rsp
263 movq initial_code(%rip),%rax
264 pushq $0 # fake return address to stop unwinder
265 pushq $__KERNEL_CS # set correct cs
266 pushq %rax # target address in negative space
267 lretq
268ENDPROC(start_cpu0)
269#endif
270
255 /* SMP bootup changes these two */ 271 /* SMP bootup changes these two */
256 __REFDATA 272 __REFDATA
257 .align 8 273 .align 8
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 1460a5df92f7..e28670f9a589 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -434,7 +434,7 @@ void hpet_msi_unmask(struct irq_data *data)
434 434
435 /* unmask it */ 435 /* unmask it */
436 cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); 436 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
437 cfg |= HPET_TN_FSB; 437 cfg |= HPET_TN_ENABLE | HPET_TN_FSB;
438 hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); 438 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
439} 439}
440 440
@@ -445,7 +445,7 @@ void hpet_msi_mask(struct irq_data *data)
445 445
446 /* mask it */ 446 /* mask it */
447 cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); 447 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
448 cfg &= ~HPET_TN_FSB; 448 cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB);
449 hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); 449 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
450} 450}
451 451
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 675a05012449..245a71db401a 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -175,7 +175,11 @@ void __cpuinit fpu_init(void)
175 cr0 |= X86_CR0_EM; 175 cr0 |= X86_CR0_EM;
176 write_cr0(cr0); 176 write_cr0(cr0);
177 177
178 if (!smp_processor_id()) 178 /*
179 * init_thread_xstate is only called once to avoid overriding
180 * xstate_size during boot time or during CPU hotplug.
181 */
182 if (xstate_size == 0)
179 init_thread_xstate(); 183 init_thread_xstate();
180 184
181 mxcsr_feature_mask_init(); 185 mxcsr_feature_mask_init();
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4180a874c764..08b973f64032 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -42,6 +42,7 @@
42#include <asm/apic.h> 42#include <asm/apic.h>
43#include <asm/apicdef.h> 43#include <asm/apicdef.h>
44#include <asm/hypervisor.h> 44#include <asm/hypervisor.h>
45#include <asm/kvm_guest.h>
45 46
46static int kvmapf = 1; 47static int kvmapf = 1;
47 48
@@ -62,6 +63,15 @@ static int parse_no_stealacc(char *arg)
62 63
63early_param("no-steal-acc", parse_no_stealacc); 64early_param("no-steal-acc", parse_no_stealacc);
64 65
66static int kvmclock_vsyscall = 1;
67static int parse_no_kvmclock_vsyscall(char *arg)
68{
69 kvmclock_vsyscall = 0;
70 return 0;
71}
72
73early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
74
65static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); 75static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
66static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); 76static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
67static int has_steal_clock = 0; 77static int has_steal_clock = 0;
@@ -110,11 +120,6 @@ void kvm_async_pf_task_wait(u32 token)
110 struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; 120 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
111 struct kvm_task_sleep_node n, *e; 121 struct kvm_task_sleep_node n, *e;
112 DEFINE_WAIT(wait); 122 DEFINE_WAIT(wait);
113 int cpu, idle;
114
115 cpu = get_cpu();
116 idle = idle_cpu(cpu);
117 put_cpu();
118 123
119 spin_lock(&b->lock); 124 spin_lock(&b->lock);
120 e = _find_apf_task(b, token); 125 e = _find_apf_task(b, token);
@@ -128,7 +133,7 @@ void kvm_async_pf_task_wait(u32 token)
128 133
129 n.token = token; 134 n.token = token;
130 n.cpu = smp_processor_id(); 135 n.cpu = smp_processor_id();
131 n.halted = idle || preempt_count() > 1; 136 n.halted = is_idle_task(current) || preempt_count() > 1;
132 init_waitqueue_head(&n.wq); 137 init_waitqueue_head(&n.wq);
133 hlist_add_head(&n.link, &b->list); 138 hlist_add_head(&n.link, &b->list);
134 spin_unlock(&b->lock); 139 spin_unlock(&b->lock);
@@ -471,6 +476,9 @@ void __init kvm_guest_init(void)
471 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) 476 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
472 apic_set_eoi_write(kvm_guest_apic_eoi_write); 477 apic_set_eoi_write(kvm_guest_apic_eoi_write);
473 478
479 if (kvmclock_vsyscall)
480 kvm_setup_vsyscall_timeinfo();
481
474#ifdef CONFIG_SMP 482#ifdef CONFIG_SMP
475 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 483 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
476 register_cpu_notifier(&kvm_cpu_notifier); 484 register_cpu_notifier(&kvm_cpu_notifier);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f1b42b3a186c..220a360010f8 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -23,6 +23,7 @@
23#include <asm/apic.h> 23#include <asm/apic.h>
24#include <linux/percpu.h> 24#include <linux/percpu.h>
25#include <linux/hardirq.h> 25#include <linux/hardirq.h>
26#include <linux/memblock.h>
26 27
27#include <asm/x86_init.h> 28#include <asm/x86_init.h>
28#include <asm/reboot.h> 29#include <asm/reboot.h>
@@ -39,7 +40,7 @@ static int parse_no_kvmclock(char *arg)
39early_param("no-kvmclock", parse_no_kvmclock); 40early_param("no-kvmclock", parse_no_kvmclock);
40 41
41/* The hypervisor will put information about time periodically here */ 42/* The hypervisor will put information about time periodically here */
42static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); 43static struct pvclock_vsyscall_time_info *hv_clock;
43static struct pvclock_wall_clock wall_clock; 44static struct pvclock_wall_clock wall_clock;
44 45
45/* 46/*
@@ -52,15 +53,20 @@ static unsigned long kvm_get_wallclock(void)
52 struct pvclock_vcpu_time_info *vcpu_time; 53 struct pvclock_vcpu_time_info *vcpu_time;
53 struct timespec ts; 54 struct timespec ts;
54 int low, high; 55 int low, high;
56 int cpu;
55 57
56 low = (int)__pa_symbol(&wall_clock); 58 low = (int)__pa_symbol(&wall_clock);
57 high = ((u64)__pa_symbol(&wall_clock) >> 32); 59 high = ((u64)__pa_symbol(&wall_clock) >> 32);
58 60
59 native_write_msr(msr_kvm_wall_clock, low, high); 61 native_write_msr(msr_kvm_wall_clock, low, high);
60 62
61 vcpu_time = &get_cpu_var(hv_clock); 63 preempt_disable();
64 cpu = smp_processor_id();
65
66 vcpu_time = &hv_clock[cpu].pvti;
62 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); 67 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
63 put_cpu_var(hv_clock); 68
69 preempt_enable();
64 70
65 return ts.tv_sec; 71 return ts.tv_sec;
66} 72}
@@ -74,9 +80,11 @@ static cycle_t kvm_clock_read(void)
74{ 80{
75 struct pvclock_vcpu_time_info *src; 81 struct pvclock_vcpu_time_info *src;
76 cycle_t ret; 82 cycle_t ret;
83 int cpu;
77 84
78 preempt_disable_notrace(); 85 preempt_disable_notrace();
79 src = &__get_cpu_var(hv_clock); 86 cpu = smp_processor_id();
87 src = &hv_clock[cpu].pvti;
80 ret = pvclock_clocksource_read(src); 88 ret = pvclock_clocksource_read(src);
81 preempt_enable_notrace(); 89 preempt_enable_notrace();
82 return ret; 90 return ret;
@@ -99,8 +107,15 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
99static unsigned long kvm_get_tsc_khz(void) 107static unsigned long kvm_get_tsc_khz(void)
100{ 108{
101 struct pvclock_vcpu_time_info *src; 109 struct pvclock_vcpu_time_info *src;
102 src = &per_cpu(hv_clock, 0); 110 int cpu;
103 return pvclock_tsc_khz(src); 111 unsigned long tsc_khz;
112
113 preempt_disable();
114 cpu = smp_processor_id();
115 src = &hv_clock[cpu].pvti;
116 tsc_khz = pvclock_tsc_khz(src);
117 preempt_enable();
118 return tsc_khz;
104} 119}
105 120
106static void kvm_get_preset_lpj(void) 121static void kvm_get_preset_lpj(void)
@@ -119,10 +134,14 @@ bool kvm_check_and_clear_guest_paused(void)
119{ 134{
120 bool ret = false; 135 bool ret = false;
121 struct pvclock_vcpu_time_info *src; 136 struct pvclock_vcpu_time_info *src;
137 int cpu = smp_processor_id();
122 138
123 src = &__get_cpu_var(hv_clock); 139 if (!hv_clock)
140 return ret;
141
142 src = &hv_clock[cpu].pvti;
124 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { 143 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
125 __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); 144 src->flags &= ~PVCLOCK_GUEST_STOPPED;
126 ret = true; 145 ret = true;
127 } 146 }
128 147
@@ -141,9 +160,10 @@ int kvm_register_clock(char *txt)
141{ 160{
142 int cpu = smp_processor_id(); 161 int cpu = smp_processor_id();
143 int low, high, ret; 162 int low, high, ret;
163 struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
144 164
145 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; 165 low = (int)__pa(src) | 1;
146 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); 166 high = ((u64)__pa(src) >> 32);
147 ret = native_write_msr_safe(msr_kvm_system_time, low, high); 167 ret = native_write_msr_safe(msr_kvm_system_time, low, high);
148 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", 168 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
149 cpu, high, low, txt); 169 cpu, high, low, txt);
@@ -197,6 +217,8 @@ static void kvm_shutdown(void)
197 217
198void __init kvmclock_init(void) 218void __init kvmclock_init(void)
199{ 219{
220 unsigned long mem;
221
200 if (!kvm_para_available()) 222 if (!kvm_para_available())
201 return; 223 return;
202 224
@@ -209,8 +231,18 @@ void __init kvmclock_init(void)
209 printk(KERN_INFO "kvm-clock: Using msrs %x and %x", 231 printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
210 msr_kvm_system_time, msr_kvm_wall_clock); 232 msr_kvm_system_time, msr_kvm_wall_clock);
211 233
212 if (kvm_register_clock("boot clock")) 234 mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS,
235 PAGE_SIZE);
236 if (!mem)
237 return;
238 hv_clock = __va(mem);
239
240 if (kvm_register_clock("boot clock")) {
241 hv_clock = NULL;
242 memblock_free(mem,
243 sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
213 return; 244 return;
245 }
214 pv_time_ops.sched_clock = kvm_clock_read; 246 pv_time_ops.sched_clock = kvm_clock_read;
215 x86_platform.calibrate_tsc = kvm_get_tsc_khz; 247 x86_platform.calibrate_tsc = kvm_get_tsc_khz;
216 x86_platform.get_wallclock = kvm_get_wallclock; 248 x86_platform.get_wallclock = kvm_get_wallclock;
@@ -233,3 +265,37 @@ void __init kvmclock_init(void)
233 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) 265 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
234 pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); 266 pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
235} 267}
268
269int __init kvm_setup_vsyscall_timeinfo(void)
270{
271#ifdef CONFIG_X86_64
272 int cpu;
273 int ret;
274 u8 flags;
275 struct pvclock_vcpu_time_info *vcpu_time;
276 unsigned int size;
277
278 size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS;
279
280 preempt_disable();
281 cpu = smp_processor_id();
282
283 vcpu_time = &hv_clock[cpu].pvti;
284 flags = pvclock_read_flags(vcpu_time);
285
286 if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
287 preempt_enable();
288 return 1;
289 }
290
291 if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
292 preempt_enable();
293 return ret;
294 }
295
296 preempt_enable();
297
298 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
299#endif
300 return 0;
301}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b644e1c765dc..2ed787f15bf0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -262,36 +262,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
262 propagate_user_return_notify(prev_p, next_p); 262 propagate_user_return_notify(prev_p, next_p);
263} 263}
264 264
265int sys_fork(struct pt_regs *regs)
266{
267 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
268}
269
270/*
271 * This is trivial, and on the face of it looks like it
272 * could equally well be done in user mode.
273 *
274 * Not so, for quite unobvious reasons - register pressure.
275 * In user mode vfork() cannot have a stack frame, and if
276 * done by calling the "clone()" system call directly, you
277 * do not have enough call-clobbered registers to hold all
278 * the information you need.
279 */
280int sys_vfork(struct pt_regs *regs)
281{
282 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
283 NULL, NULL);
284}
285
286long
287sys_clone(unsigned long clone_flags, unsigned long newsp,
288 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
289{
290 if (!newsp)
291 newsp = regs->sp;
292 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
293}
294
295/* 265/*
296 * Idle related variables and functions 266 * Idle related variables and functions
297 */ 267 */
@@ -306,11 +276,6 @@ void (*pm_idle)(void);
306EXPORT_SYMBOL(pm_idle); 276EXPORT_SYMBOL(pm_idle);
307#endif 277#endif
308 278
309static inline int hlt_use_halt(void)
310{
311 return 1;
312}
313
314#ifndef CONFIG_SMP 279#ifndef CONFIG_SMP
315static inline void play_dead(void) 280static inline void play_dead(void)
316{ 281{
@@ -410,28 +375,22 @@ void cpu_idle(void)
410 */ 375 */
411void default_idle(void) 376void default_idle(void)
412{ 377{
413 if (hlt_use_halt()) { 378 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
414 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); 379 trace_cpu_idle_rcuidle(1, smp_processor_id());
415 trace_cpu_idle_rcuidle(1, smp_processor_id()); 380 current_thread_info()->status &= ~TS_POLLING;
416 current_thread_info()->status &= ~TS_POLLING; 381 /*
417 /* 382 * TS_POLLING-cleared state must be visible before we
418 * TS_POLLING-cleared state must be visible before we 383 * test NEED_RESCHED:
419 * test NEED_RESCHED: 384 */
420 */ 385 smp_mb();
421 smp_mb();
422 386
423 if (!need_resched()) 387 if (!need_resched())
424 safe_halt(); /* enables interrupts racelessly */ 388 safe_halt(); /* enables interrupts racelessly */
425 else 389 else
426 local_irq_enable();
427 current_thread_info()->status |= TS_POLLING;
428 trace_power_end_rcuidle(smp_processor_id());
429 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
430 } else {
431 local_irq_enable(); 390 local_irq_enable();
432 /* loop is done by the caller */ 391 current_thread_info()->status |= TS_POLLING;
433 cpu_relax(); 392 trace_power_end_rcuidle(smp_processor_id());
434 } 393 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
435} 394}
436#ifdef CONFIG_APM_MODULE 395#ifdef CONFIG_APM_MODULE
437EXPORT_SYMBOL(default_idle); 396EXPORT_SYMBOL(default_idle);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 44e0bff38e72..b5a8905785e6 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -128,8 +128,7 @@ void release_thread(struct task_struct *dead_task)
128} 128}
129 129
130int copy_thread(unsigned long clone_flags, unsigned long sp, 130int copy_thread(unsigned long clone_flags, unsigned long sp,
131 unsigned long arg, 131 unsigned long arg, struct task_struct *p)
132 struct task_struct *p, struct pt_regs *regs)
133{ 132{
134 struct pt_regs *childregs = task_pt_regs(p); 133 struct pt_regs *childregs = task_pt_regs(p);
135 struct task_struct *tsk; 134 struct task_struct *tsk;
@@ -138,7 +137,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
138 p->thread.sp = (unsigned long) childregs; 137 p->thread.sp = (unsigned long) childregs;
139 p->thread.sp0 = (unsigned long) (childregs+1); 138 p->thread.sp0 = (unsigned long) (childregs+1);
140 139
141 if (unlikely(!regs)) { 140 if (unlikely(p->flags & PF_KTHREAD)) {
142 /* kernel thread */ 141 /* kernel thread */
143 memset(childregs, 0, sizeof(struct pt_regs)); 142 memset(childregs, 0, sizeof(struct pt_regs));
144 p->thread.ip = (unsigned long) ret_from_kernel_thread; 143 p->thread.ip = (unsigned long) ret_from_kernel_thread;
@@ -156,12 +155,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
156 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 155 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
157 return 0; 156 return 0;
158 } 157 }
159 *childregs = *regs; 158 *childregs = *current_pt_regs();
160 childregs->ax = 0; 159 childregs->ax = 0;
161 childregs->sp = sp; 160 if (sp)
161 childregs->sp = sp;
162 162
163 p->thread.ip = (unsigned long) ret_from_fork; 163 p->thread.ip = (unsigned long) ret_from_fork;
164 task_user_gs(p) = get_user_gs(regs); 164 task_user_gs(p) = get_user_gs(current_pt_regs());
165 165
166 p->fpu_counter = 0; 166 p->fpu_counter = 0;
167 p->thread.io_bitmap_ptr = NULL; 167 p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 16c6365e2b86..6e68a6194965 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,8 +146,7 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
146} 146}
147 147
148int copy_thread(unsigned long clone_flags, unsigned long sp, 148int copy_thread(unsigned long clone_flags, unsigned long sp,
149 unsigned long arg, 149 unsigned long arg, struct task_struct *p)
150 struct task_struct *p, struct pt_regs *regs)
151{ 150{
152 int err; 151 int err;
153 struct pt_regs *childregs; 152 struct pt_regs *childregs;
@@ -169,7 +168,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
169 savesegment(ds, p->thread.ds); 168 savesegment(ds, p->thread.ds);
170 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 169 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
171 170
172 if (unlikely(!regs)) { 171 if (unlikely(p->flags & PF_KTHREAD)) {
173 /* kernel thread */ 172 /* kernel thread */
174 memset(childregs, 0, sizeof(struct pt_regs)); 173 memset(childregs, 0, sizeof(struct pt_regs));
175 childregs->sp = (unsigned long)childregs; 174 childregs->sp = (unsigned long)childregs;
@@ -181,10 +180,11 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
181 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; 180 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
182 return 0; 181 return 0;
183 } 182 }
184 *childregs = *regs; 183 *childregs = *current_pt_regs();
185 184
186 childregs->ax = 0; 185 childregs->ax = 0;
187 childregs->sp = sp; 186 if (sp)
187 childregs->sp = sp;
188 188
189 err = -ENOMEM; 189 err = -ENOMEM;
190 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 190 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 974b67e46dd0..b629bbe0d9bd 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -23,6 +23,7 @@
23#include <linux/hw_breakpoint.h> 23#include <linux/hw_breakpoint.h>
24#include <linux/rcupdate.h> 24#include <linux/rcupdate.h>
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/context_tracking.h>
26 27
27#include <asm/uaccess.h> 28#include <asm/uaccess.h>
28#include <asm/pgtable.h> 29#include <asm/pgtable.h>
@@ -1491,7 +1492,7 @@ long syscall_trace_enter(struct pt_regs *regs)
1491{ 1492{
1492 long ret = 0; 1493 long ret = 0;
1493 1494
1494 rcu_user_exit(); 1495 user_exit();
1495 1496
1496 /* 1497 /*
1497 * If we stepped into a sysenter/syscall insn, it trapped in 1498 * If we stepped into a sysenter/syscall insn, it trapped in
@@ -1546,7 +1547,7 @@ void syscall_trace_leave(struct pt_regs *regs)
1546 * or do_notify_resume(), in which case we can be in RCU 1547 * or do_notify_resume(), in which case we can be in RCU
1547 * user mode. 1548 * user mode.
1548 */ 1549 */
1549 rcu_user_exit(); 1550 user_exit();
1550 1551
1551 audit_syscall_exit(regs); 1552 audit_syscall_exit(regs);
1552 1553
@@ -1564,5 +1565,5 @@ void syscall_trace_leave(struct pt_regs *regs)
1564 if (step || test_thread_flag(TIF_SYSCALL_TRACE)) 1565 if (step || test_thread_flag(TIF_SYSCALL_TRACE))
1565 tracehook_report_syscall_exit(regs, step); 1566 tracehook_report_syscall_exit(regs, step);
1566 1567
1567 rcu_user_enter(); 1568 user_enter();
1568} 1569}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 42eb3300dfc6..85c39590c1a4 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -17,23 +17,13 @@
17 17
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/percpu.h> 19#include <linux/percpu.h>
20#include <linux/notifier.h>
21#include <linux/sched.h>
22#include <linux/gfp.h>
23#include <linux/bootmem.h>
24#include <asm/fixmap.h>
20#include <asm/pvclock.h> 25#include <asm/pvclock.h>
21 26
22/*
23 * These are perodically updated
24 * xen: magic shared_info page
25 * kvm: gpa registered via msr
26 * and then copied here.
27 */
28struct pvclock_shadow_time {
29 u64 tsc_timestamp; /* TSC at last update of time vals. */
30 u64 system_timestamp; /* Time, in nanosecs, since boot. */
31 u32 tsc_to_nsec_mul;
32 int tsc_shift;
33 u32 version;
34 u8 flags;
35};
36
37static u8 valid_flags __read_mostly = 0; 27static u8 valid_flags __read_mostly = 0;
38 28
39void pvclock_set_flags(u8 flags) 29void pvclock_set_flags(u8 flags)
@@ -41,34 +31,6 @@ void pvclock_set_flags(u8 flags)
41 valid_flags = flags; 31 valid_flags = flags;
42} 32}
43 33
44static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
45{
46 u64 delta = native_read_tsc() - shadow->tsc_timestamp;
47 return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul,
48 shadow->tsc_shift);
49}
50
51/*
52 * Reads a consistent set of time-base values from hypervisor,
53 * into a shadow data area.
54 */
55static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
56 struct pvclock_vcpu_time_info *src)
57{
58 do {
59 dst->version = src->version;
60 rmb(); /* fetch version before data */
61 dst->tsc_timestamp = src->tsc_timestamp;
62 dst->system_timestamp = src->system_time;
63 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
64 dst->tsc_shift = src->tsc_shift;
65 dst->flags = src->flags;
66 rmb(); /* test version after fetching data */
67 } while ((src->version & 1) || (dst->version != src->version));
68
69 return dst->version;
70}
71
72unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) 34unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
73{ 35{
74 u64 pv_tsc_khz = 1000000ULL << 32; 36 u64 pv_tsc_khz = 1000000ULL << 32;
@@ -88,23 +50,32 @@ void pvclock_resume(void)
88 atomic64_set(&last_value, 0); 50 atomic64_set(&last_value, 0);
89} 51}
90 52
53u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
54{
55 unsigned version;
56 cycle_t ret;
57 u8 flags;
58
59 do {
60 version = __pvclock_read_cycles(src, &ret, &flags);
61 } while ((src->version & 1) || version != src->version);
62
63 return flags & valid_flags;
64}
65
91cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 66cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
92{ 67{
93 struct pvclock_shadow_time shadow;
94 unsigned version; 68 unsigned version;
95 cycle_t ret, offset; 69 cycle_t ret;
96 u64 last; 70 u64 last;
71 u8 flags;
97 72
98 do { 73 do {
99 version = pvclock_get_time_values(&shadow, src); 74 version = __pvclock_read_cycles(src, &ret, &flags);
100 barrier(); 75 } while ((src->version & 1) || version != src->version);
101 offset = pvclock_get_nsec_offset(&shadow);
102 ret = shadow.system_timestamp + offset;
103 barrier();
104 } while (version != src->version);
105 76
106 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && 77 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
107 (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) 78 (flags & PVCLOCK_TSC_STABLE_BIT))
108 return ret; 79 return ret;
109 80
110 /* 81 /*
@@ -156,3 +127,71 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
156 127
157 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); 128 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
158} 129}
130
131static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
132
133static struct pvclock_vsyscall_time_info *
134pvclock_get_vsyscall_user_time_info(int cpu)
135{
136 if (!pvclock_vdso_info) {
137 BUG();
138 return NULL;
139 }
140
141 return &pvclock_vdso_info[cpu];
142}
143
144struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
145{
146 return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
147}
148
149#ifdef CONFIG_X86_64
150static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
151 void *v)
152{
153 struct task_migration_notifier *mn = v;
154 struct pvclock_vsyscall_time_info *pvti;
155
156 pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
157
158 /* this is NULL when pvclock vsyscall is not initialized */
159 if (unlikely(pvti == NULL))
160 return NOTIFY_DONE;
161
162 pvti->migrate_count++;
163
164 return NOTIFY_DONE;
165}
166
167static struct notifier_block pvclock_migrate = {
168 .notifier_call = pvclock_task_migrate,
169};
170
171/*
172 * Initialize the generic pvclock vsyscall state. This will allocate
173 * a/some page(s) for the per-vcpu pvclock information, set up a
174 * fixmap mapping for the page(s)
175 */
176
177int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
178 int size)
179{
180 int idx;
181
182 WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
183
184 pvclock_vdso_info = i;
185
186 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
187 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
188 __pa_symbol(i) + (idx*PAGE_SIZE),
189 PAGE_KERNEL_VVAR);
190 }
191
192
193 register_task_migration_notifier(&pvclock_migrate);
194
195 return 0;
196}
197#endif
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 4929c1be0ac0..801602b5d745 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -195,12 +195,6 @@ void read_persistent_clock(struct timespec *ts)
195 ts->tv_nsec = 0; 195 ts->tv_nsec = 0;
196} 196}
197 197
198unsigned long long native_read_tsc(void)
199{
200 return __native_read_tsc();
201}
202EXPORT_SYMBOL(native_read_tsc);
203
204 198
205static struct resource rtc_resources[] = { 199static struct resource rtc_resources[] = {
206 [0] = { 200 [0] = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ca45696f30fb..23ddd558fbd5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -143,11 +143,7 @@ int default_check_phys_apicid_present(int phys_apicid)
143} 143}
144#endif 144#endif
145 145
146#ifndef CONFIG_DEBUG_BOOT_PARAMS
147struct boot_params __initdata boot_params;
148#else
149struct boot_params boot_params; 146struct boot_params boot_params;
150#endif
151 147
152/* 148/*
153 * Machine setup.. 149 * Machine setup..
@@ -956,6 +952,10 @@ void __init setup_arch(char **cmdline_p)
956 952
957 reserve_initrd(); 953 reserve_initrd();
958 954
955#if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD)
956 acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
957#endif
958
959 reserve_crashkernel(); 959 reserve_crashkernel();
960 960
961 vsmp_init(); 961 vsmp_init();
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 70b27ee6118e..fbbb604313a2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -22,6 +22,7 @@
22#include <linux/uaccess.h> 22#include <linux/uaccess.h>
23#include <linux/user-return-notifier.h> 23#include <linux/user-return-notifier.h>
24#include <linux/uprobes.h> 24#include <linux/uprobes.h>
25#include <linux/context_tracking.h>
25 26
26#include <asm/processor.h> 27#include <asm/processor.h>
27#include <asm/ucontext.h> 28#include <asm/ucontext.h>
@@ -816,7 +817,7 @@ static void do_signal(struct pt_regs *regs)
816void 817void
817do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 818do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
818{ 819{
819 rcu_user_exit(); 820 user_exit();
820 821
821#ifdef CONFIG_X86_MCE 822#ifdef CONFIG_X86_MCE
822 /* notify userspace of pending MCEs */ 823 /* notify userspace of pending MCEs */
@@ -838,7 +839,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
838 if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) 839 if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
839 fire_user_return_notifiers(); 840 fire_user_return_notifiers();
840 841
841 rcu_user_enter(); 842 user_enter();
842} 843}
843 844
844void signal_fault(struct pt_regs *regs, void __user *frame, char *where) 845void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f3e2ec878b8c..ed0fe385289d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -127,8 +127,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
127atomic_t init_deasserted; 127atomic_t init_deasserted;
128 128
129/* 129/*
130 * Report back to the Boot Processor. 130 * Report back to the Boot Processor during boot time or to the caller processor
131 * Running on AP. 131 * during CPU online.
132 */ 132 */
133static void __cpuinit smp_callin(void) 133static void __cpuinit smp_callin(void)
134{ 134{
@@ -140,15 +140,17 @@ static void __cpuinit smp_callin(void)
140 * we may get here before an INIT-deassert IPI reaches 140 * we may get here before an INIT-deassert IPI reaches
141 * our local APIC. We have to wait for the IPI or we'll 141 * our local APIC. We have to wait for the IPI or we'll
142 * lock up on an APIC access. 142 * lock up on an APIC access.
143 *
144 * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI.
143 */ 145 */
144 if (apic->wait_for_init_deassert) 146 cpuid = smp_processor_id();
147 if (apic->wait_for_init_deassert && cpuid != 0)
145 apic->wait_for_init_deassert(&init_deasserted); 148 apic->wait_for_init_deassert(&init_deasserted);
146 149
147 /* 150 /*
148 * (This works even if the APIC is not enabled.) 151 * (This works even if the APIC is not enabled.)
149 */ 152 */
150 phys_id = read_apic_id(); 153 phys_id = read_apic_id();
151 cpuid = smp_processor_id();
152 if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { 154 if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
153 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 155 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
154 phys_id, cpuid); 156 phys_id, cpuid);
@@ -230,6 +232,8 @@ static void __cpuinit smp_callin(void)
230 cpumask_set_cpu(cpuid, cpu_callin_mask); 232 cpumask_set_cpu(cpuid, cpu_callin_mask);
231} 233}
232 234
235static int cpu0_logical_apicid;
236static int enable_start_cpu0;
233/* 237/*
234 * Activate a secondary processor. 238 * Activate a secondary processor.
235 */ 239 */
@@ -245,6 +249,8 @@ notrace static void __cpuinit start_secondary(void *unused)
245 preempt_disable(); 249 preempt_disable();
246 smp_callin(); 250 smp_callin();
247 251
252 enable_start_cpu0 = 0;
253
248#ifdef CONFIG_X86_32 254#ifdef CONFIG_X86_32
249 /* switch away from the initial page table */ 255 /* switch away from the initial page table */
250 load_cr3(swapper_pg_dir); 256 load_cr3(swapper_pg_dir);
@@ -281,19 +287,30 @@ notrace static void __cpuinit start_secondary(void *unused)
281 cpu_idle(); 287 cpu_idle();
282} 288}
283 289
290void __init smp_store_boot_cpu_info(void)
291{
292 int id = 0; /* CPU 0 */
293 struct cpuinfo_x86 *c = &cpu_data(id);
294
295 *c = boot_cpu_data;
296 c->cpu_index = id;
297}
298
284/* 299/*
285 * The bootstrap kernel entry code has set these up. Save them for 300 * The bootstrap kernel entry code has set these up. Save them for
286 * a given CPU 301 * a given CPU
287 */ 302 */
288
289void __cpuinit smp_store_cpu_info(int id) 303void __cpuinit smp_store_cpu_info(int id)
290{ 304{
291 struct cpuinfo_x86 *c = &cpu_data(id); 305 struct cpuinfo_x86 *c = &cpu_data(id);
292 306
293 *c = boot_cpu_data; 307 *c = boot_cpu_data;
294 c->cpu_index = id; 308 c->cpu_index = id;
295 if (id != 0) 309 /*
296 identify_secondary_cpu(c); 310 * During boot time, CPU0 has this setup already. Save the info when
311 * bringing up AP or offlined CPU0.
312 */
313 identify_secondary_cpu(c);
297} 314}
298 315
299static bool __cpuinit 316static bool __cpuinit
@@ -315,7 +332,7 @@ do { \
315 332
316static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) 333static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
317{ 334{
318 if (cpu_has(c, X86_FEATURE_TOPOEXT)) { 335 if (cpu_has_topoext) {
319 int cpu1 = c->cpu_index, cpu2 = o->cpu_index; 336 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
320 337
321 if (c->phys_proc_id == o->phys_proc_id && 338 if (c->phys_proc_id == o->phys_proc_id &&
@@ -483,7 +500,7 @@ void __inquire_remote_apic(int apicid)
483 * won't ... remember to clear down the APIC, etc later. 500 * won't ... remember to clear down the APIC, etc later.
484 */ 501 */
485int __cpuinit 502int __cpuinit
486wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) 503wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
487{ 504{
488 unsigned long send_status, accept_status = 0; 505 unsigned long send_status, accept_status = 0;
489 int maxlvt; 506 int maxlvt;
@@ -491,7 +508,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
491 /* Target chip */ 508 /* Target chip */
492 /* Boot on the stack */ 509 /* Boot on the stack */
493 /* Kick the second */ 510 /* Kick the second */
494 apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); 511 apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid);
495 512
496 pr_debug("Waiting for send to finish...\n"); 513 pr_debug("Waiting for send to finish...\n");
497 send_status = safe_apic_wait_icr_idle(); 514 send_status = safe_apic_wait_icr_idle();
@@ -651,6 +668,63 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
651 node, cpu, apicid); 668 node, cpu, apicid);
652} 669}
653 670
671static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
672{
673 int cpu;
674
675 cpu = smp_processor_id();
676 if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
677 return NMI_HANDLED;
678
679 return NMI_DONE;
680}
681
682/*
683 * Wake up AP by INIT, INIT, STARTUP sequence.
684 *
685 * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS
686 * boot-strap code which is not a desired behavior for waking up BSP. To
687 * void the boot-strap code, wake up CPU0 by NMI instead.
688 *
689 * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined
690 * (i.e. physically hot removed and then hot added), NMI won't wake it up.
691 * We'll change this code in the future to wake up hard offlined CPU0 if
692 * real platform and request are available.
693 */
694static int __cpuinit
695wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
696 int *cpu0_nmi_registered)
697{
698 int id;
699 int boot_error;
700
701 /*
702 * Wake up AP by INIT, INIT, STARTUP sequence.
703 */
704 if (cpu)
705 return wakeup_secondary_cpu_via_init(apicid, start_ip);
706
707 /*
708 * Wake up BSP by nmi.
709 *
710 * Register a NMI handler to help wake up CPU0.
711 */
712 boot_error = register_nmi_handler(NMI_LOCAL,
713 wakeup_cpu0_nmi, 0, "wake_cpu0");
714
715 if (!boot_error) {
716 enable_start_cpu0 = 1;
717 *cpu0_nmi_registered = 1;
718 if (apic->dest_logical == APIC_DEST_LOGICAL)
719 id = cpu0_logical_apicid;
720 else
721 id = apicid;
722 boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
723 }
724
725 return boot_error;
726}
727
654/* 728/*
655 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 729 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
656 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 730 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -666,6 +740,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
666 740
667 unsigned long boot_error = 0; 741 unsigned long boot_error = 0;
668 int timeout; 742 int timeout;
743 int cpu0_nmi_registered = 0;
669 744
670 /* Just in case we booted with a single CPU. */ 745 /* Just in case we booted with a single CPU. */
671 alternatives_enable_smp(); 746 alternatives_enable_smp();
@@ -713,13 +788,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
713 } 788 }
714 789
715 /* 790 /*
716 * Kick the secondary CPU. Use the method in the APIC driver 791 * Wake up a CPU in difference cases:
717 * if it's defined - or use an INIT boot APIC message otherwise: 792 * - Use the method in the APIC driver if it's defined
793 * Otherwise,
794 * - Use an INIT boot APIC message for APs or NMI for BSP.
718 */ 795 */
719 if (apic->wakeup_secondary_cpu) 796 if (apic->wakeup_secondary_cpu)
720 boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); 797 boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
721 else 798 else
722 boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); 799 boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
800 &cpu0_nmi_registered);
723 801
724 if (!boot_error) { 802 if (!boot_error) {
725 /* 803 /*
@@ -784,6 +862,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
784 */ 862 */
785 smpboot_restore_warm_reset_vector(); 863 smpboot_restore_warm_reset_vector();
786 } 864 }
865 /*
866 * Clean up the nmi handler. Do this after the callin and callout sync
867 * to avoid impact of possible long unregister time.
868 */
869 if (cpu0_nmi_registered)
870 unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
871
787 return boot_error; 872 return boot_error;
788} 873}
789 874
@@ -797,7 +882,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
797 882
798 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); 883 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
799 884
800 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || 885 if (apicid == BAD_APICID ||
801 !physid_isset(apicid, phys_cpu_present_map) || 886 !physid_isset(apicid, phys_cpu_present_map) ||
802 !apic->apic_id_valid(apicid)) { 887 !apic->apic_id_valid(apicid)) {
803 pr_err("%s: bad cpu %d\n", __func__, cpu); 888 pr_err("%s: bad cpu %d\n", __func__, cpu);
@@ -995,7 +1080,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
995 /* 1080 /*
996 * Setup boot CPU information 1081 * Setup boot CPU information
997 */ 1082 */
998 smp_store_cpu_info(0); /* Final full version of the data */ 1083 smp_store_boot_cpu_info(); /* Final full version of the data */
999 cpumask_copy(cpu_callin_mask, cpumask_of(0)); 1084 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1000 mb(); 1085 mb();
1001 1086
@@ -1031,6 +1116,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1031 */ 1116 */
1032 setup_local_APIC(); 1117 setup_local_APIC();
1033 1118
1119 if (x2apic_mode)
1120 cpu0_logical_apicid = apic_read(APIC_LDR);
1121 else
1122 cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1123
1034 /* 1124 /*
1035 * Enable IO APIC before setting up error vector 1125 * Enable IO APIC before setting up error vector
1036 */ 1126 */
@@ -1219,19 +1309,6 @@ void cpu_disable_common(void)
1219 1309
1220int native_cpu_disable(void) 1310int native_cpu_disable(void)
1221{ 1311{
1222 int cpu = smp_processor_id();
1223
1224 /*
1225 * Perhaps use cpufreq to drop frequency, but that could go
1226 * into generic code.
1227 *
1228 * We won't take down the boot processor on i386 due to some
1229 * interrupts only being able to be serviced by the BSP.
1230 * Especially so if we're not using an IOAPIC -zwane
1231 */
1232 if (cpu == 0)
1233 return -EBUSY;
1234
1235 clear_local_APIC(); 1312 clear_local_APIC();
1236 1313
1237 cpu_disable_common(); 1314 cpu_disable_common();
@@ -1271,6 +1348,14 @@ void play_dead_common(void)
1271 local_irq_disable(); 1348 local_irq_disable();
1272} 1349}
1273 1350
1351static bool wakeup_cpu0(void)
1352{
1353 if (smp_processor_id() == 0 && enable_start_cpu0)
1354 return true;
1355
1356 return false;
1357}
1358
1274/* 1359/*
1275 * We need to flush the caches before going to sleep, lest we have 1360 * We need to flush the caches before going to sleep, lest we have
1276 * dirty data in our caches when we come back up. 1361 * dirty data in our caches when we come back up.
@@ -1334,6 +1419,11 @@ static inline void mwait_play_dead(void)
1334 __monitor(mwait_ptr, 0, 0); 1419 __monitor(mwait_ptr, 0, 0);
1335 mb(); 1420 mb();
1336 __mwait(eax, 0); 1421 __mwait(eax, 0);
1422 /*
1423 * If NMI wants to wake up CPU0, start CPU0.
1424 */
1425 if (wakeup_cpu0())
1426 start_cpu0();
1337 } 1427 }
1338} 1428}
1339 1429
@@ -1344,6 +1434,11 @@ static inline void hlt_play_dead(void)
1344 1434
1345 while (1) { 1435 while (1) {
1346 native_halt(); 1436 native_halt();
1437 /*
1438 * If NMI wants to wake up CPU0, start CPU0.
1439 */
1440 if (wakeup_cpu0())
1441 start_cpu0();
1347 } 1442 }
1348} 1443}
1349 1444
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index b4d3c3927dd8..97ef74b88e0f 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -21,37 +21,23 @@
21 21
22/* 22/*
23 * Align a virtual address to avoid aliasing in the I$ on AMD F15h. 23 * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
24 *
25 * @flags denotes the allocation direction - bottomup or topdown -
26 * or vDSO; see call sites below.
27 */ 24 */
28unsigned long align_addr(unsigned long addr, struct file *filp, 25static unsigned long get_align_mask(void)
29 enum align_flags flags)
30{ 26{
31 unsigned long tmp_addr;
32
33 /* handle 32- and 64-bit case with a single conditional */ 27 /* handle 32- and 64-bit case with a single conditional */
34 if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32()))) 28 if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
35 return addr; 29 return 0;
36 30
37 if (!(current->flags & PF_RANDOMIZE)) 31 if (!(current->flags & PF_RANDOMIZE))
38 return addr; 32 return 0;
39
40 if (!((flags & ALIGN_VDSO) || filp))
41 return addr;
42
43 tmp_addr = addr;
44
45 /*
46 * We need an address which is <= than the original
47 * one only when in topdown direction.
48 */
49 if (!(flags & ALIGN_TOPDOWN))
50 tmp_addr += va_align.mask;
51 33
52 tmp_addr &= ~va_align.mask; 34 return va_align.mask;
35}
53 36
54 return tmp_addr; 37unsigned long align_vdso_addr(unsigned long addr)
38{
39 unsigned long align_mask = get_align_mask();
40 return (addr + align_mask) & ~align_mask;
55} 41}
56 42
57static int __init control_va_addr_alignment(char *str) 43static int __init control_va_addr_alignment(char *str)
@@ -126,7 +112,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
126{ 112{
127 struct mm_struct *mm = current->mm; 113 struct mm_struct *mm = current->mm;
128 struct vm_area_struct *vma; 114 struct vm_area_struct *vma;
129 unsigned long start_addr; 115 struct vm_unmapped_area_info info;
130 unsigned long begin, end; 116 unsigned long begin, end;
131 117
132 if (flags & MAP_FIXED) 118 if (flags & MAP_FIXED)
@@ -144,50 +130,16 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
144 (!vma || addr + len <= vma->vm_start)) 130 (!vma || addr + len <= vma->vm_start))
145 return addr; 131 return addr;
146 } 132 }
147 if (((flags & MAP_32BIT) || test_thread_flag(TIF_ADDR32))
148 && len <= mm->cached_hole_size) {
149 mm->cached_hole_size = 0;
150 mm->free_area_cache = begin;
151 }
152 addr = mm->free_area_cache;
153 if (addr < begin)
154 addr = begin;
155 start_addr = addr;
156
157full_search:
158
159 addr = align_addr(addr, filp, 0);
160
161 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
162 /* At this point: (!vma || addr < vma->vm_end). */
163 if (end - len < addr) {
164 /*
165 * Start a new search - just in case we missed
166 * some holes.
167 */
168 if (start_addr != begin) {
169 start_addr = addr = begin;
170 mm->cached_hole_size = 0;
171 goto full_search;
172 }
173 return -ENOMEM;
174 }
175 if (!vma || addr + len <= vma->vm_start) {
176 /*
177 * Remember the place where we stopped the search:
178 */
179 mm->free_area_cache = addr + len;
180 return addr;
181 }
182 if (addr + mm->cached_hole_size < vma->vm_start)
183 mm->cached_hole_size = vma->vm_start - addr;
184 133
185 addr = vma->vm_end; 134 info.flags = 0;
186 addr = align_addr(addr, filp, 0); 135 info.length = len;
187 } 136 info.low_limit = begin;
137 info.high_limit = end;
138 info.align_mask = filp ? get_align_mask() : 0;
139 info.align_offset = pgoff << PAGE_SHIFT;
140 return vm_unmapped_area(&info);
188} 141}
189 142
190
191unsigned long 143unsigned long
192arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, 144arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
193 const unsigned long len, const unsigned long pgoff, 145 const unsigned long len, const unsigned long pgoff,
@@ -195,7 +147,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
195{ 147{
196 struct vm_area_struct *vma; 148 struct vm_area_struct *vma;
197 struct mm_struct *mm = current->mm; 149 struct mm_struct *mm = current->mm;
198 unsigned long addr = addr0, start_addr; 150 unsigned long addr = addr0;
151 struct vm_unmapped_area_info info;
199 152
200 /* requested length too big for entire address space */ 153 /* requested length too big for entire address space */
201 if (len > TASK_SIZE) 154 if (len > TASK_SIZE)
@@ -217,51 +170,16 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
217 return addr; 170 return addr;
218 } 171 }
219 172
220 /* check if free_area_cache is useful for us */ 173 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
221 if (len <= mm->cached_hole_size) { 174 info.length = len;
222 mm->cached_hole_size = 0; 175 info.low_limit = PAGE_SIZE;
223 mm->free_area_cache = mm->mmap_base; 176 info.high_limit = mm->mmap_base;
224 } 177 info.align_mask = filp ? get_align_mask() : 0;
225 178 info.align_offset = pgoff << PAGE_SHIFT;
226try_again: 179 addr = vm_unmapped_area(&info);
227 /* either no address requested or can't fit in requested address hole */ 180 if (!(addr & ~PAGE_MASK))
228 start_addr = addr = mm->free_area_cache; 181 return addr;
229 182 VM_BUG_ON(addr != -ENOMEM);
230 if (addr < len)
231 goto fail;
232
233 addr -= len;
234 do {
235 addr = align_addr(addr, filp, ALIGN_TOPDOWN);
236
237 /*
238 * Lookup failure means no vma is above this address,
239 * else if new region fits below vma->vm_start,
240 * return with success:
241 */
242 vma = find_vma(mm, addr);
243 if (!vma || addr+len <= vma->vm_start)
244 /* remember the address as a hint for next time */
245 return mm->free_area_cache = addr;
246
247 /* remember the largest hole we saw so far */
248 if (addr + mm->cached_hole_size < vma->vm_start)
249 mm->cached_hole_size = vma->vm_start - addr;
250
251 /* try just below the current vma->vm_start */
252 addr = vma->vm_start-len;
253 } while (len < vma->vm_start);
254
255fail:
256 /*
257 * if hint left us with no space for the requested
258 * mapping then try again:
259 */
260 if (start_addr != mm->mmap_base) {
261 mm->free_area_cache = mm->mmap_base;
262 mm->cached_hole_size = 0;
263 goto try_again;
264 }
265 183
266bottomup: 184bottomup:
267 /* 185 /*
@@ -270,14 +188,5 @@ bottomup:
270 * can happen with large stack limits and large mmap() 188 * can happen with large stack limits and large mmap()
271 * allocations. 189 * allocations.
272 */ 190 */
273 mm->cached_hole_size = ~0UL; 191 return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
274 mm->free_area_cache = TASK_UNMAPPED_BASE;
275 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
276 /*
277 * Restore the topdown base:
278 */
279 mm->free_area_cache = mm->mmap_base;
280 mm->cached_hole_size = ~0UL;
281
282 return addr;
283} 192}
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 76ee97709a00..6e60b5fe2244 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -30,23 +30,110 @@
30#include <linux/mmzone.h> 30#include <linux/mmzone.h>
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/smp.h> 32#include <linux/smp.h>
33#include <linux/irq.h>
33#include <asm/cpu.h> 34#include <asm/cpu.h>
34 35
35static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); 36static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
36 37
37#ifdef CONFIG_HOTPLUG_CPU 38#ifdef CONFIG_HOTPLUG_CPU
39
40#ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0
41static int cpu0_hotpluggable = 1;
42#else
43static int cpu0_hotpluggable;
44static int __init enable_cpu0_hotplug(char *str)
45{
46 cpu0_hotpluggable = 1;
47 return 1;
48}
49
50__setup("cpu0_hotplug", enable_cpu0_hotplug);
51#endif
52
53#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
54/*
55 * This function offlines a CPU as early as possible and allows userspace to
56 * boot up without the CPU. The CPU can be onlined back by user after boot.
57 *
58 * This is only called for debugging CPU offline/online feature.
59 */
60int __ref _debug_hotplug_cpu(int cpu, int action)
61{
62 struct device *dev = get_cpu_device(cpu);
63 int ret;
64
65 if (!cpu_is_hotpluggable(cpu))
66 return -EINVAL;
67
68 cpu_hotplug_driver_lock();
69
70 switch (action) {
71 case 0:
72 ret = cpu_down(cpu);
73 if (!ret) {
74 pr_info("CPU %u is now offline\n", cpu);
75 kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
76 } else
77 pr_debug("Can't offline CPU%d.\n", cpu);
78 break;
79 case 1:
80 ret = cpu_up(cpu);
81 if (!ret)
82 kobject_uevent(&dev->kobj, KOBJ_ONLINE);
83 else
84 pr_debug("Can't online CPU%d.\n", cpu);
85 break;
86 default:
87 ret = -EINVAL;
88 }
89
90 cpu_hotplug_driver_unlock();
91
92 return ret;
93}
94
95static int __init debug_hotplug_cpu(void)
96{
97 _debug_hotplug_cpu(0, 0);
98 return 0;
99}
100
101late_initcall_sync(debug_hotplug_cpu);
102#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */
103
38int __ref arch_register_cpu(int num) 104int __ref arch_register_cpu(int num)
39{ 105{
106 struct cpuinfo_x86 *c = &cpu_data(num);
107
108 /*
109 * Currently CPU0 is only hotpluggable on Intel platforms. Other
110 * vendors can add hotplug support later.
111 */
112 if (c->x86_vendor != X86_VENDOR_INTEL)
113 cpu0_hotpluggable = 0;
114
40 /* 115 /*
41 * CPU0 cannot be offlined due to several 116 * Two known BSP/CPU0 dependencies: Resume from suspend/hibernate
42 * restrictions and assumptions in kernel. This basically 117 * depends on BSP. PIC interrupts depend on BSP.
43 * doesn't add a control file, one cannot attempt to offline
44 * BSP.
45 * 118 *
46 * Also certain PCI quirks require not to enable hotplug control 119 * If the BSP depencies are under control, one can tell kernel to
47 * for all CPU's. 120 * enable BSP hotplug. This basically adds a control file and
121 * one can attempt to offline BSP.
48 */ 122 */
49 if (num) 123 if (num == 0 && cpu0_hotpluggable) {
124 unsigned int irq;
125 /*
126 * We won't take down the boot processor on i386 if some
127 * interrupts only are able to be serviced by the BSP in PIC.
128 */
129 for_each_active_irq(irq) {
130 if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) {
131 cpu0_hotpluggable = 0;
132 break;
133 }
134 }
135 }
136 if (num || cpu0_hotpluggable)
50 per_cpu(cpu_devices, num).cpu.hotpluggable = 1; 137 per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
51 138
52 return register_cpu(&per_cpu(cpu_devices, num).cpu, num); 139 return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c
new file mode 100644
index 000000000000..25b993729f9b
--- /dev/null
+++ b/arch/x86/kernel/trace_clock.c
@@ -0,0 +1,21 @@
1/*
2 * X86 trace clocks
3 */
4#include <asm/trace_clock.h>
5#include <asm/barrier.h>
6#include <asm/msr.h>
7
8/*
9 * trace_clock_x86_tsc(): A clock that is just the cycle counter.
10 *
11 * Unlike the other clocks, this is not in nanoseconds.
12 */
13u64 notrace trace_clock_x86_tsc(void)
14{
15 u64 ret;
16
17 rdtsc_barrier();
18 rdtscll(ret);
19
20 return ret;
21}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8276dc6794cc..eb8586693e0b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -55,7 +55,7 @@
55#include <asm/i387.h> 55#include <asm/i387.h>
56#include <asm/fpu-internal.h> 56#include <asm/fpu-internal.h>
57#include <asm/mce.h> 57#include <asm/mce.h>
58#include <asm/rcu.h> 58#include <asm/context_tracking.h>
59 59
60#include <asm/mach_traps.h> 60#include <asm/mach_traps.h>
61 61
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cfa5d4f7ca56..06ccb5073a3f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -77,6 +77,12 @@ unsigned long long
77sched_clock(void) __attribute__((alias("native_sched_clock"))); 77sched_clock(void) __attribute__((alias("native_sched_clock")));
78#endif 78#endif
79 79
80unsigned long long native_read_tsc(void)
81{
82 return __native_read_tsc();
83}
84EXPORT_SYMBOL(native_read_tsc);
85
80int check_tsc_unstable(void) 86int check_tsc_unstable(void)
81{ 87{
82 return tsc_unstable; 88 return tsc_unstable;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index aafa5557b396..c71025b67462 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -478,6 +478,11 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
478 regs->ip = current->utask->xol_vaddr; 478 regs->ip = current->utask->xol_vaddr;
479 pre_xol_rip_insn(auprobe, regs, autask); 479 pre_xol_rip_insn(auprobe, regs, autask);
480 480
481 autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
482 regs->flags |= X86_EFLAGS_TF;
483 if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
484 set_task_blockstep(current, false);
485
481 return 0; 486 return 0;
482} 487}
483 488
@@ -603,6 +608,16 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
603 if (auprobe->fixups & UPROBE_FIX_CALL) 608 if (auprobe->fixups & UPROBE_FIX_CALL)
604 result = adjust_ret_addr(regs->sp, correction); 609 result = adjust_ret_addr(regs->sp, correction);
605 610
611 /*
612 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
613 * so we can get an extra SIGTRAP if we do not clear TF. We need
614 * to examine the opcode to make it right.
615 */
616 if (utask->autask.saved_tf)
617 send_sig(SIGTRAP, current, 0);
618 else if (!(auprobe->fixups & UPROBE_FIX_SETF))
619 regs->flags &= ~X86_EFLAGS_TF;
620
606 return result; 621 return result;
607} 622}
608 623
@@ -647,6 +662,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
647 current->thread.trap_nr = utask->autask.saved_trap_nr; 662 current->thread.trap_nr = utask->autask.saved_trap_nr;
648 handle_riprel_post_xol(auprobe, regs, NULL); 663 handle_riprel_post_xol(auprobe, regs, NULL);
649 instruction_pointer_set(regs, utask->vaddr); 664 instruction_pointer_set(regs, utask->vaddr);
665
666 /* clear TF if it was set by us in arch_uprobe_pre_xol() */
667 if (!utask->autask.saved_tf)
668 regs->flags &= ~X86_EFLAGS_TF;
650} 669}
651 670
652/* 671/*
@@ -676,38 +695,3 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
676 send_sig(SIGTRAP, current, 0); 695 send_sig(SIGTRAP, current, 0);
677 return ret; 696 return ret;
678} 697}
679
680void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
681{
682 struct task_struct *task = current;
683 struct arch_uprobe_task *autask = &task->utask->autask;
684 struct pt_regs *regs = task_pt_regs(task);
685
686 autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
687
688 regs->flags |= X86_EFLAGS_TF;
689 if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
690 set_task_blockstep(task, false);
691}
692
693void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
694{
695 struct task_struct *task = current;
696 struct arch_uprobe_task *autask = &task->utask->autask;
697 bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
698 struct pt_regs *regs = task_pt_regs(task);
699 /*
700 * The state of TIF_BLOCKSTEP was not saved so we can get an extra
701 * SIGTRAP if we do not clear TF. We need to examine the opcode to
702 * make it right.
703 */
704 if (unlikely(trapped)) {
705 if (!autask->saved_tf)
706 regs->flags &= ~X86_EFLAGS_TF;
707 } else {
708 if (autask->saved_tf)
709 send_sig(SIGTRAP, task, 0);
710 else if (!(auprobe->fixups & UPROBE_FIX_SETF))
711 regs->flags &= ~X86_EFLAGS_TF;
712 }
713}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5c9687b1bde6..1dfe69cc78a8 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -182,7 +182,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
182 if (pud_none_or_clear_bad(pud)) 182 if (pud_none_or_clear_bad(pud))
183 goto out; 183 goto out;
184 pmd = pmd_offset(pud, 0xA0000); 184 pmd = pmd_offset(pud, 0xA0000);
185 split_huge_page_pmd(mm, pmd); 185 split_huge_page_pmd_mm(mm, 0xA0000, pmd);
186 if (pmd_none_or_clear_bad(pmd)) 186 if (pmd_none_or_clear_bad(pmd))
187 goto out; 187 goto out;
188 pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); 188 pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3a3e8c9e280d..9a907a67be8f 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -145,19 +145,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
145 return nr; 145 return nr;
146} 146}
147 147
148#ifdef CONFIG_SECCOMP
149static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
150{
151 if (!seccomp_mode(&tsk->seccomp))
152 return 0;
153 task_pt_regs(tsk)->orig_ax = syscall_nr;
154 task_pt_regs(tsk)->ax = syscall_nr;
155 return __secure_computing(syscall_nr);
156}
157#else
158#define vsyscall_seccomp(_tsk, _nr) 0
159#endif
160
161static bool write_ok_or_segv(unsigned long ptr, size_t size) 148static bool write_ok_or_segv(unsigned long ptr, size_t size)
162{ 149{
163 /* 150 /*
@@ -190,10 +177,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
190{ 177{
191 struct task_struct *tsk; 178 struct task_struct *tsk;
192 unsigned long caller; 179 unsigned long caller;
193 int vsyscall_nr; 180 int vsyscall_nr, syscall_nr, tmp;
194 int prev_sig_on_uaccess_error; 181 int prev_sig_on_uaccess_error;
195 long ret; 182 long ret;
196 int skip;
197 183
198 /* 184 /*
199 * No point in checking CS -- the only way to get here is a user mode 185 * No point in checking CS -- the only way to get here is a user mode
@@ -225,56 +211,84 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
225 } 211 }
226 212
227 tsk = current; 213 tsk = current;
228 /*
229 * With a real vsyscall, page faults cause SIGSEGV. We want to
230 * preserve that behavior to make writing exploits harder.
231 */
232 prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
233 current_thread_info()->sig_on_uaccess_error = 1;
234 214
235 /* 215 /*
216 * Check for access_ok violations and find the syscall nr.
217 *
236 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and 218 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
237 * 64-bit, so we don't need to special-case it here. For all the 219 * 64-bit, so we don't need to special-case it here. For all the
238 * vsyscalls, NULL means "don't write anything" not "write it at 220 * vsyscalls, NULL means "don't write anything" not "write it at
239 * address 0". 221 * address 0".
240 */ 222 */
241 ret = -EFAULT;
242 skip = 0;
243 switch (vsyscall_nr) { 223 switch (vsyscall_nr) {
244 case 0: 224 case 0:
245 skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
246 if (skip)
247 break;
248
249 if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || 225 if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
250 !write_ok_or_segv(regs->si, sizeof(struct timezone))) 226 !write_ok_or_segv(regs->si, sizeof(struct timezone))) {
251 break; 227 ret = -EFAULT;
228 goto check_fault;
229 }
230
231 syscall_nr = __NR_gettimeofday;
232 break;
233
234 case 1:
235 if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
236 ret = -EFAULT;
237 goto check_fault;
238 }
239
240 syscall_nr = __NR_time;
241 break;
242
243 case 2:
244 if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
245 !write_ok_or_segv(regs->si, sizeof(unsigned))) {
246 ret = -EFAULT;
247 goto check_fault;
248 }
249
250 syscall_nr = __NR_getcpu;
251 break;
252 }
253
254 /*
255 * Handle seccomp. regs->ip must be the original value.
256 * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt.
257 *
258 * We could optimize the seccomp disabled case, but performance
259 * here doesn't matter.
260 */
261 regs->orig_ax = syscall_nr;
262 regs->ax = -ENOSYS;
263 tmp = secure_computing(syscall_nr);
264 if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
265 warn_bad_vsyscall(KERN_DEBUG, regs,
266 "seccomp tried to change syscall nr or ip");
267 do_exit(SIGSYS);
268 }
269 if (tmp)
270 goto do_ret; /* skip requested */
252 271
272 /*
273 * With a real vsyscall, page faults cause SIGSEGV. We want to
274 * preserve that behavior to make writing exploits harder.
275 */
276 prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
277 current_thread_info()->sig_on_uaccess_error = 1;
278
279 ret = -EFAULT;
280 switch (vsyscall_nr) {
281 case 0:
253 ret = sys_gettimeofday( 282 ret = sys_gettimeofday(
254 (struct timeval __user *)regs->di, 283 (struct timeval __user *)regs->di,
255 (struct timezone __user *)regs->si); 284 (struct timezone __user *)regs->si);
256 break; 285 break;
257 286
258 case 1: 287 case 1:
259 skip = vsyscall_seccomp(tsk, __NR_time);
260 if (skip)
261 break;
262
263 if (!write_ok_or_segv(regs->di, sizeof(time_t)))
264 break;
265
266 ret = sys_time((time_t __user *)regs->di); 288 ret = sys_time((time_t __user *)regs->di);
267 break; 289 break;
268 290
269 case 2: 291 case 2:
270 skip = vsyscall_seccomp(tsk, __NR_getcpu);
271 if (skip)
272 break;
273
274 if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
275 !write_ok_or_segv(regs->si, sizeof(unsigned)))
276 break;
277
278 ret = sys_getcpu((unsigned __user *)regs->di, 292 ret = sys_getcpu((unsigned __user *)regs->di,
279 (unsigned __user *)regs->si, 293 (unsigned __user *)regs->si,
280 NULL); 294 NULL);
@@ -283,12 +297,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
283 297
284 current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; 298 current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
285 299
286 if (skip) { 300check_fault:
287 if ((long)regs->ax <= 0L) /* seccomp errno emulation */
288 goto do_ret;
289 goto done; /* seccomp trace/trap */
290 }
291
292 if (ret == -EFAULT) { 301 if (ret == -EFAULT) {
293 /* Bad news -- userspace fed a bad pointer to a vsyscall. */ 302 /* Bad news -- userspace fed a bad pointer to a vsyscall. */
294 warn_bad_vsyscall(KERN_INFO, regs, 303 warn_bad_vsyscall(KERN_INFO, regs,
@@ -311,7 +320,6 @@ do_ret:
311 /* Emulate a ret instruction. */ 320 /* Emulate a ret instruction. */
312 regs->ip = caller; 321 regs->ip = caller;
313 regs->sp += 8; 322 regs->sp += 8;
314done:
315 return true; 323 return true;
316 324
317sigsegv: 325sigsegv:
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ec79e773342e..a20ecb5b6cbf 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -320,6 +320,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
320 if (index == 0) { 320 if (index == 0) {
321 entry->ebx &= kvm_supported_word9_x86_features; 321 entry->ebx &= kvm_supported_word9_x86_features;
322 cpuid_mask(&entry->ebx, 9); 322 cpuid_mask(&entry->ebx, 9);
323 // TSC_ADJUST is emulated
324 entry->ebx |= F(TSC_ADJUST);
323 } else 325 } else
324 entry->ebx = 0; 326 entry->ebx = 0;
325 entry->eax = 0; 327 entry->eax = 0;
@@ -659,6 +661,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
659 } else 661 } else
660 *eax = *ebx = *ecx = *edx = 0; 662 *eax = *ebx = *ecx = *edx = 0;
661} 663}
664EXPORT_SYMBOL_GPL(kvm_cpuid);
662 665
663void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 666void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
664{ 667{
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 58fc51488828..b7fd07984888 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -31,6 +31,14 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
31 return best && (best->ecx & bit(X86_FEATURE_XSAVE)); 31 return best && (best->ecx & bit(X86_FEATURE_XSAVE));
32} 32}
33 33
34static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
35{
36 struct kvm_cpuid_entry2 *best;
37
38 best = kvm_find_cpuid_entry(vcpu, 7, 0);
39 return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST));
40}
41
34static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) 42static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
35{ 43{
36 struct kvm_cpuid_entry2 *best; 44 struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index bba39bfa1c4b..a27e76371108 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -676,8 +676,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
676 addr.seg); 676 addr.seg);
677 if (!usable) 677 if (!usable)
678 goto bad; 678 goto bad;
679 /* code segment or read-only data segment */ 679 /* code segment in protected mode or read-only data segment */
680 if (((desc.type & 8) || !(desc.type & 2)) && write) 680 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
681 || !(desc.type & 2)) && write)
681 goto bad; 682 goto bad;
682 /* unreadable code segment */ 683 /* unreadable code segment */
683 if (!fetch && (desc.type & 8) && !(desc.type & 2)) 684 if (!fetch && (desc.type & 8) && !(desc.type & 2))
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 43e9fadca5d0..9392f527f107 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1011,7 +1011,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
1011 local_irq_save(flags); 1011 local_irq_save(flags);
1012 1012
1013 now = apic->lapic_timer.timer.base->get_time(); 1013 now = apic->lapic_timer.timer.base->get_time();
1014 guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); 1014 guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
1015 if (likely(tscdeadline > guest_tsc)) { 1015 if (likely(tscdeadline > guest_tsc)) {
1016 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1016 ns = (tscdeadline - guest_tsc) * 1000000ULL;
1017 do_div(ns, this_tsc_khz); 1017 do_div(ns, this_tsc_khz);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6f85fe0bf958..01d7c2ad05f5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2382,12 +2382,20 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2382 || (!vcpu->arch.mmu.direct_map && write_fault 2382 || (!vcpu->arch.mmu.direct_map && write_fault
2383 && !is_write_protection(vcpu) && !user_fault)) { 2383 && !is_write_protection(vcpu) && !user_fault)) {
2384 2384
2385 /*
2386 * There are two cases:
2387 * - the one is other vcpu creates new sp in the window
2388 * between mapping_level() and acquiring mmu-lock.
2389 * - the another case is the new sp is created by itself
2390 * (page-fault path) when guest uses the target gfn as
2391 * its page table.
2392 * Both of these cases can be fixed by allowing guest to
2393 * retry the access, it will refault, then we can establish
2394 * the mapping by using small page.
2395 */
2385 if (level > PT_PAGE_TABLE_LEVEL && 2396 if (level > PT_PAGE_TABLE_LEVEL &&
2386 has_wrprotected_page(vcpu->kvm, gfn, level)) { 2397 has_wrprotected_page(vcpu->kvm, gfn, level))
2387 ret = 1;
2388 drop_spte(vcpu->kvm, sptep);
2389 goto done; 2398 goto done;
2390 }
2391 2399
2392 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; 2400 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
2393 2401
@@ -2505,6 +2513,14 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
2505 mmu_free_roots(vcpu); 2513 mmu_free_roots(vcpu);
2506} 2514}
2507 2515
2516static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
2517{
2518 int bit7;
2519
2520 bit7 = (gpte >> 7) & 1;
2521 return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
2522}
2523
2508static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, 2524static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
2509 bool no_dirty_log) 2525 bool no_dirty_log)
2510{ 2526{
@@ -2517,6 +2533,26 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
2517 return gfn_to_pfn_memslot_atomic(slot, gfn); 2533 return gfn_to_pfn_memslot_atomic(slot, gfn);
2518} 2534}
2519 2535
2536static bool prefetch_invalid_gpte(struct kvm_vcpu *vcpu,
2537 struct kvm_mmu_page *sp, u64 *spte,
2538 u64 gpte)
2539{
2540 if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
2541 goto no_present;
2542
2543 if (!is_present_gpte(gpte))
2544 goto no_present;
2545
2546 if (!(gpte & PT_ACCESSED_MASK))
2547 goto no_present;
2548
2549 return false;
2550
2551no_present:
2552 drop_spte(vcpu->kvm, spte);
2553 return true;
2554}
2555
2520static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, 2556static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
2521 struct kvm_mmu_page *sp, 2557 struct kvm_mmu_page *sp,
2522 u64 *start, u64 *end) 2558 u64 *start, u64 *end)
@@ -2671,7 +2707,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
2671 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done 2707 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
2672 * here. 2708 * here.
2673 */ 2709 */
2674 if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && 2710 if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
2675 level == PT_PAGE_TABLE_LEVEL && 2711 level == PT_PAGE_TABLE_LEVEL &&
2676 PageTransCompound(pfn_to_page(pfn)) && 2712 PageTransCompound(pfn_to_page(pfn)) &&
2677 !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { 2713 !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
@@ -2699,18 +2735,13 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
2699 } 2735 }
2700} 2736}
2701 2737
2702static bool mmu_invalid_pfn(pfn_t pfn)
2703{
2704 return unlikely(is_invalid_pfn(pfn));
2705}
2706
2707static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, 2738static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
2708 pfn_t pfn, unsigned access, int *ret_val) 2739 pfn_t pfn, unsigned access, int *ret_val)
2709{ 2740{
2710 bool ret = true; 2741 bool ret = true;
2711 2742
2712 /* The pfn is invalid, report the error! */ 2743 /* The pfn is invalid, report the error! */
2713 if (unlikely(is_invalid_pfn(pfn))) { 2744 if (unlikely(is_error_pfn(pfn))) {
2714 *ret_val = kvm_handle_bad_page(vcpu, gfn, pfn); 2745 *ret_val = kvm_handle_bad_page(vcpu, gfn, pfn);
2715 goto exit; 2746 goto exit;
2716 } 2747 }
@@ -2862,7 +2893,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
2862 return r; 2893 return r;
2863 2894
2864 spin_lock(&vcpu->kvm->mmu_lock); 2895 spin_lock(&vcpu->kvm->mmu_lock);
2865 if (mmu_notifier_retry(vcpu, mmu_seq)) 2896 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
2866 goto out_unlock; 2897 goto out_unlock;
2867 kvm_mmu_free_some_pages(vcpu); 2898 kvm_mmu_free_some_pages(vcpu);
2868 if (likely(!force_pt_level)) 2899 if (likely(!force_pt_level))
@@ -3331,7 +3362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
3331 return r; 3362 return r;
3332 3363
3333 spin_lock(&vcpu->kvm->mmu_lock); 3364 spin_lock(&vcpu->kvm->mmu_lock);
3334 if (mmu_notifier_retry(vcpu, mmu_seq)) 3365 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
3335 goto out_unlock; 3366 goto out_unlock;
3336 kvm_mmu_free_some_pages(vcpu); 3367 kvm_mmu_free_some_pages(vcpu);
3337 if (likely(!force_pt_level)) 3368 if (likely(!force_pt_level))
@@ -3399,14 +3430,6 @@ static void paging_free(struct kvm_vcpu *vcpu)
3399 nonpaging_free(vcpu); 3430 nonpaging_free(vcpu);
3400} 3431}
3401 3432
3402static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
3403{
3404 int bit7;
3405
3406 bit7 = (gpte >> 7) & 1;
3407 return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
3408}
3409
3410static inline void protect_clean_gpte(unsigned *access, unsigned gpte) 3433static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
3411{ 3434{
3412 unsigned mask; 3435 unsigned mask;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 714e2c01a6fe..891eb6d93b8b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -305,51 +305,43 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
305 addr, access); 305 addr, access);
306} 306}
307 307
308static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, 308static bool
309 struct kvm_mmu_page *sp, u64 *spte, 309FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
310 pt_element_t gpte) 310 u64 *spte, pt_element_t gpte, bool no_dirty_log)
311{ 311{
312 if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
313 goto no_present;
314
315 if (!is_present_gpte(gpte))
316 goto no_present;
317
318 if (!(gpte & PT_ACCESSED_MASK))
319 goto no_present;
320
321 return false;
322
323no_present:
324 drop_spte(vcpu->kvm, spte);
325 return true;
326}
327
328static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
329 u64 *spte, const void *pte)
330{
331 pt_element_t gpte;
332 unsigned pte_access; 312 unsigned pte_access;
313 gfn_t gfn;
333 pfn_t pfn; 314 pfn_t pfn;
334 315
335 gpte = *(const pt_element_t *)pte; 316 if (prefetch_invalid_gpte(vcpu, sp, spte, gpte))
336 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) 317 return false;
337 return;
338 318
339 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); 319 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
320
321 gfn = gpte_to_gfn(gpte);
340 pte_access = sp->role.access & gpte_access(vcpu, gpte); 322 pte_access = sp->role.access & gpte_access(vcpu, gpte);
341 protect_clean_gpte(&pte_access, gpte); 323 protect_clean_gpte(&pte_access, gpte);
342 pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); 324 pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
343 if (mmu_invalid_pfn(pfn)) 325 no_dirty_log && (pte_access & ACC_WRITE_MASK));
344 return; 326 if (is_error_pfn(pfn))
327 return false;
345 328
346 /* 329 /*
347 * we call mmu_set_spte() with host_writable = true because that 330 * we call mmu_set_spte() with host_writable = true because
348 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). 331 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
349 */ 332 */
350 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, 333 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
351 NULL, PT_PAGE_TABLE_LEVEL, 334 NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true);
352 gpte_to_gfn(gpte), pfn, true, true); 335
336 return true;
337}
338
339static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
340 u64 *spte, const void *pte)
341{
342 pt_element_t gpte = *(const pt_element_t *)pte;
343
344 FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
353} 345}
354 346
355static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, 347static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
@@ -395,53 +387,34 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
395 spte = sp->spt + i; 387 spte = sp->spt + i;
396 388
397 for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { 389 for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
398 pt_element_t gpte;
399 unsigned pte_access;
400 gfn_t gfn;
401 pfn_t pfn;
402
403 if (spte == sptep) 390 if (spte == sptep)
404 continue; 391 continue;
405 392
406 if (is_shadow_present_pte(*spte)) 393 if (is_shadow_present_pte(*spte))
407 continue; 394 continue;
408 395
409 gpte = gptep[i]; 396 if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
410
411 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
412 continue;
413
414 pte_access = sp->role.access & gpte_access(vcpu, gpte);
415 protect_clean_gpte(&pte_access, gpte);
416 gfn = gpte_to_gfn(gpte);
417 pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
418 pte_access & ACC_WRITE_MASK);
419 if (mmu_invalid_pfn(pfn))
420 break; 397 break;
421
422 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
423 NULL, PT_PAGE_TABLE_LEVEL, gfn,
424 pfn, true, true);
425 } 398 }
426} 399}
427 400
428/* 401/*
429 * Fetch a shadow pte for a specific level in the paging hierarchy. 402 * Fetch a shadow pte for a specific level in the paging hierarchy.
403 * If the guest tries to write a write-protected page, we need to
404 * emulate this operation, return 1 to indicate this case.
430 */ 405 */
431static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 406static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
432 struct guest_walker *gw, 407 struct guest_walker *gw,
433 int user_fault, int write_fault, int hlevel, 408 int user_fault, int write_fault, int hlevel,
434 int *emulate, pfn_t pfn, bool map_writable, 409 pfn_t pfn, bool map_writable, bool prefault)
435 bool prefault)
436{ 410{
437 unsigned access = gw->pt_access;
438 struct kvm_mmu_page *sp = NULL; 411 struct kvm_mmu_page *sp = NULL;
439 int top_level;
440 unsigned direct_access;
441 struct kvm_shadow_walk_iterator it; 412 struct kvm_shadow_walk_iterator it;
413 unsigned direct_access, access = gw->pt_access;
414 int top_level, emulate = 0;
442 415
443 if (!is_present_gpte(gw->ptes[gw->level - 1])) 416 if (!is_present_gpte(gw->ptes[gw->level - 1]))
444 return NULL; 417 return 0;
445 418
446 direct_access = gw->pte_access; 419 direct_access = gw->pte_access;
447 420
@@ -505,17 +478,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
505 478
506 clear_sp_write_flooding_count(it.sptep); 479 clear_sp_write_flooding_count(it.sptep);
507 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, 480 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
508 user_fault, write_fault, emulate, it.level, 481 user_fault, write_fault, &emulate, it.level,
509 gw->gfn, pfn, prefault, map_writable); 482 gw->gfn, pfn, prefault, map_writable);
510 FNAME(pte_prefetch)(vcpu, gw, it.sptep); 483 FNAME(pte_prefetch)(vcpu, gw, it.sptep);
511 484
512 return it.sptep; 485 return emulate;
513 486
514out_gpte_changed: 487out_gpte_changed:
515 if (sp) 488 if (sp)
516 kvm_mmu_put_page(sp, it.sptep); 489 kvm_mmu_put_page(sp, it.sptep);
517 kvm_release_pfn_clean(pfn); 490 kvm_release_pfn_clean(pfn);
518 return NULL; 491 return 0;
519} 492}
520 493
521/* 494/*
@@ -538,8 +511,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
538 int write_fault = error_code & PFERR_WRITE_MASK; 511 int write_fault = error_code & PFERR_WRITE_MASK;
539 int user_fault = error_code & PFERR_USER_MASK; 512 int user_fault = error_code & PFERR_USER_MASK;
540 struct guest_walker walker; 513 struct guest_walker walker;
541 u64 *sptep;
542 int emulate = 0;
543 int r; 514 int r;
544 pfn_t pfn; 515 pfn_t pfn;
545 int level = PT_PAGE_TABLE_LEVEL; 516 int level = PT_PAGE_TABLE_LEVEL;
@@ -594,24 +565,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
594 return r; 565 return r;
595 566
596 spin_lock(&vcpu->kvm->mmu_lock); 567 spin_lock(&vcpu->kvm->mmu_lock);
597 if (mmu_notifier_retry(vcpu, mmu_seq)) 568 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
598 goto out_unlock; 569 goto out_unlock;
599 570
600 kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); 571 kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
601 kvm_mmu_free_some_pages(vcpu); 572 kvm_mmu_free_some_pages(vcpu);
602 if (!force_pt_level) 573 if (!force_pt_level)
603 transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); 574 transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
604 sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 575 r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
605 level, &emulate, pfn, map_writable, prefault); 576 level, pfn, map_writable, prefault);
606 (void)sptep;
607 pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
608 sptep, *sptep, emulate);
609
610 ++vcpu->stat.pf_fixed; 577 ++vcpu->stat.pf_fixed;
611 kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); 578 kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
612 spin_unlock(&vcpu->kvm->mmu_lock); 579 spin_unlock(&vcpu->kvm->mmu_lock);
613 580
614 return emulate; 581 return r;
615 582
616out_unlock: 583out_unlock:
617 spin_unlock(&vcpu->kvm->mmu_lock); 584 spin_unlock(&vcpu->kvm->mmu_lock);
@@ -757,7 +724,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
757 sizeof(pt_element_t))) 724 sizeof(pt_element_t)))
758 return -EINVAL; 725 return -EINVAL;
759 726
760 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { 727 if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) {
761 vcpu->kvm->tlbs_dirty++; 728 vcpu->kvm->tlbs_dirty++;
762 continue; 729 continue;
763 } 730 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d017df3899ef..d29d3cd1c156 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -20,6 +20,7 @@
20#include "mmu.h" 20#include "mmu.h"
21#include "kvm_cache_regs.h" 21#include "kvm_cache_regs.h"
22#include "x86.h" 22#include "x86.h"
23#include "cpuid.h"
23 24
24#include <linux/module.h> 25#include <linux/module.h>
25#include <linux/mod_devicetable.h> 26#include <linux/mod_devicetable.h>
@@ -630,15 +631,12 @@ static int svm_hardware_enable(void *garbage)
630 return -EBUSY; 631 return -EBUSY;
631 632
632 if (!has_svm()) { 633 if (!has_svm()) {
633 printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", 634 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
634 me);
635 return -EINVAL; 635 return -EINVAL;
636 } 636 }
637 sd = per_cpu(svm_data, me); 637 sd = per_cpu(svm_data, me);
638
639 if (!sd) { 638 if (!sd) {
640 printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", 639 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
641 me);
642 return -EINVAL; 640 return -EINVAL;
643 } 641 }
644 642
@@ -1012,6 +1010,13 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1012 svm->tsc_ratio = ratio; 1010 svm->tsc_ratio = ratio;
1013} 1011}
1014 1012
1013static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
1014{
1015 struct vcpu_svm *svm = to_svm(vcpu);
1016
1017 return svm->vmcb->control.tsc_offset;
1018}
1019
1015static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) 1020static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1016{ 1021{
1017 struct vcpu_svm *svm = to_svm(vcpu); 1022 struct vcpu_svm *svm = to_svm(vcpu);
@@ -1189,6 +1194,8 @@ static void init_vmcb(struct vcpu_svm *svm)
1189static int svm_vcpu_reset(struct kvm_vcpu *vcpu) 1194static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
1190{ 1195{
1191 struct vcpu_svm *svm = to_svm(vcpu); 1196 struct vcpu_svm *svm = to_svm(vcpu);
1197 u32 dummy;
1198 u32 eax = 1;
1192 1199
1193 init_vmcb(svm); 1200 init_vmcb(svm);
1194 1201
@@ -1197,8 +1204,9 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
1197 svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; 1204 svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
1198 svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; 1205 svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
1199 } 1206 }
1200 vcpu->arch.regs_avail = ~0; 1207
1201 vcpu->arch.regs_dirty = ~0; 1208 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
1209 kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
1202 1210
1203 return 0; 1211 return 0;
1204} 1212}
@@ -1254,11 +1262,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1254 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 1262 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
1255 svm->asid_generation = 0; 1263 svm->asid_generation = 0;
1256 init_vmcb(svm); 1264 init_vmcb(svm);
1257 kvm_write_tsc(&svm->vcpu, 0);
1258
1259 err = fx_init(&svm->vcpu);
1260 if (err)
1261 goto free_page4;
1262 1265
1263 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 1266 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1264 if (kvm_vcpu_is_bsp(&svm->vcpu)) 1267 if (kvm_vcpu_is_bsp(&svm->vcpu))
@@ -1268,8 +1271,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1268 1271
1269 return &svm->vcpu; 1272 return &svm->vcpu;
1270 1273
1271free_page4:
1272 __free_page(hsave_page);
1273free_page3: 1274free_page3:
1274 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); 1275 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1275free_page2: 1276free_page2:
@@ -3008,11 +3009,11 @@ static int cr8_write_interception(struct vcpu_svm *svm)
3008 return 0; 3009 return 0;
3009} 3010}
3010 3011
3011u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu) 3012u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
3012{ 3013{
3013 struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); 3014 struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
3014 return vmcb->control.tsc_offset + 3015 return vmcb->control.tsc_offset +
3015 svm_scale_tsc(vcpu, native_read_tsc()); 3016 svm_scale_tsc(vcpu, host_tsc);
3016} 3017}
3017 3018
3018static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) 3019static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
@@ -3131,13 +3132,15 @@ static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
3131 return 0; 3132 return 0;
3132} 3133}
3133 3134
3134static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) 3135static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
3135{ 3136{
3136 struct vcpu_svm *svm = to_svm(vcpu); 3137 struct vcpu_svm *svm = to_svm(vcpu);
3137 3138
3139 u32 ecx = msr->index;
3140 u64 data = msr->data;
3138 switch (ecx) { 3141 switch (ecx) {
3139 case MSR_IA32_TSC: 3142 case MSR_IA32_TSC:
3140 kvm_write_tsc(vcpu, data); 3143 kvm_write_tsc(vcpu, msr);
3141 break; 3144 break;
3142 case MSR_STAR: 3145 case MSR_STAR:
3143 svm->vmcb->save.star = data; 3146 svm->vmcb->save.star = data;
@@ -3192,20 +3195,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
3192 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 3195 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3193 break; 3196 break;
3194 default: 3197 default:
3195 return kvm_set_msr_common(vcpu, ecx, data); 3198 return kvm_set_msr_common(vcpu, msr);
3196 } 3199 }
3197 return 0; 3200 return 0;
3198} 3201}
3199 3202
3200static int wrmsr_interception(struct vcpu_svm *svm) 3203static int wrmsr_interception(struct vcpu_svm *svm)
3201{ 3204{
3205 struct msr_data msr;
3202 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 3206 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3203 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) 3207 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
3204 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); 3208 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3205 3209
3210 msr.data = data;
3211 msr.index = ecx;
3212 msr.host_initiated = false;
3206 3213
3207 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3214 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3208 if (svm_set_msr(&svm->vcpu, ecx, data)) { 3215 if (svm_set_msr(&svm->vcpu, &msr)) {
3209 trace_kvm_msr_write_ex(ecx, data); 3216 trace_kvm_msr_write_ex(ecx, data);
3210 kvm_inject_gp(&svm->vcpu, 0); 3217 kvm_inject_gp(&svm->vcpu, 0);
3211 } else { 3218 } else {
@@ -4302,6 +4309,7 @@ static struct kvm_x86_ops svm_x86_ops = {
4302 .has_wbinvd_exit = svm_has_wbinvd_exit, 4309 .has_wbinvd_exit = svm_has_wbinvd_exit,
4303 4310
4304 .set_tsc_khz = svm_set_tsc_khz, 4311 .set_tsc_khz = svm_set_tsc_khz,
4312 .read_tsc_offset = svm_read_tsc_offset,
4305 .write_tsc_offset = svm_write_tsc_offset, 4313 .write_tsc_offset = svm_write_tsc_offset,
4306 .adjust_tsc_offset = svm_adjust_tsc_offset, 4314 .adjust_tsc_offset = svm_adjust_tsc_offset,
4307 .compute_tsc_offset = svm_compute_tsc_offset, 4315 .compute_tsc_offset = svm_compute_tsc_offset,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index bca63f04dccb..fe5e00ed7036 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -4,6 +4,7 @@
4#include <linux/tracepoint.h> 4#include <linux/tracepoint.h>
5#include <asm/vmx.h> 5#include <asm/vmx.h>
6#include <asm/svm.h> 6#include <asm/svm.h>
7#include <asm/clocksource.h>
7 8
8#undef TRACE_SYSTEM 9#undef TRACE_SYSTEM
9#define TRACE_SYSTEM kvm 10#define TRACE_SYSTEM kvm
@@ -754,6 +755,68 @@ TRACE_EVENT(
754 __entry->write ? "Write" : "Read", 755 __entry->write ? "Write" : "Read",
755 __entry->gpa_match ? "GPA" : "GVA") 756 __entry->gpa_match ? "GPA" : "GVA")
756); 757);
758
759#ifdef CONFIG_X86_64
760
761#define host_clocks \
762 {VCLOCK_NONE, "none"}, \
763 {VCLOCK_TSC, "tsc"}, \
764 {VCLOCK_HPET, "hpet"} \
765
766TRACE_EVENT(kvm_update_master_clock,
767 TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
768 TP_ARGS(use_master_clock, host_clock, offset_matched),
769
770 TP_STRUCT__entry(
771 __field( bool, use_master_clock )
772 __field( unsigned int, host_clock )
773 __field( bool, offset_matched )
774 ),
775
776 TP_fast_assign(
777 __entry->use_master_clock = use_master_clock;
778 __entry->host_clock = host_clock;
779 __entry->offset_matched = offset_matched;
780 ),
781
782 TP_printk("masterclock %d hostclock %s offsetmatched %u",
783 __entry->use_master_clock,
784 __print_symbolic(__entry->host_clock, host_clocks),
785 __entry->offset_matched)
786);
787
788TRACE_EVENT(kvm_track_tsc,
789 TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched,
790 unsigned int online_vcpus, bool use_master_clock,
791 unsigned int host_clock),
792 TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock,
793 host_clock),
794
795 TP_STRUCT__entry(
796 __field( unsigned int, vcpu_id )
797 __field( unsigned int, nr_vcpus_matched_tsc )
798 __field( unsigned int, online_vcpus )
799 __field( bool, use_master_clock )
800 __field( unsigned int, host_clock )
801 ),
802
803 TP_fast_assign(
804 __entry->vcpu_id = vcpu_id;
805 __entry->nr_vcpus_matched_tsc = nr_matched;
806 __entry->online_vcpus = online_vcpus;
807 __entry->use_master_clock = use_master_clock;
808 __entry->host_clock = host_clock;
809 ),
810
811 TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u"
812 " hostclock %s",
813 __entry->vcpu_id, __entry->use_master_clock,
814 __entry->nr_vcpus_matched_tsc, __entry->online_vcpus,
815 __print_symbolic(__entry->host_clock, host_clocks))
816);
817
818#endif /* CONFIG_X86_64 */
819
757#endif /* _TRACE_KVM_H */ 820#endif /* _TRACE_KVM_H */
758 821
759#undef TRACE_INCLUDE_PATH 822#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f85815945fc6..9120ae1901e4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -42,6 +42,7 @@
42#include <asm/i387.h> 42#include <asm/i387.h>
43#include <asm/xcr.h> 43#include <asm/xcr.h>
44#include <asm/perf_event.h> 44#include <asm/perf_event.h>
45#include <asm/kexec.h>
45 46
46#include "trace.h" 47#include "trace.h"
47 48
@@ -802,11 +803,6 @@ static inline bool cpu_has_vmx_ept_ad_bits(void)
802 return vmx_capability.ept & VMX_EPT_AD_BIT; 803 return vmx_capability.ept & VMX_EPT_AD_BIT;
803} 804}
804 805
805static inline bool cpu_has_vmx_invept_individual_addr(void)
806{
807 return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
808}
809
810static inline bool cpu_has_vmx_invept_context(void) 806static inline bool cpu_has_vmx_invept_context(void)
811{ 807{
812 return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; 808 return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
@@ -992,6 +988,46 @@ static void vmcs_load(struct vmcs *vmcs)
992 vmcs, phys_addr); 988 vmcs, phys_addr);
993} 989}
994 990
991#ifdef CONFIG_KEXEC
992/*
993 * This bitmap is used to indicate whether the vmclear
994 * operation is enabled on all cpus. All disabled by
995 * default.
996 */
997static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
998
999static inline void crash_enable_local_vmclear(int cpu)
1000{
1001 cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
1002}
1003
1004static inline void crash_disable_local_vmclear(int cpu)
1005{
1006 cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
1007}
1008
1009static inline int crash_local_vmclear_enabled(int cpu)
1010{
1011 return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
1012}
1013
1014static void crash_vmclear_local_loaded_vmcss(void)
1015{
1016 int cpu = raw_smp_processor_id();
1017 struct loaded_vmcs *v;
1018
1019 if (!crash_local_vmclear_enabled(cpu))
1020 return;
1021
1022 list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
1023 loaded_vmcss_on_cpu_link)
1024 vmcs_clear(v->vmcs);
1025}
1026#else
1027static inline void crash_enable_local_vmclear(int cpu) { }
1028static inline void crash_disable_local_vmclear(int cpu) { }
1029#endif /* CONFIG_KEXEC */
1030
995static void __loaded_vmcs_clear(void *arg) 1031static void __loaded_vmcs_clear(void *arg)
996{ 1032{
997 struct loaded_vmcs *loaded_vmcs = arg; 1033 struct loaded_vmcs *loaded_vmcs = arg;
@@ -1001,15 +1037,28 @@ static void __loaded_vmcs_clear(void *arg)
1001 return; /* vcpu migration can race with cpu offline */ 1037 return; /* vcpu migration can race with cpu offline */
1002 if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) 1038 if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
1003 per_cpu(current_vmcs, cpu) = NULL; 1039 per_cpu(current_vmcs, cpu) = NULL;
1040 crash_disable_local_vmclear(cpu);
1004 list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); 1041 list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
1042
1043 /*
1044 * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
1045 * is before setting loaded_vmcs->vcpu to -1 which is done in
1046 * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
1047 * then adds the vmcs into percpu list before it is deleted.
1048 */
1049 smp_wmb();
1050
1005 loaded_vmcs_init(loaded_vmcs); 1051 loaded_vmcs_init(loaded_vmcs);
1052 crash_enable_local_vmclear(cpu);
1006} 1053}
1007 1054
1008static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) 1055static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
1009{ 1056{
1010 if (loaded_vmcs->cpu != -1) 1057 int cpu = loaded_vmcs->cpu;
1011 smp_call_function_single( 1058
1012 loaded_vmcs->cpu, __loaded_vmcs_clear, loaded_vmcs, 1); 1059 if (cpu != -1)
1060 smp_call_function_single(cpu,
1061 __loaded_vmcs_clear, loaded_vmcs, 1);
1013} 1062}
1014 1063
1015static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) 1064static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
@@ -1051,17 +1100,6 @@ static inline void ept_sync_context(u64 eptp)
1051 } 1100 }
1052} 1101}
1053 1102
1054static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
1055{
1056 if (enable_ept) {
1057 if (cpu_has_vmx_invept_individual_addr())
1058 __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
1059 eptp, gpa);
1060 else
1061 ept_sync_context(eptp);
1062 }
1063}
1064
1065static __always_inline unsigned long vmcs_readl(unsigned long field) 1103static __always_inline unsigned long vmcs_readl(unsigned long field)
1066{ 1104{
1067 unsigned long value; 1105 unsigned long value;
@@ -1535,8 +1573,18 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1535 1573
1536 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 1574 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1537 local_irq_disable(); 1575 local_irq_disable();
1576 crash_disable_local_vmclear(cpu);
1577
1578 /*
1579 * Read loaded_vmcs->cpu should be before fetching
1580 * loaded_vmcs->loaded_vmcss_on_cpu_link.
1581 * See the comments in __loaded_vmcs_clear().
1582 */
1583 smp_rmb();
1584
1538 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, 1585 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
1539 &per_cpu(loaded_vmcss_on_cpu, cpu)); 1586 &per_cpu(loaded_vmcss_on_cpu, cpu));
1587 crash_enable_local_vmclear(cpu);
1540 local_irq_enable(); 1588 local_irq_enable();
1541 1589
1542 /* 1590 /*
@@ -1839,11 +1887,10 @@ static u64 guest_read_tsc(void)
1839 * Like guest_read_tsc, but always returns L1's notion of the timestamp 1887 * Like guest_read_tsc, but always returns L1's notion of the timestamp
1840 * counter, even if a nested guest (L2) is currently running. 1888 * counter, even if a nested guest (L2) is currently running.
1841 */ 1889 */
1842u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu) 1890u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
1843{ 1891{
1844 u64 host_tsc, tsc_offset; 1892 u64 tsc_offset;
1845 1893
1846 rdtscll(host_tsc);
1847 tsc_offset = is_guest_mode(vcpu) ? 1894 tsc_offset = is_guest_mode(vcpu) ?
1848 to_vmx(vcpu)->nested.vmcs01_tsc_offset : 1895 to_vmx(vcpu)->nested.vmcs01_tsc_offset :
1849 vmcs_read64(TSC_OFFSET); 1896 vmcs_read64(TSC_OFFSET);
@@ -1866,6 +1913,11 @@ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1866 WARN(1, "user requested TSC rate below hardware speed\n"); 1913 WARN(1, "user requested TSC rate below hardware speed\n");
1867} 1914}
1868 1915
1916static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
1917{
1918 return vmcs_read64(TSC_OFFSET);
1919}
1920
1869/* 1921/*
1870 * writes 'offset' into guest's timestamp counter offset register 1922 * writes 'offset' into guest's timestamp counter offset register
1871 */ 1923 */
@@ -2202,15 +2254,17 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2202 * Returns 0 on success, non-0 otherwise. 2254 * Returns 0 on success, non-0 otherwise.
2203 * Assumes vcpu_load() was already called. 2255 * Assumes vcpu_load() was already called.
2204 */ 2256 */
2205static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 2257static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2206{ 2258{
2207 struct vcpu_vmx *vmx = to_vmx(vcpu); 2259 struct vcpu_vmx *vmx = to_vmx(vcpu);
2208 struct shared_msr_entry *msr; 2260 struct shared_msr_entry *msr;
2209 int ret = 0; 2261 int ret = 0;
2262 u32 msr_index = msr_info->index;
2263 u64 data = msr_info->data;
2210 2264
2211 switch (msr_index) { 2265 switch (msr_index) {
2212 case MSR_EFER: 2266 case MSR_EFER:
2213 ret = kvm_set_msr_common(vcpu, msr_index, data); 2267 ret = kvm_set_msr_common(vcpu, msr_info);
2214 break; 2268 break;
2215#ifdef CONFIG_X86_64 2269#ifdef CONFIG_X86_64
2216 case MSR_FS_BASE: 2270 case MSR_FS_BASE:
@@ -2236,7 +2290,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
2236 vmcs_writel(GUEST_SYSENTER_ESP, data); 2290 vmcs_writel(GUEST_SYSENTER_ESP, data);
2237 break; 2291 break;
2238 case MSR_IA32_TSC: 2292 case MSR_IA32_TSC:
2239 kvm_write_tsc(vcpu, data); 2293 kvm_write_tsc(vcpu, msr_info);
2240 break; 2294 break;
2241 case MSR_IA32_CR_PAT: 2295 case MSR_IA32_CR_PAT:
2242 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { 2296 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
@@ -2244,7 +2298,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
2244 vcpu->arch.pat = data; 2298 vcpu->arch.pat = data;
2245 break; 2299 break;
2246 } 2300 }
2247 ret = kvm_set_msr_common(vcpu, msr_index, data); 2301 ret = kvm_set_msr_common(vcpu, msr_info);
2302 break;
2303 case MSR_IA32_TSC_ADJUST:
2304 ret = kvm_set_msr_common(vcpu, msr_info);
2248 break; 2305 break;
2249 case MSR_TSC_AUX: 2306 case MSR_TSC_AUX:
2250 if (!vmx->rdtscp_enabled) 2307 if (!vmx->rdtscp_enabled)
@@ -2267,7 +2324,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
2267 } 2324 }
2268 break; 2325 break;
2269 } 2326 }
2270 ret = kvm_set_msr_common(vcpu, msr_index, data); 2327 ret = kvm_set_msr_common(vcpu, msr_info);
2271 } 2328 }
2272 2329
2273 return ret; 2330 return ret;
@@ -2341,6 +2398,18 @@ static int hardware_enable(void *garbage)
2341 return -EBUSY; 2398 return -EBUSY;
2342 2399
2343 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); 2400 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
2401
2402 /*
2403 * Now we can enable the vmclear operation in kdump
2404 * since the loaded_vmcss_on_cpu list on this cpu
2405 * has been initialized.
2406 *
2407 * Though the cpu is not in VMX operation now, there
2408 * is no problem to enable the vmclear operation
2409 * for the loaded_vmcss_on_cpu list is empty!
2410 */
2411 crash_enable_local_vmclear(cpu);
2412
2344 rdmsrl(MSR_IA32_FEATURE_CONTROL, old); 2413 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
2345 2414
2346 test_bits = FEATURE_CONTROL_LOCKED; 2415 test_bits = FEATURE_CONTROL_LOCKED;
@@ -2697,6 +2766,7 @@ static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment
2697 if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { 2766 if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
2698 tmp.base = vmcs_readl(sf->base); 2767 tmp.base = vmcs_readl(sf->base);
2699 tmp.selector = vmcs_read16(sf->selector); 2768 tmp.selector = vmcs_read16(sf->selector);
2769 tmp.dpl = tmp.selector & SELECTOR_RPL_MASK;
2700 tmp.s = 1; 2770 tmp.s = 1;
2701 } 2771 }
2702 vmx_set_segment(vcpu, &tmp, seg); 2772 vmx_set_segment(vcpu, &tmp, seg);
@@ -3246,7 +3316,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3246 * unrestricted guest like Westmere to older host that don't have 3316 * unrestricted guest like Westmere to older host that don't have
3247 * unrestricted guest like Nehelem. 3317 * unrestricted guest like Nehelem.
3248 */ 3318 */
3249 if (!enable_unrestricted_guest && vmx->rmode.vm86_active) { 3319 if (vmx->rmode.vm86_active) {
3250 switch (seg) { 3320 switch (seg) {
3251 case VCPU_SREG_CS: 3321 case VCPU_SREG_CS:
3252 vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); 3322 vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
@@ -3897,8 +3967,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
3897 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 3967 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
3898 set_cr4_guest_host_mask(vmx); 3968 set_cr4_guest_host_mask(vmx);
3899 3969
3900 kvm_write_tsc(&vmx->vcpu, 0);
3901
3902 return 0; 3970 return 0;
3903} 3971}
3904 3972
@@ -3908,8 +3976,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
3908 u64 msr; 3976 u64 msr;
3909 int ret; 3977 int ret;
3910 3978
3911 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
3912
3913 vmx->rmode.vm86_active = 0; 3979 vmx->rmode.vm86_active = 0;
3914 3980
3915 vmx->soft_vnmi_blocked = 0; 3981 vmx->soft_vnmi_blocked = 0;
@@ -3921,10 +3987,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
3921 msr |= MSR_IA32_APICBASE_BSP; 3987 msr |= MSR_IA32_APICBASE_BSP;
3922 kvm_set_apic_base(&vmx->vcpu, msr); 3988 kvm_set_apic_base(&vmx->vcpu, msr);
3923 3989
3924 ret = fx_init(&vmx->vcpu);
3925 if (ret != 0)
3926 goto out;
3927
3928 vmx_segment_cache_clear(vmx); 3990 vmx_segment_cache_clear(vmx);
3929 3991
3930 seg_setup(VCPU_SREG_CS); 3992 seg_setup(VCPU_SREG_CS);
@@ -3965,7 +4027,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
3965 kvm_rip_write(vcpu, 0xfff0); 4027 kvm_rip_write(vcpu, 0xfff0);
3966 else 4028 else
3967 kvm_rip_write(vcpu, 0); 4029 kvm_rip_write(vcpu, 0);
3968 kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
3969 4030
3970 vmcs_writel(GUEST_GDTR_BASE, 0); 4031 vmcs_writel(GUEST_GDTR_BASE, 0);
3971 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); 4032 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
@@ -4015,7 +4076,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4015 /* HACK: Don't enable emulation on guest boot/reset */ 4076 /* HACK: Don't enable emulation on guest boot/reset */
4016 vmx->emulation_required = 0; 4077 vmx->emulation_required = 0;
4017 4078
4018out:
4019 return ret; 4079 return ret;
4020} 4080}
4021 4081
@@ -4287,16 +4347,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4287 if (is_machine_check(intr_info)) 4347 if (is_machine_check(intr_info))
4288 return handle_machine_check(vcpu); 4348 return handle_machine_check(vcpu);
4289 4349
4290 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
4291 !is_page_fault(intr_info)) {
4292 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4293 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
4294 vcpu->run->internal.ndata = 2;
4295 vcpu->run->internal.data[0] = vect_info;
4296 vcpu->run->internal.data[1] = intr_info;
4297 return 0;
4298 }
4299
4300 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) 4350 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
4301 return 1; /* already handled by vmx_vcpu_run() */ 4351 return 1; /* already handled by vmx_vcpu_run() */
4302 4352
@@ -4315,6 +4365,22 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4315 error_code = 0; 4365 error_code = 0;
4316 if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 4366 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
4317 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 4367 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
4368
4369 /*
4370 * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
4371 * MMIO, it is better to report an internal error.
4372 * See the comments in vmx_handle_exit.
4373 */
4374 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
4375 !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
4376 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4377 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
4378 vcpu->run->internal.ndata = 2;
4379 vcpu->run->internal.data[0] = vect_info;
4380 vcpu->run->internal.data[1] = intr_info;
4381 return 0;
4382 }
4383
4318 if (is_page_fault(intr_info)) { 4384 if (is_page_fault(intr_info)) {
4319 /* EPT won't cause page fault directly */ 4385 /* EPT won't cause page fault directly */
4320 BUG_ON(enable_ept); 4386 BUG_ON(enable_ept);
@@ -4626,11 +4692,15 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
4626 4692
4627static int handle_wrmsr(struct kvm_vcpu *vcpu) 4693static int handle_wrmsr(struct kvm_vcpu *vcpu)
4628{ 4694{
4695 struct msr_data msr;
4629 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; 4696 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
4630 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) 4697 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
4631 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); 4698 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
4632 4699
4633 if (vmx_set_msr(vcpu, ecx, data) != 0) { 4700 msr.data = data;
4701 msr.index = ecx;
4702 msr.host_initiated = false;
4703 if (vmx_set_msr(vcpu, &msr) != 0) {
4634 trace_kvm_msr_write_ex(ecx, data); 4704 trace_kvm_msr_write_ex(ecx, data);
4635 kvm_inject_gp(vcpu, 0); 4705 kvm_inject_gp(vcpu, 0);
4636 return 1; 4706 return 1;
@@ -4827,11 +4897,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
4827 4897
4828 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 4898 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4829 4899
4830 if (exit_qualification & (1 << 6)) {
4831 printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
4832 return -EINVAL;
4833 }
4834
4835 gla_validity = (exit_qualification >> 7) & 0x3; 4900 gla_validity = (exit_qualification >> 7) & 0x3;
4836 if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) { 4901 if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
4837 printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); 4902 printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
@@ -5979,13 +6044,24 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
5979 return 0; 6044 return 0;
5980 } 6045 }
5981 6046
6047 /*
6048 * Note:
6049 * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
6050 * delivery event since it indicates guest is accessing MMIO.
6051 * The vm-exit can be triggered again after return to guest that
6052 * will cause infinite loop.
6053 */
5982 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 6054 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
5983 (exit_reason != EXIT_REASON_EXCEPTION_NMI && 6055 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
5984 exit_reason != EXIT_REASON_EPT_VIOLATION && 6056 exit_reason != EXIT_REASON_EPT_VIOLATION &&
5985 exit_reason != EXIT_REASON_TASK_SWITCH)) 6057 exit_reason != EXIT_REASON_TASK_SWITCH)) {
5986 printk(KERN_WARNING "%s: unexpected, valid vectoring info " 6058 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5987 "(0x%x) and exit reason is 0x%x\n", 6059 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
5988 __func__, vectoring_info, exit_reason); 6060 vcpu->run->internal.ndata = 2;
6061 vcpu->run->internal.data[0] = vectoring_info;
6062 vcpu->run->internal.data[1] = exit_reason;
6063 return 0;
6064 }
5989 6065
5990 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && 6066 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
5991 !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( 6067 !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
@@ -7309,6 +7385,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
7309 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, 7385 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
7310 7386
7311 .set_tsc_khz = vmx_set_tsc_khz, 7387 .set_tsc_khz = vmx_set_tsc_khz,
7388 .read_tsc_offset = vmx_read_tsc_offset,
7312 .write_tsc_offset = vmx_write_tsc_offset, 7389 .write_tsc_offset = vmx_write_tsc_offset,
7313 .adjust_tsc_offset = vmx_adjust_tsc_offset, 7390 .adjust_tsc_offset = vmx_adjust_tsc_offset,
7314 .compute_tsc_offset = vmx_compute_tsc_offset, 7391 .compute_tsc_offset = vmx_compute_tsc_offset,
@@ -7367,6 +7444,11 @@ static int __init vmx_init(void)
7367 if (r) 7444 if (r)
7368 goto out3; 7445 goto out3;
7369 7446
7447#ifdef CONFIG_KEXEC
7448 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
7449 crash_vmclear_local_loaded_vmcss);
7450#endif
7451
7370 vmx_disable_intercept_for_msr(MSR_FS_BASE, false); 7452 vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
7371 vmx_disable_intercept_for_msr(MSR_GS_BASE, false); 7453 vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
7372 vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); 7454 vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
@@ -7404,6 +7486,11 @@ static void __exit vmx_exit(void)
7404 free_page((unsigned long)vmx_io_bitmap_b); 7486 free_page((unsigned long)vmx_io_bitmap_b);
7405 free_page((unsigned long)vmx_io_bitmap_a); 7487 free_page((unsigned long)vmx_io_bitmap_a);
7406 7488
7489#ifdef CONFIG_KEXEC
7490 rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
7491 synchronize_rcu();
7492#endif
7493
7407 kvm_exit(); 7494 kvm_exit();
7408} 7495}
7409 7496
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4f7641756be2..76f54461f7cb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -46,6 +46,8 @@
46#include <linux/uaccess.h> 46#include <linux/uaccess.h>
47#include <linux/hash.h> 47#include <linux/hash.h>
48#include <linux/pci.h> 48#include <linux/pci.h>
49#include <linux/timekeeper_internal.h>
50#include <linux/pvclock_gtod.h>
49#include <trace/events/kvm.h> 51#include <trace/events/kvm.h>
50 52
51#define CREATE_TRACE_POINTS 53#define CREATE_TRACE_POINTS
@@ -158,7 +160,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
158 160
159u64 __read_mostly host_xcr0; 161u64 __read_mostly host_xcr0;
160 162
161int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); 163static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
164
165static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
162 166
163static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) 167static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
164{ 168{
@@ -633,7 +637,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
633 } 637 }
634 638
635 if (is_long_mode(vcpu)) { 639 if (is_long_mode(vcpu)) {
636 if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) { 640 if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
637 if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS) 641 if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
638 return 1; 642 return 1;
639 } else 643 } else
@@ -827,6 +831,7 @@ static u32 msrs_to_save[] = {
827static unsigned num_msrs_to_save; 831static unsigned num_msrs_to_save;
828 832
829static const u32 emulated_msrs[] = { 833static const u32 emulated_msrs[] = {
834 MSR_IA32_TSC_ADJUST,
830 MSR_IA32_TSCDEADLINE, 835 MSR_IA32_TSCDEADLINE,
831 MSR_IA32_MISC_ENABLE, 836 MSR_IA32_MISC_ENABLE,
832 MSR_IA32_MCG_STATUS, 837 MSR_IA32_MCG_STATUS,
@@ -886,9 +891,9 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
886 * Returns 0 on success, non-0 otherwise. 891 * Returns 0 on success, non-0 otherwise.
887 * Assumes vcpu_load() was already called. 892 * Assumes vcpu_load() was already called.
888 */ 893 */
889int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 894int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
890{ 895{
891 return kvm_x86_ops->set_msr(vcpu, msr_index, data); 896 return kvm_x86_ops->set_msr(vcpu, msr);
892} 897}
893 898
894/* 899/*
@@ -896,9 +901,63 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
896 */ 901 */
897static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) 902static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
898{ 903{
899 return kvm_set_msr(vcpu, index, *data); 904 struct msr_data msr;
905
906 msr.data = *data;
907 msr.index = index;
908 msr.host_initiated = true;
909 return kvm_set_msr(vcpu, &msr);
900} 910}
901 911
912#ifdef CONFIG_X86_64
913struct pvclock_gtod_data {
914 seqcount_t seq;
915
916 struct { /* extract of a clocksource struct */
917 int vclock_mode;
918 cycle_t cycle_last;
919 cycle_t mask;
920 u32 mult;
921 u32 shift;
922 } clock;
923
924 /* open coded 'struct timespec' */
925 u64 monotonic_time_snsec;
926 time_t monotonic_time_sec;
927};
928
929static struct pvclock_gtod_data pvclock_gtod_data;
930
931static void update_pvclock_gtod(struct timekeeper *tk)
932{
933 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
934
935 write_seqcount_begin(&vdata->seq);
936
937 /* copy pvclock gtod data */
938 vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
939 vdata->clock.cycle_last = tk->clock->cycle_last;
940 vdata->clock.mask = tk->clock->mask;
941 vdata->clock.mult = tk->mult;
942 vdata->clock.shift = tk->shift;
943
944 vdata->monotonic_time_sec = tk->xtime_sec
945 + tk->wall_to_monotonic.tv_sec;
946 vdata->monotonic_time_snsec = tk->xtime_nsec
947 + (tk->wall_to_monotonic.tv_nsec
948 << tk->shift);
949 while (vdata->monotonic_time_snsec >=
950 (((u64)NSEC_PER_SEC) << tk->shift)) {
951 vdata->monotonic_time_snsec -=
952 ((u64)NSEC_PER_SEC) << tk->shift;
953 vdata->monotonic_time_sec++;
954 }
955
956 write_seqcount_end(&vdata->seq);
957}
958#endif
959
960
902static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) 961static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
903{ 962{
904 int version; 963 int version;
@@ -995,6 +1054,10 @@ static inline u64 get_kernel_ns(void)
995 return timespec_to_ns(&ts); 1054 return timespec_to_ns(&ts);
996} 1055}
997 1056
1057#ifdef CONFIG_X86_64
1058static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1059#endif
1060
998static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); 1061static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
999unsigned long max_tsc_khz; 1062unsigned long max_tsc_khz;
1000 1063
@@ -1046,12 +1109,47 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1046 return tsc; 1109 return tsc;
1047} 1110}
1048 1111
1049void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) 1112void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1113{
1114#ifdef CONFIG_X86_64
1115 bool vcpus_matched;
1116 bool do_request = false;
1117 struct kvm_arch *ka = &vcpu->kvm->arch;
1118 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1119
1120 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1121 atomic_read(&vcpu->kvm->online_vcpus));
1122
1123 if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
1124 if (!ka->use_master_clock)
1125 do_request = 1;
1126
1127 if (!vcpus_matched && ka->use_master_clock)
1128 do_request = 1;
1129
1130 if (do_request)
1131 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1132
1133 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1134 atomic_read(&vcpu->kvm->online_vcpus),
1135 ka->use_master_clock, gtod->clock.vclock_mode);
1136#endif
1137}
1138
1139static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1140{
1141 u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
1142 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1143}
1144
1145void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1050{ 1146{
1051 struct kvm *kvm = vcpu->kvm; 1147 struct kvm *kvm = vcpu->kvm;
1052 u64 offset, ns, elapsed; 1148 u64 offset, ns, elapsed;
1053 unsigned long flags; 1149 unsigned long flags;
1054 s64 usdiff; 1150 s64 usdiff;
1151 bool matched;
1152 u64 data = msr->data;
1055 1153
1056 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1154 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1057 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); 1155 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
@@ -1094,6 +1192,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1094 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); 1192 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1095 pr_debug("kvm: adjusted tsc offset by %llu\n", delta); 1193 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1096 } 1194 }
1195 matched = true;
1097 } else { 1196 } else {
1098 /* 1197 /*
1099 * We split periods of matched TSC writes into generations. 1198 * We split periods of matched TSC writes into generations.
@@ -1108,6 +1207,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1108 kvm->arch.cur_tsc_nsec = ns; 1207 kvm->arch.cur_tsc_nsec = ns;
1109 kvm->arch.cur_tsc_write = data; 1208 kvm->arch.cur_tsc_write = data;
1110 kvm->arch.cur_tsc_offset = offset; 1209 kvm->arch.cur_tsc_offset = offset;
1210 matched = false;
1111 pr_debug("kvm: new tsc generation %u, clock %llu\n", 1211 pr_debug("kvm: new tsc generation %u, clock %llu\n",
1112 kvm->arch.cur_tsc_generation, data); 1212 kvm->arch.cur_tsc_generation, data);
1113 } 1213 }
@@ -1129,26 +1229,195 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1129 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; 1229 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1130 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; 1230 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1131 1231
1232 if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
1233 update_ia32_tsc_adjust_msr(vcpu, offset);
1132 kvm_x86_ops->write_tsc_offset(vcpu, offset); 1234 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1133 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); 1235 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1236
1237 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1238 if (matched)
1239 kvm->arch.nr_vcpus_matched_tsc++;
1240 else
1241 kvm->arch.nr_vcpus_matched_tsc = 0;
1242
1243 kvm_track_tsc_matching(vcpu);
1244 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1134} 1245}
1135 1246
1136EXPORT_SYMBOL_GPL(kvm_write_tsc); 1247EXPORT_SYMBOL_GPL(kvm_write_tsc);
1137 1248
1249#ifdef CONFIG_X86_64
1250
1251static cycle_t read_tsc(void)
1252{
1253 cycle_t ret;
1254 u64 last;
1255
1256 /*
1257 * Empirically, a fence (of type that depends on the CPU)
1258 * before rdtsc is enough to ensure that rdtsc is ordered
1259 * with respect to loads. The various CPU manuals are unclear
1260 * as to whether rdtsc can be reordered with later loads,
1261 * but no one has ever seen it happen.
1262 */
1263 rdtsc_barrier();
1264 ret = (cycle_t)vget_cycles();
1265
1266 last = pvclock_gtod_data.clock.cycle_last;
1267
1268 if (likely(ret >= last))
1269 return ret;
1270
1271 /*
1272 * GCC likes to generate cmov here, but this branch is extremely
1273 * predictable (it's just a funciton of time and the likely is
1274 * very likely) and there's a data dependence, so force GCC
1275 * to generate a branch instead. I don't barrier() because
1276 * we don't actually need a barrier, and if this function
1277 * ever gets inlined it will generate worse code.
1278 */
1279 asm volatile ("");
1280 return last;
1281}
1282
1283static inline u64 vgettsc(cycle_t *cycle_now)
1284{
1285 long v;
1286 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1287
1288 *cycle_now = read_tsc();
1289
1290 v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
1291 return v * gtod->clock.mult;
1292}
1293
1294static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
1295{
1296 unsigned long seq;
1297 u64 ns;
1298 int mode;
1299 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1300
1301 ts->tv_nsec = 0;
1302 do {
1303 seq = read_seqcount_begin(&gtod->seq);
1304 mode = gtod->clock.vclock_mode;
1305 ts->tv_sec = gtod->monotonic_time_sec;
1306 ns = gtod->monotonic_time_snsec;
1307 ns += vgettsc(cycle_now);
1308 ns >>= gtod->clock.shift;
1309 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
1310 timespec_add_ns(ts, ns);
1311
1312 return mode;
1313}
1314
1315/* returns true if host is using tsc clocksource */
1316static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
1317{
1318 struct timespec ts;
1319
1320 /* checked again under seqlock below */
1321 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1322 return false;
1323
1324 if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
1325 return false;
1326
1327 monotonic_to_bootbased(&ts);
1328 *kernel_ns = timespec_to_ns(&ts);
1329
1330 return true;
1331}
1332#endif
1333
1334/*
1335 *
1336 * Assuming a stable TSC across physical CPUS, and a stable TSC
1337 * across virtual CPUs, the following condition is possible.
1338 * Each numbered line represents an event visible to both
1339 * CPUs at the next numbered event.
1340 *
1341 * "timespecX" represents host monotonic time. "tscX" represents
1342 * RDTSC value.
1343 *
1344 * VCPU0 on CPU0 | VCPU1 on CPU1
1345 *
1346 * 1. read timespec0,tsc0
1347 * 2. | timespec1 = timespec0 + N
1348 * | tsc1 = tsc0 + M
1349 * 3. transition to guest | transition to guest
1350 * 4. ret0 = timespec0 + (rdtsc - tsc0) |
1351 * 5. | ret1 = timespec1 + (rdtsc - tsc1)
1352 * | ret1 = timespec0 + N + (rdtsc - (tsc0 + M))
1353 *
1354 * Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity:
1355 *
1356 * - ret0 < ret1
1357 * - timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M))
1358 * ...
1359 * - 0 < N - M => M < N
1360 *
1361 * That is, when timespec0 != timespec1, M < N. Unfortunately that is not
1362 * always the case (the difference between two distinct xtime instances
1363 * might be smaller then the difference between corresponding TSC reads,
1364 * when updating guest vcpus pvclock areas).
1365 *
1366 * To avoid that problem, do not allow visibility of distinct
1367 * system_timestamp/tsc_timestamp values simultaneously: use a master
1368 * copy of host monotonic time values. Update that master copy
1369 * in lockstep.
1370 *
1371 * Rely on synchronization of host TSCs and guest TSCs for monotonicity.
1372 *
1373 */
1374
1375static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1376{
1377#ifdef CONFIG_X86_64
1378 struct kvm_arch *ka = &kvm->arch;
1379 int vclock_mode;
1380 bool host_tsc_clocksource, vcpus_matched;
1381
1382 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1383 atomic_read(&kvm->online_vcpus));
1384
1385 /*
1386 * If the host uses TSC clock, then passthrough TSC as stable
1387 * to the guest.
1388 */
1389 host_tsc_clocksource = kvm_get_time_and_clockread(
1390 &ka->master_kernel_ns,
1391 &ka->master_cycle_now);
1392
1393 ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
1394
1395 if (ka->use_master_clock)
1396 atomic_set(&kvm_guest_has_master_clock, 1);
1397
1398 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
1399 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
1400 vcpus_matched);
1401#endif
1402}
1403
1138static int kvm_guest_time_update(struct kvm_vcpu *v) 1404static int kvm_guest_time_update(struct kvm_vcpu *v)
1139{ 1405{
1140 unsigned long flags; 1406 unsigned long flags, this_tsc_khz;
1141 struct kvm_vcpu_arch *vcpu = &v->arch; 1407 struct kvm_vcpu_arch *vcpu = &v->arch;
1408 struct kvm_arch *ka = &v->kvm->arch;
1142 void *shared_kaddr; 1409 void *shared_kaddr;
1143 unsigned long this_tsc_khz;
1144 s64 kernel_ns, max_kernel_ns; 1410 s64 kernel_ns, max_kernel_ns;
1145 u64 tsc_timestamp; 1411 u64 tsc_timestamp, host_tsc;
1412 struct pvclock_vcpu_time_info *guest_hv_clock;
1146 u8 pvclock_flags; 1413 u8 pvclock_flags;
1414 bool use_master_clock;
1415
1416 kernel_ns = 0;
1417 host_tsc = 0;
1147 1418
1148 /* Keep irq disabled to prevent changes to the clock */ 1419 /* Keep irq disabled to prevent changes to the clock */
1149 local_irq_save(flags); 1420 local_irq_save(flags);
1150 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
1151 kernel_ns = get_kernel_ns();
1152 this_tsc_khz = __get_cpu_var(cpu_tsc_khz); 1421 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1153 if (unlikely(this_tsc_khz == 0)) { 1422 if (unlikely(this_tsc_khz == 0)) {
1154 local_irq_restore(flags); 1423 local_irq_restore(flags);
@@ -1157,6 +1426,24 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1157 } 1426 }
1158 1427
1159 /* 1428 /*
1429 * If the host uses TSC clock, then passthrough TSC as stable
1430 * to the guest.
1431 */
1432 spin_lock(&ka->pvclock_gtod_sync_lock);
1433 use_master_clock = ka->use_master_clock;
1434 if (use_master_clock) {
1435 host_tsc = ka->master_cycle_now;
1436 kernel_ns = ka->master_kernel_ns;
1437 }
1438 spin_unlock(&ka->pvclock_gtod_sync_lock);
1439 if (!use_master_clock) {
1440 host_tsc = native_read_tsc();
1441 kernel_ns = get_kernel_ns();
1442 }
1443
1444 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
1445
1446 /*
1160 * We may have to catch up the TSC to match elapsed wall clock 1447 * We may have to catch up the TSC to match elapsed wall clock
1161 * time for two reasons, even if kvmclock is used. 1448 * time for two reasons, even if kvmclock is used.
1162 * 1) CPU could have been running below the maximum TSC rate 1449 * 1) CPU could have been running below the maximum TSC rate
@@ -1217,23 +1504,20 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1217 vcpu->hw_tsc_khz = this_tsc_khz; 1504 vcpu->hw_tsc_khz = this_tsc_khz;
1218 } 1505 }
1219 1506
1220 if (max_kernel_ns > kernel_ns) 1507 /* with a master <monotonic time, tsc value> tuple,
1221 kernel_ns = max_kernel_ns; 1508 * pvclock clock reads always increase at the (scaled) rate
1222 1509 * of guest TSC - no need to deal with sampling errors.
1510 */
1511 if (!use_master_clock) {
1512 if (max_kernel_ns > kernel_ns)
1513 kernel_ns = max_kernel_ns;
1514 }
1223 /* With all the info we got, fill in the values */ 1515 /* With all the info we got, fill in the values */
1224 vcpu->hv_clock.tsc_timestamp = tsc_timestamp; 1516 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1225 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1517 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1226 vcpu->last_kernel_ns = kernel_ns; 1518 vcpu->last_kernel_ns = kernel_ns;
1227 vcpu->last_guest_tsc = tsc_timestamp; 1519 vcpu->last_guest_tsc = tsc_timestamp;
1228 1520
1229 pvclock_flags = 0;
1230 if (vcpu->pvclock_set_guest_stopped_request) {
1231 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1232 vcpu->pvclock_set_guest_stopped_request = false;
1233 }
1234
1235 vcpu->hv_clock.flags = pvclock_flags;
1236
1237 /* 1521 /*
1238 * The interface expects us to write an even number signaling that the 1522 * The interface expects us to write an even number signaling that the
1239 * update is finished. Since the guest won't see the intermediate 1523 * update is finished. Since the guest won't see the intermediate
@@ -1243,6 +1527,22 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1243 1527
1244 shared_kaddr = kmap_atomic(vcpu->time_page); 1528 shared_kaddr = kmap_atomic(vcpu->time_page);
1245 1529
1530 guest_hv_clock = shared_kaddr + vcpu->time_offset;
1531
1532 /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
1533 pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
1534
1535 if (vcpu->pvclock_set_guest_stopped_request) {
1536 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1537 vcpu->pvclock_set_guest_stopped_request = false;
1538 }
1539
1540 /* If the host uses TSC clocksource, then it is stable */
1541 if (use_master_clock)
1542 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
1543
1544 vcpu->hv_clock.flags = pvclock_flags;
1545
1246 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, 1546 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
1247 sizeof(vcpu->hv_clock)); 1547 sizeof(vcpu->hv_clock));
1248 1548
@@ -1572,9 +1872,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
1572 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); 1872 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
1573} 1873}
1574 1874
1575int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1875int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1576{ 1876{
1577 bool pr = false; 1877 bool pr = false;
1878 u32 msr = msr_info->index;
1879 u64 data = msr_info->data;
1578 1880
1579 switch (msr) { 1881 switch (msr) {
1580 case MSR_EFER: 1882 case MSR_EFER:
@@ -1625,6 +1927,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1625 case MSR_IA32_TSCDEADLINE: 1927 case MSR_IA32_TSCDEADLINE:
1626 kvm_set_lapic_tscdeadline_msr(vcpu, data); 1928 kvm_set_lapic_tscdeadline_msr(vcpu, data);
1627 break; 1929 break;
1930 case MSR_IA32_TSC_ADJUST:
1931 if (guest_cpuid_has_tsc_adjust(vcpu)) {
1932 if (!msr_info->host_initiated) {
1933 u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
1934 kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
1935 }
1936 vcpu->arch.ia32_tsc_adjust_msr = data;
1937 }
1938 break;
1628 case MSR_IA32_MISC_ENABLE: 1939 case MSR_IA32_MISC_ENABLE:
1629 vcpu->arch.ia32_misc_enable_msr = data; 1940 vcpu->arch.ia32_misc_enable_msr = data;
1630 break; 1941 break;
@@ -1984,6 +2295,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1984 case MSR_IA32_TSCDEADLINE: 2295 case MSR_IA32_TSCDEADLINE:
1985 data = kvm_get_lapic_tscdeadline_msr(vcpu); 2296 data = kvm_get_lapic_tscdeadline_msr(vcpu);
1986 break; 2297 break;
2298 case MSR_IA32_TSC_ADJUST:
2299 data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2300 break;
1987 case MSR_IA32_MISC_ENABLE: 2301 case MSR_IA32_MISC_ENABLE:
1988 data = vcpu->arch.ia32_misc_enable_msr; 2302 data = vcpu->arch.ia32_misc_enable_msr;
1989 break; 2303 break;
@@ -2342,7 +2656,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2342 kvm_x86_ops->write_tsc_offset(vcpu, offset); 2656 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2343 vcpu->arch.tsc_catchup = 1; 2657 vcpu->arch.tsc_catchup = 1;
2344 } 2658 }
2345 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 2659 /*
2660 * On a host with synchronized TSC, there is no need to update
2661 * kvmclock on vcpu->cpu migration
2662 */
2663 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
2664 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2346 if (vcpu->cpu != cpu) 2665 if (vcpu->cpu != cpu)
2347 kvm_migrate_timers(vcpu); 2666 kvm_migrate_timers(vcpu);
2348 vcpu->cpu = cpu; 2667 vcpu->cpu = cpu;
@@ -2691,15 +3010,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2691 if (!vcpu->arch.apic) 3010 if (!vcpu->arch.apic)
2692 goto out; 3011 goto out;
2693 u.lapic = memdup_user(argp, sizeof(*u.lapic)); 3012 u.lapic = memdup_user(argp, sizeof(*u.lapic));
2694 if (IS_ERR(u.lapic)) { 3013 if (IS_ERR(u.lapic))
2695 r = PTR_ERR(u.lapic); 3014 return PTR_ERR(u.lapic);
2696 goto out;
2697 }
2698 3015
2699 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); 3016 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
2700 if (r)
2701 goto out;
2702 r = 0;
2703 break; 3017 break;
2704 } 3018 }
2705 case KVM_INTERRUPT: { 3019 case KVM_INTERRUPT: {
@@ -2709,16 +3023,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2709 if (copy_from_user(&irq, argp, sizeof irq)) 3023 if (copy_from_user(&irq, argp, sizeof irq))
2710 goto out; 3024 goto out;
2711 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 3025 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
2712 if (r)
2713 goto out;
2714 r = 0;
2715 break; 3026 break;
2716 } 3027 }
2717 case KVM_NMI: { 3028 case KVM_NMI: {
2718 r = kvm_vcpu_ioctl_nmi(vcpu); 3029 r = kvm_vcpu_ioctl_nmi(vcpu);
2719 if (r)
2720 goto out;
2721 r = 0;
2722 break; 3030 break;
2723 } 3031 }
2724 case KVM_SET_CPUID: { 3032 case KVM_SET_CPUID: {
@@ -2729,8 +3037,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2729 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 3037 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2730 goto out; 3038 goto out;
2731 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); 3039 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
2732 if (r)
2733 goto out;
2734 break; 3040 break;
2735 } 3041 }
2736 case KVM_SET_CPUID2: { 3042 case KVM_SET_CPUID2: {
@@ -2742,8 +3048,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2742 goto out; 3048 goto out;
2743 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, 3049 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
2744 cpuid_arg->entries); 3050 cpuid_arg->entries);
2745 if (r)
2746 goto out;
2747 break; 3051 break;
2748 } 3052 }
2749 case KVM_GET_CPUID2: { 3053 case KVM_GET_CPUID2: {
@@ -2875,10 +3179,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2875 } 3179 }
2876 case KVM_SET_XSAVE: { 3180 case KVM_SET_XSAVE: {
2877 u.xsave = memdup_user(argp, sizeof(*u.xsave)); 3181 u.xsave = memdup_user(argp, sizeof(*u.xsave));
2878 if (IS_ERR(u.xsave)) { 3182 if (IS_ERR(u.xsave))
2879 r = PTR_ERR(u.xsave); 3183 return PTR_ERR(u.xsave);
2880 goto out;
2881 }
2882 3184
2883 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); 3185 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
2884 break; 3186 break;
@@ -2900,10 +3202,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2900 } 3202 }
2901 case KVM_SET_XCRS: { 3203 case KVM_SET_XCRS: {
2902 u.xcrs = memdup_user(argp, sizeof(*u.xcrs)); 3204 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
2903 if (IS_ERR(u.xcrs)) { 3205 if (IS_ERR(u.xcrs))
2904 r = PTR_ERR(u.xcrs); 3206 return PTR_ERR(u.xcrs);
2905 goto out;
2906 }
2907 3207
2908 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); 3208 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
2909 break; 3209 break;
@@ -2951,7 +3251,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
2951 int ret; 3251 int ret;
2952 3252
2953 if (addr > (unsigned int)(-3 * PAGE_SIZE)) 3253 if (addr > (unsigned int)(-3 * PAGE_SIZE))
2954 return -1; 3254 return -EINVAL;
2955 ret = kvm_x86_ops->set_tss_addr(kvm, addr); 3255 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
2956 return ret; 3256 return ret;
2957} 3257}
@@ -3212,8 +3512,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3212 switch (ioctl) { 3512 switch (ioctl) {
3213 case KVM_SET_TSS_ADDR: 3513 case KVM_SET_TSS_ADDR:
3214 r = kvm_vm_ioctl_set_tss_addr(kvm, arg); 3514 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3215 if (r < 0)
3216 goto out;
3217 break; 3515 break;
3218 case KVM_SET_IDENTITY_MAP_ADDR: { 3516 case KVM_SET_IDENTITY_MAP_ADDR: {
3219 u64 ident_addr; 3517 u64 ident_addr;
@@ -3222,14 +3520,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
3222 if (copy_from_user(&ident_addr, argp, sizeof ident_addr)) 3520 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3223 goto out; 3521 goto out;
3224 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr); 3522 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3225 if (r < 0)
3226 goto out;
3227 break; 3523 break;
3228 } 3524 }
3229 case KVM_SET_NR_MMU_PAGES: 3525 case KVM_SET_NR_MMU_PAGES:
3230 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); 3526 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3231 if (r)
3232 goto out;
3233 break; 3527 break;
3234 case KVM_GET_NR_MMU_PAGES: 3528 case KVM_GET_NR_MMU_PAGES:
3235 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); 3529 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
@@ -3320,8 +3614,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3320 r = 0; 3614 r = 0;
3321 get_irqchip_out: 3615 get_irqchip_out:
3322 kfree(chip); 3616 kfree(chip);
3323 if (r)
3324 goto out;
3325 break; 3617 break;
3326 } 3618 }
3327 case KVM_SET_IRQCHIP: { 3619 case KVM_SET_IRQCHIP: {
@@ -3343,8 +3635,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3343 r = 0; 3635 r = 0;
3344 set_irqchip_out: 3636 set_irqchip_out:
3345 kfree(chip); 3637 kfree(chip);
3346 if (r)
3347 goto out;
3348 break; 3638 break;
3349 } 3639 }
3350 case KVM_GET_PIT: { 3640 case KVM_GET_PIT: {
@@ -3371,9 +3661,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3371 if (!kvm->arch.vpit) 3661 if (!kvm->arch.vpit)
3372 goto out; 3662 goto out;
3373 r = kvm_vm_ioctl_set_pit(kvm, &u.ps); 3663 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3374 if (r)
3375 goto out;
3376 r = 0;
3377 break; 3664 break;
3378 } 3665 }
3379 case KVM_GET_PIT2: { 3666 case KVM_GET_PIT2: {
@@ -3397,9 +3684,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3397 if (!kvm->arch.vpit) 3684 if (!kvm->arch.vpit)
3398 goto out; 3685 goto out;
3399 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); 3686 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3400 if (r)
3401 goto out;
3402 r = 0;
3403 break; 3687 break;
3404 } 3688 }
3405 case KVM_REINJECT_CONTROL: { 3689 case KVM_REINJECT_CONTROL: {
@@ -3408,9 +3692,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3408 if (copy_from_user(&control, argp, sizeof(control))) 3692 if (copy_from_user(&control, argp, sizeof(control)))
3409 goto out; 3693 goto out;
3410 r = kvm_vm_ioctl_reinject(kvm, &control); 3694 r = kvm_vm_ioctl_reinject(kvm, &control);
3411 if (r)
3412 goto out;
3413 r = 0;
3414 break; 3695 break;
3415 } 3696 }
3416 case KVM_XEN_HVM_CONFIG: { 3697 case KVM_XEN_HVM_CONFIG: {
@@ -4273,7 +4554,12 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4273static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, 4554static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4274 u32 msr_index, u64 data) 4555 u32 msr_index, u64 data)
4275{ 4556{
4276 return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); 4557 struct msr_data msr;
4558
4559 msr.data = data;
4560 msr.index = msr_index;
4561 msr.host_initiated = false;
4562 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
4277} 4563}
4278 4564
4279static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, 4565static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -4495,7 +4781,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4495 * instruction -> ... 4781 * instruction -> ...
4496 */ 4782 */
4497 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); 4783 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4498 if (!is_error_pfn(pfn)) { 4784 if (!is_error_noslot_pfn(pfn)) {
4499 kvm_release_pfn_clean(pfn); 4785 kvm_release_pfn_clean(pfn);
4500 return true; 4786 return true;
4501 } 4787 }
@@ -4881,6 +5167,50 @@ static void kvm_set_mmio_spte_mask(void)
4881 kvm_mmu_set_mmio_spte_mask(mask); 5167 kvm_mmu_set_mmio_spte_mask(mask);
4882} 5168}
4883 5169
5170#ifdef CONFIG_X86_64
5171static void pvclock_gtod_update_fn(struct work_struct *work)
5172{
5173 struct kvm *kvm;
5174
5175 struct kvm_vcpu *vcpu;
5176 int i;
5177
5178 raw_spin_lock(&kvm_lock);
5179 list_for_each_entry(kvm, &vm_list, vm_list)
5180 kvm_for_each_vcpu(i, vcpu, kvm)
5181 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
5182 atomic_set(&kvm_guest_has_master_clock, 0);
5183 raw_spin_unlock(&kvm_lock);
5184}
5185
5186static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
5187
5188/*
5189 * Notification about pvclock gtod data update.
5190 */
5191static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
5192 void *priv)
5193{
5194 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
5195 struct timekeeper *tk = priv;
5196
5197 update_pvclock_gtod(tk);
5198
5199 /* disable master clock if host does not trust, or does not
5200 * use, TSC clocksource
5201 */
5202 if (gtod->clock.vclock_mode != VCLOCK_TSC &&
5203 atomic_read(&kvm_guest_has_master_clock) != 0)
5204 queue_work(system_long_wq, &pvclock_gtod_work);
5205
5206 return 0;
5207}
5208
5209static struct notifier_block pvclock_gtod_notifier = {
5210 .notifier_call = pvclock_gtod_notify,
5211};
5212#endif
5213
4884int kvm_arch_init(void *opaque) 5214int kvm_arch_init(void *opaque)
4885{ 5215{
4886 int r; 5216 int r;
@@ -4922,6 +5252,10 @@ int kvm_arch_init(void *opaque)
4922 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 5252 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
4923 5253
4924 kvm_lapic_init(); 5254 kvm_lapic_init();
5255#ifdef CONFIG_X86_64
5256 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
5257#endif
5258
4925 return 0; 5259 return 0;
4926 5260
4927out: 5261out:
@@ -4936,6 +5270,9 @@ void kvm_arch_exit(void)
4936 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, 5270 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
4937 CPUFREQ_TRANSITION_NOTIFIER); 5271 CPUFREQ_TRANSITION_NOTIFIER);
4938 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block); 5272 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5273#ifdef CONFIG_X86_64
5274 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
5275#endif
4939 kvm_x86_ops = NULL; 5276 kvm_x86_ops = NULL;
4940 kvm_mmu_module_exit(); 5277 kvm_mmu_module_exit();
4941} 5278}
@@ -5059,7 +5396,7 @@ out:
5059} 5396}
5060EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); 5397EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
5061 5398
5062int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) 5399static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5063{ 5400{
5064 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 5401 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5065 char instruction[3]; 5402 char instruction[3];
@@ -5235,6 +5572,29 @@ static void process_nmi(struct kvm_vcpu *vcpu)
5235 kvm_make_request(KVM_REQ_EVENT, vcpu); 5572 kvm_make_request(KVM_REQ_EVENT, vcpu);
5236} 5573}
5237 5574
5575static void kvm_gen_update_masterclock(struct kvm *kvm)
5576{
5577#ifdef CONFIG_X86_64
5578 int i;
5579 struct kvm_vcpu *vcpu;
5580 struct kvm_arch *ka = &kvm->arch;
5581
5582 spin_lock(&ka->pvclock_gtod_sync_lock);
5583 kvm_make_mclock_inprogress_request(kvm);
5584 /* no guest entries from this point */
5585 pvclock_update_vm_gtod_copy(kvm);
5586
5587 kvm_for_each_vcpu(i, vcpu, kvm)
5588 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
5589
5590 /* guest entries allowed */
5591 kvm_for_each_vcpu(i, vcpu, kvm)
5592 clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
5593
5594 spin_unlock(&ka->pvclock_gtod_sync_lock);
5595#endif
5596}
5597
5238static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5598static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5239{ 5599{
5240 int r; 5600 int r;
@@ -5247,6 +5607,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5247 kvm_mmu_unload(vcpu); 5607 kvm_mmu_unload(vcpu);
5248 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) 5608 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
5249 __kvm_migrate_timers(vcpu); 5609 __kvm_migrate_timers(vcpu);
5610 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
5611 kvm_gen_update_masterclock(vcpu->kvm);
5250 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { 5612 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5251 r = kvm_guest_time_update(vcpu); 5613 r = kvm_guest_time_update(vcpu);
5252 if (unlikely(r)) 5614 if (unlikely(r))
@@ -5362,7 +5724,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5362 if (hw_breakpoint_active()) 5724 if (hw_breakpoint_active())
5363 hw_breakpoint_restore(); 5725 hw_breakpoint_restore();
5364 5726
5365 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); 5727 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
5728 native_read_tsc());
5366 5729
5367 vcpu->mode = OUTSIDE_GUEST_MODE; 5730 vcpu->mode = OUTSIDE_GUEST_MODE;
5368 smp_wmb(); 5731 smp_wmb();
@@ -5419,7 +5782,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5419 pr_debug("vcpu %d received sipi with vector # %x\n", 5782 pr_debug("vcpu %d received sipi with vector # %x\n",
5420 vcpu->vcpu_id, vcpu->arch.sipi_vector); 5783 vcpu->vcpu_id, vcpu->arch.sipi_vector);
5421 kvm_lapic_reset(vcpu); 5784 kvm_lapic_reset(vcpu);
5422 r = kvm_arch_vcpu_reset(vcpu); 5785 r = kvm_vcpu_reset(vcpu);
5423 if (r) 5786 if (r)
5424 return r; 5787 return r;
5425 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5788 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -6047,7 +6410,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6047 r = vcpu_load(vcpu); 6410 r = vcpu_load(vcpu);
6048 if (r) 6411 if (r)
6049 return r; 6412 return r;
6050 r = kvm_arch_vcpu_reset(vcpu); 6413 r = kvm_vcpu_reset(vcpu);
6051 if (r == 0) 6414 if (r == 0)
6052 r = kvm_mmu_setup(vcpu); 6415 r = kvm_mmu_setup(vcpu);
6053 vcpu_put(vcpu); 6416 vcpu_put(vcpu);
@@ -6055,6 +6418,23 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6055 return r; 6418 return r;
6056} 6419}
6057 6420
6421int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6422{
6423 int r;
6424 struct msr_data msr;
6425
6426 r = vcpu_load(vcpu);
6427 if (r)
6428 return r;
6429 msr.data = 0x0;
6430 msr.index = MSR_IA32_TSC;
6431 msr.host_initiated = true;
6432 kvm_write_tsc(vcpu, &msr);
6433 vcpu_put(vcpu);
6434
6435 return r;
6436}
6437
6058void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 6438void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6059{ 6439{
6060 int r; 6440 int r;
@@ -6069,7 +6449,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6069 kvm_x86_ops->vcpu_free(vcpu); 6449 kvm_x86_ops->vcpu_free(vcpu);
6070} 6450}
6071 6451
6072int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) 6452static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6073{ 6453{
6074 atomic_set(&vcpu->arch.nmi_queued, 0); 6454 atomic_set(&vcpu->arch.nmi_queued, 0);
6075 vcpu->arch.nmi_pending = 0; 6455 vcpu->arch.nmi_pending = 0;
@@ -6092,6 +6472,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
6092 6472
6093 kvm_pmu_reset(vcpu); 6473 kvm_pmu_reset(vcpu);
6094 6474
6475 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
6476 vcpu->arch.regs_avail = ~0;
6477 vcpu->arch.regs_dirty = ~0;
6478
6095 return kvm_x86_ops->vcpu_reset(vcpu); 6479 return kvm_x86_ops->vcpu_reset(vcpu);
6096} 6480}
6097 6481
@@ -6168,6 +6552,8 @@ int kvm_arch_hardware_enable(void *garbage)
6168 kvm_for_each_vcpu(i, vcpu, kvm) { 6552 kvm_for_each_vcpu(i, vcpu, kvm) {
6169 vcpu->arch.tsc_offset_adjustment += delta_cyc; 6553 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6170 vcpu->arch.last_host_tsc = local_tsc; 6554 vcpu->arch.last_host_tsc = local_tsc;
6555 set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
6556 &vcpu->requests);
6171 } 6557 }
6172 6558
6173 /* 6559 /*
@@ -6258,10 +6644,17 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6258 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) 6644 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
6259 goto fail_free_mce_banks; 6645 goto fail_free_mce_banks;
6260 6646
6647 r = fx_init(vcpu);
6648 if (r)
6649 goto fail_free_wbinvd_dirty_mask;
6650
6651 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
6261 kvm_async_pf_hash_reset(vcpu); 6652 kvm_async_pf_hash_reset(vcpu);
6262 kvm_pmu_init(vcpu); 6653 kvm_pmu_init(vcpu);
6263 6654
6264 return 0; 6655 return 0;
6656fail_free_wbinvd_dirty_mask:
6657 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6265fail_free_mce_banks: 6658fail_free_mce_banks:
6266 kfree(vcpu->arch.mce_banks); 6659 kfree(vcpu->arch.mce_banks);
6267fail_free_lapic: 6660fail_free_lapic:
@@ -6305,6 +6698,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6305 6698
6306 raw_spin_lock_init(&kvm->arch.tsc_write_lock); 6699 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
6307 mutex_init(&kvm->arch.apic_map_lock); 6700 mutex_init(&kvm->arch.apic_map_lock);
6701 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
6702
6703 pvclock_update_vm_gtod_copy(kvm);
6308 6704
6309 return 0; 6705 return 0;
6310} 6706}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2b5219c12ac8..e224f7a671b6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -112,7 +112,7 @@ void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
112void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 112void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
113int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); 113int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
114 114
115void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); 115void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
116 116
117int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, 117int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
118 gva_t addr, void *val, unsigned int bytes, 118 gva_t addr, void *val, unsigned int bytes,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 642d8805bc1b..df4176cdbb32 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1412,7 +1412,7 @@ __init void lguest_init(void)
1412 1412
1413 /* We don't have features. We have puppies! Puppies! */ 1413 /* We don't have features. We have puppies! Puppies! */
1414#ifdef CONFIG_X86_MCE 1414#ifdef CONFIG_X86_MCE
1415 mce_disabled = 1; 1415 mca_cfg.disabled = true;
1416#endif 1416#endif
1417#ifdef CONFIG_ACPI 1417#ifdef CONFIG_ACPI
1418 acpi_disabled = 1; 1418 acpi_disabled = 1;
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index b00f6785da74..96b2c6697c9d 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -32,7 +32,6 @@ ifeq ($(CONFIG_X86_32),y)
32 lib-y += checksum_32.o 32 lib-y += checksum_32.o
33 lib-y += strstr_32.o 33 lib-y += strstr_32.o
34 lib-y += string_32.o 34 lib-y += string_32.o
35 lib-y += cmpxchg.o
36ifneq ($(CONFIG_X86_CMPXCHG64),y) 35ifneq ($(CONFIG_X86_CMPXCHG64),y)
37 lib-y += cmpxchg8b_emu.o atomic64_386_32.o 36 lib-y += cmpxchg8b_emu.o atomic64_386_32.o
38endif 37endif
diff --git a/arch/x86/lib/cmpxchg.c b/arch/x86/lib/cmpxchg.c
deleted file mode 100644
index 5d619f6df3ee..000000000000
--- a/arch/x86/lib/cmpxchg.c
+++ /dev/null
@@ -1,54 +0,0 @@
1/*
2 * cmpxchg*() fallbacks for CPU not supporting these instructions
3 */
4
5#include <linux/kernel.h>
6#include <linux/smp.h>
7#include <linux/module.h>
8
9#ifndef CONFIG_X86_CMPXCHG
10unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
11{
12 u8 prev;
13 unsigned long flags;
14
15 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
16 local_irq_save(flags);
17 prev = *(u8 *)ptr;
18 if (prev == old)
19 *(u8 *)ptr = new;
20 local_irq_restore(flags);
21 return prev;
22}
23EXPORT_SYMBOL(cmpxchg_386_u8);
24
25unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
26{
27 u16 prev;
28 unsigned long flags;
29
30 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
31 local_irq_save(flags);
32 prev = *(u16 *)ptr;
33 if (prev == old)
34 *(u16 *)ptr = new;
35 local_irq_restore(flags);
36 return prev;
37}
38EXPORT_SYMBOL(cmpxchg_386_u16);
39
40unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
41{
42 u32 prev;
43 unsigned long flags;
44
45 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
46 local_irq_save(flags);
47 prev = *(u32 *)ptr;
48 if (prev == old)
49 *(u32 *)ptr = new;
50 local_irq_restore(flags);
51 return prev;
52}
53EXPORT_SYMBOL(cmpxchg_386_u32);
54#endif
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 6b34d04d096a..176cca67212b 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -5,91 +5,89 @@
5#include <asm/alternative-asm.h> 5#include <asm/alternative-asm.h>
6 6
7 ALIGN 7 ALIGN
8copy_page_c: 8copy_page_rep:
9 CFI_STARTPROC 9 CFI_STARTPROC
10 movl $4096/8,%ecx 10 movl $4096/8, %ecx
11 rep movsq 11 rep movsq
12 ret 12 ret
13 CFI_ENDPROC 13 CFI_ENDPROC
14ENDPROC(copy_page_c) 14ENDPROC(copy_page_rep)
15 15
16/* Don't use streaming store because it's better when the target 16/*
17 ends up in cache. */ 17 * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
18 18 * Could vary the prefetch distance based on SMP/UP.
19/* Could vary the prefetch distance based on SMP/UP */ 19*/
20 20
21ENTRY(copy_page) 21ENTRY(copy_page)
22 CFI_STARTPROC 22 CFI_STARTPROC
23 subq $2*8,%rsp 23 subq $2*8, %rsp
24 CFI_ADJUST_CFA_OFFSET 2*8 24 CFI_ADJUST_CFA_OFFSET 2*8
25 movq %rbx,(%rsp) 25 movq %rbx, (%rsp)
26 CFI_REL_OFFSET rbx, 0 26 CFI_REL_OFFSET rbx, 0
27 movq %r12,1*8(%rsp) 27 movq %r12, 1*8(%rsp)
28 CFI_REL_OFFSET r12, 1*8 28 CFI_REL_OFFSET r12, 1*8
29 29
30 movl $(4096/64)-5,%ecx 30 movl $(4096/64)-5, %ecx
31 .p2align 4 31 .p2align 4
32.Loop64: 32.Loop64:
33 dec %rcx 33 dec %rcx
34 34 movq 0x8*0(%rsi), %rax
35 movq (%rsi), %rax 35 movq 0x8*1(%rsi), %rbx
36 movq 8 (%rsi), %rbx 36 movq 0x8*2(%rsi), %rdx
37 movq 16 (%rsi), %rdx 37 movq 0x8*3(%rsi), %r8
38 movq 24 (%rsi), %r8 38 movq 0x8*4(%rsi), %r9
39 movq 32 (%rsi), %r9 39 movq 0x8*5(%rsi), %r10
40 movq 40 (%rsi), %r10 40 movq 0x8*6(%rsi), %r11
41 movq 48 (%rsi), %r11 41 movq 0x8*7(%rsi), %r12
42 movq 56 (%rsi), %r12
43 42
44 prefetcht0 5*64(%rsi) 43 prefetcht0 5*64(%rsi)
45 44
46 movq %rax, (%rdi) 45 movq %rax, 0x8*0(%rdi)
47 movq %rbx, 8 (%rdi) 46 movq %rbx, 0x8*1(%rdi)
48 movq %rdx, 16 (%rdi) 47 movq %rdx, 0x8*2(%rdi)
49 movq %r8, 24 (%rdi) 48 movq %r8, 0x8*3(%rdi)
50 movq %r9, 32 (%rdi) 49 movq %r9, 0x8*4(%rdi)
51 movq %r10, 40 (%rdi) 50 movq %r10, 0x8*5(%rdi)
52 movq %r11, 48 (%rdi) 51 movq %r11, 0x8*6(%rdi)
53 movq %r12, 56 (%rdi) 52 movq %r12, 0x8*7(%rdi)
54 53
55 leaq 64 (%rsi), %rsi 54 leaq 64 (%rsi), %rsi
56 leaq 64 (%rdi), %rdi 55 leaq 64 (%rdi), %rdi
57 56
58 jnz .Loop64 57 jnz .Loop64
59 58
60 movl $5,%ecx 59 movl $5, %ecx
61 .p2align 4 60 .p2align 4
62.Loop2: 61.Loop2:
63 decl %ecx 62 decl %ecx
64 63
65 movq (%rsi), %rax 64 movq 0x8*0(%rsi), %rax
66 movq 8 (%rsi), %rbx 65 movq 0x8*1(%rsi), %rbx
67 movq 16 (%rsi), %rdx 66 movq 0x8*2(%rsi), %rdx
68 movq 24 (%rsi), %r8 67 movq 0x8*3(%rsi), %r8
69 movq 32 (%rsi), %r9 68 movq 0x8*4(%rsi), %r9
70 movq 40 (%rsi), %r10 69 movq 0x8*5(%rsi), %r10
71 movq 48 (%rsi), %r11 70 movq 0x8*6(%rsi), %r11
72 movq 56 (%rsi), %r12 71 movq 0x8*7(%rsi), %r12
73 72
74 movq %rax, (%rdi) 73 movq %rax, 0x8*0(%rdi)
75 movq %rbx, 8 (%rdi) 74 movq %rbx, 0x8*1(%rdi)
76 movq %rdx, 16 (%rdi) 75 movq %rdx, 0x8*2(%rdi)
77 movq %r8, 24 (%rdi) 76 movq %r8, 0x8*3(%rdi)
78 movq %r9, 32 (%rdi) 77 movq %r9, 0x8*4(%rdi)
79 movq %r10, 40 (%rdi) 78 movq %r10, 0x8*5(%rdi)
80 movq %r11, 48 (%rdi) 79 movq %r11, 0x8*6(%rdi)
81 movq %r12, 56 (%rdi) 80 movq %r12, 0x8*7(%rdi)
82 81
83 leaq 64(%rdi),%rdi 82 leaq 64(%rdi), %rdi
84 leaq 64(%rsi),%rsi 83 leaq 64(%rsi), %rsi
85
86 jnz .Loop2 84 jnz .Loop2
87 85
88 movq (%rsp),%rbx 86 movq (%rsp), %rbx
89 CFI_RESTORE rbx 87 CFI_RESTORE rbx
90 movq 1*8(%rsp),%r12 88 movq 1*8(%rsp), %r12
91 CFI_RESTORE r12 89 CFI_RESTORE r12
92 addq $2*8,%rsp 90 addq $2*8, %rsp
93 CFI_ADJUST_CFA_OFFSET -2*8 91 CFI_ADJUST_CFA_OFFSET -2*8
94 ret 92 ret
95.Lcopy_page_end: 93.Lcopy_page_end:
@@ -103,7 +101,7 @@ ENDPROC(copy_page)
103 101
104 .section .altinstr_replacement,"ax" 102 .section .altinstr_replacement,"ax"
1051: .byte 0xeb /* jmp <disp8> */ 1031: .byte 0xeb /* jmp <disp8> */
106 .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ 104 .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
1072: 1052:
108 .previous 106 .previous
109 .section .altinstructions,"a" 107 .section .altinstructions,"a"
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 98f6d6b68f5a..f0312d746402 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -570,63 +570,6 @@ do { \
570unsigned long __copy_to_user_ll(void __user *to, const void *from, 570unsigned long __copy_to_user_ll(void __user *to, const void *from,
571 unsigned long n) 571 unsigned long n)
572{ 572{
573#ifndef CONFIG_X86_WP_WORKS_OK
574 if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
575 ((unsigned long)to) < TASK_SIZE) {
576 /*
577 * When we are in an atomic section (see
578 * mm/filemap.c:file_read_actor), return the full
579 * length to take the slow path.
580 */
581 if (in_atomic())
582 return n;
583
584 /*
585 * CPU does not honor the WP bit when writing
586 * from supervisory mode, and due to preemption or SMP,
587 * the page tables can change at any time.
588 * Do it manually. Manfred <manfred@colorfullife.com>
589 */
590 while (n) {
591 unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
592 unsigned long len = PAGE_SIZE - offset;
593 int retval;
594 struct page *pg;
595 void *maddr;
596
597 if (len > n)
598 len = n;
599
600survive:
601 down_read(&current->mm->mmap_sem);
602 retval = get_user_pages(current, current->mm,
603 (unsigned long)to, 1, 1, 0, &pg, NULL);
604
605 if (retval == -ENOMEM && is_global_init(current)) {
606 up_read(&current->mm->mmap_sem);
607 congestion_wait(BLK_RW_ASYNC, HZ/50);
608 goto survive;
609 }
610
611 if (retval != 1) {
612 up_read(&current->mm->mmap_sem);
613 break;
614 }
615
616 maddr = kmap_atomic(pg);
617 memcpy(maddr + offset, from, len);
618 kunmap_atomic(maddr);
619 set_page_dirty_lock(pg);
620 put_page(pg);
621 up_read(&current->mm->mmap_sem);
622
623 from += len;
624 to += len;
625 n -= len;
626 }
627 return n;
628 }
629#endif
630 stac(); 573 stac();
631 if (movsl_is_ok(to, from, n)) 574 if (movsl_is_ok(to, from, n))
632 __copy_user(to, from, n); 575 __copy_user(to, from, n);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8e13ecb41bee..027088f2f7dd 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,7 +18,7 @@
18#include <asm/pgalloc.h> /* pgd_*(), ... */ 18#include <asm/pgalloc.h> /* pgd_*(), ... */
19#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ 19#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
20#include <asm/fixmap.h> /* VSYSCALL_START */ 20#include <asm/fixmap.h> /* VSYSCALL_START */
21#include <asm/rcu.h> /* exception_enter(), ... */ 21#include <asm/context_tracking.h> /* exception_enter(), ... */
22 22
23/* 23/*
24 * Page fault error code bits: 24 * Page fault error code bits:
@@ -803,20 +803,6 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
803 __bad_area(regs, error_code, address, SEGV_ACCERR); 803 __bad_area(regs, error_code, address, SEGV_ACCERR);
804} 804}
805 805
806/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
807static void
808out_of_memory(struct pt_regs *regs, unsigned long error_code,
809 unsigned long address)
810{
811 /*
812 * We ran out of memory, call the OOM killer, and return the userspace
813 * (which will retry the fault, or kill us if we got oom-killed):
814 */
815 up_read(&current->mm->mmap_sem);
816
817 pagefault_out_of_memory();
818}
819
820static void 806static void
821do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, 807do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
822 unsigned int fault) 808 unsigned int fault)
@@ -879,7 +865,14 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
879 return 1; 865 return 1;
880 } 866 }
881 867
882 out_of_memory(regs, error_code, address); 868 up_read(&current->mm->mmap_sem);
869
870 /*
871 * We ran out of memory, call the OOM killer, and return the
872 * userspace (which will retry the fault, or kill us if we got
873 * oom-killed):
874 */
875 pagefault_out_of_memory();
883 } else { 876 } else {
884 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| 877 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
885 VM_FAULT_HWPOISON_LARGE)) 878 VM_FAULT_HWPOISON_LARGE))
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 937bff5cdaa7..ae1aa71d0115 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -274,42 +274,15 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
274 unsigned long pgoff, unsigned long flags) 274 unsigned long pgoff, unsigned long flags)
275{ 275{
276 struct hstate *h = hstate_file(file); 276 struct hstate *h = hstate_file(file);
277 struct mm_struct *mm = current->mm; 277 struct vm_unmapped_area_info info;
278 struct vm_area_struct *vma; 278
279 unsigned long start_addr; 279 info.flags = 0;
280 280 info.length = len;
281 if (len > mm->cached_hole_size) { 281 info.low_limit = TASK_UNMAPPED_BASE;
282 start_addr = mm->free_area_cache; 282 info.high_limit = TASK_SIZE;
283 } else { 283 info.align_mask = PAGE_MASK & ~huge_page_mask(h);
284 start_addr = TASK_UNMAPPED_BASE; 284 info.align_offset = 0;
285 mm->cached_hole_size = 0; 285 return vm_unmapped_area(&info);
286 }
287
288full_search:
289 addr = ALIGN(start_addr, huge_page_size(h));
290
291 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
292 /* At this point: (!vma || addr < vma->vm_end). */
293 if (TASK_SIZE - len < addr) {
294 /*
295 * Start a new search - just in case we missed
296 * some holes.
297 */
298 if (start_addr != TASK_UNMAPPED_BASE) {
299 start_addr = TASK_UNMAPPED_BASE;
300 mm->cached_hole_size = 0;
301 goto full_search;
302 }
303 return -ENOMEM;
304 }
305 if (!vma || addr + len <= vma->vm_start) {
306 mm->free_area_cache = addr + len;
307 return addr;
308 }
309 if (addr + mm->cached_hole_size < vma->vm_start)
310 mm->cached_hole_size = vma->vm_start - addr;
311 addr = ALIGN(vma->vm_end, huge_page_size(h));
312 }
313} 286}
314 287
315static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, 288static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
@@ -317,83 +290,30 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
317 unsigned long pgoff, unsigned long flags) 290 unsigned long pgoff, unsigned long flags)
318{ 291{
319 struct hstate *h = hstate_file(file); 292 struct hstate *h = hstate_file(file);
320 struct mm_struct *mm = current->mm; 293 struct vm_unmapped_area_info info;
321 struct vm_area_struct *vma; 294 unsigned long addr;
322 unsigned long base = mm->mmap_base;
323 unsigned long addr = addr0;
324 unsigned long largest_hole = mm->cached_hole_size;
325 unsigned long start_addr;
326
327 /* don't allow allocations above current base */
328 if (mm->free_area_cache > base)
329 mm->free_area_cache = base;
330
331 if (len <= largest_hole) {
332 largest_hole = 0;
333 mm->free_area_cache = base;
334 }
335try_again:
336 start_addr = mm->free_area_cache;
337
338 /* make sure it can fit in the remaining address space */
339 if (mm->free_area_cache < len)
340 goto fail;
341
342 /* either no address requested or can't fit in requested address hole */
343 addr = (mm->free_area_cache - len) & huge_page_mask(h);
344 do {
345 /*
346 * Lookup failure means no vma is above this address,
347 * i.e. return with success:
348 */
349 vma = find_vma(mm, addr);
350 if (!vma)
351 return addr;
352 295
353 if (addr + len <= vma->vm_start) { 296 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
354 /* remember the address as a hint for next time */ 297 info.length = len;
355 mm->cached_hole_size = largest_hole; 298 info.low_limit = PAGE_SIZE;
356 return (mm->free_area_cache = addr); 299 info.high_limit = current->mm->mmap_base;
357 } else if (mm->free_area_cache == vma->vm_end) { 300 info.align_mask = PAGE_MASK & ~huge_page_mask(h);
358 /* pull free_area_cache down to the first hole */ 301 info.align_offset = 0;
359 mm->free_area_cache = vma->vm_start; 302 addr = vm_unmapped_area(&info);
360 mm->cached_hole_size = largest_hole;
361 }
362 303
363 /* remember the largest hole we saw so far */
364 if (addr + largest_hole < vma->vm_start)
365 largest_hole = vma->vm_start - addr;
366
367 /* try just below the current vma->vm_start */
368 addr = (vma->vm_start - len) & huge_page_mask(h);
369 } while (len <= vma->vm_start);
370
371fail:
372 /*
373 * if hint left us with no space for the requested
374 * mapping then try again:
375 */
376 if (start_addr != base) {
377 mm->free_area_cache = base;
378 largest_hole = 0;
379 goto try_again;
380 }
381 /* 304 /*
382 * A failed mmap() very likely causes application failure, 305 * A failed mmap() very likely causes application failure,
383 * so fall back to the bottom-up function here. This scenario 306 * so fall back to the bottom-up function here. This scenario
384 * can happen with large stack limits and large mmap() 307 * can happen with large stack limits and large mmap()
385 * allocations. 308 * allocations.
386 */ 309 */
387 mm->free_area_cache = TASK_UNMAPPED_BASE; 310 if (addr & ~PAGE_MASK) {
388 mm->cached_hole_size = ~0UL; 311 VM_BUG_ON(addr != -ENOMEM);
389 addr = hugetlb_get_unmapped_area_bottomup(file, addr0, 312 info.flags = 0;
390 len, pgoff, flags); 313 info.low_limit = TASK_UNMAPPED_BASE;
391 314 info.high_limit = TASK_SIZE;
392 /* 315 addr = vm_unmapped_area(&info);
393 * Restore the topdown base: 316 }
394 */
395 mm->free_area_cache = base;
396 mm->cached_hole_size = ~0UL;
397 317
398 return addr; 318 return addr;
399} 319}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 11a58001b4ce..745d66b843c8 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -715,10 +715,7 @@ static void __init test_wp_bit(void)
715 715
716 if (!boot_cpu_data.wp_works_ok) { 716 if (!boot_cpu_data.wp_works_ok) {
717 printk(KERN_CONT "No.\n"); 717 printk(KERN_CONT "No.\n");
718#ifdef CONFIG_X86_WP_WORKS_OK 718 panic("Linux doesn't support CPUs with broken WP.");
719 panic(
720 "This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
721#endif
722 } else { 719 } else {
723 printk(KERN_CONT "Ok.\n"); 720 printk(KERN_CONT "Ok.\n");
724 } 721 }
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3baff255adac..2ead3c8a4c84 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -630,7 +630,9 @@ void __init paging_init(void)
630 * numa support is not compiled in, and later node_set_state 630 * numa support is not compiled in, and later node_set_state
631 * will not set it back. 631 * will not set it back.
632 */ 632 */
633 node_clear_state(0, N_NORMAL_MEMORY); 633 node_clear_state(0, N_MEMORY);
634 if (N_MEMORY != N_NORMAL_MEMORY)
635 node_clear_state(0, N_NORMAL_MEMORY);
634 636
635 zone_sizes_init(); 637 zone_sizes_init();
636} 638}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8573b83a63d0..e27fbf887f3b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -137,7 +137,7 @@ static void pgd_dtor(pgd_t *pgd)
137 * against pageattr.c; it is the unique case in which a valid change 137 * against pageattr.c; it is the unique case in which a valid change
138 * of kernel pagetables can't be lazily synchronized by vmalloc faults. 138 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
139 * vmalloc faults work because attached pagetables are never freed. 139 * vmalloc faults work because attached pagetables are never freed.
140 * -- wli 140 * -- nyc
141 */ 141 */
142 142
143#ifdef CONFIG_X86_PAE 143#ifdef CONFIG_X86_PAE
@@ -301,6 +301,13 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
301 free_page((unsigned long)pgd); 301 free_page((unsigned long)pgd);
302} 302}
303 303
304/*
305 * Used to set accessed or dirty bits in the page table entries
306 * on other architectures. On x86, the accessed and dirty bits
307 * are tracked by hardware. However, do_wp_page calls this function
308 * to also make the pte writeable at the same time the dirty bit is
309 * set. In that case we do actually need to write the PTE.
310 */
304int ptep_set_access_flags(struct vm_area_struct *vma, 311int ptep_set_access_flags(struct vm_area_struct *vma,
305 unsigned long address, pte_t *ptep, 312 unsigned long address, pte_t *ptep,
306 pte_t entry, int dirty) 313 pte_t entry, int dirty)
@@ -310,7 +317,6 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
310 if (changed && dirty) { 317 if (changed && dirty) {
311 *ptep = entry; 318 *ptep = entry;
312 pte_update_defer(vma->vm_mm, address, ptep); 319 pte_update_defer(vma->vm_mm, address, ptep);
313 flush_tlb_page(vma, address);
314 } 320 }
315 321
316 return changed; 322 return changed;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 60f926cd8b0e..13a6b29e2e5d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -104,7 +104,7 @@ static void flush_tlb_func(void *info)
104 return; 104 return;
105 105
106 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { 106 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
107 if (f->flush_end == TLB_FLUSH_ALL || !cpu_has_invlpg) 107 if (f->flush_end == TLB_FLUSH_ALL)
108 local_flush_tlb(); 108 local_flush_tlb();
109 else if (!f->flush_end) 109 else if (!f->flush_end)
110 __flush_tlb_single(f->flush_start); 110 __flush_tlb_single(f->flush_start);
@@ -337,10 +337,8 @@ static const struct file_operations fops_tlbflush = {
337 337
338static int __cpuinit create_tlb_flushall_shift(void) 338static int __cpuinit create_tlb_flushall_shift(void)
339{ 339{
340 if (cpu_has_invlpg) { 340 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
341 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, 341 arch_debugfs_dir, NULL, &fops_tlbflush);
342 arch_debugfs_dir, NULL, &fops_tlbflush);
343 }
344 return 0; 342 return 0;
345} 343}
346late_initcall(create_tlb_flushall_shift); 344late_initcall(create_tlb_flushall_shift);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 520d2bd0b9c5..d11a47099d33 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -11,6 +11,7 @@
11#include <asm/cacheflush.h> 11#include <asm/cacheflush.h>
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/filter.h> 13#include <linux/filter.h>
14#include <linux/if_vlan.h>
14 15
15/* 16/*
16 * Conventions : 17 * Conventions :
@@ -212,6 +213,8 @@ void bpf_jit_compile(struct sk_filter *fp)
212 case BPF_S_ANC_MARK: 213 case BPF_S_ANC_MARK:
213 case BPF_S_ANC_RXHASH: 214 case BPF_S_ANC_RXHASH:
214 case BPF_S_ANC_CPU: 215 case BPF_S_ANC_CPU:
216 case BPF_S_ANC_VLAN_TAG:
217 case BPF_S_ANC_VLAN_TAG_PRESENT:
215 case BPF_S_ANC_QUEUE: 218 case BPF_S_ANC_QUEUE:
216 case BPF_S_LD_W_ABS: 219 case BPF_S_LD_W_ABS:
217 case BPF_S_LD_H_ABS: 220 case BPF_S_LD_H_ABS:
@@ -515,6 +518,24 @@ void bpf_jit_compile(struct sk_filter *fp)
515 CLEAR_A(); 518 CLEAR_A();
516#endif 519#endif
517 break; 520 break;
521 case BPF_S_ANC_VLAN_TAG:
522 case BPF_S_ANC_VLAN_TAG_PRESENT:
523 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
524 if (is_imm8(offsetof(struct sk_buff, vlan_tci))) {
525 /* movzwl off8(%rdi),%eax */
526 EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci));
527 } else {
528 EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
529 EMIT(offsetof(struct sk_buff, vlan_tci), 4);
530 }
531 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
532 if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
533 EMIT3(0x80, 0xe4, 0xef); /* and $0xef,%ah */
534 } else {
535 EMIT3(0xc1, 0xe8, 0x0c); /* shr $0xc,%eax */
536 EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */
537 }
538 break;
518 case BPF_S_LD_W_ABS: 539 case BPF_S_LD_W_ABS:
519 func = CHOOSE_LOAD_FUNC(K, sk_load_word); 540 func = CHOOSE_LOAD_FUNC(K, sk_load_word);
520common_load: seen |= SEEN_DATAREF; 541common_load: seen |= SEEN_DATAREF;
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 3af5a1e79c9c..ee0af58ca5bd 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_STA2X11) += sta2x11-fixup.o
16obj-$(CONFIG_X86_VISWS) += visws.o 16obj-$(CONFIG_X86_VISWS) += visws.o
17 17
18obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 18obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
19obj-$(CONFIG_X86_NUMACHIP) += numachip.o
19 20
20obj-$(CONFIG_X86_INTEL_MID) += mrst.o 21obj-$(CONFIG_X86_INTEL_MID) += mrst.o
21 22
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 192397c98606..0c01261fe5a8 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -12,6 +12,7 @@ struct pci_root_info {
12 char name[16]; 12 char name[16];
13 unsigned int res_num; 13 unsigned int res_num;
14 struct resource *res; 14 struct resource *res;
15 resource_size_t *res_offset;
15 struct pci_sysdata sd; 16 struct pci_sysdata sd;
16#ifdef CONFIG_PCI_MMCONFIG 17#ifdef CONFIG_PCI_MMCONFIG
17 bool mcfg_added; 18 bool mcfg_added;
@@ -22,6 +23,7 @@ struct pci_root_info {
22}; 23};
23 24
24static bool pci_use_crs = true; 25static bool pci_use_crs = true;
26static bool pci_ignore_seg = false;
25 27
26static int __init set_use_crs(const struct dmi_system_id *id) 28static int __init set_use_crs(const struct dmi_system_id *id)
27{ 29{
@@ -35,7 +37,14 @@ static int __init set_nouse_crs(const struct dmi_system_id *id)
35 return 0; 37 return 0;
36} 38}
37 39
38static const struct dmi_system_id pci_use_crs_table[] __initconst = { 40static int __init set_ignore_seg(const struct dmi_system_id *id)
41{
42 printk(KERN_INFO "PCI: %s detected: ignoring ACPI _SEG\n", id->ident);
43 pci_ignore_seg = true;
44 return 0;
45}
46
47static const struct dmi_system_id pci_crs_quirks[] __initconst = {
39 /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */ 48 /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */
40 { 49 {
41 .callback = set_use_crs, 50 .callback = set_use_crs,
@@ -98,6 +107,16 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
98 DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"), 107 DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"),
99 }, 108 },
100 }, 109 },
110
111 /* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */
112 {
113 .callback = set_ignore_seg,
114 .ident = "HP xw9300",
115 .matches = {
116 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
117 DMI_MATCH(DMI_PRODUCT_NAME, "HP xw9300 Workstation"),
118 },
119 },
101 {} 120 {}
102}; 121};
103 122
@@ -108,7 +127,7 @@ void __init pci_acpi_crs_quirks(void)
108 if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) 127 if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008)
109 pci_use_crs = false; 128 pci_use_crs = false;
110 129
111 dmi_check_system(pci_use_crs_table); 130 dmi_check_system(pci_crs_quirks);
112 131
113 /* 132 /*
114 * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that 133 * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that
@@ -305,6 +324,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
305 res->flags = flags; 324 res->flags = flags;
306 res->start = start; 325 res->start = start;
307 res->end = end; 326 res->end = end;
327 info->res_offset[info->res_num] = addr.translation_offset;
308 328
309 if (!pci_use_crs) { 329 if (!pci_use_crs) {
310 dev_printk(KERN_DEBUG, &info->bridge->dev, 330 dev_printk(KERN_DEBUG, &info->bridge->dev,
@@ -374,7 +394,8 @@ static void add_resources(struct pci_root_info *info,
374 "ignoring host bridge window %pR (conflicts with %s %pR)\n", 394 "ignoring host bridge window %pR (conflicts with %s %pR)\n",
375 res, conflict->name, conflict); 395 res, conflict->name, conflict);
376 else 396 else
377 pci_add_resource(resources, res); 397 pci_add_resource_offset(resources, res,
398 info->res_offset[i]);
378 } 399 }
379} 400}
380 401
@@ -382,6 +403,8 @@ static void free_pci_root_info_res(struct pci_root_info *info)
382{ 403{
383 kfree(info->res); 404 kfree(info->res);
384 info->res = NULL; 405 info->res = NULL;
406 kfree(info->res_offset);
407 info->res_offset = NULL;
385 info->res_num = 0; 408 info->res_num = 0;
386} 409}
387 410
@@ -432,10 +455,20 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
432 return; 455 return;
433 456
434 size = sizeof(*info->res) * info->res_num; 457 size = sizeof(*info->res) * info->res_num;
435 info->res_num = 0;
436 info->res = kzalloc(size, GFP_KERNEL); 458 info->res = kzalloc(size, GFP_KERNEL);
437 if (!info->res) 459 if (!info->res) {
460 info->res_num = 0;
461 return;
462 }
463
464 size = sizeof(*info->res_offset) * info->res_num;
465 info->res_num = 0;
466 info->res_offset = kzalloc(size, GFP_KERNEL);
467 if (!info->res_offset) {
468 kfree(info->res);
469 info->res = NULL;
438 return; 470 return;
471 }
439 472
440 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, 473 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
441 info); 474 info);
@@ -455,6 +488,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
455 int pxm; 488 int pxm;
456#endif 489#endif
457 490
491 if (pci_ignore_seg)
492 domain = 0;
493
458 if (domain && !pci_domains_supported) { 494 if (domain && !pci_domains_supported) {
459 printk(KERN_WARNING "pci_bus %04x:%02x: " 495 printk(KERN_WARNING "pci_bus %04x:%02x: "
460 "ignored (multiple domains not supported)\n", 496 "ignored (multiple domains not supported)\n",
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 720e973fc34a..1b1dda90a945 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -17,6 +17,7 @@
17#include <asm/io.h> 17#include <asm/io.h>
18#include <asm/smp.h> 18#include <asm/smp.h>
19#include <asm/pci_x86.h> 19#include <asm/pci_x86.h>
20#include <asm/setup.h>
20 21
21unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | 22unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
22 PCI_PROBE_MMCONF; 23 PCI_PROBE_MMCONF;
@@ -608,6 +609,35 @@ unsigned int pcibios_assign_all_busses(void)
608 return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; 609 return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
609} 610}
610 611
612int pcibios_add_device(struct pci_dev *dev)
613{
614 struct setup_data *data;
615 struct pci_setup_rom *rom;
616 u64 pa_data;
617
618 pa_data = boot_params.hdr.setup_data;
619 while (pa_data) {
620 data = phys_to_virt(pa_data);
621
622 if (data->type == SETUP_PCI) {
623 rom = (struct pci_setup_rom *)data;
624
625 if ((pci_domain_nr(dev->bus) == rom->segment) &&
626 (dev->bus->number == rom->bus) &&
627 (PCI_SLOT(dev->devfn) == rom->device) &&
628 (PCI_FUNC(dev->devfn) == rom->function) &&
629 (dev->vendor == rom->vendor) &&
630 (dev->device == rom->devid)) {
631 dev->rom = pa_data +
632 offsetof(struct pci_setup_rom, romdata);
633 dev->romlen = rom->pcilen;
634 }
635 }
636 pa_data = data->next;
637 }
638 return 0;
639}
640
611int pcibios_enable_device(struct pci_dev *dev, int mask) 641int pcibios_enable_device(struct pci_dev *dev, int mask)
612{ 642{
613 int err; 643 int err;
@@ -626,7 +656,7 @@ void pcibios_disable_device (struct pci_dev *dev)
626 pcibios_disable_irq(dev); 656 pcibios_disable_irq(dev);
627} 657}
628 658
629int pci_ext_cfg_avail(struct pci_dev *dev) 659int pci_ext_cfg_avail(void)
630{ 660{
631 if (raw_pci_ext_ops) 661 if (raw_pci_ext_ops)
632 return 1; 662 return 1;
diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c
new file mode 100644
index 000000000000..7307d9d12d15
--- /dev/null
+++ b/arch/x86/pci/numachip.c
@@ -0,0 +1,129 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Numascale NumaConnect-specific PCI code
7 *
8 * Copyright (C) 2012 Numascale AS. All rights reserved.
9 *
10 * Send feedback to <support@numascale.com>
11 *
12 * PCI accessor functions derived from mmconfig_64.c
13 *
14 */
15
16#include <linux/pci.h>
17#include <asm/pci_x86.h>
18
19static u8 limit __read_mostly;
20
21static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
22{
23 struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
24
25 if (cfg && cfg->virt)
26 return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
27 return NULL;
28}
29
30static int pci_mmcfg_read_numachip(unsigned int seg, unsigned int bus,
31 unsigned int devfn, int reg, int len, u32 *value)
32{
33 char __iomem *addr;
34
35 /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
36 if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
37err: *value = -1;
38 return -EINVAL;
39 }
40
41 /* Ensure AMD Northbridges don't decode reads to other devices */
42 if (unlikely(bus == 0 && devfn >= limit)) {
43 *value = -1;
44 return 0;
45 }
46
47 rcu_read_lock();
48 addr = pci_dev_base(seg, bus, devfn);
49 if (!addr) {
50 rcu_read_unlock();
51 goto err;
52 }
53
54 switch (len) {
55 case 1:
56 *value = mmio_config_readb(addr + reg);
57 break;
58 case 2:
59 *value = mmio_config_readw(addr + reg);
60 break;
61 case 4:
62 *value = mmio_config_readl(addr + reg);
63 break;
64 }
65 rcu_read_unlock();
66
67 return 0;
68}
69
70static int pci_mmcfg_write_numachip(unsigned int seg, unsigned int bus,
71 unsigned int devfn, int reg, int len, u32 value)
72{
73 char __iomem *addr;
74
75 /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
76 if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
77 return -EINVAL;
78
79 /* Ensure AMD Northbridges don't decode writes to other devices */
80 if (unlikely(bus == 0 && devfn >= limit))
81 return 0;
82
83 rcu_read_lock();
84 addr = pci_dev_base(seg, bus, devfn);
85 if (!addr) {
86 rcu_read_unlock();
87 return -EINVAL;
88 }
89
90 switch (len) {
91 case 1:
92 mmio_config_writeb(addr + reg, value);
93 break;
94 case 2:
95 mmio_config_writew(addr + reg, value);
96 break;
97 case 4:
98 mmio_config_writel(addr + reg, value);
99 break;
100 }
101 rcu_read_unlock();
102
103 return 0;
104}
105
106const struct pci_raw_ops pci_mmcfg_numachip = {
107 .read = pci_mmcfg_read_numachip,
108 .write = pci_mmcfg_write_numachip,
109};
110
111int __init pci_numachip_init(void)
112{
113 int ret = 0;
114 u32 val;
115
116 /* For remote I/O, restrict bus 0 access to the actual number of AMD
117 Northbridges, which starts at device number 0x18 */
118 ret = raw_pci_read(0, 0, PCI_DEVFN(0x18, 0), 0x60, sizeof(val), &val);
119 if (ret)
120 goto out;
121
122 /* HyperTransport fabric size in bits 6:4 */
123 limit = PCI_DEVFN(0x18 + ((val >> 4) & 7) + 1, 0);
124
125 /* Use NumaChip PCI accessors for non-extended and extended access */
126 raw_pci_ops = raw_pci_ext_ops = &pci_mmcfg_numachip;
127out:
128 return ret;
129}
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 92525cb8e54c..f8ab4945892e 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -105,8 +105,11 @@ static void ce4100_serial_fixup(int port, struct uart_port *up,
105 up->membase = 105 up->membase =
106 (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE); 106 (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
107 up->membase += up->mapbase & ~PAGE_MASK; 107 up->membase += up->mapbase & ~PAGE_MASK;
108 up->mapbase += port * 0x100;
109 up->membase += port * 0x100;
108 up->iotype = UPIO_MEM32; 110 up->iotype = UPIO_MEM32;
109 up->regshift = 2; 111 up->regshift = 2;
112 up->irq = 4;
110 } 113 }
111#endif 114#endif
112 up->iobase = 0; 115 up->iobase = 0;
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index f6a0c1b8e518..d9c1b95af17c 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -39,6 +39,8 @@ void efi_bgrt_init(void)
39 if (ACPI_FAILURE(status)) 39 if (ACPI_FAILURE(status))
40 return; 40 return;
41 41
42 if (bgrt_tab->header.length < sizeof(*bgrt_tab))
43 return;
42 if (bgrt_tab->version != 1) 44 if (bgrt_tab->version != 1)
43 return; 45 return;
44 if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) 46 if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 218cdb16163c..120cee1c3f8d 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -21,6 +21,7 @@
21#include <asm/suspend.h> 21#include <asm/suspend.h>
22#include <asm/debugreg.h> 22#include <asm/debugreg.h>
23#include <asm/fpu-internal.h> /* pcntxt_mask */ 23#include <asm/fpu-internal.h> /* pcntxt_mask */
24#include <asm/cpu.h>
24 25
25#ifdef CONFIG_X86_32 26#ifdef CONFIG_X86_32
26static struct saved_context saved_context; 27static struct saved_context saved_context;
@@ -237,3 +238,84 @@ void restore_processor_state(void)
237#ifdef CONFIG_X86_32 238#ifdef CONFIG_X86_32
238EXPORT_SYMBOL(restore_processor_state); 239EXPORT_SYMBOL(restore_processor_state);
239#endif 240#endif
241
242/*
243 * When bsp_check() is called in hibernate and suspend, cpu hotplug
244 * is disabled already. So it's unnessary to handle race condition between
245 * cpumask query and cpu hotplug.
246 */
247static int bsp_check(void)
248{
249 if (cpumask_first(cpu_online_mask) != 0) {
250 pr_warn("CPU0 is offline.\n");
251 return -ENODEV;
252 }
253
254 return 0;
255}
256
257static int bsp_pm_callback(struct notifier_block *nb, unsigned long action,
258 void *ptr)
259{
260 int ret = 0;
261
262 switch (action) {
263 case PM_SUSPEND_PREPARE:
264 case PM_HIBERNATION_PREPARE:
265 ret = bsp_check();
266 break;
267#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
268 case PM_RESTORE_PREPARE:
269 /*
270 * When system resumes from hibernation, online CPU0 because
271 * 1. it's required for resume and
272 * 2. the CPU was online before hibernation
273 */
274 if (!cpu_online(0))
275 _debug_hotplug_cpu(0, 1);
276 break;
277 case PM_POST_RESTORE:
278 /*
279 * When a resume really happens, this code won't be called.
280 *
281 * This code is called only when user space hibernation software
282 * prepares for snapshot device during boot time. So we just
283 * call _debug_hotplug_cpu() to restore to CPU0's state prior to
284 * preparing the snapshot device.
285 *
286 * This works for normal boot case in our CPU0 hotplug debug
287 * mode, i.e. CPU0 is offline and user mode hibernation
288 * software initializes during boot time.
289 *
290 * If CPU0 is online and user application accesses snapshot
291 * device after boot time, this will offline CPU0 and user may
292 * see different CPU0 state before and after accessing
293 * the snapshot device. But hopefully this is not a case when
294 * user debugging CPU0 hotplug. Even if users hit this case,
295 * they can easily online CPU0 back.
296 *
297 * To simplify this debug code, we only consider normal boot
298 * case. Otherwise we need to remember CPU0's state and restore
299 * to that state and resolve racy conditions etc.
300 */
301 _debug_hotplug_cpu(0, 0);
302 break;
303#endif
304 default:
305 break;
306 }
307 return notifier_from_errno(ret);
308}
309
310static int __init bsp_pm_check_init(void)
311{
312 /*
313 * Set this bsp_pm_callback as lower priority than
314 * cpu_hotplug_pm_callback. So cpu_hotplug_pm_callback will be called
315 * earlier to disable cpu hotplug before bsp online check.
316 */
317 pm_notifier(bsp_pm_callback, -INT_MAX);
318 return 0;
319}
320
321core_initcall(bsp_pm_check_init);
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index a47103fbc692..ee3c220ee500 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -8,7 +8,7 @@
8# 8#
90 i386 restart_syscall sys_restart_syscall 90 i386 restart_syscall sys_restart_syscall
101 i386 exit sys_exit 101 i386 exit sys_exit
112 i386 fork ptregs_fork stub32_fork 112 i386 fork sys_fork stub32_fork
123 i386 read sys_read 123 i386 read sys_read
134 i386 write sys_write 134 i386 write sys_write
145 i386 open sys_open compat_sys_open 145 i386 open sys_open compat_sys_open
@@ -126,7 +126,7 @@
126117 i386 ipc sys_ipc sys32_ipc 126117 i386 ipc sys_ipc sys32_ipc
127118 i386 fsync sys_fsync 127118 i386 fsync sys_fsync
128119 i386 sigreturn ptregs_sigreturn stub32_sigreturn 128119 i386 sigreturn ptregs_sigreturn stub32_sigreturn
129120 i386 clone ptregs_clone stub32_clone 129120 i386 clone sys_clone stub32_clone
130121 i386 setdomainname sys_setdomainname 130121 i386 setdomainname sys_setdomainname
131122 i386 uname sys_newuname 131122 i386 uname sys_newuname
132123 i386 modify_ldt sys_modify_ldt 132123 i386 modify_ldt sys_modify_ldt
@@ -196,7 +196,7 @@
196187 i386 sendfile sys_sendfile sys32_sendfile 196187 i386 sendfile sys_sendfile sys32_sendfile
197188 i386 getpmsg 197188 i386 getpmsg
198189 i386 putpmsg 198189 i386 putpmsg
199190 i386 vfork ptregs_vfork stub32_vfork 199190 i386 vfork sys_vfork stub32_vfork
200191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit 200191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
201192 i386 mmap2 sys_mmap_pgoff 201192 i386 mmap2 sys_mmap_pgoff
202193 i386 truncate64 sys_truncate64 sys32_truncate64 202193 i386 truncate64 sys_truncate64 sys32_truncate64
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index ddcf39b1a18d..e6773dc8ac41 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -356,7 +356,7 @@ END {
356 exit 1 356 exit 1
357 # print escape opcode map's array 357 # print escape opcode map's array
358 print "/* Escape opcode map array */" 358 print "/* Escape opcode map array */"
359 print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ 359 print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
360 "[INAT_LSTPFX_MAX + 1] = {" 360 "[INAT_LSTPFX_MAX + 1] = {"
361 for (i = 0; i < geid; i++) 361 for (i = 0; i < geid; i++)
362 for (j = 0; j < max_lprefix; j++) 362 for (j = 0; j < max_lprefix; j++)
@@ -365,7 +365,7 @@ END {
365 print "};\n" 365 print "};\n"
366 # print group opcode map's array 366 # print group opcode map's array
367 print "/* Group opcode map array */" 367 print "/* Group opcode map array */"
368 print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ 368 print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
369 "[INAT_LSTPFX_MAX + 1] = {" 369 "[INAT_LSTPFX_MAX + 1] = {"
370 for (i = 0; i < ggid; i++) 370 for (i = 0; i < ggid; i++)
371 for (j = 0; j < max_lprefix; j++) 371 for (j = 0; j < max_lprefix; j++)
@@ -374,7 +374,7 @@ END {
374 print "};\n" 374 print "};\n"
375 # print AVX opcode map's array 375 # print AVX opcode map's array
376 print "/* AVX opcode map array */" 376 print "/* AVX opcode map array */"
377 print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\ 377 print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
378 "[INAT_LSTPFX_MAX + 1] = {" 378 "[INAT_LSTPFX_MAX + 1] = {"
379 for (i = 0; i < gaid; i++) 379 for (i = 0; i < gaid; i++)
380 for (j = 0; j < max_lprefix; j++) 380 for (j = 0; j < max_lprefix; j++)
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 07611759ce35..983997041963 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -25,13 +25,14 @@ config X86_32
25 select HAVE_AOUT 25 select HAVE_AOUT
26 select ARCH_WANT_IPC_PARSE_VERSION 26 select ARCH_WANT_IPC_PARSE_VERSION
27 select MODULES_USE_ELF_REL 27 select MODULES_USE_ELF_REL
28 select CLONE_BACKWARDS
28 29
29config X86_64 30config X86_64
30 def_bool 64BIT 31 def_bool 64BIT
31 select MODULES_USE_ELF_RELA 32 select MODULES_USE_ELF_RELA
32 33
33config RWSEM_XCHGADD_ALGORITHM 34config RWSEM_XCHGADD_ALGORITHM
34 def_bool X86_XADD && 64BIT 35 def_bool 64BIT
35 36
36config RWSEM_GENERIC_SPINLOCK 37config RWSEM_GENERIC_SPINLOCK
37 def_bool !RWSEM_XCHGADD_ALGORITHM 38 def_bool !RWSEM_XCHGADD_ALGORITHM
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
index ca255a805ed9..bd9a89b67e41 100644
--- a/arch/x86/um/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -1,5 +1,3 @@
1extern long sys_clone(unsigned long clone_flags, unsigned long newsp,
2 void __user *parent_tid, void __user *child_tid);
3#ifdef __i386__ 1#ifdef __i386__
4#include "syscalls_32.h" 2#include "syscalls_32.h"
5#else 3#else
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 232e60504b3a..812e98c098e4 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -24,13 +24,10 @@
24 24
25#define old_mmap sys_old_mmap 25#define old_mmap sys_old_mmap
26 26
27#define ptregs_fork sys_fork
28#define ptregs_iopl sys_iopl 27#define ptregs_iopl sys_iopl
29#define ptregs_vm86old sys_vm86old 28#define ptregs_vm86old sys_vm86old
30#define ptregs_clone i386_clone
31#define ptregs_vm86 sys_vm86 29#define ptregs_vm86 sys_vm86
32#define ptregs_sigaltstack sys_sigaltstack 30#define ptregs_sigaltstack sys_sigaltstack
33#define ptregs_vfork sys_vfork
34 31
35#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; 32#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
36#include <asm/syscalls_32.h> 33#include <asm/syscalls_32.h>
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index db444c7218fe..e8bcea99acdb 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -6,21 +6,6 @@
6#include <linux/syscalls.h> 6#include <linux/syscalls.h>
7#include <sysdep/syscalls.h> 7#include <sysdep/syscalls.h>
8 8
9/*
10 * The prototype on i386 is:
11 *
12 * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls
13 *
14 * and the "newtls" arg. on i386 is read by copy_thread directly from the
15 * register saved on the stack.
16 */
17long i386_clone(unsigned long clone_flags, unsigned long newsp,
18 int __user *parent_tid, void *newtls, int __user *child_tid)
19{
20 return sys_clone(clone_flags, newsp, parent_tid, child_tid);
21}
22
23
24long sys_sigaction(int sig, const struct old_sigaction __user *act, 9long sys_sigaction(int sig, const struct old_sigaction __user *act,
25 struct old_sigaction __user *oact) 10 struct old_sigaction __user *oact)
26{ 11{
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 4df6c373421a..205ad328aa52 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -22,6 +22,7 @@
22#include <asm/hpet.h> 22#include <asm/hpet.h>
23#include <asm/unistd.h> 23#include <asm/unistd.h>
24#include <asm/io.h> 24#include <asm/io.h>
25#include <asm/pvclock.h>
25 26
26#define gtod (&VVAR(vsyscall_gtod_data)) 27#define gtod (&VVAR(vsyscall_gtod_data))
27 28
@@ -62,6 +63,76 @@ static notrace cycle_t vread_hpet(void)
62 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 63 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
63} 64}
64 65
66#ifdef CONFIG_PARAVIRT_CLOCK
67
68static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
69{
70 const struct pvclock_vsyscall_time_info *pvti_base;
71 int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
72 int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
73
74 BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
75
76 pvti_base = (struct pvclock_vsyscall_time_info *)
77 __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
78
79 return &pvti_base[offset];
80}
81
82static notrace cycle_t vread_pvclock(int *mode)
83{
84 const struct pvclock_vsyscall_time_info *pvti;
85 cycle_t ret;
86 u64 last;
87 u32 version;
88 u32 migrate_count;
89 u8 flags;
90 unsigned cpu, cpu1;
91
92
93 /*
94 * When looping to get a consistent (time-info, tsc) pair, we
95 * also need to deal with the possibility we can switch vcpus,
96 * so make sure we always re-fetch time-info for the current vcpu.
97 */
98 do {
99 cpu = __getcpu() & VGETCPU_CPU_MASK;
100 /* TODO: We can put vcpu id into higher bits of pvti.version.
101 * This will save a couple of cycles by getting rid of
102 * __getcpu() calls (Gleb).
103 */
104
105 pvti = get_pvti(cpu);
106
107 migrate_count = pvti->migrate_count;
108
109 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
110
111 /*
112 * Test we're still on the cpu as well as the version.
113 * We could have been migrated just after the first
114 * vgetcpu but before fetching the version, so we
115 * wouldn't notice a version change.
116 */
117 cpu1 = __getcpu() & VGETCPU_CPU_MASK;
118 } while (unlikely(cpu != cpu1 ||
119 (pvti->pvti.version & 1) ||
120 pvti->pvti.version != version ||
121 pvti->migrate_count != migrate_count));
122
123 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
124 *mode = VCLOCK_NONE;
125
126 /* refer to tsc.c read_tsc() comment for rationale */
127 last = VVAR(vsyscall_gtod_data).clock.cycle_last;
128
129 if (likely(ret >= last))
130 return ret;
131
132 return last;
133}
134#endif
135
65notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 136notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
66{ 137{
67 long ret; 138 long ret;
@@ -80,7 +151,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
80} 151}
81 152
82 153
83notrace static inline u64 vgetsns(void) 154notrace static inline u64 vgetsns(int *mode)
84{ 155{
85 long v; 156 long v;
86 cycles_t cycles; 157 cycles_t cycles;
@@ -88,6 +159,10 @@ notrace static inline u64 vgetsns(void)
88 cycles = vread_tsc(); 159 cycles = vread_tsc();
89 else if (gtod->clock.vclock_mode == VCLOCK_HPET) 160 else if (gtod->clock.vclock_mode == VCLOCK_HPET)
90 cycles = vread_hpet(); 161 cycles = vread_hpet();
162#ifdef CONFIG_PARAVIRT_CLOCK
163 else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
164 cycles = vread_pvclock(mode);
165#endif
91 else 166 else
92 return 0; 167 return 0;
93 v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; 168 v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
@@ -107,7 +182,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
107 mode = gtod->clock.vclock_mode; 182 mode = gtod->clock.vclock_mode;
108 ts->tv_sec = gtod->wall_time_sec; 183 ts->tv_sec = gtod->wall_time_sec;
109 ns = gtod->wall_time_snsec; 184 ns = gtod->wall_time_snsec;
110 ns += vgetsns(); 185 ns += vgetsns(&mode);
111 ns >>= gtod->clock.shift; 186 ns >>= gtod->clock.shift;
112 } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 187 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
113 188
@@ -127,7 +202,7 @@ notrace static int do_monotonic(struct timespec *ts)
127 mode = gtod->clock.vclock_mode; 202 mode = gtod->clock.vclock_mode;
128 ts->tv_sec = gtod->monotonic_time_sec; 203 ts->tv_sec = gtod->monotonic_time_sec;
129 ns = gtod->monotonic_time_snsec; 204 ns = gtod->monotonic_time_snsec;
130 ns += vgetsns(); 205 ns += vgetsns(&mode);
131 ns >>= gtod->clock.shift; 206 ns >>= gtod->clock.shift;
132 } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 207 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
133 timespec_add_ns(ts, ns); 208 timespec_add_ns(ts, ns);
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index 5463ad558573..2f94b039e55b 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -17,15 +17,10 @@ __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
17{ 17{
18 unsigned int p; 18 unsigned int p;
19 19
20 if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { 20 p = __getcpu();
21 /* Load per CPU data from RDTSCP */ 21
22 native_read_tscp(&p);
23 } else {
24 /* Load per CPU data from GDT */
25 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
26 }
27 if (cpu) 22 if (cpu)
28 *cpu = p & 0xfff; 23 *cpu = p & VGETCPU_CPU_MASK;
29 if (node) 24 if (node)
30 *node = p >> 12; 25 *node = p >> 12;
31 return 0; 26 return 0;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 00aaf047b39f..431e87544411 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -141,7 +141,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
141 * unaligned here as a result of stack start randomization. 141 * unaligned here as a result of stack start randomization.
142 */ 142 */
143 addr = PAGE_ALIGN(addr); 143 addr = PAGE_ALIGN(addr);
144 addr = align_addr(addr, NULL, ALIGN_VDSO); 144 addr = align_vdso_addr(addr);
145 145
146 return addr; 146 return addr;
147} 147}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index fdce49c7aff6..131dacd2748a 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,8 +6,9 @@ config XEN
6 bool "Xen guest support" 6 bool "Xen guest support"
7 select PARAVIRT 7 select PARAVIRT
8 select PARAVIRT_CLOCK 8 select PARAVIRT_CLOCK
9 select XEN_HAVE_PVMMU
9 depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) 10 depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
10 depends on X86_CMPXCHG && X86_TSC 11 depends on X86_TSC
11 help 12 help
12 This is the Linux Xen port. Enabling this will allow the 13 This is the Linux Xen port. Enabling this will allow the
13 kernel to boot in a paravirtualized environment under the 14 kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 586d83812b67..138e5667409a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -193,10 +193,11 @@ void xen_vcpu_restore(void)
193{ 193{
194 int cpu; 194 int cpu;
195 195
196 for_each_online_cpu(cpu) { 196 for_each_possible_cpu(cpu) {
197 bool other_cpu = (cpu != smp_processor_id()); 197 bool other_cpu = (cpu != smp_processor_id());
198 bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
198 199
199 if (other_cpu && 200 if (other_cpu && is_up &&
200 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) 201 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
201 BUG(); 202 BUG();
202 203
@@ -205,7 +206,7 @@ void xen_vcpu_restore(void)
205 if (have_vcpu_info_placement) 206 if (have_vcpu_info_placement)
206 xen_vcpu_setup(cpu); 207 xen_vcpu_setup(cpu);
207 208
208 if (other_cpu && 209 if (other_cpu && is_up &&
209 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) 210 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
210 BUG(); 211 BUG();
211 } 212 }
@@ -223,6 +224,21 @@ static void __init xen_banner(void)
223 version >> 16, version & 0xffff, extra.extraversion, 224 version >> 16, version & 0xffff, extra.extraversion,
224 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 225 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
225} 226}
227/* Check if running on Xen version (major, minor) or later */
228bool
229xen_running_on_version_or_later(unsigned int major, unsigned int minor)
230{
231 unsigned int version;
232
233 if (!xen_domain())
234 return false;
235
236 version = HYPERVISOR_xen_version(XENVER_version, NULL);
237 if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) ||
238 ((version >> 16) > major))
239 return true;
240 return false;
241}
226 242
227#define CPUID_THERM_POWER_LEAF 6 243#define CPUID_THERM_POWER_LEAF 6
228#define APERFMPERF_PRESENT 0 244#define APERFMPERF_PRESENT 0
@@ -287,8 +303,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
287 303
288static bool __init xen_check_mwait(void) 304static bool __init xen_check_mwait(void)
289{ 305{
290#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \ 306#ifdef CONFIG_ACPI
291 !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE)
292 struct xen_platform_op op = { 307 struct xen_platform_op op = {
293 .cmd = XENPF_set_processor_pminfo, 308 .cmd = XENPF_set_processor_pminfo,
294 .u.set_pminfo.id = -1, 309 .u.set_pminfo.id = -1,
@@ -309,6 +324,13 @@ static bool __init xen_check_mwait(void)
309 if (!xen_initial_domain()) 324 if (!xen_initial_domain())
310 return false; 325 return false;
311 326
327 /*
328 * When running under platform earlier than Xen4.2, do not expose
329 * mwait, to avoid the risk of loading native acpi pad driver
330 */
331 if (!xen_running_on_version_or_later(4, 2))
332 return false;
333
312 ax = 1; 334 ax = 1;
313 cx = 0; 335 cx = 0;
314 336
@@ -1495,51 +1517,72 @@ asmlinkage void __init xen_start_kernel(void)
1495#endif 1517#endif
1496} 1518}
1497 1519
1498void __ref xen_hvm_init_shared_info(void) 1520#ifdef CONFIG_XEN_PVHVM
1521#define HVM_SHARED_INFO_ADDR 0xFE700000UL
1522static struct shared_info *xen_hvm_shared_info;
1523static unsigned long xen_hvm_sip_phys;
1524static int xen_major, xen_minor;
1525
1526static void xen_hvm_connect_shared_info(unsigned long pfn)
1499{ 1527{
1500 int cpu;
1501 struct xen_add_to_physmap xatp; 1528 struct xen_add_to_physmap xatp;
1502 static struct shared_info *shared_info_page = 0;
1503 1529
1504 if (!shared_info_page)
1505 shared_info_page = (struct shared_info *)
1506 extend_brk(PAGE_SIZE, PAGE_SIZE);
1507 xatp.domid = DOMID_SELF; 1530 xatp.domid = DOMID_SELF;
1508 xatp.idx = 0; 1531 xatp.idx = 0;
1509 xatp.space = XENMAPSPACE_shared_info; 1532 xatp.space = XENMAPSPACE_shared_info;
1510 xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; 1533 xatp.gpfn = pfn;
1511 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 1534 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
1512 BUG(); 1535 BUG();
1513 1536
1514 HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; 1537}
1538static void __init xen_hvm_set_shared_info(struct shared_info *sip)
1539{
1540 int cpu;
1541
1542 HYPERVISOR_shared_info = sip;
1515 1543
1516 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info 1544 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
1517 * page, we use it in the event channel upcall and in some pvclock 1545 * page, we use it in the event channel upcall and in some pvclock
1518 * related functions. We don't need the vcpu_info placement 1546 * related functions. We don't need the vcpu_info placement
1519 * optimizations because we don't use any pv_mmu or pv_irq op on 1547 * optimizations because we don't use any pv_mmu or pv_irq op on
1520 * HVM. 1548 * HVM. */
1521 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is 1549 for_each_online_cpu(cpu)
1522 * online but xen_hvm_init_shared_info is run at resume time too and
1523 * in that case multiple vcpus might be online. */
1524 for_each_online_cpu(cpu) {
1525 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1550 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1551}
1552
1553/* Reconnect the shared_info pfn to a (new) mfn */
1554void xen_hvm_resume_shared_info(void)
1555{
1556 xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
1557}
1558
1559/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage.
1560 * On these old tools the shared info page will be placed in E820_Ram.
1561 * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects
1562 * that nothing is mapped up to HVM_SHARED_INFO_ADDR.
1563 * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used
1564 * here for the shared info page. */
1565static void __init xen_hvm_init_shared_info(void)
1566{
1567 if (xen_major < 4) {
1568 xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
1569 xen_hvm_sip_phys = __pa(xen_hvm_shared_info);
1570 } else {
1571 xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR;
1572 set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys);
1573 xen_hvm_shared_info =
1574 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
1526 } 1575 }
1576 xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
1577 xen_hvm_set_shared_info(xen_hvm_shared_info);
1527} 1578}
1528 1579
1529#ifdef CONFIG_XEN_PVHVM
1530static void __init init_hvm_pv_info(void) 1580static void __init init_hvm_pv_info(void)
1531{ 1581{
1532 int major, minor; 1582 uint32_t ecx, edx, pages, msr, base;
1533 uint32_t eax, ebx, ecx, edx, pages, msr, base;
1534 u64 pfn; 1583 u64 pfn;
1535 1584
1536 base = xen_cpuid_base(); 1585 base = xen_cpuid_base();
1537 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1538
1539 major = eax >> 16;
1540 minor = eax & 0xffff;
1541 printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
1542
1543 cpuid(base + 2, &pages, &msr, &ecx, &edx); 1586 cpuid(base + 2, &pages, &msr, &ecx, &edx);
1544 1587
1545 pfn = __pa(hypercall_page); 1588 pfn = __pa(hypercall_page);
@@ -1590,12 +1633,22 @@ static void __init xen_hvm_guest_init(void)
1590 1633
1591static bool __init xen_hvm_platform(void) 1634static bool __init xen_hvm_platform(void)
1592{ 1635{
1636 uint32_t eax, ebx, ecx, edx, base;
1637
1593 if (xen_pv_domain()) 1638 if (xen_pv_domain())
1594 return false; 1639 return false;
1595 1640
1596 if (!xen_cpuid_base()) 1641 base = xen_cpuid_base();
1642 if (!base)
1597 return false; 1643 return false;
1598 1644
1645 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1646
1647 xen_major = eax >> 16;
1648 xen_minor = eax & 0xffff;
1649
1650 printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor);
1651
1599 return true; 1652 return true;
1600} 1653}
1601 1654
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index dcf5f2dd91ec..01de35c77221 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2497,8 +2497,10 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
2497 2497
2498int xen_remap_domain_mfn_range(struct vm_area_struct *vma, 2498int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
2499 unsigned long addr, 2499 unsigned long addr,
2500 unsigned long mfn, int nr, 2500 xen_pfn_t mfn, int nr,
2501 pgprot_t prot, unsigned domid) 2501 pgprot_t prot, unsigned domid,
2502 struct page **pages)
2503
2502{ 2504{
2503 struct remap_data rmd; 2505 struct remap_data rmd;
2504 struct mmu_update mmu_update[REMAP_BATCH_SIZE]; 2506 struct mmu_update mmu_update[REMAP_BATCH_SIZE];
@@ -2542,3 +2544,14 @@ out:
2542 return err; 2544 return err;
2543} 2545}
2544EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); 2546EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
2547
2548/* Returns: 0 success */
2549int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
2550 int numpgs, struct page **pages)
2551{
2552 if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
2553 return 0;
2554
2555 return -EINVAL;
2556}
2557EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 353c50f18702..4f7d2599b484 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -254,7 +254,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
254 } 254 }
255 xen_init_lock_cpu(0); 255 xen_init_lock_cpu(0);
256 256
257 smp_store_cpu_info(0); 257 smp_store_boot_cpu_info();
258 cpu_data(0).x86_max_cores = 1; 258 cpu_data(0).x86_max_cores = 1;
259 259
260 for_each_possible_cpu(i) { 260 for_each_possible_cpu(i) {
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 45329c8c226e..ae8a00c39de4 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
30{ 30{
31#ifdef CONFIG_XEN_PVHVM 31#ifdef CONFIG_XEN_PVHVM
32 int cpu; 32 int cpu;
33 xen_hvm_init_shared_info(); 33 xen_hvm_resume_shared_info();
34 xen_callback_vector(); 34 xen_callback_vector();
35 xen_unplug_emulated_devices(); 35 xen_unplug_emulated_devices();
36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) { 36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index a95b41744ad0..d2e73d19d366 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -40,7 +40,7 @@ void xen_enable_syscall(void);
40void xen_vcpu_restore(void); 40void xen_vcpu_restore(void);
41 41
42void xen_callback_vector(void); 42void xen_callback_vector(void);
43void xen_hvm_init_shared_info(void); 43void xen_hvm_resume_shared_info(void);
44void xen_unplug_emulated_devices(void); 44void xen_unplug_emulated_devices(void);
45 45
46void __init xen_build_dynamic_phys_to_machine(void); 46void __init xen_build_dynamic_phys_to_machine(void);