aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig98
-rw-r--r--arch/x86/Kconfig.cpu5
-rw-r--r--arch/x86/boot/Makefile4
-rw-r--r--arch/x86/boot/compressed/Makefile3
-rw-r--r--arch/x86/boot/compressed/eboot.c34
-rw-r--r--arch/x86/boot/compressed/eboot.h4
-rw-r--r--arch/x86/boot/header.S4
-rw-r--r--arch/x86/boot/mkcpustr.c2
-rw-r--r--arch/x86/configs/i386_defconfig23
-rw-r--r--arch/x86/configs/x86_64_defconfig23
-rw-r--r--arch/x86/ia32/ia32_signal.c31
-rw-r--r--arch/x86/ia32/ia32entry.S6
-rw-r--r--arch/x86/ia32/sys_ia32.c2
-rw-r--r--arch/x86/include/asm/alternative-asm.h9
-rw-r--r--arch/x86/include/asm/alternative.h36
-rw-r--r--arch/x86/include/asm/atomic.h4
-rw-r--r--arch/x86/include/asm/bitops.h14
-rw-r--r--arch/x86/include/asm/calling.h50
-rw-r--r--arch/x86/include/asm/checksum.h4
-rw-r--r--arch/x86/include/asm/cmpxchg.h4
-rw-r--r--arch/x86/include/asm/cpufeature.h6
-rw-r--r--arch/x86/include/asm/fpu-internal.h422
-rw-r--r--arch/x86/include/asm/ftrace.h56
-rw-r--r--arch/x86/include/asm/futex.h19
-rw-r--r--arch/x86/include/asm/hardirq.h4
-rw-r--r--arch/x86/include/asm/hpet.h2
-rw-r--r--arch/x86/include/asm/i387.h29
-rw-r--r--arch/x86/include/asm/iommu_table.h6
-rw-r--r--arch/x86/include/asm/kprobes.h1
-rw-r--r--arch/x86/include/asm/kvm.h16
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/asm/mce.h13
-rw-r--r--arch/x86/include/asm/microcode.h10
-rw-r--r--arch/x86/include/asm/mmzone.h4
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/mutex.h4
-rw-r--r--arch/x86/include/asm/numa.h4
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/perf_regs.h33
-rw-r--r--arch/x86/include/asm/pgtable.h4
-rw-r--r--arch/x86/include/asm/pgtable_types.h10
-rw-r--r--arch/x86/include/asm/posix_types.h10
-rw-r--r--arch/x86/include/asm/processor-flags.h1
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/rcu.h32
-rw-r--r--arch/x86/include/asm/seccomp.h4
-rw-r--r--arch/x86/include/asm/signal.h4
-rw-r--r--arch/x86/include/asm/smap.h91
-rw-r--r--arch/x86/include/asm/string.h4
-rw-r--r--arch/x86/include/asm/suspend.h4
-rw-r--r--arch/x86/include/asm/svm.h205
-rw-r--r--arch/x86/include/asm/sys_ia32.h2
-rw-r--r--arch/x86/include/asm/thread_info.h10
-rw-r--r--arch/x86/include/asm/uaccess.h32
-rw-r--r--arch/x86/include/asm/uaccess_32.h3
-rw-r--r--arch/x86/include/asm/uaccess_64.h3
-rw-r--r--arch/x86/include/asm/uprobes.h3
-rw-r--r--arch/x86/include/asm/user.h4
-rw-r--r--arch/x86/include/asm/vdso.h3
-rw-r--r--arch/x86/include/asm/vmx.h127
-rw-r--r--arch/x86/include/asm/x86_init.h9
-rw-r--r--arch/x86/include/asm/xen/interface.h11
-rw-r--r--arch/x86/include/asm/xen/swiotlb-xen.h2
-rw-r--r--arch/x86/include/asm/xor.h4
-rw-r--r--arch/x86/include/asm/xor_32.h58
-rw-r--r--arch/x86/include/asm/xor_64.h63
-rw-r--r--arch/x86/include/asm/xor_avx.h54
-rw-r--r--arch/x86/include/asm/xsave.h23
-rw-r--r--arch/x86/include/uapi/asm/Kbuild6
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/acpi/sleep.c15
-rw-r--r--arch/x86/kernel/alternative.c111
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c67
-rw-r--r--arch/x86/kernel/cpu/bugs.c7
-rw-r--r--arch/x86/kernel/cpu/common.c63
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h12
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c168
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl5
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c30
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h6
-rw-r--r--arch/x86/kernel/cpu/proc.c5
-rw-r--r--arch/x86/kernel/cpuid.c5
-rw-r--r--arch/x86/kernel/devicetree.c51
-rw-r--r--arch/x86/kernel/entry_32.S100
-rw-r--r--arch/x86/kernel/entry_64.S172
-rw-r--r--arch/x86/kernel/ftrace.c73
-rw-r--r--arch/x86/kernel/head_32.S31
-rw-r--r--arch/x86/kernel/i387.c292
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/kprobes.c67
-rw-r--r--arch/x86/kernel/microcode_amd.c357
-rw-r--r--arch/x86/kernel/microcode_core.c67
-rw-r--r--arch/x86/kernel/microcode_intel.c3
-rw-r--r--arch/x86/kernel/msr.c5
-rw-r--r--arch/x86/kernel/perf_regs.c105
-rw-r--r--arch/x86/kernel/probe_roms.c2
-rw-r--r--arch/x86/kernel/process.c22
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c8
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/signal.c231
-rw-r--r--arch/x86/kernel/smpboot.c20
-rw-r--r--arch/x86/kernel/step.c53
-rw-r--r--arch/x86/kernel/traps.c174
-rw-r--r--arch/x86/kernel/uprobes.c52
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c6
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/kernel/xsave.c517
-rw-r--r--arch/x86/kvm/trace.h89
-rw-r--r--arch/x86/kvm/vmx.c12
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--arch/x86/lib/copy_user_64.S7
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S3
-rw-r--r--arch/x86/lib/getuser.S10
-rw-r--r--arch/x86/lib/insn.c4
-rw-r--r--arch/x86/lib/putuser.S8
-rw-r--r--arch/x86/lib/usercopy_32.c13
-rw-r--r--arch/x86/lib/usercopy_64.c3
-rw-r--r--arch/x86/mm/fault.c31
-rw-r--r--arch/x86/mm/init_32.c13
-rw-r--r--arch/x86/mm/tlb.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c34
-rw-r--r--arch/x86/pci/acpi.c3
-rw-r--r--arch/x86/pci/mmconfig-shared.c2
-rw-r--r--arch/x86/pci/visws.c5
-rw-r--r--arch/x86/platform/efi/Makefile1
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c76
-rw-r--r--arch/x86/platform/efi/efi.c66
-rw-r--r--arch/x86/realmode/rm/wakeup.h2
-rw-r--r--arch/x86/realmode/rm/wakeup_asm.S29
-rw-r--r--arch/x86/syscalls/Makefile17
-rw-r--r--arch/x86/tools/Makefile2
-rw-r--r--arch/x86/xen/apic.c3
-rw-r--r--arch/x86/xen/enlighten.c15
-rw-r--r--arch/x86/xen/mmu.c188
-rw-r--r--arch/x86/xen/p2m.c92
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c52
-rw-r--r--arch/x86/xen/platform-pci-unplug.c1
-rw-r--r--arch/x86/xen/setup.c18
-rw-r--r--arch/x86/xen/smp.c6
-rw-r--r--arch/x86/xen/vga.c7
-rw-r--r--arch/x86/xen/xen-head.S56
-rw-r--r--arch/x86/xen/xen-ops.h3
151 files changed, 3641 insertions, 2074 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 50a1d1f9b6d3..7f9a395c5254 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -7,11 +7,13 @@ config 64BIT
7 Say no to build a 32-bit kernel - formerly known as i386 7 Say no to build a 32-bit kernel - formerly known as i386
8 8
9config X86_32 9config X86_32
10 def_bool !64BIT 10 def_bool y
11 depends on !64BIT
11 select CLKSRC_I8253 12 select CLKSRC_I8253
12 13
13config X86_64 14config X86_64
14 def_bool 64BIT 15 def_bool y
16 depends on 64BIT
15 select X86_DEV_DMA_OPS 17 select X86_DEV_DMA_OPS
16 18
17### Arch settings 19### Arch settings
@@ -36,6 +38,7 @@ config X86
36 select HAVE_KRETPROBES 38 select HAVE_KRETPROBES
37 select HAVE_OPTPROBES 39 select HAVE_OPTPROBES
38 select HAVE_FTRACE_MCOUNT_RECORD 40 select HAVE_FTRACE_MCOUNT_RECORD
41 select HAVE_FENTRY if X86_64
39 select HAVE_C_RECORDMCOUNT 42 select HAVE_C_RECORDMCOUNT
40 select HAVE_DYNAMIC_FTRACE 43 select HAVE_DYNAMIC_FTRACE
41 select HAVE_FUNCTION_TRACER 44 select HAVE_FUNCTION_TRACER
@@ -60,6 +63,8 @@ config X86
60 select HAVE_MIXED_BREAKPOINTS_REGS 63 select HAVE_MIXED_BREAKPOINTS_REGS
61 select PERF_EVENTS 64 select PERF_EVENTS
62 select HAVE_PERF_EVENTS_NMI 65 select HAVE_PERF_EVENTS_NMI
66 select HAVE_PERF_REGS
67 select HAVE_PERF_USER_STACK_DUMP
63 select ANON_INODES 68 select ANON_INODES
64 select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 69 select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
65 select HAVE_CMPXCHG_LOCAL if !M386 70 select HAVE_CMPXCHG_LOCAL if !M386
@@ -97,9 +102,12 @@ config X86
97 select KTIME_SCALAR if X86_32 102 select KTIME_SCALAR if X86_32
98 select GENERIC_STRNCPY_FROM_USER 103 select GENERIC_STRNCPY_FROM_USER
99 select GENERIC_STRNLEN_USER 104 select GENERIC_STRNLEN_USER
105 select HAVE_RCU_USER_QS if X86_64
106 select HAVE_IRQ_TIME_ACCOUNTING
100 107
101config INSTRUCTION_DECODER 108config INSTRUCTION_DECODER
102 def_bool (KPROBES || PERF_EVENTS || UPROBES) 109 def_bool y
110 depends on KPROBES || PERF_EVENTS || UPROBES
103 111
104config OUTPUT_FORMAT 112config OUTPUT_FORMAT
105 string 113 string
@@ -127,13 +135,15 @@ config SBUS
127 bool 135 bool
128 136
129config NEED_DMA_MAP_STATE 137config NEED_DMA_MAP_STATE
130 def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG) 138 def_bool y
139 depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG
131 140
132config NEED_SG_DMA_LENGTH 141config NEED_SG_DMA_LENGTH
133 def_bool y 142 def_bool y
134 143
135config GENERIC_ISA_DMA 144config GENERIC_ISA_DMA
136 def_bool ISA_DMA_API 145 def_bool y
146 depends on ISA_DMA_API
137 147
138config GENERIC_BUG 148config GENERIC_BUG
139 def_bool y 149 def_bool y
@@ -150,13 +160,16 @@ config GENERIC_GPIO
150 bool 160 bool
151 161
152config ARCH_MAY_HAVE_PC_FDC 162config ARCH_MAY_HAVE_PC_FDC
153 def_bool ISA_DMA_API 163 def_bool y
164 depends on ISA_DMA_API
154 165
155config RWSEM_GENERIC_SPINLOCK 166config RWSEM_GENERIC_SPINLOCK
156 def_bool !X86_XADD 167 def_bool y
168 depends on !X86_XADD
157 169
158config RWSEM_XCHGADD_ALGORITHM 170config RWSEM_XCHGADD_ALGORITHM
159 def_bool X86_XADD 171 def_bool y
172 depends on X86_XADD
160 173
161config GENERIC_CALIBRATE_DELAY 174config GENERIC_CALIBRATE_DELAY
162 def_bool y 175 def_bool y
@@ -752,7 +765,8 @@ config SWIOTLB
752 If unsure, say Y. 765 If unsure, say Y.
753 766
754config IOMMU_HELPER 767config IOMMU_HELPER
755 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) 768 def_bool y
769 depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU
756 770
757config MAXSMP 771config MAXSMP
758 bool "Enable Maximum number of SMP Processors and NUMA Nodes" 772 bool "Enable Maximum number of SMP Processors and NUMA Nodes"
@@ -796,17 +810,6 @@ config SCHED_MC
796 making when dealing with multi-core CPU chips at a cost of slightly 810 making when dealing with multi-core CPU chips at a cost of slightly
797 increased overhead in some places. If unsure say N here. 811 increased overhead in some places. If unsure say N here.
798 812
799config IRQ_TIME_ACCOUNTING
800 bool "Fine granularity task level IRQ time accounting"
801 default n
802 ---help---
803 Select this option to enable fine granularity task irq time
804 accounting. This is done by reading a timestamp on each
805 transitions between softirq and hardirq state, so there can be a
806 small performance impact.
807
808 If in doubt, say N here.
809
810source "kernel/Kconfig.preempt" 813source "kernel/Kconfig.preempt"
811 814
812config X86_UP_APIC 815config X86_UP_APIC
@@ -871,6 +874,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
871 874
872config X86_MCE 875config X86_MCE
873 bool "Machine Check / overheating reporting" 876 bool "Machine Check / overheating reporting"
877 default y
874 ---help--- 878 ---help---
875 Machine Check support allows the processor to notify the 879 Machine Check support allows the processor to notify the
876 kernel if it detects a problem (e.g. overheating, data corruption). 880 kernel if it detects a problem (e.g. overheating, data corruption).
@@ -982,25 +986,25 @@ config X86_REBOOTFIXUPS
982 Say N otherwise. 986 Say N otherwise.
983 987
984config MICROCODE 988config MICROCODE
985 tristate "/dev/cpu/microcode - microcode support" 989 tristate "CPU microcode loading support"
986 select FW_LOADER 990 select FW_LOADER
987 ---help--- 991 ---help---
992
988 If you say Y here, you will be able to update the microcode on 993 If you say Y here, you will be able to update the microcode on
989 certain Intel and AMD processors. The Intel support is for the 994 certain Intel and AMD processors. The Intel support is for the
990 IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, 995 IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
991 Pentium 4, Xeon etc. The AMD support is for family 0x10 and 996 Xeon etc. The AMD support is for families 0x10 and later. You will
992 0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra. 997 obviously need the actual microcode binary data itself which is not
993 You will obviously need the actual microcode binary data itself 998 shipped with the Linux kernel.
994 which is not shipped with the Linux kernel.
995 999
996 This option selects the general module only, you need to select 1000 This option selects the general module only, you need to select
997 at least one vendor specific module as well. 1001 at least one vendor specific module as well.
998 1002
999 To compile this driver as a module, choose M here: the 1003 To compile this driver as a module, choose M here: the module
1000 module will be called microcode. 1004 will be called microcode.
1001 1005
1002config MICROCODE_INTEL 1006config MICROCODE_INTEL
1003 bool "Intel microcode patch loading support" 1007 bool "Intel microcode loading support"
1004 depends on MICROCODE 1008 depends on MICROCODE
1005 default MICROCODE 1009 default MICROCODE
1006 select FW_LOADER 1010 select FW_LOADER
@@ -1013,7 +1017,7 @@ config MICROCODE_INTEL
1013 <http://www.urbanmyth.org/microcode/>. 1017 <http://www.urbanmyth.org/microcode/>.
1014 1018
1015config MICROCODE_AMD 1019config MICROCODE_AMD
1016 bool "AMD microcode patch loading support" 1020 bool "AMD microcode loading support"
1017 depends on MICROCODE 1021 depends on MICROCODE
1018 select FW_LOADER 1022 select FW_LOADER
1019 ---help--- 1023 ---help---
@@ -1159,10 +1163,12 @@ config X86_PAE
1159 consumes more pagetable space per process. 1163 consumes more pagetable space per process.
1160 1164
1161config ARCH_PHYS_ADDR_T_64BIT 1165config ARCH_PHYS_ADDR_T_64BIT
1162 def_bool X86_64 || X86_PAE 1166 def_bool y
1167 depends on X86_64 || X86_PAE
1163 1168
1164config ARCH_DMA_ADDR_T_64BIT 1169config ARCH_DMA_ADDR_T_64BIT
1165 def_bool X86_64 || HIGHMEM64G 1170 def_bool y
1171 depends on X86_64 || HIGHMEM64G
1166 1172
1167config DIRECT_GBPAGES 1173config DIRECT_GBPAGES
1168 bool "Enable 1GB pages for kernel pagetables" if EXPERT 1174 bool "Enable 1GB pages for kernel pagetables" if EXPERT
@@ -1285,8 +1291,8 @@ config ARCH_SELECT_MEMORY_MODEL
1285 depends on ARCH_SPARSEMEM_ENABLE 1291 depends on ARCH_SPARSEMEM_ENABLE
1286 1292
1287config ARCH_MEMORY_PROBE 1293config ARCH_MEMORY_PROBE
1288 def_bool X86_64 1294 def_bool y
1289 depends on MEMORY_HOTPLUG 1295 depends on X86_64 && MEMORY_HOTPLUG
1290 1296
1291config ARCH_PROC_KCORE_TEXT 1297config ARCH_PROC_KCORE_TEXT
1292 def_bool y 1298 def_bool y
@@ -1487,6 +1493,17 @@ config ARCH_RANDOM
1487 If supported, this is a high bandwidth, cryptographically 1493 If supported, this is a high bandwidth, cryptographically
1488 secure hardware random number generator. 1494 secure hardware random number generator.
1489 1495
1496config X86_SMAP
1497 def_bool y
1498 prompt "Supervisor Mode Access Prevention" if EXPERT
1499 ---help---
1500 Supervisor Mode Access Prevention (SMAP) is a security
1501 feature in newer Intel processors. There is a small
1502 performance cost if this enabled and turned on; there is
1503 also a small increase in the kernel size if this is enabled.
1504
1505 If unsure, say Y.
1506
1490config EFI 1507config EFI
1491 bool "EFI runtime service support" 1508 bool "EFI runtime service support"
1492 depends on ACPI 1509 depends on ACPI
@@ -1975,7 +1992,6 @@ config PCI_MMCONFIG
1975 1992
1976config PCI_CNB20LE_QUIRK 1993config PCI_CNB20LE_QUIRK
1977 bool "Read CNB20LE Host Bridge Windows" if EXPERT 1994 bool "Read CNB20LE Host Bridge Windows" if EXPERT
1978 default n
1979 depends on PCI && EXPERIMENTAL 1995 depends on PCI && EXPERIMENTAL
1980 help 1996 help
1981 Read the PCI windows out of the CNB20LE host bridge. This allows 1997 Read the PCI windows out of the CNB20LE host bridge. This allows
@@ -2186,18 +2202,18 @@ config COMPAT
2186 depends on IA32_EMULATION || X86_X32 2202 depends on IA32_EMULATION || X86_X32
2187 select ARCH_WANT_OLD_COMPAT_IPC 2203 select ARCH_WANT_OLD_COMPAT_IPC
2188 2204
2205if COMPAT
2189config COMPAT_FOR_U64_ALIGNMENT 2206config COMPAT_FOR_U64_ALIGNMENT
2190 def_bool COMPAT 2207 def_bool y
2191 depends on X86_64
2192 2208
2193config SYSVIPC_COMPAT 2209config SYSVIPC_COMPAT
2194 def_bool y 2210 def_bool y
2195 depends on COMPAT && SYSVIPC 2211 depends on SYSVIPC
2196 2212
2197config KEYS_COMPAT 2213config KEYS_COMPAT
2198 bool 2214 def_bool y
2199 depends on COMPAT && KEYS 2215 depends on KEYS
2200 default y 2216endif
2201 2217
2202endmenu 2218endmenu
2203 2219
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 706e12e9984b..f3b86d0df44e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -306,7 +306,8 @@ config X86_INTERNODE_CACHE_SHIFT
306 default X86_L1_CACHE_SHIFT 306 default X86_L1_CACHE_SHIFT
307 307
308config X86_CMPXCHG 308config X86_CMPXCHG
309 def_bool X86_64 || (X86_32 && !M386) 309 def_bool y
310 depends on X86_64 || (X86_32 && !M386)
310 311
311config X86_L1_CACHE_SHIFT 312config X86_L1_CACHE_SHIFT
312 int 313 int
@@ -317,7 +318,7 @@ config X86_L1_CACHE_SHIFT
317 318
318config X86_XADD 319config X86_XADD
319 def_bool y 320 def_bool y
320 depends on X86_64 || !M386 321 depends on !M386
321 322
322config X86_PPRO_FENCE 323config X86_PPRO_FENCE
323 bool "PentiumPro memory ordering errata workaround" 324 bool "PentiumPro memory ordering errata workaround"
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index f7535bedc33f..ce03476d8c8f 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -37,7 +37,7 @@ setup-y += video-bios.o
37targets += $(setup-y) 37targets += $(setup-y)
38hostprogs-y := mkcpustr tools/build 38hostprogs-y := mkcpustr tools/build
39 39
40HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \ 40HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(USERINCLUDE) \
41 -D__EXPORTED_HEADERS__ 41 -D__EXPORTED_HEADERS__
42 42
43$(obj)/cpu.o: $(obj)/cpustr.h 43$(obj)/cpu.o: $(obj)/cpustr.h
@@ -52,7 +52,7 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE
52 52
53# How to compile the 16-bit code. Note we always compile for -march=i386, 53# How to compile the 16-bit code. Note we always compile for -march=i386,
54# that way we can complain to the user if the CPU is insufficient. 54# that way we can complain to the user if the CPU is insufficient.
55KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ 55KBUILD_CFLAGS := $(USERINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
56 -DDISABLE_BRANCH_PROFILING \ 56 -DDISABLE_BRANCH_PROFILING \
57 -Wall -Wstrict-prototypes \ 57 -Wall -Wstrict-prototypes \
58 -march=i386 -mregparm=3 \ 58 -march=i386 -mregparm=3 \
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e398bb5d63bb..8a84501acb1b 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -28,6 +28,9 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
28 $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ 28 $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
29 $(obj)/piggy.o 29 $(obj)/piggy.o
30 30
31$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
32$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone
33
31ifeq ($(CONFIG_EFI_STUB), y) 34ifeq ($(CONFIG_EFI_STUB), y)
32 VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o 35 VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
33endif 36endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index b3e0227df2c9..c760e073963e 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -276,8 +276,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
276 nr_gops = size / sizeof(void *); 276 nr_gops = size / sizeof(void *);
277 for (i = 0; i < nr_gops; i++) { 277 for (i = 0; i < nr_gops; i++) {
278 struct efi_graphics_output_mode_info *info; 278 struct efi_graphics_output_mode_info *info;
279 efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; 279 efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
280 void *pciio; 280 bool conout_found = false;
281 void *dummy;
281 void *h = gop_handle[i]; 282 void *h = gop_handle[i];
282 283
283 status = efi_call_phys3(sys_table->boottime->handle_protocol, 284 status = efi_call_phys3(sys_table->boottime->handle_protocol,
@@ -285,19 +286,21 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
285 if (status != EFI_SUCCESS) 286 if (status != EFI_SUCCESS)
286 continue; 287 continue;
287 288
288 efi_call_phys3(sys_table->boottime->handle_protocol, 289 status = efi_call_phys3(sys_table->boottime->handle_protocol,
289 h, &pciio_proto, &pciio); 290 h, &conout_proto, &dummy);
291
292 if (status == EFI_SUCCESS)
293 conout_found = true;
290 294
291 status = efi_call_phys4(gop->query_mode, gop, 295 status = efi_call_phys4(gop->query_mode, gop,
292 gop->mode->mode, &size, &info); 296 gop->mode->mode, &size, &info);
293 if (status == EFI_SUCCESS && (!first_gop || pciio)) { 297 if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
294 /* 298 /*
295 * Apple provide GOPs that are not backed by 299 * Systems that use the UEFI Console Splitter may
296 * real hardware (they're used to handle 300 * provide multiple GOP devices, not all of which are
297 * multiple displays). The workaround is to 301 * backed by real hardware. The workaround is to search
298 * search for a GOP implementing the PCIIO 302 * for a GOP implementing the ConOut protocol, and if
299 * protocol, and if one isn't found, to just 303 * one isn't found, to just fall back to the first GOP.
300 * fallback to the first GOP.
301 */ 304 */
302 width = info->horizontal_resolution; 305 width = info->horizontal_resolution;
303 height = info->vertical_resolution; 306 height = info->vertical_resolution;
@@ -308,10 +311,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
308 pixels_per_scan_line = info->pixels_per_scan_line; 311 pixels_per_scan_line = info->pixels_per_scan_line;
309 312
310 /* 313 /*
311 * Once we've found a GOP supporting PCIIO, 314 * Once we've found a GOP supporting ConOut,
312 * don't bother looking any further. 315 * don't bother looking any further.
313 */ 316 */
314 if (pciio) 317 if (conout_found)
315 break; 318 break;
316 319
317 first_gop = gop; 320 first_gop = gop;
@@ -328,7 +331,6 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
328 si->lfb_width = width; 331 si->lfb_width = width;
329 si->lfb_height = height; 332 si->lfb_height = height;
330 si->lfb_base = fb_base; 333 si->lfb_base = fb_base;
331 si->lfb_size = fb_size;
332 si->pages = 1; 334 si->pages = 1;
333 335
334 if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { 336 if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
@@ -376,6 +378,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
376 si->rsvd_pos = 0; 378 si->rsvd_pos = 0;
377 } 379 }
378 380
381 si->lfb_size = si->lfb_linelength * si->lfb_height;
382
383 si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
384
379free_handle: 385free_handle:
380 efi_call_phys1(sys_table->boottime->free_pool, gop_handle); 386 efi_call_phys1(sys_table->boottime->free_pool, gop_handle);
381 return status; 387 return status;
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index 3b6e15627c55..e5b0a8f91c5f 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -14,6 +14,10 @@
14#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) 14#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT)
15#define EFI_READ_CHUNK_SIZE (1024 * 1024) 15#define EFI_READ_CHUNK_SIZE (1024 * 1024)
16 16
17#define EFI_CONSOLE_OUT_DEVICE_GUID \
18 EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
19 0x3f, 0xc1, 0x4d)
20
17#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 21#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
18#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 22#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
19#define PIXEL_BIT_MASK 2 23#define PIXEL_BIT_MASK 2
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b4e15dd6786a..2a017441b8b2 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -32,10 +32,6 @@ SYSSEG = 0x1000 /* historical load address >> 4 */
32#define SVGA_MODE ASK_VGA 32#define SVGA_MODE ASK_VGA
33#endif 33#endif
34 34
35#ifndef RAMDISK
36#define RAMDISK 0
37#endif
38
39#ifndef ROOT_RDONLY 35#ifndef ROOT_RDONLY
40#define ROOT_RDONLY 1 36#define ROOT_RDONLY 1
41#endif 37#endif
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 919257f526f2..4579eff0ef4d 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -15,6 +15,8 @@
15 15
16#include <stdio.h> 16#include <stdio.h>
17 17
18#include "../include/asm/required-features.h"
19#include "../include/asm/cpufeature.h"
18#include "../kernel/cpu/capflags.c" 20#include "../kernel/cpu/capflags.c"
19 21
20int main(void) 22int main(void)
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 119db67dcb03..5598547281a7 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y
8CONFIG_TASK_XACCT=y 8CONFIG_TASK_XACCT=y
9CONFIG_TASK_IO_ACCOUNTING=y 9CONFIG_TASK_IO_ACCOUNTING=y
10CONFIG_AUDIT=y 10CONFIG_AUDIT=y
11CONFIG_NO_HZ=y
12CONFIG_HIGH_RES_TIMERS=y
11CONFIG_LOG_BUF_SHIFT=18 13CONFIG_LOG_BUF_SHIFT=18
12CONFIG_CGROUPS=y 14CONFIG_CGROUPS=y
13CONFIG_CGROUP_FREEZER=y 15CONFIG_CGROUP_FREEZER=y
@@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y
34CONFIG_SUN_PARTITION=y 36CONFIG_SUN_PARTITION=y
35CONFIG_KARMA_PARTITION=y 37CONFIG_KARMA_PARTITION=y
36CONFIG_EFI_PARTITION=y 38CONFIG_EFI_PARTITION=y
37CONFIG_NO_HZ=y
38CONFIG_HIGH_RES_TIMERS=y
39CONFIG_SMP=y 39CONFIG_SMP=y
40CONFIG_X86_GENERIC=y 40CONFIG_X86_GENERIC=y
41CONFIG_HPET_TIMER=y 41CONFIG_HPET_TIMER=y
@@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
144CONFIG_DEBUG_DEVRES=y 144CONFIG_DEBUG_DEVRES=y
145CONFIG_CONNECTOR=y 145CONFIG_CONNECTOR=y
146CONFIG_BLK_DEV_LOOP=y 146CONFIG_BLK_DEV_LOOP=y
147CONFIG_BLK_DEV_RAM=y
148CONFIG_BLK_DEV_RAM_SIZE=16384
149CONFIG_BLK_DEV_SD=y 147CONFIG_BLK_DEV_SD=y
150CONFIG_BLK_DEV_SR=y 148CONFIG_BLK_DEV_SR=y
151CONFIG_BLK_DEV_SR_VENDOR=y 149CONFIG_BLK_DEV_SR_VENDOR=y
@@ -231,8 +229,6 @@ CONFIG_SND_HRTIMER=y
231CONFIG_SND_HDA_INTEL=y 229CONFIG_SND_HDA_INTEL=y
232CONFIG_SND_HDA_HWDEP=y 230CONFIG_SND_HDA_HWDEP=y
233CONFIG_HIDRAW=y 231CONFIG_HIDRAW=y
234CONFIG_HID_PID=y
235CONFIG_USB_HIDDEV=y
236CONFIG_HID_GYRATION=y 232CONFIG_HID_GYRATION=y
237CONFIG_LOGITECH_FF=y 233CONFIG_LOGITECH_FF=y
238CONFIG_HID_NTRIG=y 234CONFIG_HID_NTRIG=y
@@ -243,11 +239,11 @@ CONFIG_HID_SAMSUNG=y
243CONFIG_HID_SONY=y 239CONFIG_HID_SONY=y
244CONFIG_HID_SUNPLUS=y 240CONFIG_HID_SUNPLUS=y
245CONFIG_HID_TOPSEED=y 241CONFIG_HID_TOPSEED=y
242CONFIG_HID_PID=y
243CONFIG_USB_HIDDEV=y
246CONFIG_USB=y 244CONFIG_USB=y
247CONFIG_USB_DEBUG=y 245CONFIG_USB_DEBUG=y
248CONFIG_USB_ANNOUNCE_NEW_DEVICES=y 246CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
249CONFIG_USB_DEVICEFS=y
250# CONFIG_USB_DEVICE_CLASS is not set
251CONFIG_USB_MON=y 247CONFIG_USB_MON=y
252CONFIG_USB_EHCI_HCD=y 248CONFIG_USB_EHCI_HCD=y
253# CONFIG_USB_EHCI_TT_NEWSCHED is not set 249# CONFIG_USB_EHCI_TT_NEWSCHED is not set
@@ -262,10 +258,9 @@ CONFIG_RTC_CLASS=y
262CONFIG_DMADEVICES=y 258CONFIG_DMADEVICES=y
263CONFIG_EEEPC_LAPTOP=y 259CONFIG_EEEPC_LAPTOP=y
264CONFIG_EFI_VARS=y 260CONFIG_EFI_VARS=y
265CONFIG_EXT3_FS=y 261CONFIG_EXT4_FS=y
266# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set 262CONFIG_EXT4_FS_POSIX_ACL=y
267CONFIG_EXT3_FS_POSIX_ACL=y 263CONFIG_EXT4_FS_SECURITY=y
268CONFIG_EXT3_FS_SECURITY=y
269CONFIG_QUOTA=y 264CONFIG_QUOTA=y
270CONFIG_QUOTA_NETLINK_INTERFACE=y 265CONFIG_QUOTA_NETLINK_INTERFACE=y
271# CONFIG_PRINT_QUOTA_WARNING is not set 266# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y
280CONFIG_TMPFS_POSIX_ACL=y 275CONFIG_TMPFS_POSIX_ACL=y
281CONFIG_HUGETLBFS=y 276CONFIG_HUGETLBFS=y
282CONFIG_NFS_FS=y 277CONFIG_NFS_FS=y
283CONFIG_NFS_V3=y
284CONFIG_NFS_V3_ACL=y 278CONFIG_NFS_V3_ACL=y
285CONFIG_NFS_V4=y 279CONFIG_NFS_V4=y
286CONFIG_ROOT_NFS=y 280CONFIG_ROOT_NFS=y
@@ -299,13 +293,11 @@ CONFIG_DEBUG_KERNEL=y
299CONFIG_SCHEDSTATS=y 293CONFIG_SCHEDSTATS=y
300CONFIG_TIMER_STATS=y 294CONFIG_TIMER_STATS=y
301CONFIG_DEBUG_STACK_USAGE=y 295CONFIG_DEBUG_STACK_USAGE=y
302CONFIG_SYSCTL_SYSCALL_CHECK=y
303CONFIG_BLK_DEV_IO_TRACE=y 296CONFIG_BLK_DEV_IO_TRACE=y
304CONFIG_PROVIDE_OHCI1394_DMA_INIT=y 297CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
305CONFIG_EARLY_PRINTK_DBGP=y 298CONFIG_EARLY_PRINTK_DBGP=y
306CONFIG_DEBUG_STACKOVERFLOW=y 299CONFIG_DEBUG_STACKOVERFLOW=y
307# CONFIG_DEBUG_RODATA_TEST is not set 300# CONFIG_DEBUG_RODATA_TEST is not set
308CONFIG_DEBUG_NX_TEST=m
309CONFIG_DEBUG_BOOT_PARAMS=y 301CONFIG_DEBUG_BOOT_PARAMS=y
310CONFIG_OPTIMIZE_INLINING=y 302CONFIG_OPTIMIZE_INLINING=y
311CONFIG_KEYS_DEBUG_PROC_KEYS=y 303CONFIG_KEYS_DEBUG_PROC_KEYS=y
@@ -316,4 +308,3 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y
316CONFIG_SECURITY_SELINUX_DISABLE=y 308CONFIG_SECURITY_SELINUX_DISABLE=y
317CONFIG_CRYPTO_AES_586=y 309CONFIG_CRYPTO_AES_586=y
318# CONFIG_CRYPTO_ANSI_CPRNG is not set 310# CONFIG_CRYPTO_ANSI_CPRNG is not set
319CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 76eb2903809f..671524d0f6c0 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y
8CONFIG_TASK_XACCT=y 8CONFIG_TASK_XACCT=y
9CONFIG_TASK_IO_ACCOUNTING=y 9CONFIG_TASK_IO_ACCOUNTING=y
10CONFIG_AUDIT=y 10CONFIG_AUDIT=y
11CONFIG_NO_HZ=y
12CONFIG_HIGH_RES_TIMERS=y
11CONFIG_LOG_BUF_SHIFT=18 13CONFIG_LOG_BUF_SHIFT=18
12CONFIG_CGROUPS=y 14CONFIG_CGROUPS=y
13CONFIG_CGROUP_FREEZER=y 15CONFIG_CGROUP_FREEZER=y
@@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y
34CONFIG_SUN_PARTITION=y 36CONFIG_SUN_PARTITION=y
35CONFIG_KARMA_PARTITION=y 37CONFIG_KARMA_PARTITION=y
36CONFIG_EFI_PARTITION=y 38CONFIG_EFI_PARTITION=y
37CONFIG_NO_HZ=y
38CONFIG_HIGH_RES_TIMERS=y
39CONFIG_SMP=y 39CONFIG_SMP=y
40CONFIG_CALGARY_IOMMU=y 40CONFIG_CALGARY_IOMMU=y
41CONFIG_NR_CPUS=64 41CONFIG_NR_CPUS=64
@@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
144CONFIG_DEBUG_DEVRES=y 144CONFIG_DEBUG_DEVRES=y
145CONFIG_CONNECTOR=y 145CONFIG_CONNECTOR=y
146CONFIG_BLK_DEV_LOOP=y 146CONFIG_BLK_DEV_LOOP=y
147CONFIG_BLK_DEV_RAM=y
148CONFIG_BLK_DEV_RAM_SIZE=16384
149CONFIG_BLK_DEV_SD=y 147CONFIG_BLK_DEV_SD=y
150CONFIG_BLK_DEV_SR=y 148CONFIG_BLK_DEV_SR=y
151CONFIG_BLK_DEV_SR_VENDOR=y 149CONFIG_BLK_DEV_SR_VENDOR=y
@@ -227,8 +225,6 @@ CONFIG_SND_HRTIMER=y
227CONFIG_SND_HDA_INTEL=y 225CONFIG_SND_HDA_INTEL=y
228CONFIG_SND_HDA_HWDEP=y 226CONFIG_SND_HDA_HWDEP=y
229CONFIG_HIDRAW=y 227CONFIG_HIDRAW=y
230CONFIG_HID_PID=y
231CONFIG_USB_HIDDEV=y
232CONFIG_HID_GYRATION=y 228CONFIG_HID_GYRATION=y
233CONFIG_LOGITECH_FF=y 229CONFIG_LOGITECH_FF=y
234CONFIG_HID_NTRIG=y 230CONFIG_HID_NTRIG=y
@@ -239,11 +235,11 @@ CONFIG_HID_SAMSUNG=y
239CONFIG_HID_SONY=y 235CONFIG_HID_SONY=y
240CONFIG_HID_SUNPLUS=y 236CONFIG_HID_SUNPLUS=y
241CONFIG_HID_TOPSEED=y 237CONFIG_HID_TOPSEED=y
238CONFIG_HID_PID=y
239CONFIG_USB_HIDDEV=y
242CONFIG_USB=y 240CONFIG_USB=y
243CONFIG_USB_DEBUG=y 241CONFIG_USB_DEBUG=y
244CONFIG_USB_ANNOUNCE_NEW_DEVICES=y 242CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
245CONFIG_USB_DEVICEFS=y
246# CONFIG_USB_DEVICE_CLASS is not set
247CONFIG_USB_MON=y 243CONFIG_USB_MON=y
248CONFIG_USB_EHCI_HCD=y 244CONFIG_USB_EHCI_HCD=y
249# CONFIG_USB_EHCI_TT_NEWSCHED is not set 245# CONFIG_USB_EHCI_TT_NEWSCHED is not set
@@ -262,10 +258,9 @@ CONFIG_AMD_IOMMU_STATS=y
262CONFIG_INTEL_IOMMU=y 258CONFIG_INTEL_IOMMU=y
263# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set 259# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
264CONFIG_EFI_VARS=y 260CONFIG_EFI_VARS=y
265CONFIG_EXT3_FS=y 261CONFIG_EXT4_FS=y
266# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set 262CONFIG_EXT4_FS_POSIX_ACL=y
267CONFIG_EXT3_FS_POSIX_ACL=y 263CONFIG_EXT4_FS_SECURITY=y
268CONFIG_EXT3_FS_SECURITY=y
269CONFIG_QUOTA=y 264CONFIG_QUOTA=y
270CONFIG_QUOTA_NETLINK_INTERFACE=y 265CONFIG_QUOTA_NETLINK_INTERFACE=y
271# CONFIG_PRINT_QUOTA_WARNING is not set 266# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y
280CONFIG_TMPFS_POSIX_ACL=y 275CONFIG_TMPFS_POSIX_ACL=y
281CONFIG_HUGETLBFS=y 276CONFIG_HUGETLBFS=y
282CONFIG_NFS_FS=y 277CONFIG_NFS_FS=y
283CONFIG_NFS_V3=y
284CONFIG_NFS_V3_ACL=y 278CONFIG_NFS_V3_ACL=y
285CONFIG_NFS_V4=y 279CONFIG_NFS_V4=y
286CONFIG_ROOT_NFS=y 280CONFIG_ROOT_NFS=y
@@ -298,13 +292,11 @@ CONFIG_DEBUG_KERNEL=y
298CONFIG_SCHEDSTATS=y 292CONFIG_SCHEDSTATS=y
299CONFIG_TIMER_STATS=y 293CONFIG_TIMER_STATS=y
300CONFIG_DEBUG_STACK_USAGE=y 294CONFIG_DEBUG_STACK_USAGE=y
301CONFIG_SYSCTL_SYSCALL_CHECK=y
302CONFIG_BLK_DEV_IO_TRACE=y 295CONFIG_BLK_DEV_IO_TRACE=y
303CONFIG_PROVIDE_OHCI1394_DMA_INIT=y 296CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
304CONFIG_EARLY_PRINTK_DBGP=y 297CONFIG_EARLY_PRINTK_DBGP=y
305CONFIG_DEBUG_STACKOVERFLOW=y 298CONFIG_DEBUG_STACKOVERFLOW=y
306# CONFIG_DEBUG_RODATA_TEST is not set 299# CONFIG_DEBUG_RODATA_TEST is not set
307CONFIG_DEBUG_NX_TEST=m
308CONFIG_DEBUG_BOOT_PARAMS=y 300CONFIG_DEBUG_BOOT_PARAMS=y
309CONFIG_OPTIMIZE_INLINING=y 301CONFIG_OPTIMIZE_INLINING=y
310CONFIG_KEYS_DEBUG_PROC_KEYS=y 302CONFIG_KEYS_DEBUG_PROC_KEYS=y
@@ -314,4 +306,3 @@ CONFIG_SECURITY_SELINUX=y
314CONFIG_SECURITY_SELINUX_BOOTPARAM=y 306CONFIG_SECURITY_SELINUX_BOOTPARAM=y
315CONFIG_SECURITY_SELINUX_DISABLE=y 307CONFIG_SECURITY_SELINUX_DISABLE=y
316# CONFIG_CRYPTO_ANSI_CPRNG is not set 308# CONFIG_CRYPTO_ANSI_CPRNG is not set
317CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 673ac9b63d6b..efc6a958b71d 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -32,6 +32,7 @@
32#include <asm/sigframe.h> 32#include <asm/sigframe.h>
33#include <asm/sighandling.h> 33#include <asm/sighandling.h>
34#include <asm/sys_ia32.h> 34#include <asm/sys_ia32.h>
35#include <asm/smap.h>
35 36
36#define FIX_EFLAGS __FIX_EFLAGS 37#define FIX_EFLAGS __FIX_EFLAGS
37 38
@@ -162,7 +163,8 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
162 } 163 }
163 seg = get_fs(); 164 seg = get_fs();
164 set_fs(KERNEL_DS); 165 set_fs(KERNEL_DS);
165 ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); 166 ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
167 (stack_t __force __user *) &uoss, regs->sp);
166 set_fs(seg); 168 set_fs(seg);
167 if (ret >= 0 && uoss_ptr) { 169 if (ret >= 0 && uoss_ptr) {
168 if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) 170 if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)))
@@ -250,11 +252,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
250 252
251 get_user_ex(tmp, &sc->fpstate); 253 get_user_ex(tmp, &sc->fpstate);
252 buf = compat_ptr(tmp); 254 buf = compat_ptr(tmp);
253 err |= restore_i387_xstate_ia32(buf);
254 255
255 get_user_ex(*pax, &sc->ax); 256 get_user_ex(*pax, &sc->ax);
256 } get_user_catch(err); 257 } get_user_catch(err);
257 258
259 err |= restore_xstate_sig(buf, 1);
260
258 return err; 261 return err;
259} 262}
260 263
@@ -361,7 +364,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
361 */ 364 */
362static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, 365static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
363 size_t frame_size, 366 size_t frame_size,
364 void **fpstate) 367 void __user **fpstate)
365{ 368{
366 unsigned long sp; 369 unsigned long sp;
367 370
@@ -381,9 +384,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
381 sp = (unsigned long) ka->sa.sa_restorer; 384 sp = (unsigned long) ka->sa.sa_restorer;
382 385
383 if (used_math()) { 386 if (used_math()) {
384 sp = sp - sig_xstate_ia32_size; 387 unsigned long fx_aligned, math_size;
385 *fpstate = (struct _fpstate_ia32 *) sp; 388
386 if (save_i387_xstate_ia32(*fpstate) < 0) 389 sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size);
390 *fpstate = (struct _fpstate_ia32 __user *) sp;
391 if (save_xstate_sig(*fpstate, (void __user *)fx_aligned,
392 math_size) < 0)
387 return (void __user *) -1L; 393 return (void __user *) -1L;
388 } 394 }
389 395
@@ -448,7 +454,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
448 * These are actually not used anymore, but left because some 454 * These are actually not used anymore, but left because some
449 * gdb versions depend on them as a marker. 455 * gdb versions depend on them as a marker.
450 */ 456 */
451 put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); 457 put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
452 } put_user_catch(err); 458 } put_user_catch(err);
453 459
454 if (err) 460 if (err)
@@ -502,7 +508,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
502 put_user_ex(sig, &frame->sig); 508 put_user_ex(sig, &frame->sig);
503 put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo); 509 put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo);
504 put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); 510 put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
505 err |= copy_siginfo_to_user32(&frame->info, info);
506 511
507 /* Create the ucontext. */ 512 /* Create the ucontext. */
508 if (cpu_has_xsave) 513 if (cpu_has_xsave)
@@ -514,9 +519,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
514 put_user_ex(sas_ss_flags(regs->sp), 519 put_user_ex(sas_ss_flags(regs->sp),
515 &frame->uc.uc_stack.ss_flags); 520 &frame->uc.uc_stack.ss_flags);
516 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 521 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
517 err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
518 regs, set->sig[0]);
519 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
520 522
521 if (ka->sa.sa_flags & SA_RESTORER) 523 if (ka->sa.sa_flags & SA_RESTORER)
522 restorer = ka->sa.sa_restorer; 524 restorer = ka->sa.sa_restorer;
@@ -529,9 +531,14 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
529 * Not actually used anymore, but left because some gdb 531 * Not actually used anymore, but left because some gdb
530 * versions need it. 532 * versions need it.
531 */ 533 */
532 put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); 534 put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
533 } put_user_catch(err); 535 } put_user_catch(err);
534 536
537 err |= copy_siginfo_to_user32(&frame->info, info);
538 err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
539 regs, set->sig[0]);
540 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
541
535 if (err) 542 if (err)
536 return -EFAULT; 543 return -EFAULT;
537 544
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 20e5f7ba0e6b..9c289504e680 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -14,6 +14,7 @@
14#include <asm/segment.h> 14#include <asm/segment.h>
15#include <asm/irqflags.h> 15#include <asm/irqflags.h>
16#include <asm/asm.h> 16#include <asm/asm.h>
17#include <asm/smap.h>
17#include <linux/linkage.h> 18#include <linux/linkage.h>
18#include <linux/err.h> 19#include <linux/err.h>
19 20
@@ -146,8 +147,10 @@ ENTRY(ia32_sysenter_target)
146 SAVE_ARGS 0,1,0 147 SAVE_ARGS 0,1,0
147 /* no need to do an access_ok check here because rbp has been 148 /* no need to do an access_ok check here because rbp has been
148 32bit zero extended */ 149 32bit zero extended */
150 ASM_STAC
1491: movl (%rbp),%ebp 1511: movl (%rbp),%ebp
150 _ASM_EXTABLE(1b,ia32_badarg) 152 _ASM_EXTABLE(1b,ia32_badarg)
153 ASM_CLAC
151 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 154 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
152 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 155 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
153 CFI_REMEMBER_STATE 156 CFI_REMEMBER_STATE
@@ -301,8 +304,10 @@ ENTRY(ia32_cstar_target)
301 /* no need to do an access_ok check here because r8 has been 304 /* no need to do an access_ok check here because r8 has been
302 32bit zero extended */ 305 32bit zero extended */
303 /* hardware stack frame is complete now */ 306 /* hardware stack frame is complete now */
307 ASM_STAC
3041: movl (%r8),%r9d 3081: movl (%r8),%r9d
305 _ASM_EXTABLE(1b,ia32_badarg) 309 _ASM_EXTABLE(1b,ia32_badarg)
310 ASM_CLAC
306 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 311 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
307 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 312 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
308 CFI_REMEMBER_STATE 313 CFI_REMEMBER_STATE
@@ -365,6 +370,7 @@ cstar_tracesys:
365END(ia32_cstar_target) 370END(ia32_cstar_target)
366 371
367ia32_badarg: 372ia32_badarg:
373 ASM_CLAC
368 movq $-EFAULT,%rax 374 movq $-EFAULT,%rax
369 jmp ia32_sysret 375 jmp ia32_sysret
370 CFI_ENDPROC 376 CFI_ENDPROC
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 4540bece0946..c5b938d92eab 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -287,7 +287,7 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
287 return ret; 287 return ret;
288} 288}
289 289
290asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, 290asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
291 int options) 291 int options)
292{ 292{
293 return compat_sys_wait4(pid, stat_addr, options, NULL); 293 return compat_sys_wait4(pid, stat_addr, options, NULL);
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 952bd0100c5c..372231c22a47 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -1,3 +1,6 @@
1#ifndef _ASM_X86_ALTERNATIVE_ASM_H
2#define _ASM_X86_ALTERNATIVE_ASM_H
3
1#ifdef __ASSEMBLY__ 4#ifdef __ASSEMBLY__
2 5
3#include <asm/asm.h> 6#include <asm/asm.h>
@@ -5,10 +8,10 @@
5#ifdef CONFIG_SMP 8#ifdef CONFIG_SMP
6 .macro LOCK_PREFIX 9 .macro LOCK_PREFIX
7672: lock 10672: lock
8 .section .smp_locks,"a" 11 .pushsection .smp_locks,"a"
9 .balign 4 12 .balign 4
10 .long 672b - . 13 .long 672b - .
11 .previous 14 .popsection
12 .endm 15 .endm
13#else 16#else
14 .macro LOCK_PREFIX 17 .macro LOCK_PREFIX
@@ -24,3 +27,5 @@
24.endm 27.endm
25 28
26#endif /* __ASSEMBLY__ */ 29#endif /* __ASSEMBLY__ */
30
31#endif /* _ASM_X86_ALTERNATIVE_ASM_H */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 70780689599a..58ed6d96a6ac 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -29,10 +29,10 @@
29 29
30#ifdef CONFIG_SMP 30#ifdef CONFIG_SMP
31#define LOCK_PREFIX_HERE \ 31#define LOCK_PREFIX_HERE \
32 ".section .smp_locks,\"a\"\n" \ 32 ".pushsection .smp_locks,\"a\"\n" \
33 ".balign 4\n" \ 33 ".balign 4\n" \
34 ".long 671f - .\n" /* offset */ \ 34 ".long 671f - .\n" /* offset */ \
35 ".previous\n" \ 35 ".popsection\n" \
36 "671:" 36 "671:"
37 37
38#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " 38#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
@@ -60,7 +60,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
60 void *locks, void *locks_end, 60 void *locks, void *locks_end,
61 void *text, void *text_end); 61 void *text, void *text_end);
62extern void alternatives_smp_module_del(struct module *mod); 62extern void alternatives_smp_module_del(struct module *mod);
63extern void alternatives_smp_switch(int smp); 63extern void alternatives_enable_smp(void);
64extern int alternatives_text_reserved(void *start, void *end); 64extern int alternatives_text_reserved(void *start, void *end);
65extern bool skip_smp_alternatives; 65extern bool skip_smp_alternatives;
66#else 66#else
@@ -68,7 +68,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name,
68 void *locks, void *locks_end, 68 void *locks, void *locks_end,
69 void *text, void *text_end) {} 69 void *text, void *text_end) {}
70static inline void alternatives_smp_module_del(struct module *mod) {} 70static inline void alternatives_smp_module_del(struct module *mod) {}
71static inline void alternatives_smp_switch(int smp) {} 71static inline void alternatives_enable_smp(void) {}
72static inline int alternatives_text_reserved(void *start, void *end) 72static inline int alternatives_text_reserved(void *start, void *end)
73{ 73{
74 return 0; 74 return 0;
@@ -99,30 +99,30 @@ static inline int alternatives_text_reserved(void *start, void *end)
99/* alternative assembly primitive: */ 99/* alternative assembly primitive: */
100#define ALTERNATIVE(oldinstr, newinstr, feature) \ 100#define ALTERNATIVE(oldinstr, newinstr, feature) \
101 OLDINSTR(oldinstr) \ 101 OLDINSTR(oldinstr) \
102 ".section .altinstructions,\"a\"\n" \ 102 ".pushsection .altinstructions,\"a\"\n" \
103 ALTINSTR_ENTRY(feature, 1) \ 103 ALTINSTR_ENTRY(feature, 1) \
104 ".previous\n" \ 104 ".popsection\n" \
105 ".section .discard,\"aw\",@progbits\n" \ 105 ".pushsection .discard,\"aw\",@progbits\n" \
106 DISCARD_ENTRY(1) \ 106 DISCARD_ENTRY(1) \
107 ".previous\n" \ 107 ".popsection\n" \
108 ".section .altinstr_replacement, \"ax\"\n" \ 108 ".pushsection .altinstr_replacement, \"ax\"\n" \
109 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ 109 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
110 ".previous" 110 ".popsection"
111 111
112#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ 112#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
113 OLDINSTR(oldinstr) \ 113 OLDINSTR(oldinstr) \
114 ".section .altinstructions,\"a\"\n" \ 114 ".pushsection .altinstructions,\"a\"\n" \
115 ALTINSTR_ENTRY(feature1, 1) \ 115 ALTINSTR_ENTRY(feature1, 1) \
116 ALTINSTR_ENTRY(feature2, 2) \ 116 ALTINSTR_ENTRY(feature2, 2) \
117 ".previous\n" \ 117 ".popsection\n" \
118 ".section .discard,\"aw\",@progbits\n" \ 118 ".pushsection .discard,\"aw\",@progbits\n" \
119 DISCARD_ENTRY(1) \ 119 DISCARD_ENTRY(1) \
120 DISCARD_ENTRY(2) \ 120 DISCARD_ENTRY(2) \
121 ".previous\n" \ 121 ".popsection\n" \
122 ".section .altinstr_replacement, \"ax\"\n" \ 122 ".pushsection .altinstr_replacement, \"ax\"\n" \
123 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ 123 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
124 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ 124 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
125 ".previous" 125 ".popsection"
126 126
127/* 127/*
128 * This must be included *after* the definition of ALTERNATIVE due to 128 * This must be included *after* the definition of ALTERNATIVE due to
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 58cb6d4085f7..250b8774c158 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -309,9 +309,9 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
309#define smp_mb__after_atomic_inc() barrier() 309#define smp_mb__after_atomic_inc() barrier()
310 310
311#ifdef CONFIG_X86_32 311#ifdef CONFIG_X86_32
312# include "atomic64_32.h" 312# include <asm/atomic64_32.h>
313#else 313#else
314# include "atomic64_64.h" 314# include <asm/atomic64_64.h>
315#endif 315#endif
316 316
317#endif /* _ASM_X86_ATOMIC_H */ 317#endif /* _ASM_X86_ATOMIC_H */
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 72f5009deb5a..6dfd0195bb55 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -355,7 +355,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
355 */ 355 */
356static inline unsigned long __ffs(unsigned long word) 356static inline unsigned long __ffs(unsigned long word)
357{ 357{
358 asm("bsf %1,%0" 358 asm("rep; bsf %1,%0"
359 : "=r" (word) 359 : "=r" (word)
360 : "rm" (word)); 360 : "rm" (word));
361 return word; 361 return word;
@@ -369,7 +369,7 @@ static inline unsigned long __ffs(unsigned long word)
369 */ 369 */
370static inline unsigned long ffz(unsigned long word) 370static inline unsigned long ffz(unsigned long word)
371{ 371{
372 asm("bsf %1,%0" 372 asm("rep; bsf %1,%0"
373 : "=r" (word) 373 : "=r" (word)
374 : "r" (~word)); 374 : "r" (~word));
375 return word; 375 return word;
@@ -417,10 +417,9 @@ static inline int ffs(int x)
417 * We cannot do this on 32 bits because at the very least some 417 * We cannot do this on 32 bits because at the very least some
418 * 486 CPUs did not behave this way. 418 * 486 CPUs did not behave this way.
419 */ 419 */
420 long tmp = -1;
421 asm("bsfl %1,%0" 420 asm("bsfl %1,%0"
422 : "=r" (r) 421 : "=r" (r)
423 : "rm" (x), "0" (tmp)); 422 : "rm" (x), "0" (-1));
424#elif defined(CONFIG_X86_CMOV) 423#elif defined(CONFIG_X86_CMOV)
425 asm("bsfl %1,%0\n\t" 424 asm("bsfl %1,%0\n\t"
426 "cmovzl %2,%0" 425 "cmovzl %2,%0"
@@ -459,10 +458,9 @@ static inline int fls(int x)
459 * We cannot do this on 32 bits because at the very least some 458 * We cannot do this on 32 bits because at the very least some
460 * 486 CPUs did not behave this way. 459 * 486 CPUs did not behave this way.
461 */ 460 */
462 long tmp = -1;
463 asm("bsrl %1,%0" 461 asm("bsrl %1,%0"
464 : "=r" (r) 462 : "=r" (r)
465 : "rm" (x), "0" (tmp)); 463 : "rm" (x), "0" (-1));
466#elif defined(CONFIG_X86_CMOV) 464#elif defined(CONFIG_X86_CMOV)
467 asm("bsrl %1,%0\n\t" 465 asm("bsrl %1,%0\n\t"
468 "cmovzl %2,%0" 466 "cmovzl %2,%0"
@@ -490,13 +488,13 @@ static inline int fls(int x)
490#ifdef CONFIG_X86_64 488#ifdef CONFIG_X86_64
491static __always_inline int fls64(__u64 x) 489static __always_inline int fls64(__u64 x)
492{ 490{
493 long bitpos = -1; 491 int bitpos = -1;
494 /* 492 /*
495 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the 493 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
496 * dest reg is undefined if x==0, but their CPU architect says its 494 * dest reg is undefined if x==0, but their CPU architect says its
497 * value is written to set it to the same as before. 495 * value is written to set it to the same as before.
498 */ 496 */
499 asm("bsrq %1,%0" 497 asm("bsrq %1,%q0"
500 : "+r" (bitpos) 498 : "+r" (bitpos)
501 : "rm" (x)); 499 : "rm" (x));
502 return bitpos + 1; 500 return bitpos + 1;
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index a9e3a740f697..0fa675033912 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -46,41 +46,39 @@ For 32-bit we have the following conventions - kernel is built with
46 46
47*/ 47*/
48 48
49#include "dwarf2.h" 49#include <asm/dwarf2.h>
50 50
51/* 51/*
52 * 64-bit system call stack frame layout defines and helpers, for 52 * 64-bit system call stack frame layout defines and helpers,
53 * assembly code (note that the seemingly unnecessary parentheses 53 * for assembly code:
54 * are to prevent cpp from inserting spaces in expressions that get
55 * passed to macros):
56 */ 54 */
57 55
58#define R15 (0) 56#define R15 0
59#define R14 (8) 57#define R14 8
60#define R13 (16) 58#define R13 16
61#define R12 (24) 59#define R12 24
62#define RBP (32) 60#define RBP 32
63#define RBX (40) 61#define RBX 40
64 62
65/* arguments: interrupts/non tracing syscalls only save up to here: */ 63/* arguments: interrupts/non tracing syscalls only save up to here: */
66#define R11 (48) 64#define R11 48
67#define R10 (56) 65#define R10 56
68#define R9 (64) 66#define R9 64
69#define R8 (72) 67#define R8 72
70#define RAX (80) 68#define RAX 80
71#define RCX (88) 69#define RCX 88
72#define RDX (96) 70#define RDX 96
73#define RSI (104) 71#define RSI 104
74#define RDI (112) 72#define RDI 112
75#define ORIG_RAX (120) /* + error_code */ 73#define ORIG_RAX 120 /* + error_code */
76/* end of arguments */ 74/* end of arguments */
77 75
78/* cpu exception frame or undefined in case of fast syscall: */ 76/* cpu exception frame or undefined in case of fast syscall: */
79#define RIP (128) 77#define RIP 128
80#define CS (136) 78#define CS 136
81#define EFLAGS (144) 79#define EFLAGS 144
82#define RSP (152) 80#define RSP 152
83#define SS (160) 81#define SS 160
84 82
85#define ARGOFFSET R11 83#define ARGOFFSET R11
86#define SWFRAME ORIG_RAX 84#define SWFRAME ORIG_RAX
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index 848850fd7d62..5f5bb0f97361 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,5 +1,5 @@
1#ifdef CONFIG_X86_32 1#ifdef CONFIG_X86_32
2# include "checksum_32.h" 2# include <asm/checksum_32.h>
3#else 3#else
4# include "checksum_64.h" 4# include <asm/checksum_64.h>
5#endif 5#endif
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 99480e55973d..8d871eaddb66 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -138,9 +138,9 @@ extern void __add_wrong_size(void)
138 __raw_cmpxchg((ptr), (old), (new), (size), "") 138 __raw_cmpxchg((ptr), (old), (new), (size), "")
139 139
140#ifdef CONFIG_X86_32 140#ifdef CONFIG_X86_32
141# include "cmpxchg_32.h" 141# include <asm/cmpxchg_32.h>
142#else 142#else
143# include "cmpxchg_64.h" 143# include <asm/cmpxchg_64.h>
144#endif 144#endif
145 145
146#ifdef __HAVE_ARCH_CMPXCHG 146#ifdef __HAVE_ARCH_CMPXCHG
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 6b7ee5ff6820..8c297aa53eef 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -4,7 +4,9 @@
4#ifndef _ASM_X86_CPUFEATURE_H 4#ifndef _ASM_X86_CPUFEATURE_H
5#define _ASM_X86_CPUFEATURE_H 5#define _ASM_X86_CPUFEATURE_H
6 6
7#ifndef _ASM_X86_REQUIRED_FEATURES_H
7#include <asm/required-features.h> 8#include <asm/required-features.h>
9#endif
8 10
9#define NCAPINTS 10 /* N 32-bit words worth of info */ 11#define NCAPINTS 10 /* N 32-bit words worth of info */
10 12
@@ -97,6 +99,7 @@
97#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ 99#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
98#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ 100#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
99#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ 101#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
102#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */
100 103
101/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 104/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
102#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ 105#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
@@ -209,6 +212,7 @@
209#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ 212#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
210#define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ 213#define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */
211#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ 214#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */
215#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */
212 216
213#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 217#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
214 218
@@ -299,12 +303,14 @@ extern const char * const x86_power_flags[32];
299#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) 303#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
300#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) 304#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
301#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) 305#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
306#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT)
302#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) 307#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
303#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) 308#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
304#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) 309#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
305#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) 310#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
306#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) 311#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
307#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) 312#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
313#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
308 314
309#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 315#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
310# define cpu_has_invlpg 1 316# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 75f4c6d6a331..831dbb9c6c02 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -12,6 +12,7 @@
12 12
13#include <linux/kernel_stat.h> 13#include <linux/kernel_stat.h>
14#include <linux/regset.h> 14#include <linux/regset.h>
15#include <linux/compat.h>
15#include <linux/slab.h> 16#include <linux/slab.h>
16#include <asm/asm.h> 17#include <asm/asm.h>
17#include <asm/cpufeature.h> 18#include <asm/cpufeature.h>
@@ -20,43 +21,76 @@
20#include <asm/user.h> 21#include <asm/user.h>
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include <asm/xsave.h> 23#include <asm/xsave.h>
24#include <asm/smap.h>
23 25
24extern unsigned int sig_xstate_size; 26#ifdef CONFIG_X86_64
27# include <asm/sigcontext32.h>
28# include <asm/user32.h>
29int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
30 compat_sigset_t *set, struct pt_regs *regs);
31int ia32_setup_frame(int sig, struct k_sigaction *ka,
32 compat_sigset_t *set, struct pt_regs *regs);
33#else
34# define user_i387_ia32_struct user_i387_struct
35# define user32_fxsr_struct user_fxsr_struct
36# define ia32_setup_frame __setup_frame
37# define ia32_setup_rt_frame __setup_rt_frame
38#endif
39
40extern unsigned int mxcsr_feature_mask;
25extern void fpu_init(void); 41extern void fpu_init(void);
42extern void eager_fpu_init(void);
26 43
27DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); 44DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
28 45
46extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
47 struct task_struct *tsk);
48extern void convert_to_fxsr(struct task_struct *tsk,
49 const struct user_i387_ia32_struct *env);
50
29extern user_regset_active_fn fpregs_active, xfpregs_active; 51extern user_regset_active_fn fpregs_active, xfpregs_active;
30extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, 52extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
31 xstateregs_get; 53 xstateregs_get;
32extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, 54extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
33 xstateregs_set; 55 xstateregs_set;
34 56
35
36/* 57/*
37 * xstateregs_active == fpregs_active. Please refer to the comment 58 * xstateregs_active == fpregs_active. Please refer to the comment
38 * at the definition of fpregs_active. 59 * at the definition of fpregs_active.
39 */ 60 */
40#define xstateregs_active fpregs_active 61#define xstateregs_active fpregs_active
41 62
42extern struct _fpx_sw_bytes fx_sw_reserved;
43#ifdef CONFIG_IA32_EMULATION
44extern unsigned int sig_xstate_ia32_size;
45extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
46struct _fpstate_ia32;
47struct _xstate_ia32;
48extern int save_i387_xstate_ia32(void __user *buf);
49extern int restore_i387_xstate_ia32(void __user *buf);
50#endif
51
52#ifdef CONFIG_MATH_EMULATION 63#ifdef CONFIG_MATH_EMULATION
64# define HAVE_HWFP (boot_cpu_data.hard_math)
53extern void finit_soft_fpu(struct i387_soft_struct *soft); 65extern void finit_soft_fpu(struct i387_soft_struct *soft);
54#else 66#else
67# define HAVE_HWFP 1
55static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} 68static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
56#endif 69#endif
57 70
71static inline int is_ia32_compat_frame(void)
72{
73 return config_enabled(CONFIG_IA32_EMULATION) &&
74 test_thread_flag(TIF_IA32);
75}
76
77static inline int is_ia32_frame(void)
78{
79 return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
80}
81
82static inline int is_x32_frame(void)
83{
84 return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
85}
86
58#define X87_FSW_ES (1 << 7) /* Exception Summary */ 87#define X87_FSW_ES (1 << 7) /* Exception Summary */
59 88
89static __always_inline __pure bool use_eager_fpu(void)
90{
91 return static_cpu_has(X86_FEATURE_EAGER_FPU);
92}
93
60static __always_inline __pure bool use_xsaveopt(void) 94static __always_inline __pure bool use_xsaveopt(void)
61{ 95{
62 return static_cpu_has(X86_FEATURE_XSAVEOPT); 96 return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -72,6 +106,13 @@ static __always_inline __pure bool use_fxsr(void)
72 return static_cpu_has(X86_FEATURE_FXSR); 106 return static_cpu_has(X86_FEATURE_FXSR);
73} 107}
74 108
109static inline void fx_finit(struct i387_fxsave_struct *fx)
110{
111 memset(fx, 0, xstate_size);
112 fx->cwd = 0x37f;
113 fx->mxcsr = MXCSR_DEFAULT;
114}
115
75extern void __sanitize_i387_state(struct task_struct *); 116extern void __sanitize_i387_state(struct task_struct *);
76 117
77static inline void sanitize_i387_state(struct task_struct *tsk) 118static inline void sanitize_i387_state(struct task_struct *tsk)
@@ -81,131 +122,121 @@ static inline void sanitize_i387_state(struct task_struct *tsk)
81 __sanitize_i387_state(tsk); 122 __sanitize_i387_state(tsk);
82} 123}
83 124
84#ifdef CONFIG_X86_64 125#define user_insn(insn, output, input...) \
85static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 126({ \
86{ 127 int err; \
87 int err; 128 asm volatile(ASM_STAC "\n" \
88 129 "1:" #insn "\n\t" \
89 /* See comment in fxsave() below. */ 130 "2: " ASM_CLAC "\n" \
90#ifdef CONFIG_AS_FXSAVEQ 131 ".section .fixup,\"ax\"\n" \
91 asm volatile("1: fxrstorq %[fx]\n\t" 132 "3: movl $-1,%[err]\n" \
92 "2:\n" 133 " jmp 2b\n" \
93 ".section .fixup,\"ax\"\n" 134 ".previous\n" \
94 "3: movl $-1,%[err]\n" 135 _ASM_EXTABLE(1b, 3b) \
95 " jmp 2b\n" 136 : [err] "=r" (err), output \
96 ".previous\n" 137 : "0"(0), input); \
97 _ASM_EXTABLE(1b, 3b) 138 err; \
98 : [err] "=r" (err) 139})
99 : [fx] "m" (*fx), "0" (0)); 140
100#else 141#define check_insn(insn, output, input...) \
101 asm volatile("1: rex64/fxrstor (%[fx])\n\t" 142({ \
102 "2:\n" 143 int err; \
103 ".section .fixup,\"ax\"\n" 144 asm volatile("1:" #insn "\n\t" \
104 "3: movl $-1,%[err]\n" 145 "2:\n" \
105 " jmp 2b\n" 146 ".section .fixup,\"ax\"\n" \
106 ".previous\n" 147 "3: movl $-1,%[err]\n" \
107 _ASM_EXTABLE(1b, 3b) 148 " jmp 2b\n" \
108 : [err] "=r" (err) 149 ".previous\n" \
109 : [fx] "R" (fx), "m" (*fx), "0" (0)); 150 _ASM_EXTABLE(1b, 3b) \
110#endif 151 : [err] "=r" (err), output \
111 return err; 152 : "0"(0), input); \
153 err; \
154})
155
156static inline int fsave_user(struct i387_fsave_struct __user *fx)
157{
158 return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
112} 159}
113 160
114static inline int fxsave_user(struct i387_fxsave_struct __user *fx) 161static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
115{ 162{
116 int err; 163 if (config_enabled(CONFIG_X86_32))
164 return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
165 else if (config_enabled(CONFIG_AS_FXSAVEQ))
166 return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
117 167
118 /* 168 /* See comment in fpu_fxsave() below. */
119 * Clear the bytes not touched by the fxsave and reserved 169 return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
120 * for the SW usage.
121 */
122 err = __clear_user(&fx->sw_reserved,
123 sizeof(struct _fpx_sw_bytes));
124 if (unlikely(err))
125 return -EFAULT;
126
127 /* See comment in fxsave() below. */
128#ifdef CONFIG_AS_FXSAVEQ
129 asm volatile("1: fxsaveq %[fx]\n\t"
130 "2:\n"
131 ".section .fixup,\"ax\"\n"
132 "3: movl $-1,%[err]\n"
133 " jmp 2b\n"
134 ".previous\n"
135 _ASM_EXTABLE(1b, 3b)
136 : [err] "=r" (err), [fx] "=m" (*fx)
137 : "0" (0));
138#else
139 asm volatile("1: rex64/fxsave (%[fx])\n\t"
140 "2:\n"
141 ".section .fixup,\"ax\"\n"
142 "3: movl $-1,%[err]\n"
143 " jmp 2b\n"
144 ".previous\n"
145 _ASM_EXTABLE(1b, 3b)
146 : [err] "=r" (err), "=m" (*fx)
147 : [fx] "R" (fx), "0" (0));
148#endif
149 if (unlikely(err) &&
150 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
151 err = -EFAULT;
152 /* No need to clear here because the caller clears USED_MATH */
153 return err;
154} 170}
155 171
156static inline void fpu_fxsave(struct fpu *fpu) 172static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
157{ 173{
158 /* Using "rex64; fxsave %0" is broken because, if the memory operand 174 if (config_enabled(CONFIG_X86_32))
159 uses any extended registers for addressing, a second REX prefix 175 return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
160 will be generated (to the assembler, rex64 followed by semicolon 176 else if (config_enabled(CONFIG_AS_FXSAVEQ))
161 is a separate instruction), and hence the 64-bitness is lost. */ 177 return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
162 178
163#ifdef CONFIG_AS_FXSAVEQ 179 /* See comment in fpu_fxsave() below. */
164 /* Using "fxsaveq %0" would be the ideal choice, but is only supported 180 return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
165 starting with gas 2.16. */ 181 "m" (*fx));
166 __asm__ __volatile__("fxsaveq %0"
167 : "=m" (fpu->state->fxsave));
168#else
169 /* Using, as a workaround, the properly prefixed form below isn't
170 accepted by any binutils version so far released, complaining that
171 the same type of prefix is used twice if an extended register is
172 needed for addressing (fix submitted to mainline 2005-11-21).
173 asm volatile("rex64/fxsave %0"
174 : "=m" (fpu->state->fxsave));
175 This, however, we can work around by forcing the compiler to select
176 an addressing mode that doesn't require extended registers. */
177 asm volatile("rex64/fxsave (%[fx])"
178 : "=m" (fpu->state->fxsave)
179 : [fx] "R" (&fpu->state->fxsave));
180#endif
181} 182}
182 183
183#else /* CONFIG_X86_32 */ 184static inline int fxrstor_user(struct i387_fxsave_struct __user *fx)
185{
186 if (config_enabled(CONFIG_X86_32))
187 return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
188 else if (config_enabled(CONFIG_AS_FXSAVEQ))
189 return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
184 190
185/* perform fxrstor iff the processor has extended states, otherwise frstor */ 191 /* See comment in fpu_fxsave() below. */
186static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 192 return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
193 "m" (*fx));
194}
195
196static inline int frstor_checking(struct i387_fsave_struct *fx)
187{ 197{
188 /* 198 return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
189 * The "nop" is needed to make the instructions the same 199}
190 * length.
191 */
192 alternative_input(
193 "nop ; frstor %1",
194 "fxrstor %1",
195 X86_FEATURE_FXSR,
196 "m" (*fx));
197 200
198 return 0; 201static inline int frstor_user(struct i387_fsave_struct __user *fx)
202{
203 return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
199} 204}
200 205
201static inline void fpu_fxsave(struct fpu *fpu) 206static inline void fpu_fxsave(struct fpu *fpu)
202{ 207{
203 asm volatile("fxsave %[fx]" 208 if (config_enabled(CONFIG_X86_32))
204 : [fx] "=m" (fpu->state->fxsave)); 209 asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
210 else if (config_enabled(CONFIG_AS_FXSAVEQ))
211 asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave));
212 else {
213 /* Using "rex64; fxsave %0" is broken because, if the memory
214 * operand uses any extended registers for addressing, a second
215 * REX prefix will be generated (to the assembler, rex64
216 * followed by semicolon is a separate instruction), and hence
217 * the 64-bitness is lost.
218 *
219 * Using "fxsaveq %0" would be the ideal choice, but is only
220 * supported starting with gas 2.16.
221 *
222 * Using, as a workaround, the properly prefixed form below
223 * isn't accepted by any binutils version so far released,
224 * complaining that the same type of prefix is used twice if
225 * an extended register is needed for addressing (fix submitted
226 * to mainline 2005-11-21).
227 *
228 * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave));
229 *
230 * This, however, we can work around by forcing the compiler to
231 * select an addressing mode that doesn't require extended
232 * registers.
233 */
234 asm volatile( "rex64/fxsave (%[fx])"
235 : "=m" (fpu->state->fxsave)
236 : [fx] "R" (&fpu->state->fxsave));
237 }
205} 238}
206 239
207#endif /* CONFIG_X86_64 */
208
209/* 240/*
210 * These must be called with preempt disabled. Returns 241 * These must be called with preempt disabled. Returns
211 * 'true' if the FPU state is still intact. 242 * 'true' if the FPU state is still intact.
@@ -248,17 +279,14 @@ static inline int __save_init_fpu(struct task_struct *tsk)
248 return fpu_save_init(&tsk->thread.fpu); 279 return fpu_save_init(&tsk->thread.fpu);
249} 280}
250 281
251static inline int fpu_fxrstor_checking(struct fpu *fpu)
252{
253 return fxrstor_checking(&fpu->state->fxsave);
254}
255
256static inline int fpu_restore_checking(struct fpu *fpu) 282static inline int fpu_restore_checking(struct fpu *fpu)
257{ 283{
258 if (use_xsave()) 284 if (use_xsave())
259 return fpu_xrstor_checking(fpu); 285 return fpu_xrstor_checking(&fpu->state->xsave);
286 else if (use_fxsr())
287 return fxrstor_checking(&fpu->state->fxsave);
260 else 288 else
261 return fpu_fxrstor_checking(fpu); 289 return frstor_checking(&fpu->state->fsave);
262} 290}
263 291
264static inline int restore_fpu_checking(struct task_struct *tsk) 292static inline int restore_fpu_checking(struct task_struct *tsk)
@@ -310,15 +338,52 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk)
310static inline void __thread_fpu_end(struct task_struct *tsk) 338static inline void __thread_fpu_end(struct task_struct *tsk)
311{ 339{
312 __thread_clear_has_fpu(tsk); 340 __thread_clear_has_fpu(tsk);
313 stts(); 341 if (!use_eager_fpu())
342 stts();
314} 343}
315 344
316static inline void __thread_fpu_begin(struct task_struct *tsk) 345static inline void __thread_fpu_begin(struct task_struct *tsk)
317{ 346{
318 clts(); 347 if (!use_eager_fpu())
348 clts();
319 __thread_set_has_fpu(tsk); 349 __thread_set_has_fpu(tsk);
320} 350}
321 351
352static inline void __drop_fpu(struct task_struct *tsk)
353{
354 if (__thread_has_fpu(tsk)) {
355 /* Ignore delayed exceptions from user space */
356 asm volatile("1: fwait\n"
357 "2:\n"
358 _ASM_EXTABLE(1b, 2b));
359 __thread_fpu_end(tsk);
360 }
361}
362
363static inline void drop_fpu(struct task_struct *tsk)
364{
365 /*
366 * Forget coprocessor state..
367 */
368 preempt_disable();
369 tsk->fpu_counter = 0;
370 __drop_fpu(tsk);
371 clear_used_math();
372 preempt_enable();
373}
374
375static inline void drop_init_fpu(struct task_struct *tsk)
376{
377 if (!use_eager_fpu())
378 drop_fpu(tsk);
379 else {
380 if (use_xsave())
381 xrstor_state(init_xstate_buf, -1);
382 else
383 fxrstor_checking(&init_xstate_buf->i387);
384 }
385}
386
322/* 387/*
323 * FPU state switching for scheduling. 388 * FPU state switching for scheduling.
324 * 389 *
@@ -352,7 +417,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
352{ 417{
353 fpu_switch_t fpu; 418 fpu_switch_t fpu;
354 419
355 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; 420 /*
421 * If the task has used the math, pre-load the FPU on xsave processors
422 * or if the past 5 consecutive context-switches used math.
423 */
424 fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
425 new->fpu_counter > 5);
356 if (__thread_has_fpu(old)) { 426 if (__thread_has_fpu(old)) {
357 if (!__save_init_fpu(old)) 427 if (!__save_init_fpu(old))
358 cpu = ~0; 428 cpu = ~0;
@@ -364,14 +434,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
364 new->fpu_counter++; 434 new->fpu_counter++;
365 __thread_set_has_fpu(new); 435 __thread_set_has_fpu(new);
366 prefetch(new->thread.fpu.state); 436 prefetch(new->thread.fpu.state);
367 } else 437 } else if (!use_eager_fpu())
368 stts(); 438 stts();
369 } else { 439 } else {
370 old->fpu_counter = 0; 440 old->fpu_counter = 0;
371 old->thread.fpu.last_cpu = ~0; 441 old->thread.fpu.last_cpu = ~0;
372 if (fpu.preload) { 442 if (fpu.preload) {
373 new->fpu_counter++; 443 new->fpu_counter++;
374 if (fpu_lazy_restore(new, cpu)) 444 if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
375 fpu.preload = 0; 445 fpu.preload = 0;
376 else 446 else
377 prefetch(new->thread.fpu.state); 447 prefetch(new->thread.fpu.state);
@@ -391,44 +461,40 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
391{ 461{
392 if (fpu.preload) { 462 if (fpu.preload) {
393 if (unlikely(restore_fpu_checking(new))) 463 if (unlikely(restore_fpu_checking(new)))
394 __thread_fpu_end(new); 464 drop_init_fpu(new);
395 } 465 }
396} 466}
397 467
398/* 468/*
399 * Signal frame handlers... 469 * Signal frame handlers...
400 */ 470 */
401extern int save_i387_xstate(void __user *buf); 471extern int save_xstate_sig(void __user *buf, void __user *fx, int size);
402extern int restore_i387_xstate(void __user *buf); 472extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);
403 473
404static inline void __clear_fpu(struct task_struct *tsk) 474static inline int xstate_sigframe_size(void)
405{ 475{
406 if (__thread_has_fpu(tsk)) { 476 return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
407 /* Ignore delayed exceptions from user space */ 477}
408 asm volatile("1: fwait\n" 478
409 "2:\n" 479static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
410 _ASM_EXTABLE(1b, 2b)); 480{
411 __thread_fpu_end(tsk); 481 void __user *buf_fx = buf;
482 int size = xstate_sigframe_size();
483
484 if (ia32_frame && use_fxsr()) {
485 buf_fx = buf + sizeof(struct i387_fsave_struct);
486 size += sizeof(struct i387_fsave_struct);
412 } 487 }
488
489 return __restore_xstate_sig(buf, buf_fx, size);
413} 490}
414 491
415/* 492/*
416 * The actual user_fpu_begin/end() functions 493 * Need to be preemption-safe.
417 * need to be preemption-safe.
418 * 494 *
419 * NOTE! user_fpu_end() must be used only after you 495 * NOTE! user_fpu_begin() must be used only immediately before restoring
420 * have saved the FP state, and user_fpu_begin() must 496 * it. This function does not do any save/restore on their own.
421 * be used only immediately before restoring it.
422 * These functions do not do any save/restore on
423 * their own.
424 */ 497 */
425static inline void user_fpu_end(void)
426{
427 preempt_disable();
428 __thread_fpu_end(current);
429 preempt_enable();
430}
431
432static inline void user_fpu_begin(void) 498static inline void user_fpu_begin(void)
433{ 499{
434 preempt_disable(); 500 preempt_disable();
@@ -437,25 +503,32 @@ static inline void user_fpu_begin(void)
437 preempt_enable(); 503 preempt_enable();
438} 504}
439 505
506static inline void __save_fpu(struct task_struct *tsk)
507{
508 if (use_xsave())
509 xsave_state(&tsk->thread.fpu.state->xsave, -1);
510 else
511 fpu_fxsave(&tsk->thread.fpu);
512}
513
440/* 514/*
441 * These disable preemption on their own and are safe 515 * These disable preemption on their own and are safe
442 */ 516 */
443static inline void save_init_fpu(struct task_struct *tsk) 517static inline void save_init_fpu(struct task_struct *tsk)
444{ 518{
445 WARN_ON_ONCE(!__thread_has_fpu(tsk)); 519 WARN_ON_ONCE(!__thread_has_fpu(tsk));
520
521 if (use_eager_fpu()) {
522 __save_fpu(tsk);
523 return;
524 }
525
446 preempt_disable(); 526 preempt_disable();
447 __save_init_fpu(tsk); 527 __save_init_fpu(tsk);
448 __thread_fpu_end(tsk); 528 __thread_fpu_end(tsk);
449 preempt_enable(); 529 preempt_enable();
450} 530}
451 531
452static inline void clear_fpu(struct task_struct *tsk)
453{
454 preempt_disable();
455 __clear_fpu(tsk);
456 preempt_enable();
457}
458
459/* 532/*
460 * i387 state interaction 533 * i387 state interaction
461 */ 534 */
@@ -510,11 +583,34 @@ static inline void fpu_free(struct fpu *fpu)
510 } 583 }
511} 584}
512 585
513static inline void fpu_copy(struct fpu *dst, struct fpu *src) 586static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
514{ 587{
515 memcpy(dst->state, src->state, xstate_size); 588 if (use_eager_fpu()) {
589 memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
590 __save_fpu(dst);
591 } else {
592 struct fpu *dfpu = &dst->thread.fpu;
593 struct fpu *sfpu = &src->thread.fpu;
594
595 unlazy_fpu(src);
596 memcpy(dfpu->state, sfpu->state, xstate_size);
597 }
516} 598}
517 599
518extern void fpu_finit(struct fpu *fpu); 600static inline unsigned long
601alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,
602 unsigned long *size)
603{
604 unsigned long frame_size = xstate_sigframe_size();
605
606 *buf_fx = sp = round_down(sp - frame_size, 64);
607 if (ia32_frame && use_fxsr()) {
608 frame_size += sizeof(struct i387_fsave_struct);
609 sp -= sizeof(struct i387_fsave_struct);
610 }
611
612 *size = frame_size;
613 return sp;
614}
519 615
520#endif 616#endif
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b0767bc08740..9a25b522d377 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -3,38 +3,54 @@
3 3
4#ifdef __ASSEMBLY__ 4#ifdef __ASSEMBLY__
5 5
6 .macro MCOUNT_SAVE_FRAME 6 /* skip is set if the stack was already partially adjusted */
7 /* taken from glibc */ 7 .macro MCOUNT_SAVE_FRAME skip=0
8 subq $0x38, %rsp 8 /*
9 movq %rax, (%rsp) 9 * We add enough stack to save all regs.
10 movq %rcx, 8(%rsp) 10 */
11 movq %rdx, 16(%rsp) 11 subq $(SS+8-\skip), %rsp
12 movq %rsi, 24(%rsp) 12 movq %rax, RAX(%rsp)
13 movq %rdi, 32(%rsp) 13 movq %rcx, RCX(%rsp)
14 movq %r8, 40(%rsp) 14 movq %rdx, RDX(%rsp)
15 movq %r9, 48(%rsp) 15 movq %rsi, RSI(%rsp)
16 movq %rdi, RDI(%rsp)
17 movq %r8, R8(%rsp)
18 movq %r9, R9(%rsp)
19 /* Move RIP to its proper location */
20 movq SS+8(%rsp), %rdx
21 movq %rdx, RIP(%rsp)
16 .endm 22 .endm
17 23
18 .macro MCOUNT_RESTORE_FRAME 24 .macro MCOUNT_RESTORE_FRAME skip=0
19 movq 48(%rsp), %r9 25 movq R9(%rsp), %r9
20 movq 40(%rsp), %r8 26 movq R8(%rsp), %r8
21 movq 32(%rsp), %rdi 27 movq RDI(%rsp), %rdi
22 movq 24(%rsp), %rsi 28 movq RSI(%rsp), %rsi
23 movq 16(%rsp), %rdx 29 movq RDX(%rsp), %rdx
24 movq 8(%rsp), %rcx 30 movq RCX(%rsp), %rcx
25 movq (%rsp), %rax 31 movq RAX(%rsp), %rax
26 addq $0x38, %rsp 32 addq $(SS+8-\skip), %rsp
27 .endm 33 .endm
28 34
29#endif 35#endif
30 36
31#ifdef CONFIG_FUNCTION_TRACER 37#ifdef CONFIG_FUNCTION_TRACER
32#define MCOUNT_ADDR ((long)(mcount)) 38#ifdef CC_USING_FENTRY
39# define MCOUNT_ADDR ((long)(__fentry__))
40#else
41# define MCOUNT_ADDR ((long)(mcount))
42#endif
33#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ 43#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
34 44
45#ifdef CONFIG_DYNAMIC_FTRACE
46#define ARCH_SUPPORTS_FTRACE_OPS 1
47#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
48#endif
49
35#ifndef __ASSEMBLY__ 50#ifndef __ASSEMBLY__
36extern void mcount(void); 51extern void mcount(void);
37extern atomic_t modifying_ftrace_code; 52extern atomic_t modifying_ftrace_code;
53extern void __fentry__(void);
38 54
39static inline unsigned long ftrace_call_adjust(unsigned long addr) 55static inline unsigned long ftrace_call_adjust(unsigned long addr)
40{ 56{
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 71ecbcba1a4e..f373046e63ec 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -9,10 +9,13 @@
9#include <asm/asm.h> 9#include <asm/asm.h>
10#include <asm/errno.h> 10#include <asm/errno.h>
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/smap.h>
12 13
13#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ 14#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
14 asm volatile("1:\t" insn "\n" \ 15 asm volatile("\t" ASM_STAC "\n" \
15 "2:\t.section .fixup,\"ax\"\n" \ 16 "1:\t" insn "\n" \
17 "2:\t" ASM_CLAC "\n" \
18 "\t.section .fixup,\"ax\"\n" \
16 "3:\tmov\t%3, %1\n" \ 19 "3:\tmov\t%3, %1\n" \
17 "\tjmp\t2b\n" \ 20 "\tjmp\t2b\n" \
18 "\t.previous\n" \ 21 "\t.previous\n" \
@@ -21,12 +24,14 @@
21 : "i" (-EFAULT), "0" (oparg), "1" (0)) 24 : "i" (-EFAULT), "0" (oparg), "1" (0))
22 25
23#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ 26#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
24 asm volatile("1:\tmovl %2, %0\n" \ 27 asm volatile("\t" ASM_STAC "\n" \
28 "1:\tmovl %2, %0\n" \
25 "\tmovl\t%0, %3\n" \ 29 "\tmovl\t%0, %3\n" \
26 "\t" insn "\n" \ 30 "\t" insn "\n" \
27 "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ 31 "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
28 "\tjnz\t1b\n" \ 32 "\tjnz\t1b\n" \
29 "3:\t.section .fixup,\"ax\"\n" \ 33 "3:\t" ASM_CLAC "\n" \
34 "\t.section .fixup,\"ax\"\n" \
30 "4:\tmov\t%5, %1\n" \ 35 "4:\tmov\t%5, %1\n" \
31 "\tjmp\t3b\n" \ 36 "\tjmp\t3b\n" \
32 "\t.previous\n" \ 37 "\t.previous\n" \
@@ -122,8 +127,10 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
122 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) 127 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
123 return -EFAULT; 128 return -EFAULT;
124 129
125 asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" 130 asm volatile("\t" ASM_STAC "\n"
126 "2:\t.section .fixup, \"ax\"\n" 131 "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
132 "2:\t" ASM_CLAC "\n"
133 "\t.section .fixup, \"ax\"\n"
127 "3:\tmov %3, %0\n" 134 "3:\tmov %3, %0\n"
128 "\tjmp 2b\n" 135 "\tjmp 2b\n"
129 "\t.previous\n" 136 "\t.previous\n"
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index d3895dbf4ddb..81f04cee5f74 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -18,6 +18,10 @@ typedef struct {
18#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
19 unsigned int irq_resched_count; 19 unsigned int irq_resched_count;
20 unsigned int irq_call_count; 20 unsigned int irq_call_count;
21 /*
22 * irq_tlb_count is double-counted in irq_call_count, so it must be
23 * subtracted from irq_call_count when displaying irq_call_count
24 */
21 unsigned int irq_tlb_count; 25 unsigned int irq_tlb_count;
22#endif 26#endif
23#ifdef CONFIG_X86_THERMAL_VECTOR 27#ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 2c392d663dce..434e2106cc87 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -35,8 +35,6 @@
35#define HPET_ID_NUMBER_SHIFT 8 35#define HPET_ID_NUMBER_SHIFT 8
36#define HPET_ID_VENDOR_SHIFT 16 36#define HPET_ID_VENDOR_SHIFT 16
37 37
38#define HPET_ID_VENDOR_8086 0x8086
39
40#define HPET_CFG_ENABLE 0x001 38#define HPET_CFG_ENABLE 0x001
41#define HPET_CFG_LEGACY 0x002 39#define HPET_CFG_LEGACY 0x002
42#define HPET_LEGACY_8254 2 40#define HPET_LEGACY_8254 2
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 257d9cca214f..ed8089d69094 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -19,12 +19,37 @@ struct pt_regs;
19struct user_i387_struct; 19struct user_i387_struct;
20 20
21extern int init_fpu(struct task_struct *child); 21extern int init_fpu(struct task_struct *child);
22extern void fpu_finit(struct fpu *fpu);
22extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 23extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
23extern void math_state_restore(void); 24extern void math_state_restore(void);
24 25
25extern bool irq_fpu_usable(void); 26extern bool irq_fpu_usable(void);
26extern void kernel_fpu_begin(void); 27
27extern void kernel_fpu_end(void); 28/*
29 * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
30 * and they don't touch the preempt state on their own.
31 * If you enable preemption after __kernel_fpu_begin(), preempt notifier
32 * should call the __kernel_fpu_end() to prevent the kernel/user FPU
33 * state from getting corrupted. KVM for example uses this model.
34 *
35 * All other cases use kernel_fpu_begin/end() which disable preemption
36 * during kernel FPU usage.
37 */
38extern void __kernel_fpu_begin(void);
39extern void __kernel_fpu_end(void);
40
41static inline void kernel_fpu_begin(void)
42{
43 WARN_ON_ONCE(!irq_fpu_usable());
44 preempt_disable();
45 __kernel_fpu_begin();
46}
47
48static inline void kernel_fpu_end(void)
49{
50 __kernel_fpu_end();
51 preempt_enable();
52}
28 53
29/* 54/*
30 * Some instructions like VIA's padlock instructions generate a spurious 55 * Some instructions like VIA's padlock instructions generate a spurious
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
index f229b13a5f30..f42a04735a0a 100644
--- a/arch/x86/include/asm/iommu_table.h
+++ b/arch/x86/include/asm/iommu_table.h
@@ -48,7 +48,7 @@ struct iommu_table_entry {
48 48
49 49
50#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\ 50#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\
51 static const struct iommu_table_entry const \ 51 static const struct iommu_table_entry \
52 __iommu_entry_##_detect __used \ 52 __iommu_entry_##_detect __used \
53 __attribute__ ((unused, __section__(".iommu_table"), \ 53 __attribute__ ((unused, __section__(".iommu_table"), \
54 aligned((sizeof(void *))))) \ 54 aligned((sizeof(void *))))) \
@@ -63,10 +63,10 @@ struct iommu_table_entry {
63 * to stop detecting the other IOMMUs after yours has been detected. 63 * to stop detecting the other IOMMUs after yours has been detected.
64 */ 64 */
65#define IOMMU_INIT_POST(_detect) \ 65#define IOMMU_INIT_POST(_detect) \
66 __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0) 66 __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0)
67 67
68#define IOMMU_INIT_POST_FINISH(detect) \ 68#define IOMMU_INIT_POST_FINISH(detect) \
69 __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1) 69 __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1)
70 70
71/* 71/*
72 * A more sophisticated version of IOMMU_INIT. This variant requires: 72 * A more sophisticated version of IOMMU_INIT. This variant requires:
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 547882539157..d3ddd17405d0 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -27,6 +27,7 @@
27#include <asm/insn.h> 27#include <asm/insn.h>
28 28
29#define __ARCH_WANT_KPROBES_INSN_SLOT 29#define __ARCH_WANT_KPROBES_INSN_SLOT
30#define ARCH_SUPPORTS_KPROBES_ON_FTRACE
30 31
31struct pt_regs; 32struct pt_regs;
32struct kprobe; 33struct kprobe;
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 246617efd67f..41e08cb6a092 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,22 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/ioctl.h> 10#include <linux/ioctl.h>
11 11
12#define DE_VECTOR 0
13#define DB_VECTOR 1
14#define BP_VECTOR 3
15#define OF_VECTOR 4
16#define BR_VECTOR 5
17#define UD_VECTOR 6
18#define NM_VECTOR 7
19#define DF_VECTOR 8
20#define TS_VECTOR 10
21#define NP_VECTOR 11
22#define SS_VECTOR 12
23#define GP_VECTOR 13
24#define PF_VECTOR 14
25#define MF_VECTOR 16
26#define MC_VECTOR 18
27
12/* Select x86 specific features in <linux/kvm.h> */ 28/* Select x86 specific features in <linux/kvm.h> */
13#define __KVM_HAVE_PIT 29#define __KVM_HAVE_PIT
14#define __KVM_HAVE_IOAPIC 30#define __KVM_HAVE_IOAPIC
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09155d64cf7e..1eaa6b056670 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -75,22 +75,6 @@
75#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 75#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
76#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) 76#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
77 77
78#define DE_VECTOR 0
79#define DB_VECTOR 1
80#define BP_VECTOR 3
81#define OF_VECTOR 4
82#define BR_VECTOR 5
83#define UD_VECTOR 6
84#define NM_VECTOR 7
85#define DF_VECTOR 8
86#define TS_VECTOR 10
87#define NP_VECTOR 11
88#define SS_VECTOR 12
89#define GP_VECTOR 13
90#define PF_VECTOR 14
91#define MF_VECTOR 16
92#define MC_VECTOR 18
93
94#define SELECTOR_TI_MASK (1 << 2) 78#define SELECTOR_TI_MASK (1 << 2)
95#define SELECTOR_RPL_MASK 0x03 79#define SELECTOR_RPL_MASK 0x03
96 80
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index a3ac52b29cbf..54d73b1f00a0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -116,19 +116,9 @@ struct mce_log {
116/* Software defined banks */ 116/* Software defined banks */
117#define MCE_EXTENDED_BANK 128 117#define MCE_EXTENDED_BANK 128
118#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 118#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
119 119#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
120#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */
121#define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9)
122#define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9)
123#define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9)
124#define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9)
125#define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9)
126#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
127#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
128
129 120
130#ifdef __KERNEL__ 121#ifdef __KERNEL__
131
132extern void mce_register_decode_chain(struct notifier_block *nb); 122extern void mce_register_decode_chain(struct notifier_block *nb);
133extern void mce_unregister_decode_chain(struct notifier_block *nb); 123extern void mce_unregister_decode_chain(struct notifier_block *nb);
134 124
@@ -171,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device);
171#ifdef CONFIG_X86_MCE_INTEL 161#ifdef CONFIG_X86_MCE_INTEL
172extern int mce_cmci_disabled; 162extern int mce_cmci_disabled;
173extern int mce_ignore_ce; 163extern int mce_ignore_ce;
164extern int mce_bios_cmci_threshold;
174void mce_intel_feature_init(struct cpuinfo_x86 *c); 165void mce_intel_feature_init(struct cpuinfo_x86 *c);
175void cmci_clear(void); 166void cmci_clear(void);
176void cmci_reenable(void); 167void cmci_reenable(void);
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 4ebe157bf73d..43d921b4752c 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -15,8 +15,8 @@ struct microcode_ops {
15 enum ucode_state (*request_microcode_user) (int cpu, 15 enum ucode_state (*request_microcode_user) (int cpu,
16 const void __user *buf, size_t size); 16 const void __user *buf, size_t size);
17 17
18 enum ucode_state (*request_microcode_fw) (int cpu, 18 enum ucode_state (*request_microcode_fw) (int cpu, struct device *,
19 struct device *device); 19 bool refresh_fw);
20 20
21 void (*microcode_fini_cpu) (int cpu); 21 void (*microcode_fini_cpu) (int cpu);
22 22
@@ -49,12 +49,6 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
49#ifdef CONFIG_MICROCODE_AMD 49#ifdef CONFIG_MICROCODE_AMD
50extern struct microcode_ops * __init init_amd_microcode(void); 50extern struct microcode_ops * __init init_amd_microcode(void);
51extern void __exit exit_amd_microcode(void); 51extern void __exit exit_amd_microcode(void);
52
53static inline void get_ucode_data(void *to, const u8 *from, size_t n)
54{
55 memcpy(to, from, n);
56}
57
58#else 52#else
59static inline struct microcode_ops * __init init_amd_microcode(void) 53static inline struct microcode_ops * __init init_amd_microcode(void)
60{ 54{
diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h
index 64217ea16a36..d497bc425cae 100644
--- a/arch/x86/include/asm/mmzone.h
+++ b/arch/x86/include/asm/mmzone.h
@@ -1,5 +1,5 @@
1#ifdef CONFIG_X86_32 1#ifdef CONFIG_X86_32
2# include "mmzone_32.h" 2# include <asm/mmzone_32.h>
3#else 3#else
4# include "mmzone_64.h" 4# include <asm/mmzone_64.h>
5#endif 5#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 957ec87385af..fbee9714d9ab 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -248,6 +248,9 @@
248 248
249#define MSR_IA32_PERF_STATUS 0x00000198 249#define MSR_IA32_PERF_STATUS 0x00000198
250#define MSR_IA32_PERF_CTL 0x00000199 250#define MSR_IA32_PERF_CTL 0x00000199
251#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
252#define MSR_AMD_PERF_STATUS 0xc0010063
253#define MSR_AMD_PERF_CTL 0xc0010062
251 254
252#define MSR_IA32_MPERF 0x000000e7 255#define MSR_IA32_MPERF 0x000000e7
253#define MSR_IA32_APERF 0x000000e8 256#define MSR_IA32_APERF 0x000000e8
diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h
index a731b9c573a6..7d3a48275394 100644
--- a/arch/x86/include/asm/mutex.h
+++ b/arch/x86/include/asm/mutex.h
@@ -1,5 +1,5 @@
1#ifdef CONFIG_X86_32 1#ifdef CONFIG_X86_32
2# include "mutex_32.h" 2# include <asm/mutex_32.h>
3#else 3#else
4# include "mutex_64.h" 4# include <asm/mutex_64.h>
5#endif 5#endif
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index bfacd2ccf651..49119fcea2dc 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -53,9 +53,9 @@ static inline int numa_cpu_node(int cpu)
53#endif /* CONFIG_NUMA */ 53#endif /* CONFIG_NUMA */
54 54
55#ifdef CONFIG_X86_32 55#ifdef CONFIG_X86_32
56# include "numa_32.h" 56# include <asm/numa_32.h>
57#else 57#else
58# include "numa_64.h" 58# include <asm/numa_64.h>
59#endif 59#endif
60 60
61#ifdef CONFIG_NUMA 61#ifdef CONFIG_NUMA
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index df75d07571ce..6e41b9343928 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -141,7 +141,7 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq);
141#endif /* __KERNEL__ */ 141#endif /* __KERNEL__ */
142 142
143#ifdef CONFIG_X86_64 143#ifdef CONFIG_X86_64
144#include "pci_64.h" 144#include <asm/pci_64.h>
145#endif 145#endif
146 146
147/* implement the pci_ DMA API in terms of the generic device dma_ one */ 147/* implement the pci_ DMA API in terms of the generic device dma_ one */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index cb4e43bce98a..4fabcdf1cfa7 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { }
262 static inline void amd_pmu_disable_virt(void) { } 262 static inline void amd_pmu_disable_virt(void) { }
263#endif 263#endif
264 264
265#define arch_perf_out_copy_user copy_from_user_nmi
266
265#endif /* _ASM_X86_PERF_EVENT_H */ 267#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h
new file mode 100644
index 000000000000..3f2207bfd17b
--- /dev/null
+++ b/arch/x86/include/asm/perf_regs.h
@@ -0,0 +1,33 @@
1#ifndef _ASM_X86_PERF_REGS_H
2#define _ASM_X86_PERF_REGS_H
3
4enum perf_event_x86_regs {
5 PERF_REG_X86_AX,
6 PERF_REG_X86_BX,
7 PERF_REG_X86_CX,
8 PERF_REG_X86_DX,
9 PERF_REG_X86_SI,
10 PERF_REG_X86_DI,
11 PERF_REG_X86_BP,
12 PERF_REG_X86_SP,
13 PERF_REG_X86_IP,
14 PERF_REG_X86_FLAGS,
15 PERF_REG_X86_CS,
16 PERF_REG_X86_SS,
17 PERF_REG_X86_DS,
18 PERF_REG_X86_ES,
19 PERF_REG_X86_FS,
20 PERF_REG_X86_GS,
21 PERF_REG_X86_R8,
22 PERF_REG_X86_R9,
23 PERF_REG_X86_R10,
24 PERF_REG_X86_R11,
25 PERF_REG_X86_R12,
26 PERF_REG_X86_R13,
27 PERF_REG_X86_R14,
28 PERF_REG_X86_R15,
29
30 PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
31 PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
32};
33#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 49afb3f41eb6..fc9948465293 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -384,9 +384,9 @@ pte_t *populate_extra_pte(unsigned long vaddr);
384#endif /* __ASSEMBLY__ */ 384#endif /* __ASSEMBLY__ */
385 385
386#ifdef CONFIG_X86_32 386#ifdef CONFIG_X86_32
387# include "pgtable_32.h" 387# include <asm/pgtable_32.h>
388#else 388#else
389# include "pgtable_64.h" 389# include <asm/pgtable_64.h>
390#endif 390#endif
391 391
392#ifndef __ASSEMBLY__ 392#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 013286a10c2c..ec8a1fc9505d 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -174,9 +174,9 @@
174#endif 174#endif
175 175
176#ifdef CONFIG_X86_32 176#ifdef CONFIG_X86_32
177# include "pgtable_32_types.h" 177# include <asm/pgtable_32_types.h>
178#else 178#else
179# include "pgtable_64_types.h" 179# include <asm/pgtable_64_types.h>
180#endif 180#endif
181 181
182#ifndef __ASSEMBLY__ 182#ifndef __ASSEMBLY__
@@ -303,11 +303,9 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
303 303
304extern void native_pagetable_reserve(u64 start, u64 end); 304extern void native_pagetable_reserve(u64 start, u64 end);
305#ifdef CONFIG_X86_32 305#ifdef CONFIG_X86_32
306extern void native_pagetable_setup_start(pgd_t *base); 306extern void native_pagetable_init(void);
307extern void native_pagetable_setup_done(pgd_t *base);
308#else 307#else
309#define native_pagetable_setup_start x86_init_pgd_noop 308#define native_pagetable_init paging_init
310#define native_pagetable_setup_done x86_init_pgd_noop
311#endif 309#endif
312 310
313struct seq_file; 311struct seq_file;
diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h
index 7ef7c3020e5c..bad3665c25fc 100644
--- a/arch/x86/include/asm/posix_types.h
+++ b/arch/x86/include/asm/posix_types.h
@@ -1,15 +1,15 @@
1#ifdef __KERNEL__ 1#ifdef __KERNEL__
2# ifdef CONFIG_X86_32 2# ifdef CONFIG_X86_32
3# include "posix_types_32.h" 3# include <asm/posix_types_32.h>
4# else 4# else
5# include "posix_types_64.h" 5# include <asm/posix_types_64.h>
6# endif 6# endif
7#else 7#else
8# ifdef __i386__ 8# ifdef __i386__
9# include "posix_types_32.h" 9# include <asm/posix_types_32.h>
10# elif defined(__ILP32__) 10# elif defined(__ILP32__)
11# include "posix_types_x32.h" 11# include <asm/posix_types_x32.h>
12# else 12# else
13# include "posix_types_64.h" 13# include <asm/posix_types_64.h>
14# endif 14# endif
15#endif 15#endif
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index aea1d1d848c7..680cf09ed100 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -65,6 +65,7 @@
65#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ 65#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
66#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ 66#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
67#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ 67#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
68#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */
68 69
69/* 70/*
70 * x86-64 Task Priority Register, CR8 71 * x86-64 Task Priority Register, CR8
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d048cad9bcad..b98c0d958ebb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -423,7 +423,6 @@ DECLARE_INIT_PER_CPU(irq_stack_union);
423 423
424DECLARE_PER_CPU(char *, irq_stack_ptr); 424DECLARE_PER_CPU(char *, irq_stack_ptr);
425DECLARE_PER_CPU(unsigned int, irq_count); 425DECLARE_PER_CPU(unsigned int, irq_count);
426extern unsigned long kernel_eflags;
427extern asmlinkage void ignore_sysret(void); 426extern asmlinkage void ignore_sysret(void);
428#else /* X86_64 */ 427#else /* X86_64 */
429#ifdef CONFIG_CC_STACKPROTECTOR 428#ifdef CONFIG_CC_STACKPROTECTOR
@@ -759,6 +758,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
759 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); 758 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
760} 759}
761 760
761extern void set_task_blockstep(struct task_struct *task, bool on);
762
762/* 763/*
763 * from system description table in BIOS. Mostly for MCA use, but 764 * from system description table in BIOS. Mostly for MCA use, but
764 * others may find it useful: 765 * others may find it useful:
diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h
new file mode 100644
index 000000000000..d1ac07a23979
--- /dev/null
+++ b/arch/x86/include/asm/rcu.h
@@ -0,0 +1,32 @@
1#ifndef _ASM_X86_RCU_H
2#define _ASM_X86_RCU_H
3
4#ifndef __ASSEMBLY__
5
6#include <linux/rcupdate.h>
7#include <asm/ptrace.h>
8
9static inline void exception_enter(struct pt_regs *regs)
10{
11 rcu_user_exit();
12}
13
14static inline void exception_exit(struct pt_regs *regs)
15{
16#ifdef CONFIG_RCU_USER_QS
17 if (user_mode(regs))
18 rcu_user_enter();
19#endif
20}
21
22#else /* __ASSEMBLY__ */
23
24#ifdef CONFIG_RCU_USER_QS
25# define SCHEDULE_USER call schedule_user
26#else
27# define SCHEDULE_USER call schedule
28#endif
29
30#endif /* !__ASSEMBLY__ */
31
32#endif
diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h
index c62e58a5a90d..0f3d7f099224 100644
--- a/arch/x86/include/asm/seccomp.h
+++ b/arch/x86/include/asm/seccomp.h
@@ -1,5 +1,5 @@
1#ifdef CONFIG_X86_32 1#ifdef CONFIG_X86_32
2# include "seccomp_32.h" 2# include <asm/seccomp_32.h>
3#else 3#else
4# include "seccomp_64.h" 4# include <asm/seccomp_64.h>
5#endif 5#endif
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 598457cbd0f8..323973f4abf1 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,6 +31,10 @@ typedef struct {
31 unsigned long sig[_NSIG_WORDS]; 31 unsigned long sig[_NSIG_WORDS];
32} sigset_t; 32} sigset_t;
33 33
34#ifndef CONFIG_COMPAT
35typedef sigset_t compat_sigset_t;
36#endif
37
34#else 38#else
35/* Here we must cater to libcs that poke about in kernel headers. */ 39/* Here we must cater to libcs that poke about in kernel headers. */
36 40
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
new file mode 100644
index 000000000000..8d3120f4e270
--- /dev/null
+++ b/arch/x86/include/asm/smap.h
@@ -0,0 +1,91 @@
1/*
2 * Supervisor Mode Access Prevention support
3 *
4 * Copyright (C) 2012 Intel Corporation
5 * Author: H. Peter Anvin <hpa@linux.intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 */
12
13#ifndef _ASM_X86_SMAP_H
14#define _ASM_X86_SMAP_H
15
16#include <linux/stringify.h>
17#include <asm/nops.h>
18#include <asm/cpufeature.h>
19
20/* "Raw" instruction opcodes */
21#define __ASM_CLAC .byte 0x0f,0x01,0xca
22#define __ASM_STAC .byte 0x0f,0x01,0xcb
23
24#ifdef __ASSEMBLY__
25
26#include <asm/alternative-asm.h>
27
28#ifdef CONFIG_X86_SMAP
29
30#define ASM_CLAC \
31 661: ASM_NOP3 ; \
32 .pushsection .altinstr_replacement, "ax" ; \
33 662: __ASM_CLAC ; \
34 .popsection ; \
35 .pushsection .altinstructions, "a" ; \
36 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
37 .popsection
38
39#define ASM_STAC \
40 661: ASM_NOP3 ; \
41 .pushsection .altinstr_replacement, "ax" ; \
42 662: __ASM_STAC ; \
43 .popsection ; \
44 .pushsection .altinstructions, "a" ; \
45 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
46 .popsection
47
48#else /* CONFIG_X86_SMAP */
49
50#define ASM_CLAC
51#define ASM_STAC
52
53#endif /* CONFIG_X86_SMAP */
54
55#else /* __ASSEMBLY__ */
56
57#include <asm/alternative.h>
58
59#ifdef CONFIG_X86_SMAP
60
61static __always_inline void clac(void)
62{
63 /* Note: a barrier is implicit in alternative() */
64 alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
65}
66
67static __always_inline void stac(void)
68{
69 /* Note: a barrier is implicit in alternative() */
70 alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
71}
72
73/* These macros can be used in asm() statements */
74#define ASM_CLAC \
75 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
76#define ASM_STAC \
77 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
78
79#else /* CONFIG_X86_SMAP */
80
81static inline void clac(void) { }
82static inline void stac(void) { }
83
84#define ASM_CLAC
85#define ASM_STAC
86
87#endif /* CONFIG_X86_SMAP */
88
89#endif /* __ASSEMBLY__ */
90
91#endif /* _ASM_X86_SMAP_H */
diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index 6dfd6d9373a0..09224d7a5862 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -1,5 +1,5 @@
1#ifdef CONFIG_X86_32 1#ifdef CONFIG_X86_32
2# include "string_32.h" 2# include <asm/string_32.h>
3#else 3#else
4# include "string_64.h" 4# include <asm/string_64.h>
5#endif 5#endif
diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h
index 9bd521fe4570..2fab6c2c3575 100644
--- a/arch/x86/include/asm/suspend.h
+++ b/arch/x86/include/asm/suspend.h
@@ -1,5 +1,5 @@
1#ifdef CONFIG_X86_32 1#ifdef CONFIG_X86_32
2# include "suspend_32.h" 2# include <asm/suspend_32.h>
3#else 3#else
4# include "suspend_64.h" 4# include <asm/suspend_64.h>
5#endif 5#endif
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index f2b83bc7d784..cdf5674dd23a 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -1,6 +1,135 @@
1#ifndef __SVM_H 1#ifndef __SVM_H
2#define __SVM_H 2#define __SVM_H
3 3
4#define SVM_EXIT_READ_CR0 0x000
5#define SVM_EXIT_READ_CR3 0x003
6#define SVM_EXIT_READ_CR4 0x004
7#define SVM_EXIT_READ_CR8 0x008
8#define SVM_EXIT_WRITE_CR0 0x010
9#define SVM_EXIT_WRITE_CR3 0x013
10#define SVM_EXIT_WRITE_CR4 0x014
11#define SVM_EXIT_WRITE_CR8 0x018
12#define SVM_EXIT_READ_DR0 0x020
13#define SVM_EXIT_READ_DR1 0x021
14#define SVM_EXIT_READ_DR2 0x022
15#define SVM_EXIT_READ_DR3 0x023
16#define SVM_EXIT_READ_DR4 0x024
17#define SVM_EXIT_READ_DR5 0x025
18#define SVM_EXIT_READ_DR6 0x026
19#define SVM_EXIT_READ_DR7 0x027
20#define SVM_EXIT_WRITE_DR0 0x030
21#define SVM_EXIT_WRITE_DR1 0x031
22#define SVM_EXIT_WRITE_DR2 0x032
23#define SVM_EXIT_WRITE_DR3 0x033
24#define SVM_EXIT_WRITE_DR4 0x034
25#define SVM_EXIT_WRITE_DR5 0x035
26#define SVM_EXIT_WRITE_DR6 0x036
27#define SVM_EXIT_WRITE_DR7 0x037
28#define SVM_EXIT_EXCP_BASE 0x040
29#define SVM_EXIT_INTR 0x060
30#define SVM_EXIT_NMI 0x061
31#define SVM_EXIT_SMI 0x062
32#define SVM_EXIT_INIT 0x063
33#define SVM_EXIT_VINTR 0x064
34#define SVM_EXIT_CR0_SEL_WRITE 0x065
35#define SVM_EXIT_IDTR_READ 0x066
36#define SVM_EXIT_GDTR_READ 0x067
37#define SVM_EXIT_LDTR_READ 0x068
38#define SVM_EXIT_TR_READ 0x069
39#define SVM_EXIT_IDTR_WRITE 0x06a
40#define SVM_EXIT_GDTR_WRITE 0x06b
41#define SVM_EXIT_LDTR_WRITE 0x06c
42#define SVM_EXIT_TR_WRITE 0x06d
43#define SVM_EXIT_RDTSC 0x06e
44#define SVM_EXIT_RDPMC 0x06f
45#define SVM_EXIT_PUSHF 0x070
46#define SVM_EXIT_POPF 0x071
47#define SVM_EXIT_CPUID 0x072
48#define SVM_EXIT_RSM 0x073
49#define SVM_EXIT_IRET 0x074
50#define SVM_EXIT_SWINT 0x075
51#define SVM_EXIT_INVD 0x076
52#define SVM_EXIT_PAUSE 0x077
53#define SVM_EXIT_HLT 0x078
54#define SVM_EXIT_INVLPG 0x079
55#define SVM_EXIT_INVLPGA 0x07a
56#define SVM_EXIT_IOIO 0x07b
57#define SVM_EXIT_MSR 0x07c
58#define SVM_EXIT_TASK_SWITCH 0x07d
59#define SVM_EXIT_FERR_FREEZE 0x07e
60#define SVM_EXIT_SHUTDOWN 0x07f
61#define SVM_EXIT_VMRUN 0x080
62#define SVM_EXIT_VMMCALL 0x081
63#define SVM_EXIT_VMLOAD 0x082
64#define SVM_EXIT_VMSAVE 0x083
65#define SVM_EXIT_STGI 0x084
66#define SVM_EXIT_CLGI 0x085
67#define SVM_EXIT_SKINIT 0x086
68#define SVM_EXIT_RDTSCP 0x087
69#define SVM_EXIT_ICEBP 0x088
70#define SVM_EXIT_WBINVD 0x089
71#define SVM_EXIT_MONITOR 0x08a
72#define SVM_EXIT_MWAIT 0x08b
73#define SVM_EXIT_MWAIT_COND 0x08c
74#define SVM_EXIT_XSETBV 0x08d
75#define SVM_EXIT_NPF 0x400
76
77#define SVM_EXIT_ERR -1
78
79#define SVM_EXIT_REASONS \
80 { SVM_EXIT_READ_CR0, "read_cr0" }, \
81 { SVM_EXIT_READ_CR3, "read_cr3" }, \
82 { SVM_EXIT_READ_CR4, "read_cr4" }, \
83 { SVM_EXIT_READ_CR8, "read_cr8" }, \
84 { SVM_EXIT_WRITE_CR0, "write_cr0" }, \
85 { SVM_EXIT_WRITE_CR3, "write_cr3" }, \
86 { SVM_EXIT_WRITE_CR4, "write_cr4" }, \
87 { SVM_EXIT_WRITE_CR8, "write_cr8" }, \
88 { SVM_EXIT_READ_DR0, "read_dr0" }, \
89 { SVM_EXIT_READ_DR1, "read_dr1" }, \
90 { SVM_EXIT_READ_DR2, "read_dr2" }, \
91 { SVM_EXIT_READ_DR3, "read_dr3" }, \
92 { SVM_EXIT_WRITE_DR0, "write_dr0" }, \
93 { SVM_EXIT_WRITE_DR1, "write_dr1" }, \
94 { SVM_EXIT_WRITE_DR2, "write_dr2" }, \
95 { SVM_EXIT_WRITE_DR3, "write_dr3" }, \
96 { SVM_EXIT_WRITE_DR5, "write_dr5" }, \
97 { SVM_EXIT_WRITE_DR7, "write_dr7" }, \
98 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
99 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
100 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
101 { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
102 { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
103 { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
104 { SVM_EXIT_INTR, "interrupt" }, \
105 { SVM_EXIT_NMI, "nmi" }, \
106 { SVM_EXIT_SMI, "smi" }, \
107 { SVM_EXIT_INIT, "init" }, \
108 { SVM_EXIT_VINTR, "vintr" }, \
109 { SVM_EXIT_CPUID, "cpuid" }, \
110 { SVM_EXIT_INVD, "invd" }, \
111 { SVM_EXIT_HLT, "hlt" }, \
112 { SVM_EXIT_INVLPG, "invlpg" }, \
113 { SVM_EXIT_INVLPGA, "invlpga" }, \
114 { SVM_EXIT_IOIO, "io" }, \
115 { SVM_EXIT_MSR, "msr" }, \
116 { SVM_EXIT_TASK_SWITCH, "task_switch" }, \
117 { SVM_EXIT_SHUTDOWN, "shutdown" }, \
118 { SVM_EXIT_VMRUN, "vmrun" }, \
119 { SVM_EXIT_VMMCALL, "hypercall" }, \
120 { SVM_EXIT_VMLOAD, "vmload" }, \
121 { SVM_EXIT_VMSAVE, "vmsave" }, \
122 { SVM_EXIT_STGI, "stgi" }, \
123 { SVM_EXIT_CLGI, "clgi" }, \
124 { SVM_EXIT_SKINIT, "skinit" }, \
125 { SVM_EXIT_WBINVD, "wbinvd" }, \
126 { SVM_EXIT_MONITOR, "monitor" }, \
127 { SVM_EXIT_MWAIT, "mwait" }, \
128 { SVM_EXIT_XSETBV, "xsetbv" }, \
129 { SVM_EXIT_NPF, "npf" }
130
131#ifdef __KERNEL__
132
4enum { 133enum {
5 INTERCEPT_INTR, 134 INTERCEPT_INTR,
6 INTERCEPT_NMI, 135 INTERCEPT_NMI,
@@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb {
264 393
265#define SVM_EXITINFO_REG_MASK 0x0F 394#define SVM_EXITINFO_REG_MASK 0x0F
266 395
267#define SVM_EXIT_READ_CR0 0x000
268#define SVM_EXIT_READ_CR3 0x003
269#define SVM_EXIT_READ_CR4 0x004
270#define SVM_EXIT_READ_CR8 0x008
271#define SVM_EXIT_WRITE_CR0 0x010
272#define SVM_EXIT_WRITE_CR3 0x013
273#define SVM_EXIT_WRITE_CR4 0x014
274#define SVM_EXIT_WRITE_CR8 0x018
275#define SVM_EXIT_READ_DR0 0x020
276#define SVM_EXIT_READ_DR1 0x021
277#define SVM_EXIT_READ_DR2 0x022
278#define SVM_EXIT_READ_DR3 0x023
279#define SVM_EXIT_READ_DR4 0x024
280#define SVM_EXIT_READ_DR5 0x025
281#define SVM_EXIT_READ_DR6 0x026
282#define SVM_EXIT_READ_DR7 0x027
283#define SVM_EXIT_WRITE_DR0 0x030
284#define SVM_EXIT_WRITE_DR1 0x031
285#define SVM_EXIT_WRITE_DR2 0x032
286#define SVM_EXIT_WRITE_DR3 0x033
287#define SVM_EXIT_WRITE_DR4 0x034
288#define SVM_EXIT_WRITE_DR5 0x035
289#define SVM_EXIT_WRITE_DR6 0x036
290#define SVM_EXIT_WRITE_DR7 0x037
291#define SVM_EXIT_EXCP_BASE 0x040
292#define SVM_EXIT_INTR 0x060
293#define SVM_EXIT_NMI 0x061
294#define SVM_EXIT_SMI 0x062
295#define SVM_EXIT_INIT 0x063
296#define SVM_EXIT_VINTR 0x064
297#define SVM_EXIT_CR0_SEL_WRITE 0x065
298#define SVM_EXIT_IDTR_READ 0x066
299#define SVM_EXIT_GDTR_READ 0x067
300#define SVM_EXIT_LDTR_READ 0x068
301#define SVM_EXIT_TR_READ 0x069
302#define SVM_EXIT_IDTR_WRITE 0x06a
303#define SVM_EXIT_GDTR_WRITE 0x06b
304#define SVM_EXIT_LDTR_WRITE 0x06c
305#define SVM_EXIT_TR_WRITE 0x06d
306#define SVM_EXIT_RDTSC 0x06e
307#define SVM_EXIT_RDPMC 0x06f
308#define SVM_EXIT_PUSHF 0x070
309#define SVM_EXIT_POPF 0x071
310#define SVM_EXIT_CPUID 0x072
311#define SVM_EXIT_RSM 0x073
312#define SVM_EXIT_IRET 0x074
313#define SVM_EXIT_SWINT 0x075
314#define SVM_EXIT_INVD 0x076
315#define SVM_EXIT_PAUSE 0x077
316#define SVM_EXIT_HLT 0x078
317#define SVM_EXIT_INVLPG 0x079
318#define SVM_EXIT_INVLPGA 0x07a
319#define SVM_EXIT_IOIO 0x07b
320#define SVM_EXIT_MSR 0x07c
321#define SVM_EXIT_TASK_SWITCH 0x07d
322#define SVM_EXIT_FERR_FREEZE 0x07e
323#define SVM_EXIT_SHUTDOWN 0x07f
324#define SVM_EXIT_VMRUN 0x080
325#define SVM_EXIT_VMMCALL 0x081
326#define SVM_EXIT_VMLOAD 0x082
327#define SVM_EXIT_VMSAVE 0x083
328#define SVM_EXIT_STGI 0x084
329#define SVM_EXIT_CLGI 0x085
330#define SVM_EXIT_SKINIT 0x086
331#define SVM_EXIT_RDTSCP 0x087
332#define SVM_EXIT_ICEBP 0x088
333#define SVM_EXIT_WBINVD 0x089
334#define SVM_EXIT_MONITOR 0x08a
335#define SVM_EXIT_MWAIT 0x08b
336#define SVM_EXIT_MWAIT_COND 0x08c
337#define SVM_EXIT_XSETBV 0x08d
338#define SVM_EXIT_NPF 0x400
339
340#define SVM_EXIT_ERR -1
341
342#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) 396#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
343 397
344#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" 398#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
@@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb {
350 404
351#endif 405#endif
352 406
407#endif
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 3fda9db48819..4ca1c611b552 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -40,7 +40,7 @@ asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
40 struct old_sigaction32 __user *); 40 struct old_sigaction32 __user *);
41asmlinkage long sys32_alarm(unsigned int); 41asmlinkage long sys32_alarm(unsigned int);
42 42
43asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); 43asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
44asmlinkage long sys32_sysfs(int, u32, u32); 44asmlinkage long sys32_sysfs(int, u32, u32);
45 45
46asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, 46asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89f794f007ec..c535d847e3b5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -89,6 +89,7 @@ struct thread_info {
89#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 89#define TIF_NOTSC 16 /* TSC is not accessible in userland */
90#define TIF_IA32 17 /* IA32 compatibility process */ 90#define TIF_IA32 17 /* IA32 compatibility process */
91#define TIF_FORK 18 /* ret_from_fork */ 91#define TIF_FORK 18 /* ret_from_fork */
92#define TIF_NOHZ 19 /* in adaptive nohz mode */
92#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ 93#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
93#define TIF_DEBUG 21 /* uses debug registers */ 94#define TIF_DEBUG 21 /* uses debug registers */
94#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ 95#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -114,6 +115,7 @@ struct thread_info {
114#define _TIF_NOTSC (1 << TIF_NOTSC) 115#define _TIF_NOTSC (1 << TIF_NOTSC)
115#define _TIF_IA32 (1 << TIF_IA32) 116#define _TIF_IA32 (1 << TIF_IA32)
116#define _TIF_FORK (1 << TIF_FORK) 117#define _TIF_FORK (1 << TIF_FORK)
118#define _TIF_NOHZ (1 << TIF_NOHZ)
117#define _TIF_DEBUG (1 << TIF_DEBUG) 119#define _TIF_DEBUG (1 << TIF_DEBUG)
118#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) 120#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
119#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 121#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
@@ -126,12 +128,13 @@ struct thread_info {
126/* work to do in syscall_trace_enter() */ 128/* work to do in syscall_trace_enter() */
127#define _TIF_WORK_SYSCALL_ENTRY \ 129#define _TIF_WORK_SYSCALL_ENTRY \
128 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ 130 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
129 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) 131 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
132 _TIF_NOHZ)
130 133
131/* work to do in syscall_trace_leave() */ 134/* work to do in syscall_trace_leave() */
132#define _TIF_WORK_SYSCALL_EXIT \ 135#define _TIF_WORK_SYSCALL_EXIT \
133 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ 136 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
134 _TIF_SYSCALL_TRACEPOINT) 137 _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
135 138
136/* work to do on interrupt/exception return */ 139/* work to do on interrupt/exception return */
137#define _TIF_WORK_MASK \ 140#define _TIF_WORK_MASK \
@@ -141,7 +144,8 @@ struct thread_info {
141 144
142/* work to do on any return to user space */ 145/* work to do on any return to user space */
143#define _TIF_ALLWORK_MASK \ 146#define _TIF_ALLWORK_MASK \
144 ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) 147 ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
148 _TIF_NOHZ)
145 149
146/* Only used for 64 bit */ 150/* Only used for 64 bit */
147#define _TIF_DO_NOTIFY_MASK \ 151#define _TIF_DO_NOTIFY_MASK \
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index e1f3a17034fc..7ccf8d131535 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -9,6 +9,7 @@
9#include <linux/string.h> 9#include <linux/string.h>
10#include <asm/asm.h> 10#include <asm/asm.h>
11#include <asm/page.h> 11#include <asm/page.h>
12#include <asm/smap.h>
12 13
13#define VERIFY_READ 0 14#define VERIFY_READ 0
14#define VERIFY_WRITE 1 15#define VERIFY_WRITE 1
@@ -192,9 +193,10 @@ extern int __get_user_bad(void);
192 193
193#ifdef CONFIG_X86_32 194#ifdef CONFIG_X86_32
194#define __put_user_asm_u64(x, addr, err, errret) \ 195#define __put_user_asm_u64(x, addr, err, errret) \
195 asm volatile("1: movl %%eax,0(%2)\n" \ 196 asm volatile(ASM_STAC "\n" \
197 "1: movl %%eax,0(%2)\n" \
196 "2: movl %%edx,4(%2)\n" \ 198 "2: movl %%edx,4(%2)\n" \
197 "3:\n" \ 199 "3: " ASM_CLAC "\n" \
198 ".section .fixup,\"ax\"\n" \ 200 ".section .fixup,\"ax\"\n" \
199 "4: movl %3,%0\n" \ 201 "4: movl %3,%0\n" \
200 " jmp 3b\n" \ 202 " jmp 3b\n" \
@@ -205,9 +207,10 @@ extern int __get_user_bad(void);
205 : "A" (x), "r" (addr), "i" (errret), "0" (err)) 207 : "A" (x), "r" (addr), "i" (errret), "0" (err))
206 208
207#define __put_user_asm_ex_u64(x, addr) \ 209#define __put_user_asm_ex_u64(x, addr) \
208 asm volatile("1: movl %%eax,0(%1)\n" \ 210 asm volatile(ASM_STAC "\n" \
211 "1: movl %%eax,0(%1)\n" \
209 "2: movl %%edx,4(%1)\n" \ 212 "2: movl %%edx,4(%1)\n" \
210 "3:\n" \ 213 "3: " ASM_CLAC "\n" \
211 _ASM_EXTABLE_EX(1b, 2b) \ 214 _ASM_EXTABLE_EX(1b, 2b) \
212 _ASM_EXTABLE_EX(2b, 3b) \ 215 _ASM_EXTABLE_EX(2b, 3b) \
213 : : "A" (x), "r" (addr)) 216 : : "A" (x), "r" (addr))
@@ -379,8 +382,9 @@ do { \
379} while (0) 382} while (0)
380 383
381#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ 384#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \
382 asm volatile("1: mov"itype" %2,%"rtype"1\n" \ 385 asm volatile(ASM_STAC "\n" \
383 "2:\n" \ 386 "1: mov"itype" %2,%"rtype"1\n" \
387 "2: " ASM_CLAC "\n" \
384 ".section .fixup,\"ax\"\n" \ 388 ".section .fixup,\"ax\"\n" \
385 "3: mov %3,%0\n" \ 389 "3: mov %3,%0\n" \
386 " xor"itype" %"rtype"1,%"rtype"1\n" \ 390 " xor"itype" %"rtype"1,%"rtype"1\n" \
@@ -443,8 +447,9 @@ struct __large_struct { unsigned long buf[100]; };
443 * aliasing issues. 447 * aliasing issues.
444 */ 448 */
445#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ 449#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \
446 asm volatile("1: mov"itype" %"rtype"1,%2\n" \ 450 asm volatile(ASM_STAC "\n" \
447 "2:\n" \ 451 "1: mov"itype" %"rtype"1,%2\n" \
452 "2: " ASM_CLAC "\n" \
448 ".section .fixup,\"ax\"\n" \ 453 ".section .fixup,\"ax\"\n" \
449 "3: mov %3,%0\n" \ 454 "3: mov %3,%0\n" \
450 " jmp 2b\n" \ 455 " jmp 2b\n" \
@@ -463,13 +468,13 @@ struct __large_struct { unsigned long buf[100]; };
463 * uaccess_try and catch 468 * uaccess_try and catch
464 */ 469 */
465#define uaccess_try do { \ 470#define uaccess_try do { \
466 int prev_err = current_thread_info()->uaccess_err; \
467 current_thread_info()->uaccess_err = 0; \ 471 current_thread_info()->uaccess_err = 0; \
472 stac(); \
468 barrier(); 473 barrier();
469 474
470#define uaccess_catch(err) \ 475#define uaccess_catch(err) \
476 clac(); \
471 (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ 477 (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \
472 current_thread_info()->uaccess_err = prev_err; \
473} while (0) 478} while (0)
474 479
475/** 480/**
@@ -569,6 +574,9 @@ strncpy_from_user(char *dst, const char __user *src, long count);
569extern __must_check long strlen_user(const char __user *str); 574extern __must_check long strlen_user(const char __user *str);
570extern __must_check long strnlen_user(const char __user *str, long n); 575extern __must_check long strnlen_user(const char __user *str, long n);
571 576
577unsigned long __must_check clear_user(void __user *mem, unsigned long len);
578unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
579
572/* 580/*
573 * movsl can be slow when source and dest are not both 8-byte aligned 581 * movsl can be slow when source and dest are not both 8-byte aligned
574 */ 582 */
@@ -581,9 +589,9 @@ extern struct movsl_mask {
581#define ARCH_HAS_NOCACHE_UACCESS 1 589#define ARCH_HAS_NOCACHE_UACCESS 1
582 590
583#ifdef CONFIG_X86_32 591#ifdef CONFIG_X86_32
584# include "uaccess_32.h" 592# include <asm/uaccess_32.h>
585#else 593#else
586# include "uaccess_64.h" 594# include <asm/uaccess_64.h>
587#endif 595#endif
588 596
589#endif /* _ASM_X86_UACCESS_H */ 597#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 576e39bca6ad..7f760a9f1f61 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -213,7 +213,4 @@ static inline unsigned long __must_check copy_from_user(void *to,
213 return n; 213 return n;
214} 214}
215 215
216unsigned long __must_check clear_user(void __user *mem, unsigned long len);
217unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
218
219#endif /* _ASM_X86_UACCESS_32_H */ 216#endif /* _ASM_X86_UACCESS_32_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index d8def8b3dba0..142810c457dc 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -217,9 +217,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
217 } 217 }
218} 218}
219 219
220__must_check unsigned long clear_user(void __user *mem, unsigned long len);
221__must_check unsigned long __clear_user(void __user *mem, unsigned long len);
222
223static __must_check __always_inline int 220static __must_check __always_inline int
224__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) 221__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size)
225{ 222{
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index f3971bbcd1de..8ff8be7835ab 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -42,10 +42,11 @@ struct arch_uprobe {
42}; 42};
43 43
44struct arch_uprobe_task { 44struct arch_uprobe_task {
45 unsigned long saved_trap_nr;
46#ifdef CONFIG_X86_64 45#ifdef CONFIG_X86_64
47 unsigned long saved_scratch_register; 46 unsigned long saved_scratch_register;
48#endif 47#endif
48 unsigned int saved_trap_nr;
49 unsigned int saved_tf;
49}; 50};
50 51
51extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); 52extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h
index 24532c7da3d6..ccab4af1646d 100644
--- a/arch/x86/include/asm/user.h
+++ b/arch/x86/include/asm/user.h
@@ -2,9 +2,9 @@
2#define _ASM_X86_USER_H 2#define _ASM_X86_USER_H
3 3
4#ifdef CONFIG_X86_32 4#ifdef CONFIG_X86_32
5# include "user_32.h" 5# include <asm/user_32.h>
6#else 6#else
7# include "user_64.h" 7# include <asm/user_64.h>
8#endif 8#endif
9 9
10#include <asm/types.h> 10#include <asm/types.h>
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index bb0522850b74..fddb53d63915 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -11,7 +11,8 @@ extern const char VDSO32_PRELINK[];
11#define VDSO32_SYMBOL(base, name) \ 11#define VDSO32_SYMBOL(base, name) \
12({ \ 12({ \
13 extern const char VDSO32_##name[]; \ 13 extern const char VDSO32_##name[]; \
14 (void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ 14 (void __user *)(VDSO32_##name - VDSO32_PRELINK + \
15 (unsigned long)(base)); \
15}) 16})
16#endif 17#endif
17 18
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 74fcb963595b..36ec21c36d68 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -25,6 +25,88 @@
25 * 25 *
26 */ 26 */
27 27
28#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
29
30#define EXIT_REASON_EXCEPTION_NMI 0
31#define EXIT_REASON_EXTERNAL_INTERRUPT 1
32#define EXIT_REASON_TRIPLE_FAULT 2
33
34#define EXIT_REASON_PENDING_INTERRUPT 7
35#define EXIT_REASON_NMI_WINDOW 8
36#define EXIT_REASON_TASK_SWITCH 9
37#define EXIT_REASON_CPUID 10
38#define EXIT_REASON_HLT 12
39#define EXIT_REASON_INVD 13
40#define EXIT_REASON_INVLPG 14
41#define EXIT_REASON_RDPMC 15
42#define EXIT_REASON_RDTSC 16
43#define EXIT_REASON_VMCALL 18
44#define EXIT_REASON_VMCLEAR 19
45#define EXIT_REASON_VMLAUNCH 20
46#define EXIT_REASON_VMPTRLD 21
47#define EXIT_REASON_VMPTRST 22
48#define EXIT_REASON_VMREAD 23
49#define EXIT_REASON_VMRESUME 24
50#define EXIT_REASON_VMWRITE 25
51#define EXIT_REASON_VMOFF 26
52#define EXIT_REASON_VMON 27
53#define EXIT_REASON_CR_ACCESS 28
54#define EXIT_REASON_DR_ACCESS 29
55#define EXIT_REASON_IO_INSTRUCTION 30
56#define EXIT_REASON_MSR_READ 31
57#define EXIT_REASON_MSR_WRITE 32
58#define EXIT_REASON_INVALID_STATE 33
59#define EXIT_REASON_MWAIT_INSTRUCTION 36
60#define EXIT_REASON_MONITOR_INSTRUCTION 39
61#define EXIT_REASON_PAUSE_INSTRUCTION 40
62#define EXIT_REASON_MCE_DURING_VMENTRY 41
63#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
64#define EXIT_REASON_APIC_ACCESS 44
65#define EXIT_REASON_EPT_VIOLATION 48
66#define EXIT_REASON_EPT_MISCONFIG 49
67#define EXIT_REASON_WBINVD 54
68#define EXIT_REASON_XSETBV 55
69#define EXIT_REASON_INVPCID 58
70
71#define VMX_EXIT_REASONS \
72 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
73 { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
74 { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
75 { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
76 { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
77 { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
78 { EXIT_REASON_CPUID, "CPUID" }, \
79 { EXIT_REASON_HLT, "HLT" }, \
80 { EXIT_REASON_INVLPG, "INVLPG" }, \
81 { EXIT_REASON_RDPMC, "RDPMC" }, \
82 { EXIT_REASON_RDTSC, "RDTSC" }, \
83 { EXIT_REASON_VMCALL, "VMCALL" }, \
84 { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \
85 { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \
86 { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \
87 { EXIT_REASON_VMPTRST, "VMPTRST" }, \
88 { EXIT_REASON_VMREAD, "VMREAD" }, \
89 { EXIT_REASON_VMRESUME, "VMRESUME" }, \
90 { EXIT_REASON_VMWRITE, "VMWRITE" }, \
91 { EXIT_REASON_VMOFF, "VMOFF" }, \
92 { EXIT_REASON_VMON, "VMON" }, \
93 { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \
94 { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \
95 { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
96 { EXIT_REASON_MSR_READ, "MSR_READ" }, \
97 { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
98 { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
99 { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
100 { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
101 { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
102 { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
103 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
104 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
105 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
106 { EXIT_REASON_WBINVD, "WBINVD" }
107
108#ifdef __KERNEL__
109
28#include <linux/types.h> 110#include <linux/types.h>
29 111
30/* 112/*
@@ -241,49 +323,6 @@ enum vmcs_field {
241 HOST_RIP = 0x00006c16, 323 HOST_RIP = 0x00006c16,
242}; 324};
243 325
244#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
245
246#define EXIT_REASON_EXCEPTION_NMI 0
247#define EXIT_REASON_EXTERNAL_INTERRUPT 1
248#define EXIT_REASON_TRIPLE_FAULT 2
249
250#define EXIT_REASON_PENDING_INTERRUPT 7
251#define EXIT_REASON_NMI_WINDOW 8
252#define EXIT_REASON_TASK_SWITCH 9
253#define EXIT_REASON_CPUID 10
254#define EXIT_REASON_HLT 12
255#define EXIT_REASON_INVD 13
256#define EXIT_REASON_INVLPG 14
257#define EXIT_REASON_RDPMC 15
258#define EXIT_REASON_RDTSC 16
259#define EXIT_REASON_VMCALL 18
260#define EXIT_REASON_VMCLEAR 19
261#define EXIT_REASON_VMLAUNCH 20
262#define EXIT_REASON_VMPTRLD 21
263#define EXIT_REASON_VMPTRST 22
264#define EXIT_REASON_VMREAD 23
265#define EXIT_REASON_VMRESUME 24
266#define EXIT_REASON_VMWRITE 25
267#define EXIT_REASON_VMOFF 26
268#define EXIT_REASON_VMON 27
269#define EXIT_REASON_CR_ACCESS 28
270#define EXIT_REASON_DR_ACCESS 29
271#define EXIT_REASON_IO_INSTRUCTION 30
272#define EXIT_REASON_MSR_READ 31
273#define EXIT_REASON_MSR_WRITE 32
274#define EXIT_REASON_INVALID_STATE 33
275#define EXIT_REASON_MWAIT_INSTRUCTION 36
276#define EXIT_REASON_MONITOR_INSTRUCTION 39
277#define EXIT_REASON_PAUSE_INSTRUCTION 40
278#define EXIT_REASON_MCE_DURING_VMENTRY 41
279#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
280#define EXIT_REASON_APIC_ACCESS 44
281#define EXIT_REASON_EPT_VIOLATION 48
282#define EXIT_REASON_EPT_MISCONFIG 49
283#define EXIT_REASON_WBINVD 54
284#define EXIT_REASON_XSETBV 55
285#define EXIT_REASON_INVPCID 58
286
287/* 326/*
288 * Interruption-information format 327 * Interruption-information format
289 */ 328 */
@@ -488,3 +527,5 @@ enum vm_instruction_error_number {
488}; 527};
489 528
490#endif 529#endif
530
531#endif
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 38155f667144..57693498519c 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -81,12 +81,13 @@ struct x86_init_mapping {
81 81
82/** 82/**
83 * struct x86_init_paging - platform specific paging functions 83 * struct x86_init_paging - platform specific paging functions
84 * @pagetable_setup_start: platform specific pre paging_init() call 84 * @pagetable_init: platform specific paging initialization call to setup
85 * @pagetable_setup_done: platform specific post paging_init() call 85 * the kernel pagetables and prepare accessors functions.
86 * Callback must call paging_init(). Called once after the
87 * direct mapping for phys memory is available.
86 */ 88 */
87struct x86_init_paging { 89struct x86_init_paging {
88 void (*pagetable_setup_start)(pgd_t *base); 90 void (*pagetable_init)(void);
89 void (*pagetable_setup_done)(pgd_t *base);
90}; 91};
91 92
92/** 93/**
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index cbf0c9d50b92..1707cfa928fb 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -47,6 +47,10 @@
47#endif 47#endif
48 48
49#ifndef __ASSEMBLY__ 49#ifndef __ASSEMBLY__
50/* Explicitly size integers that represent pfns in the public interface
51 * with Xen so that on ARM we can have one ABI that works for 32 and 64
52 * bit guests. */
53typedef unsigned long xen_pfn_t;
50/* Guest handles for primitive C types. */ 54/* Guest handles for primitive C types. */
51__DEFINE_GUEST_HANDLE(uchar, unsigned char); 55__DEFINE_GUEST_HANDLE(uchar, unsigned char);
52__DEFINE_GUEST_HANDLE(uint, unsigned int); 56__DEFINE_GUEST_HANDLE(uint, unsigned int);
@@ -57,6 +61,7 @@ DEFINE_GUEST_HANDLE(long);
57DEFINE_GUEST_HANDLE(void); 61DEFINE_GUEST_HANDLE(void);
58DEFINE_GUEST_HANDLE(uint64_t); 62DEFINE_GUEST_HANDLE(uint64_t);
59DEFINE_GUEST_HANDLE(uint32_t); 63DEFINE_GUEST_HANDLE(uint32_t);
64DEFINE_GUEST_HANDLE(xen_pfn_t);
60#endif 65#endif
61 66
62#ifndef HYPERVISOR_VIRT_START 67#ifndef HYPERVISOR_VIRT_START
@@ -116,11 +121,13 @@ struct arch_shared_info {
116#endif /* !__ASSEMBLY__ */ 121#endif /* !__ASSEMBLY__ */
117 122
118#ifdef CONFIG_X86_32 123#ifdef CONFIG_X86_32
119#include "interface_32.h" 124#include <asm/xen/interface_32.h>
120#else 125#else
121#include "interface_64.h" 126#include <asm/xen/interface_64.h>
122#endif 127#endif
123 128
129#include <asm/pvclock-abi.h>
130
124#ifndef __ASSEMBLY__ 131#ifndef __ASSEMBLY__
125/* 132/*
126 * The following is all CPU context. Note that the fpu_ctxt block is filled 133 * The following is all CPU context. Note that the fpu_ctxt block is filled
diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
index 1be1ab7d6a41..ee52fcac6f72 100644
--- a/arch/x86/include/asm/xen/swiotlb-xen.h
+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
@@ -5,10 +5,12 @@
5extern int xen_swiotlb; 5extern int xen_swiotlb;
6extern int __init pci_xen_swiotlb_detect(void); 6extern int __init pci_xen_swiotlb_detect(void);
7extern void __init pci_xen_swiotlb_init(void); 7extern void __init pci_xen_swiotlb_init(void);
8extern int pci_xen_swiotlb_init_late(void);
8#else 9#else
9#define xen_swiotlb (0) 10#define xen_swiotlb (0)
10static inline int __init pci_xen_swiotlb_detect(void) { return 0; } 11static inline int __init pci_xen_swiotlb_detect(void) { return 0; }
11static inline void __init pci_xen_swiotlb_init(void) { } 12static inline void __init pci_xen_swiotlb_init(void) { }
13static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
12#endif 14#endif
13 15
14#endif /* _ASM_X86_SWIOTLB_XEN_H */ 16#endif /* _ASM_X86_SWIOTLB_XEN_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 7fcf6f3dbcc3..f8fde90bc45e 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -3,8 +3,8 @@
3# include <asm-generic/xor.h> 3# include <asm-generic/xor.h>
4#else 4#else
5#ifdef CONFIG_X86_32 5#ifdef CONFIG_X86_32
6# include "xor_32.h" 6# include <asm/xor_32.h>
7#else 7#else
8# include "xor_64.h" 8# include <asm/xor_64.h>
9#endif 9#endif
10#endif 10#endif
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 454570891bdc..f79cb7ec0e06 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = {
534 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) 534 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
535 */ 535 */
536 536
537#define XMMS_SAVE \
538do { \
539 preempt_disable(); \
540 cr0 = read_cr0(); \
541 clts(); \
542 asm volatile( \
543 "movups %%xmm0,(%0) ;\n\t" \
544 "movups %%xmm1,0x10(%0) ;\n\t" \
545 "movups %%xmm2,0x20(%0) ;\n\t" \
546 "movups %%xmm3,0x30(%0) ;\n\t" \
547 : \
548 : "r" (xmm_save) \
549 : "memory"); \
550} while (0)
551
552#define XMMS_RESTORE \
553do { \
554 asm volatile( \
555 "sfence ;\n\t" \
556 "movups (%0),%%xmm0 ;\n\t" \
557 "movups 0x10(%0),%%xmm1 ;\n\t" \
558 "movups 0x20(%0),%%xmm2 ;\n\t" \
559 "movups 0x30(%0),%%xmm3 ;\n\t" \
560 : \
561 : "r" (xmm_save) \
562 : "memory"); \
563 write_cr0(cr0); \
564 preempt_enable(); \
565} while (0)
566
567#define ALIGN16 __attribute__((aligned(16)))
568
569#define OFFS(x) "16*("#x")" 537#define OFFS(x) "16*("#x")"
570#define PF_OFFS(x) "256+16*("#x")" 538#define PF_OFFS(x) "256+16*("#x")"
571#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" 539#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
@@ -587,10 +555,8 @@ static void
587xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 555xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
588{ 556{
589 unsigned long lines = bytes >> 8; 557 unsigned long lines = bytes >> 8;
590 char xmm_save[16*4] ALIGN16;
591 int cr0;
592 558
593 XMMS_SAVE; 559 kernel_fpu_begin();
594 560
595 asm volatile( 561 asm volatile(
596#undef BLOCK 562#undef BLOCK
@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
633 : 599 :
634 : "memory"); 600 : "memory");
635 601
636 XMMS_RESTORE; 602 kernel_fpu_end();
637} 603}
638 604
639static void 605static void
@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
641 unsigned long *p3) 607 unsigned long *p3)
642{ 608{
643 unsigned long lines = bytes >> 8; 609 unsigned long lines = bytes >> 8;
644 char xmm_save[16*4] ALIGN16;
645 int cr0;
646 610
647 XMMS_SAVE; 611 kernel_fpu_begin();
648 612
649 asm volatile( 613 asm volatile(
650#undef BLOCK 614#undef BLOCK
@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
694 : 658 :
695 : "memory" ); 659 : "memory" );
696 660
697 XMMS_RESTORE; 661 kernel_fpu_end();
698} 662}
699 663
700static void 664static void
@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
702 unsigned long *p3, unsigned long *p4) 666 unsigned long *p3, unsigned long *p4)
703{ 667{
704 unsigned long lines = bytes >> 8; 668 unsigned long lines = bytes >> 8;
705 char xmm_save[16*4] ALIGN16;
706 int cr0;
707 669
708 XMMS_SAVE; 670 kernel_fpu_begin();
709 671
710 asm volatile( 672 asm volatile(
711#undef BLOCK 673#undef BLOCK
@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
762 : 724 :
763 : "memory" ); 725 : "memory" );
764 726
765 XMMS_RESTORE; 727 kernel_fpu_end();
766} 728}
767 729
768static void 730static void
@@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
770 unsigned long *p3, unsigned long *p4, unsigned long *p5) 732 unsigned long *p3, unsigned long *p4, unsigned long *p5)
771{ 733{
772 unsigned long lines = bytes >> 8; 734 unsigned long lines = bytes >> 8;
773 char xmm_save[16*4] ALIGN16;
774 int cr0;
775 735
776 XMMS_SAVE; 736 kernel_fpu_begin();
777 737
778 /* Make sure GCC forgets anything it knows about p4 or p5, 738 /* Make sure GCC forgets anything it knows about p4 or p5,
779 such that it won't pass to the asm volatile below a 739 such that it won't pass to the asm volatile below a
@@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
850 like assuming they have some legal value. */ 810 like assuming they have some legal value. */
851 asm("" : "=r" (p4), "=r" (p5)); 811 asm("" : "=r" (p4), "=r" (p5));
852 812
853 XMMS_RESTORE; 813 kernel_fpu_end();
854} 814}
855 815
856static struct xor_block_template xor_block_pIII_sse = { 816static struct xor_block_template xor_block_pIII_sse = {
@@ -862,7 +822,7 @@ static struct xor_block_template xor_block_pIII_sse = {
862}; 822};
863 823
864/* Also try the AVX routines */ 824/* Also try the AVX routines */
865#include "xor_avx.h" 825#include <asm/xor_avx.h>
866 826
867/* Also try the generic routines. */ 827/* Also try the generic routines. */
868#include <asm-generic/xor.h> 828#include <asm-generic/xor.h>
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index b9b2323e90fe..87ac522c4af5 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -34,41 +34,7 @@
34 * no advantages to be gotten from x86-64 here anyways. 34 * no advantages to be gotten from x86-64 here anyways.
35 */ 35 */
36 36
37typedef struct { 37#include <asm/i387.h>
38 unsigned long a, b;
39} __attribute__((aligned(16))) xmm_store_t;
40
41/* Doesn't use gcc to save the XMM registers, because there is no easy way to
42 tell it to do a clts before the register saving. */
43#define XMMS_SAVE \
44do { \
45 preempt_disable(); \
46 asm volatile( \
47 "movq %%cr0,%0 ;\n\t" \
48 "clts ;\n\t" \
49 "movups %%xmm0,(%1) ;\n\t" \
50 "movups %%xmm1,0x10(%1) ;\n\t" \
51 "movups %%xmm2,0x20(%1) ;\n\t" \
52 "movups %%xmm3,0x30(%1) ;\n\t" \
53 : "=&r" (cr0) \
54 : "r" (xmm_save) \
55 : "memory"); \
56} while (0)
57
58#define XMMS_RESTORE \
59do { \
60 asm volatile( \
61 "sfence ;\n\t" \
62 "movups (%1),%%xmm0 ;\n\t" \
63 "movups 0x10(%1),%%xmm1 ;\n\t" \
64 "movups 0x20(%1),%%xmm2 ;\n\t" \
65 "movups 0x30(%1),%%xmm3 ;\n\t" \
66 "movq %0,%%cr0 ;\n\t" \
67 : \
68 : "r" (cr0), "r" (xmm_save) \
69 : "memory"); \
70 preempt_enable(); \
71} while (0)
72 38
73#define OFFS(x) "16*("#x")" 39#define OFFS(x) "16*("#x")"
74#define PF_OFFS(x) "256+16*("#x")" 40#define PF_OFFS(x) "256+16*("#x")"
@@ -91,10 +57,8 @@ static void
91xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 57xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
92{ 58{
93 unsigned int lines = bytes >> 8; 59 unsigned int lines = bytes >> 8;
94 unsigned long cr0;
95 xmm_store_t xmm_save[4];
96 60
97 XMMS_SAVE; 61 kernel_fpu_begin();
98 62
99 asm volatile( 63 asm volatile(
100#undef BLOCK 64#undef BLOCK
@@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
135 : [inc] "r" (256UL) 99 : [inc] "r" (256UL)
136 : "memory"); 100 : "memory");
137 101
138 XMMS_RESTORE; 102 kernel_fpu_end();
139} 103}
140 104
141static void 105static void
@@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
143 unsigned long *p3) 107 unsigned long *p3)
144{ 108{
145 unsigned int lines = bytes >> 8; 109 unsigned int lines = bytes >> 8;
146 xmm_store_t xmm_save[4];
147 unsigned long cr0;
148
149 XMMS_SAVE;
150 110
111 kernel_fpu_begin();
151 asm volatile( 112 asm volatile(
152#undef BLOCK 113#undef BLOCK
153#define BLOCK(i) \ 114#define BLOCK(i) \
@@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
194 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) 155 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
195 : [inc] "r" (256UL) 156 : [inc] "r" (256UL)
196 : "memory"); 157 : "memory");
197 XMMS_RESTORE; 158 kernel_fpu_end();
198} 159}
199 160
200static void 161static void
@@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
202 unsigned long *p3, unsigned long *p4) 163 unsigned long *p3, unsigned long *p4)
203{ 164{
204 unsigned int lines = bytes >> 8; 165 unsigned int lines = bytes >> 8;
205 xmm_store_t xmm_save[4];
206 unsigned long cr0;
207 166
208 XMMS_SAVE; 167 kernel_fpu_begin();
209 168
210 asm volatile( 169 asm volatile(
211#undef BLOCK 170#undef BLOCK
@@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
261 : [inc] "r" (256UL) 220 : [inc] "r" (256UL)
262 : "memory" ); 221 : "memory" );
263 222
264 XMMS_RESTORE; 223 kernel_fpu_end();
265} 224}
266 225
267static void 226static void
@@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
269 unsigned long *p3, unsigned long *p4, unsigned long *p5) 228 unsigned long *p3, unsigned long *p4, unsigned long *p5)
270{ 229{
271 unsigned int lines = bytes >> 8; 230 unsigned int lines = bytes >> 8;
272 xmm_store_t xmm_save[4];
273 unsigned long cr0;
274 231
275 XMMS_SAVE; 232 kernel_fpu_begin();
276 233
277 asm volatile( 234 asm volatile(
278#undef BLOCK 235#undef BLOCK
@@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
336 : [inc] "r" (256UL) 293 : [inc] "r" (256UL)
337 : "memory"); 294 : "memory");
338 295
339 XMMS_RESTORE; 296 kernel_fpu_end();
340} 297}
341 298
342static struct xor_block_template xor_block_sse = { 299static struct xor_block_template xor_block_sse = {
@@ -349,7 +306,7 @@ static struct xor_block_template xor_block_sse = {
349 306
350 307
351/* Also try the AVX routines */ 308/* Also try the AVX routines */
352#include "xor_avx.h" 309#include <asm/xor_avx.h>
353 310
354#undef XOR_TRY_TEMPLATES 311#undef XOR_TRY_TEMPLATES
355#define XOR_TRY_TEMPLATES \ 312#define XOR_TRY_TEMPLATES \
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 2510d35f480e..7ea79c5fa1f2 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -20,32 +20,6 @@
20#include <linux/compiler.h> 20#include <linux/compiler.h>
21#include <asm/i387.h> 21#include <asm/i387.h>
22 22
23#define ALIGN32 __aligned(32)
24
25#define YMM_SAVED_REGS 4
26
27#define YMMS_SAVE \
28do { \
29 preempt_disable(); \
30 cr0 = read_cr0(); \
31 clts(); \
32 asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
33 asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
34 asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
35 asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
36} while (0);
37
38#define YMMS_RESTORE \
39do { \
40 asm volatile("sfence" : : : "memory"); \
41 asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
42 asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
43 asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
44 asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
45 write_cr0(cr0); \
46 preempt_enable(); \
47} while (0);
48
49#define BLOCK4(i) \ 23#define BLOCK4(i) \
50 BLOCK(32 * i, 0) \ 24 BLOCK(32 * i, 0) \
51 BLOCK(32 * (i + 1), 1) \ 25 BLOCK(32 * (i + 1), 1) \
@@ -60,10 +34,9 @@ do { \
60 34
61static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) 35static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
62{ 36{
63 unsigned long cr0, lines = bytes >> 9; 37 unsigned long lines = bytes >> 9;
64 char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
65 38
66 YMMS_SAVE 39 kernel_fpu_begin();
67 40
68 while (lines--) { 41 while (lines--) {
69#undef BLOCK 42#undef BLOCK
@@ -82,16 +55,15 @@ do { \
82 p1 = (unsigned long *)((uintptr_t)p1 + 512); 55 p1 = (unsigned long *)((uintptr_t)p1 + 512);
83 } 56 }
84 57
85 YMMS_RESTORE 58 kernel_fpu_end();
86} 59}
87 60
88static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, 61static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
89 unsigned long *p2) 62 unsigned long *p2)
90{ 63{
91 unsigned long cr0, lines = bytes >> 9; 64 unsigned long lines = bytes >> 9;
92 char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
93 65
94 YMMS_SAVE 66 kernel_fpu_begin();
95 67
96 while (lines--) { 68 while (lines--) {
97#undef BLOCK 69#undef BLOCK
@@ -113,16 +85,15 @@ do { \
113 p2 = (unsigned long *)((uintptr_t)p2 + 512); 85 p2 = (unsigned long *)((uintptr_t)p2 + 512);
114 } 86 }
115 87
116 YMMS_RESTORE 88 kernel_fpu_end();
117} 89}
118 90
119static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, 91static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
120 unsigned long *p2, unsigned long *p3) 92 unsigned long *p2, unsigned long *p3)
121{ 93{
122 unsigned long cr0, lines = bytes >> 9; 94 unsigned long lines = bytes >> 9;
123 char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
124 95
125 YMMS_SAVE 96 kernel_fpu_begin();
126 97
127 while (lines--) { 98 while (lines--) {
128#undef BLOCK 99#undef BLOCK
@@ -147,16 +118,15 @@ do { \
147 p3 = (unsigned long *)((uintptr_t)p3 + 512); 118 p3 = (unsigned long *)((uintptr_t)p3 + 512);
148 } 119 }
149 120
150 YMMS_RESTORE 121 kernel_fpu_end();
151} 122}
152 123
153static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, 124static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
154 unsigned long *p2, unsigned long *p3, unsigned long *p4) 125 unsigned long *p2, unsigned long *p3, unsigned long *p4)
155{ 126{
156 unsigned long cr0, lines = bytes >> 9; 127 unsigned long lines = bytes >> 9;
157 char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
158 128
159 YMMS_SAVE 129 kernel_fpu_begin();
160 130
161 while (lines--) { 131 while (lines--) {
162#undef BLOCK 132#undef BLOCK
@@ -184,7 +154,7 @@ do { \
184 p4 = (unsigned long *)((uintptr_t)p4 + 512); 154 p4 = (unsigned long *)((uintptr_t)p4 + 512);
185 } 155 }
186 156
187 YMMS_RESTORE 157 kernel_fpu_end();
188} 158}
189 159
190static struct xor_block_template xor_block_avx = { 160static struct xor_block_template xor_block_avx = {
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 8a1b6f9b594a..0415cdabb5a6 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -34,17 +34,14 @@
34extern unsigned int xstate_size; 34extern unsigned int xstate_size;
35extern u64 pcntxt_mask; 35extern u64 pcntxt_mask;
36extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; 36extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
37extern struct xsave_struct *init_xstate_buf;
37 38
38extern void xsave_init(void); 39extern void xsave_init(void);
39extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); 40extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
40extern int init_fpu(struct task_struct *child); 41extern int init_fpu(struct task_struct *child);
41extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
42 void __user *fpstate,
43 struct _fpx_sw_bytes *sw);
44 42
45static inline int fpu_xrstor_checking(struct fpu *fpu) 43static inline int fpu_xrstor_checking(struct xsave_struct *fx)
46{ 44{
47 struct xsave_struct *fx = &fpu->state->xsave;
48 int err; 45 int err;
49 46
50 asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" 47 asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
@@ -69,13 +66,13 @@ static inline int xsave_user(struct xsave_struct __user *buf)
69 * Clear the xsave header first, so that reserved fields are 66 * Clear the xsave header first, so that reserved fields are
70 * initialized to zero. 67 * initialized to zero.
71 */ 68 */
72 err = __clear_user(&buf->xsave_hdr, 69 err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
73 sizeof(struct xsave_hdr_struct));
74 if (unlikely(err)) 70 if (unlikely(err))
75 return -EFAULT; 71 return -EFAULT;
76 72
77 __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" 73 __asm__ __volatile__(ASM_STAC "\n"
78 "2:\n" 74 "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
75 "2: " ASM_CLAC "\n"
79 ".section .fixup,\"ax\"\n" 76 ".section .fixup,\"ax\"\n"
80 "3: movl $-1,%[err]\n" 77 "3: movl $-1,%[err]\n"
81 " jmp 2b\n" 78 " jmp 2b\n"
@@ -84,9 +81,6 @@ static inline int xsave_user(struct xsave_struct __user *buf)
84 : [err] "=r" (err) 81 : [err] "=r" (err)
85 : "D" (buf), "a" (-1), "d" (-1), "0" (0) 82 : "D" (buf), "a" (-1), "d" (-1), "0" (0)
86 : "memory"); 83 : "memory");
87 if (unlikely(err) && __clear_user(buf, xstate_size))
88 err = -EFAULT;
89 /* No need to clear here because the caller clears USED_MATH */
90 return err; 84 return err;
91} 85}
92 86
@@ -97,8 +91,9 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
97 u32 lmask = mask; 91 u32 lmask = mask;
98 u32 hmask = mask >> 32; 92 u32 hmask = mask >> 32;
99 93
100 __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" 94 __asm__ __volatile__(ASM_STAC "\n"
101 "2:\n" 95 "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
96 "2: " ASM_CLAC "\n"
102 ".section .fixup,\"ax\"\n" 97 ".section .fixup,\"ax\"\n"
103 "3: movl $-1,%[err]\n" 98 "3: movl $-1,%[err]\n"
104 " jmp 2b\n" 99 " jmp 2b\n"
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..83b6e9a0dce4
--- /dev/null
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -0,0 +1,6 @@
1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm
3
4genhdr-y += unistd_32.h
5genhdr-y += unistd_64.h
6genhdr-y += unistd_x32.h
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8215e5652d97..8d7a619718b5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
100obj-$(CONFIG_OF) += devicetree.o 100obj-$(CONFIG_OF) += devicetree.o
101obj-$(CONFIG_UPROBES) += uprobes.o 101obj-$(CONFIG_UPROBES) += uprobes.o
102 102
103obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
104
103### 105###
104# 64 bit specific files 106# 64 bit specific files
105ifeq ($(CONFIG_X86_64),y) 107ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b2297e58c6ed..e651f7a589ac 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
656 acpi_register_lapic(physid, ACPI_MADT_ENABLED); 656 acpi_register_lapic(physid, ACPI_MADT_ENABLED);
657 657
658 /* 658 /*
659 * If mp_register_lapic successfully generates a new logical cpu 659 * If acpi_register_lapic successfully generates a new logical cpu
660 * number, then the following will get us exactly what was mapped 660 * number, then the following will get us exactly what was mapped
661 */ 661 */
662 cpumask_andnot(new_map, cpu_present_mask, tmp_map); 662 cpumask_andnot(new_map, cpu_present_mask, tmp_map);
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 1b8e5a03d942..11676cf65aee 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -43,17 +43,22 @@ int acpi_suspend_lowlevel(void)
43 43
44 header->video_mode = saved_video_mode; 44 header->video_mode = saved_video_mode;
45 45
46 header->pmode_behavior = 0;
47
46#ifndef CONFIG_64BIT 48#ifndef CONFIG_64BIT
47 store_gdt((struct desc_ptr *)&header->pmode_gdt); 49 store_gdt((struct desc_ptr *)&header->pmode_gdt);
48 50
49 if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, 51 if (!rdmsr_safe(MSR_EFER,
50 &header->pmode_efer_high)) 52 &header->pmode_efer_low,
51 header->pmode_efer_low = header->pmode_efer_high = 0; 53 &header->pmode_efer_high))
54 header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER);
52#endif /* !CONFIG_64BIT */ 55#endif /* !CONFIG_64BIT */
53 56
54 header->pmode_cr0 = read_cr0(); 57 header->pmode_cr0 = read_cr0();
55 header->pmode_cr4 = read_cr4_safe(); 58 if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
56 header->pmode_behavior = 0; 59 header->pmode_cr4 = read_cr4();
60 header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
61 }
57 if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, 62 if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
58 &header->pmode_misc_en_low, 63 &header->pmode_misc_en_low,
59 &header->pmode_misc_en_high)) 64 &header->pmode_misc_en_high))
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ced4534baed5..ef5ccca79a6c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -23,19 +23,6 @@
23 23
24#define MAX_PATCH_LEN (255-1) 24#define MAX_PATCH_LEN (255-1)
25 25
26#ifdef CONFIG_HOTPLUG_CPU
27static int smp_alt_once;
28
29static int __init bootonly(char *str)
30{
31 smp_alt_once = 1;
32 return 1;
33}
34__setup("smp-alt-boot", bootonly);
35#else
36#define smp_alt_once 1
37#endif
38
39static int __initdata_or_module debug_alternative; 26static int __initdata_or_module debug_alternative;
40 27
41static int __init debug_alt(char *str) 28static int __init debug_alt(char *str)
@@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
317 /* turn DS segment override prefix into lock prefix */ 304 /* turn DS segment override prefix into lock prefix */
318 if (*ptr == 0x3e) 305 if (*ptr == 0x3e)
319 text_poke(ptr, ((unsigned char []){0xf0}), 1); 306 text_poke(ptr, ((unsigned char []){0xf0}), 1);
320 }; 307 }
321 mutex_unlock(&text_mutex); 308 mutex_unlock(&text_mutex);
322} 309}
323 310
@@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
326{ 313{
327 const s32 *poff; 314 const s32 *poff;
328 315
329 if (noreplace_smp)
330 return;
331
332 mutex_lock(&text_mutex); 316 mutex_lock(&text_mutex);
333 for (poff = start; poff < end; poff++) { 317 for (poff = start; poff < end; poff++) {
334 u8 *ptr = (u8 *)poff + *poff; 318 u8 *ptr = (u8 *)poff + *poff;
@@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
338 /* turn lock prefix into DS segment override prefix */ 322 /* turn lock prefix into DS segment override prefix */
339 if (*ptr == 0xf0) 323 if (*ptr == 0xf0)
340 text_poke(ptr, ((unsigned char []){0x3E}), 1); 324 text_poke(ptr, ((unsigned char []){0x3E}), 1);
341 }; 325 }
342 mutex_unlock(&text_mutex); 326 mutex_unlock(&text_mutex);
343} 327}
344 328
@@ -359,7 +343,7 @@ struct smp_alt_module {
359}; 343};
360static LIST_HEAD(smp_alt_modules); 344static LIST_HEAD(smp_alt_modules);
361static DEFINE_MUTEX(smp_alt); 345static DEFINE_MUTEX(smp_alt);
362static int smp_mode = 1; /* protected by smp_alt */ 346static bool uniproc_patched = false; /* protected by smp_alt */
363 347
364void __init_or_module alternatives_smp_module_add(struct module *mod, 348void __init_or_module alternatives_smp_module_add(struct module *mod,
365 char *name, 349 char *name,
@@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
368{ 352{
369 struct smp_alt_module *smp; 353 struct smp_alt_module *smp;
370 354
371 if (noreplace_smp) 355 mutex_lock(&smp_alt);
372 return; 356 if (!uniproc_patched)
357 goto unlock;
373 358
374 if (smp_alt_once) { 359 if (num_possible_cpus() == 1)
375 if (boot_cpu_has(X86_FEATURE_UP)) 360 /* Don't bother remembering, we'll never have to undo it. */
376 alternatives_smp_unlock(locks, locks_end, 361 goto smp_unlock;
377 text, text_end);
378 return;
379 }
380 362
381 smp = kzalloc(sizeof(*smp), GFP_KERNEL); 363 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
382 if (NULL == smp) 364 if (NULL == smp)
383 return; /* we'll run the (safe but slow) SMP code then ... */ 365 /* we'll run the (safe but slow) SMP code then ... */
366 goto unlock;
384 367
385 smp->mod = mod; 368 smp->mod = mod;
386 smp->name = name; 369 smp->name = name;
@@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
392 __func__, smp->locks, smp->locks_end, 375 __func__, smp->locks, smp->locks_end,
393 smp->text, smp->text_end, smp->name); 376 smp->text, smp->text_end, smp->name);
394 377
395 mutex_lock(&smp_alt);
396 list_add_tail(&smp->next, &smp_alt_modules); 378 list_add_tail(&smp->next, &smp_alt_modules);
397 if (boot_cpu_has(X86_FEATURE_UP)) 379smp_unlock:
398 alternatives_smp_unlock(smp->locks, smp->locks_end, 380 alternatives_smp_unlock(locks, locks_end, text, text_end);
399 smp->text, smp->text_end); 381unlock:
400 mutex_unlock(&smp_alt); 382 mutex_unlock(&smp_alt);
401} 383}
402 384
@@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
404{ 386{
405 struct smp_alt_module *item; 387 struct smp_alt_module *item;
406 388
407 if (smp_alt_once || noreplace_smp)
408 return;
409
410 mutex_lock(&smp_alt); 389 mutex_lock(&smp_alt);
411 list_for_each_entry(item, &smp_alt_modules, next) { 390 list_for_each_entry(item, &smp_alt_modules, next) {
412 if (mod != item->mod) 391 if (mod != item->mod)
413 continue; 392 continue;
414 list_del(&item->next); 393 list_del(&item->next);
415 mutex_unlock(&smp_alt);
416 DPRINTK("%s: %s\n", __func__, item->name);
417 kfree(item); 394 kfree(item);
418 return; 395 break;
419 } 396 }
420 mutex_unlock(&smp_alt); 397 mutex_unlock(&smp_alt);
421} 398}
422 399
423bool skip_smp_alternatives; 400void alternatives_enable_smp(void)
424void alternatives_smp_switch(int smp)
425{ 401{
426 struct smp_alt_module *mod; 402 struct smp_alt_module *mod;
427 403
@@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp)
436 pr_info("lockdep: fixing up alternatives\n"); 412 pr_info("lockdep: fixing up alternatives\n");
437#endif 413#endif
438 414
439 if (noreplace_smp || smp_alt_once || skip_smp_alternatives) 415 /* Why bother if there are no other CPUs? */
440 return; 416 BUG_ON(num_possible_cpus() == 1);
441 BUG_ON(!smp && (num_online_cpus() > 1));
442 417
443 mutex_lock(&smp_alt); 418 mutex_lock(&smp_alt);
444 419
445 /* 420 if (uniproc_patched) {
446 * Avoid unnecessary switches because it forces JIT based VMs to
447 * throw away all cached translations, which can be quite costly.
448 */
449 if (smp == smp_mode) {
450 /* nothing */
451 } else if (smp) {
452 pr_info("switching to SMP code\n"); 421 pr_info("switching to SMP code\n");
422 BUG_ON(num_online_cpus() != 1);
453 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); 423 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
454 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); 424 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
455 list_for_each_entry(mod, &smp_alt_modules, next) 425 list_for_each_entry(mod, &smp_alt_modules, next)
456 alternatives_smp_lock(mod->locks, mod->locks_end, 426 alternatives_smp_lock(mod->locks, mod->locks_end,
457 mod->text, mod->text_end); 427 mod->text, mod->text_end);
458 } else { 428 uniproc_patched = false;
459 pr_info("switching to UP code\n");
460 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
461 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
462 list_for_each_entry(mod, &smp_alt_modules, next)
463 alternatives_smp_unlock(mod->locks, mod->locks_end,
464 mod->text, mod->text_end);
465 } 429 }
466 smp_mode = smp;
467 mutex_unlock(&smp_alt); 430 mutex_unlock(&smp_alt);
468} 431}
469 432
@@ -540,40 +503,22 @@ void __init alternative_instructions(void)
540 503
541 apply_alternatives(__alt_instructions, __alt_instructions_end); 504 apply_alternatives(__alt_instructions, __alt_instructions_end);
542 505
543 /* switch to patch-once-at-boottime-only mode and free the
544 * tables in case we know the number of CPUs will never ever
545 * change */
546#ifdef CONFIG_HOTPLUG_CPU
547 if (num_possible_cpus() < 2)
548 smp_alt_once = 1;
549#endif
550
551#ifdef CONFIG_SMP 506#ifdef CONFIG_SMP
552 if (smp_alt_once) { 507 /* Patch to UP if other cpus not imminent. */
553 if (1 == num_possible_cpus()) { 508 if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
554 pr_info("switching to UP code\n"); 509 uniproc_patched = true;
555 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
556 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
557
558 alternatives_smp_unlock(__smp_locks, __smp_locks_end,
559 _text, _etext);
560 }
561 } else {
562 alternatives_smp_module_add(NULL, "core kernel", 510 alternatives_smp_module_add(NULL, "core kernel",
563 __smp_locks, __smp_locks_end, 511 __smp_locks, __smp_locks_end,
564 _text, _etext); 512 _text, _etext);
565
566 /* Only switch to UP mode if we don't immediately boot others */
567 if (num_present_cpus() == 1 || setup_max_cpus <= 1)
568 alternatives_smp_switch(0);
569 } 513 }
570#endif
571 apply_paravirt(__parainstructions, __parainstructions_end);
572 514
573 if (smp_alt_once) 515 if (!uniproc_patched || num_possible_cpus() == 1)
574 free_init_pages("SMP alternatives", 516 free_init_pages("SMP alternatives",
575 (unsigned long)__smp_locks, 517 (unsigned long)__smp_locks,
576 (unsigned long)__smp_locks_end); 518 (unsigned long)__smp_locks_end);
519#endif
520
521 apply_paravirt(__parainstructions, __parainstructions_end);
577 522
578 restart_nmi(); 523 restart_nmi();
579} 524}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 24deb3082328..b17416e72fbd 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs)
1934 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); 1934 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
1935 i++; 1935 i++;
1936 v1 >>= 1; 1936 v1 >>= 1;
1937 }; 1937 }
1938 1938
1939 apic_printk(APIC_DEBUG, KERN_CONT "\n"); 1939 apic_printk(APIC_DEBUG, KERN_CONT "\n");
1940 1940
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9d92e19039f0..f7e98a2c0d12 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
737} 737}
738#endif 738#endif
739 739
740static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
741{
742 if (!cpu_has_invlpg)
743 return;
744
745 tlb_flushall_shift = 5;
746
747 if (c->x86 <= 0x11)
748 tlb_flushall_shift = 4;
749}
750
751static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
752{
753 u32 ebx, eax, ecx, edx;
754 u16 mask = 0xfff;
755
756 if (c->x86 < 0xf)
757 return;
758
759 if (c->extended_cpuid_level < 0x80000006)
760 return;
761
762 cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
763
764 tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
765 tlb_lli_4k[ENTRIES] = ebx & mask;
766
767 /*
768 * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB
769 * characteristics from the CPUID function 0x80000005 instead.
770 */
771 if (c->x86 == 0xf) {
772 cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
773 mask = 0xff;
774 }
775
776 /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
777 if (!((eax >> 16) & mask)) {
778 u32 a, b, c, d;
779
780 cpuid(0x80000005, &a, &b, &c, &d);
781 tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff;
782 } else {
783 tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
784 }
785
786 /* a 4M entry uses two 2M entries */
787 tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
788
789 /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
790 if (!(eax & mask)) {
791 /* Erratum 658 */
792 if (c->x86 == 0x15 && c->x86_model <= 0x1f) {
793 tlb_lli_2m[ENTRIES] = 1024;
794 } else {
795 cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
796 tlb_lli_2m[ENTRIES] = eax & 0xff;
797 }
798 } else
799 tlb_lli_2m[ENTRIES] = eax & mask;
800
801 tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
802
803 cpu_set_tlb_flushall_shift(c);
804}
805
740static const struct cpu_dev __cpuinitconst amd_cpu_dev = { 806static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
741 .c_vendor = "AMD", 807 .c_vendor = "AMD",
742 .c_ident = { "AuthenticAMD" }, 808 .c_ident = { "AuthenticAMD" },
@@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
756 .c_size_cache = amd_size_cache, 822 .c_size_cache = amd_size_cache,
757#endif 823#endif
758 .c_early_init = early_init_amd, 824 .c_early_init = early_init_amd,
825 .c_detect_tlb = cpu_detect_tlb_amd,
759 .c_bsp_init = bsp_init_amd, 826 .c_bsp_init = bsp_init_amd,
760 .c_init = init_amd, 827 .c_init = init_amd,
761 .c_x86_vendor = X86_VENDOR_AMD, 828 .c_x86_vendor = X86_VENDOR_AMD,
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c97bb7b5a9f8..d0e910da16c5 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -165,10 +165,15 @@ void __init check_bugs(void)
165 print_cpu_info(&boot_cpu_data); 165 print_cpu_info(&boot_cpu_data);
166#endif 166#endif
167 check_config(); 167 check_config();
168 check_fpu();
169 check_hlt(); 168 check_hlt();
170 check_popad(); 169 check_popad();
171 init_utsname()->machine[1] = 170 init_utsname()->machine[1] =
172 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 171 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
173 alternative_instructions(); 172 alternative_instructions();
173
174 /*
175 * kernel_fpu_begin/end() in check_fpu() relies on the patched
176 * alternative instructions.
177 */
178 check_fpu();
174} 179}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5fbc3c5fccc..7505f7b13e71 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -259,23 +259,36 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
259} 259}
260#endif 260#endif
261 261
262static int disable_smep __cpuinitdata;
263static __init int setup_disable_smep(char *arg) 262static __init int setup_disable_smep(char *arg)
264{ 263{
265 disable_smep = 1; 264 setup_clear_cpu_cap(X86_FEATURE_SMEP);
266 return 1; 265 return 1;
267} 266}
268__setup("nosmep", setup_disable_smep); 267__setup("nosmep", setup_disable_smep);
269 268
270static __cpuinit void setup_smep(struct cpuinfo_x86 *c) 269static __always_inline void setup_smep(struct cpuinfo_x86 *c)
271{ 270{
272 if (cpu_has(c, X86_FEATURE_SMEP)) { 271 if (cpu_has(c, X86_FEATURE_SMEP))
273 if (unlikely(disable_smep)) { 272 set_in_cr4(X86_CR4_SMEP);
274 setup_clear_cpu_cap(X86_FEATURE_SMEP); 273}
275 clear_in_cr4(X86_CR4_SMEP); 274
276 } else 275static __init int setup_disable_smap(char *arg)
277 set_in_cr4(X86_CR4_SMEP); 276{
278 } 277 setup_clear_cpu_cap(X86_FEATURE_SMAP);
278 return 1;
279}
280__setup("nosmap", setup_disable_smap);
281
282static __always_inline void setup_smap(struct cpuinfo_x86 *c)
283{
284 unsigned long eflags;
285
286 /* This should have been cleared long ago */
287 raw_local_save_flags(eflags);
288 BUG_ON(eflags & X86_EFLAGS_AC);
289
290 if (cpu_has(c, X86_FEATURE_SMAP))
291 set_in_cr4(X86_CR4_SMAP);
279} 292}
280 293
281/* 294/*
@@ -476,7 +489,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
476 489
477 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ 490 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
478 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ 491 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
479 "tlb_flushall_shift is 0x%x\n", 492 "tlb_flushall_shift: %d\n",
480 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], 493 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
481 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], 494 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
482 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], 495 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
@@ -712,8 +725,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
712 c->cpu_index = 0; 725 c->cpu_index = 0;
713 filter_cpuid_features(c, false); 726 filter_cpuid_features(c, false);
714 727
715 setup_smep(c);
716
717 if (this_cpu->c_bsp_init) 728 if (this_cpu->c_bsp_init)
718 this_cpu->c_bsp_init(c); 729 this_cpu->c_bsp_init(c);
719} 730}
@@ -798,8 +809,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
798 c->phys_proc_id = c->initial_apicid; 809 c->phys_proc_id = c->initial_apicid;
799 } 810 }
800 811
801 setup_smep(c);
802
803 get_model_name(c); /* Default name */ 812 get_model_name(c); /* Default name */
804 813
805 detect_nopl(c); 814 detect_nopl(c);
@@ -864,6 +873,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
864 /* Disable the PN if appropriate */ 873 /* Disable the PN if appropriate */
865 squash_the_stupid_serial_number(c); 874 squash_the_stupid_serial_number(c);
866 875
876 /* Set up SMEP/SMAP */
877 setup_smep(c);
878 setup_smap(c);
879
867 /* 880 /*
868 * The vendor-specific functions might have changed features. 881 * The vendor-specific functions might have changed features.
869 * Now we do "generic changes." 882 * Now we do "generic changes."
@@ -942,8 +955,7 @@ void __init identify_boot_cpu(void)
942#else 955#else
943 vgetcpu_set_mode(); 956 vgetcpu_set_mode();
944#endif 957#endif
945 if (boot_cpu_data.cpuid_level >= 2) 958 cpu_detect_tlb(&boot_cpu_data);
946 cpu_detect_tlb(&boot_cpu_data);
947} 959}
948 960
949void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 961void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -1023,14 +1035,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1023 printk(KERN_CONT "%s ", vendor); 1035 printk(KERN_CONT "%s ", vendor);
1024 1036
1025 if (c->x86_model_id[0]) 1037 if (c->x86_model_id[0])
1026 printk(KERN_CONT "%s", c->x86_model_id); 1038 printk(KERN_CONT "%s", strim(c->x86_model_id));
1027 else 1039 else
1028 printk(KERN_CONT "%d86", c->x86); 1040 printk(KERN_CONT "%d86", c->x86);
1029 1041
1042 printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
1043
1030 if (c->x86_mask || c->cpuid_level >= 0) 1044 if (c->x86_mask || c->cpuid_level >= 0)
1031 printk(KERN_CONT " stepping %02x\n", c->x86_mask); 1045 printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
1032 else 1046 else
1033 printk(KERN_CONT "\n"); 1047 printk(KERN_CONT ")\n");
1034 1048
1035 print_cpu_msr(c); 1049 print_cpu_msr(c);
1036} 1050}
@@ -1113,11 +1127,10 @@ void syscall_init(void)
1113 1127
1114 /* Flags to clear on syscall */ 1128 /* Flags to clear on syscall */
1115 wrmsrl(MSR_SYSCALL_MASK, 1129 wrmsrl(MSR_SYSCALL_MASK,
1116 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); 1130 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
1131 X86_EFLAGS_IOPL|X86_EFLAGS_AC);
1117} 1132}
1118 1133
1119unsigned long kernel_eflags;
1120
1121/* 1134/*
1122 * Copies of the original ist values from the tss are only accessed during 1135 * Copies of the original ist values from the tss are only accessed during
1123 * debugging, no special alignment required. 1136 * debugging, no special alignment required.
@@ -1297,9 +1310,6 @@ void __cpuinit cpu_init(void)
1297 dbg_restore_debug_regs(); 1310 dbg_restore_debug_regs();
1298 1311
1299 fpu_init(); 1312 fpu_init();
1300 xsave_init();
1301
1302 raw_local_save_flags(kernel_eflags);
1303 1313
1304 if (is_uv_system()) 1314 if (is_uv_system())
1305 uv_cpu_init(); 1315 uv_cpu_init();
@@ -1352,6 +1362,5 @@ void __cpuinit cpu_init(void)
1352 dbg_restore_debug_regs(); 1362 dbg_restore_debug_regs();
1353 1363
1354 fpu_init(); 1364 fpu_init();
1355 xsave_init();
1356} 1365}
1357#endif 1366#endif
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 0a4ce2980a5a..198e019a531a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
648 int i, j, n; 648 int i, j, n;
649 unsigned int regs[4]; 649 unsigned int regs[4];
650 unsigned char *desc = (unsigned char *)regs; 650 unsigned char *desc = (unsigned char *)regs;
651
652 if (c->cpuid_level < 2)
653 return;
654
651 /* Number of times to iterate */ 655 /* Number of times to iterate */
652 n = cpuid_eax(2) & 0xFF; 656 n = cpuid_eax(2) & 0xFF;
653 657
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index fc4beb393577..ddc72f839332 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
78} 78}
79 79
80static cpumask_var_t mce_inject_cpumask; 80static cpumask_var_t mce_inject_cpumask;
81static DEFINE_MUTEX(mce_inject_mutex);
81 82
82static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) 83static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
83{ 84{
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m)
194 put_online_cpus(); 195 put_online_cpus();
195 } else 196 } else
196#endif 197#endif
198 {
199 preempt_disable();
197 raise_local(); 200 raise_local();
201 preempt_enable();
202 }
198} 203}
199 204
200/* Error injection interface */ 205/* Error injection interface */
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
225 * so do it a jiffie or two later everywhere. 230 * so do it a jiffie or two later everywhere.
226 */ 231 */
227 schedule_timeout(2); 232 schedule_timeout(2);
233
234 mutex_lock(&mce_inject_mutex);
228 raise_mce(&m); 235 raise_mce(&m);
236 mutex_unlock(&mce_inject_mutex);
229 return usize; 237 return usize;
230} 238}
231 239
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index ed44c8a65858..6a05c1d327a9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,6 +28,18 @@ extern int mce_ser;
28 28
29extern struct mce_bank *mce_banks; 29extern struct mce_bank *mce_banks;
30 30
31#ifdef CONFIG_X86_MCE_INTEL
32unsigned long mce_intel_adjust_timer(unsigned long interval);
33void mce_intel_cmci_poll(void);
34void mce_intel_hcpu_update(unsigned long cpu);
35#else
36# define mce_intel_adjust_timer mce_adjust_timer_default
37static inline void mce_intel_cmci_poll(void) { }
38static inline void mce_intel_hcpu_update(unsigned long cpu) { }
39#endif
40
41void mce_timer_kick(unsigned long interval);
42
31#ifdef CONFIG_ACPI_APEI 43#ifdef CONFIG_ACPI_APEI
32int apei_write_mce(struct mce *m); 44int apei_write_mce(struct mce *m);
33ssize_t apei_read_mce(struct mce *m, u64 *record_id); 45ssize_t apei_read_mce(struct mce *m, u64 *record_id);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 292d0258311c..29e87d3b2843 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly;
83int mce_cmci_disabled __read_mostly; 83int mce_cmci_disabled __read_mostly;
84int mce_ignore_ce __read_mostly; 84int mce_ignore_ce __read_mostly;
85int mce_ser __read_mostly; 85int mce_ser __read_mostly;
86int mce_bios_cmci_threshold __read_mostly;
86 87
87struct mce_bank *mce_banks __read_mostly; 88struct mce_bank *mce_banks __read_mostly;
88 89
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
1266static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ 1267static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
1267static DEFINE_PER_CPU(struct timer_list, mce_timer); 1268static DEFINE_PER_CPU(struct timer_list, mce_timer);
1268 1269
1270static unsigned long mce_adjust_timer_default(unsigned long interval)
1271{
1272 return interval;
1273}
1274
1275static unsigned long (*mce_adjust_timer)(unsigned long interval) =
1276 mce_adjust_timer_default;
1277
1269static void mce_timer_fn(unsigned long data) 1278static void mce_timer_fn(unsigned long data)
1270{ 1279{
1271 struct timer_list *t = &__get_cpu_var(mce_timer); 1280 struct timer_list *t = &__get_cpu_var(mce_timer);
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data)
1276 if (mce_available(__this_cpu_ptr(&cpu_info))) { 1285 if (mce_available(__this_cpu_ptr(&cpu_info))) {
1277 machine_check_poll(MCP_TIMESTAMP, 1286 machine_check_poll(MCP_TIMESTAMP,
1278 &__get_cpu_var(mce_poll_banks)); 1287 &__get_cpu_var(mce_poll_banks));
1288 mce_intel_cmci_poll();
1279 } 1289 }
1280 1290
1281 /* 1291 /*
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data)
1283 * polling interval, otherwise increase the polling interval. 1293 * polling interval, otherwise increase the polling interval.
1284 */ 1294 */
1285 iv = __this_cpu_read(mce_next_interval); 1295 iv = __this_cpu_read(mce_next_interval);
1286 if (mce_notify_irq()) 1296 if (mce_notify_irq()) {
1287 iv = max(iv / 2, (unsigned long) HZ/100); 1297 iv = max(iv / 2, (unsigned long) HZ/100);
1288 else 1298 } else {
1289 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); 1299 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1300 iv = mce_adjust_timer(iv);
1301 }
1290 __this_cpu_write(mce_next_interval, iv); 1302 __this_cpu_write(mce_next_interval, iv);
1303 /* Might have become 0 after CMCI storm subsided */
1304 if (iv) {
1305 t->expires = jiffies + iv;
1306 add_timer_on(t, smp_processor_id());
1307 }
1308}
1291 1309
1292 t->expires = jiffies + iv; 1310/*
1293 add_timer_on(t, smp_processor_id()); 1311 * Ensure that the timer is firing in @interval from now.
1312 */
1313void mce_timer_kick(unsigned long interval)
1314{
1315 struct timer_list *t = &__get_cpu_var(mce_timer);
1316 unsigned long when = jiffies + interval;
1317 unsigned long iv = __this_cpu_read(mce_next_interval);
1318
1319 if (timer_pending(t)) {
1320 if (time_before(when, t->expires))
1321 mod_timer_pinned(t, when);
1322 } else {
1323 t->expires = round_jiffies(when);
1324 add_timer_on(t, smp_processor_id());
1325 }
1326 if (interval < iv)
1327 __this_cpu_write(mce_next_interval, interval);
1294} 1328}
1295 1329
1296/* Must not be called in IRQ context where del_timer_sync() can deadlock */ 1330/* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1585 switch (c->x86_vendor) { 1619 switch (c->x86_vendor) {
1586 case X86_VENDOR_INTEL: 1620 case X86_VENDOR_INTEL:
1587 mce_intel_feature_init(c); 1621 mce_intel_feature_init(c);
1622 mce_adjust_timer = mce_intel_adjust_timer;
1588 break; 1623 break;
1589 case X86_VENDOR_AMD: 1624 case X86_VENDOR_AMD:
1590 mce_amd_feature_init(c); 1625 mce_amd_feature_init(c);
@@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1594 } 1629 }
1595} 1630}
1596 1631
1597static void __mcheck_cpu_init_timer(void) 1632static void mce_start_timer(unsigned int cpu, struct timer_list *t)
1598{ 1633{
1599 struct timer_list *t = &__get_cpu_var(mce_timer); 1634 unsigned long iv = mce_adjust_timer(check_interval * HZ);
1600 unsigned long iv = check_interval * HZ;
1601 1635
1602 setup_timer(t, mce_timer_fn, smp_processor_id()); 1636 __this_cpu_write(mce_next_interval, iv);
1603 1637
1604 if (mce_ignore_ce) 1638 if (mce_ignore_ce || !iv)
1605 return; 1639 return;
1606 1640
1607 __this_cpu_write(mce_next_interval, iv);
1608 if (!iv)
1609 return;
1610 t->expires = round_jiffies(jiffies + iv); 1641 t->expires = round_jiffies(jiffies + iv);
1611 add_timer_on(t, smp_processor_id()); 1642 add_timer_on(t, smp_processor_id());
1612} 1643}
1613 1644
1645static void __mcheck_cpu_init_timer(void)
1646{
1647 struct timer_list *t = &__get_cpu_var(mce_timer);
1648 unsigned int cpu = smp_processor_id();
1649
1650 setup_timer(t, mce_timer_fn, cpu);
1651 mce_start_timer(cpu, t);
1652}
1653
1614/* Handle unconfigured int18 (should never happen) */ 1654/* Handle unconfigured int18 (should never happen) */
1615static void unexpected_machine_check(struct pt_regs *regs, long error_code) 1655static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1616{ 1656{
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
1907 * check, or 0 to not wait 1947 * check, or 0 to not wait
1908 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. 1948 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
1909 * mce=nobootlog Don't log MCEs from before booting. 1949 * mce=nobootlog Don't log MCEs from before booting.
1950 * mce=bios_cmci_threshold Don't program the CMCI threshold
1910 */ 1951 */
1911static int __init mcheck_enable(char *str) 1952static int __init mcheck_enable(char *str)
1912{ 1953{
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str)
1926 mce_ignore_ce = 1; 1967 mce_ignore_ce = 1;
1927 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) 1968 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
1928 mce_bootlog = (str[0] == 'b'); 1969 mce_bootlog = (str[0] == 'b');
1970 else if (!strcmp(str, "bios_cmci_threshold"))
1971 mce_bios_cmci_threshold = 1;
1929 else if (isdigit(str[0])) { 1972 else if (isdigit(str[0])) {
1930 get_option(&str, &tolerant); 1973 get_option(&str, &tolerant);
1931 if (*str == ',') { 1974 if (*str == ',') {
@@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
2166 &mce_cmci_disabled 2209 &mce_cmci_disabled
2167}; 2210};
2168 2211
2212static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
2213 __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
2214 &mce_bios_cmci_threshold
2215};
2216
2169static struct device_attribute *mce_device_attrs[] = { 2217static struct device_attribute *mce_device_attrs[] = {
2170 &dev_attr_tolerant.attr, 2218 &dev_attr_tolerant.attr,
2171 &dev_attr_check_interval.attr, 2219 &dev_attr_check_interval.attr,
@@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
2174 &dev_attr_dont_log_ce.attr, 2222 &dev_attr_dont_log_ce.attr,
2175 &dev_attr_ignore_ce.attr, 2223 &dev_attr_ignore_ce.attr,
2176 &dev_attr_cmci_disabled.attr, 2224 &dev_attr_cmci_disabled.attr,
2225 &dev_attr_bios_cmci_threshold.attr,
2177 NULL 2226 NULL
2178}; 2227};
2179 2228
@@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2294 unsigned int cpu = (unsigned long)hcpu; 2343 unsigned int cpu = (unsigned long)hcpu;
2295 struct timer_list *t = &per_cpu(mce_timer, cpu); 2344 struct timer_list *t = &per_cpu(mce_timer, cpu);
2296 2345
2297 switch (action) { 2346 switch (action & ~CPU_TASKS_FROZEN) {
2298 case CPU_ONLINE: 2347 case CPU_ONLINE:
2299 case CPU_ONLINE_FROZEN:
2300 mce_device_create(cpu); 2348 mce_device_create(cpu);
2301 if (threshold_cpu_callback) 2349 if (threshold_cpu_callback)
2302 threshold_cpu_callback(action, cpu); 2350 threshold_cpu_callback(action, cpu);
2303 break; 2351 break;
2304 case CPU_DEAD: 2352 case CPU_DEAD:
2305 case CPU_DEAD_FROZEN:
2306 if (threshold_cpu_callback) 2353 if (threshold_cpu_callback)
2307 threshold_cpu_callback(action, cpu); 2354 threshold_cpu_callback(action, cpu);
2308 mce_device_remove(cpu); 2355 mce_device_remove(cpu);
2356 mce_intel_hcpu_update(cpu);
2309 break; 2357 break;
2310 case CPU_DOWN_PREPARE: 2358 case CPU_DOWN_PREPARE:
2311 case CPU_DOWN_PREPARE_FROZEN:
2312 del_timer_sync(t);
2313 smp_call_function_single(cpu, mce_disable_cpu, &action, 1); 2359 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
2360 del_timer_sync(t);
2314 break; 2361 break;
2315 case CPU_DOWN_FAILED: 2362 case CPU_DOWN_FAILED:
2316 case CPU_DOWN_FAILED_FROZEN:
2317 if (!mce_ignore_ce && check_interval) {
2318 t->expires = round_jiffies(jiffies +
2319 per_cpu(mce_next_interval, cpu));
2320 add_timer_on(t, cpu);
2321 }
2322 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 2363 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
2364 mce_start_timer(cpu, t);
2323 break; 2365 break;
2324 case CPU_POST_DEAD: 2366 }
2367
2368 if (action == CPU_POST_DEAD) {
2325 /* intentionally ignoring frozen here */ 2369 /* intentionally ignoring frozen here */
2326 cmci_rediscover(cpu); 2370 cmci_rediscover(cpu);
2327 break;
2328 } 2371 }
2372
2329 return NOTIFY_OK; 2373 return NOTIFY_OK;
2330} 2374}
2331 2375
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 38e49bc95ffc..5f88abf07e9c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -15,6 +15,8 @@
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/mce.h> 16#include <asm/mce.h>
17 17
18#include "mce-internal.h"
19
18/* 20/*
19 * Support for Intel Correct Machine Check Interrupts. This allows 21 * Support for Intel Correct Machine Check Interrupts. This allows
20 * the CPU to raise an interrupt when a corrected machine check happened. 22 * the CPU to raise an interrupt when a corrected machine check happened.
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
30 */ 32 */
31static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 33static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
32 34
33#define CMCI_THRESHOLD 1 35#define CMCI_THRESHOLD 1
36#define CMCI_POLL_INTERVAL (30 * HZ)
37#define CMCI_STORM_INTERVAL (1 * HZ)
38#define CMCI_STORM_THRESHOLD 15
39
40static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
41static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
42static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
43
44enum {
45 CMCI_STORM_NONE,
46 CMCI_STORM_ACTIVE,
47 CMCI_STORM_SUBSIDED,
48};
49
50static atomic_t cmci_storm_on_cpus;
34 51
35static int cmci_supported(int *banks) 52static int cmci_supported(int *banks)
36{ 53{
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
53 return !!(cap & MCG_CMCI_P); 70 return !!(cap & MCG_CMCI_P);
54} 71}
55 72
73void mce_intel_cmci_poll(void)
74{
75 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
76 return;
77 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
78}
79
80void mce_intel_hcpu_update(unsigned long cpu)
81{
82 if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
83 atomic_dec(&cmci_storm_on_cpus);
84
85 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
86}
87
88unsigned long mce_intel_adjust_timer(unsigned long interval)
89{
90 int r;
91
92 if (interval < CMCI_POLL_INTERVAL)
93 return interval;
94
95 switch (__this_cpu_read(cmci_storm_state)) {
96 case CMCI_STORM_ACTIVE:
97 /*
98 * We switch back to interrupt mode once the poll timer has
99 * silenced itself. That means no events recorded and the
100 * timer interval is back to our poll interval.
101 */
102 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
103 r = atomic_sub_return(1, &cmci_storm_on_cpus);
104 if (r == 0)
105 pr_notice("CMCI storm subsided: switching to interrupt mode\n");
106 /* FALLTHROUGH */
107
108 case CMCI_STORM_SUBSIDED:
109 /*
110 * We wait for all cpus to go back to SUBSIDED
111 * state. When that happens we switch back to
112 * interrupt mode.
113 */
114 if (!atomic_read(&cmci_storm_on_cpus)) {
115 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
116 cmci_reenable();
117 cmci_recheck();
118 }
119 return CMCI_POLL_INTERVAL;
120 default:
121 /*
122 * We have shiny weather. Let the poll do whatever it
123 * thinks.
124 */
125 return interval;
126 }
127}
128
129static bool cmci_storm_detect(void)
130{
131 unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
132 unsigned long ts = __this_cpu_read(cmci_time_stamp);
133 unsigned long now = jiffies;
134 int r;
135
136 if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
137 return true;
138
139 if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
140 cnt++;
141 } else {
142 cnt = 1;
143 __this_cpu_write(cmci_time_stamp, now);
144 }
145 __this_cpu_write(cmci_storm_cnt, cnt);
146
147 if (cnt <= CMCI_STORM_THRESHOLD)
148 return false;
149
150 cmci_clear();
151 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
152 r = atomic_add_return(1, &cmci_storm_on_cpus);
153 mce_timer_kick(CMCI_POLL_INTERVAL);
154
155 if (r == 1)
156 pr_notice("CMCI storm detected: switching to poll mode\n");
157 return true;
158}
159
56/* 160/*
57 * The interrupt handler. This is called on every event. 161 * The interrupt handler. This is called on every event.
58 * Just call the poller directly to log any events. 162 * Just call the poller directly to log any events.
@@ -61,33 +165,28 @@ static int cmci_supported(int *banks)
61 */ 165 */
62static void intel_threshold_interrupt(void) 166static void intel_threshold_interrupt(void)
63{ 167{
168 if (cmci_storm_detect())
169 return;
64 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); 170 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
65 mce_notify_irq(); 171 mce_notify_irq();
66} 172}
67 173
68static void print_update(char *type, int *hdr, int num)
69{
70 if (*hdr == 0)
71 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
72 *hdr = 1;
73 printk(KERN_CONT " %s:%d", type, num);
74}
75
76/* 174/*
77 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 175 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
78 * on this CPU. Use the algorithm recommended in the SDM to discover shared 176 * on this CPU. Use the algorithm recommended in the SDM to discover shared
79 * banks. 177 * banks.
80 */ 178 */
81static void cmci_discover(int banks, int boot) 179static void cmci_discover(int banks)
82{ 180{
83 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); 181 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
84 unsigned long flags; 182 unsigned long flags;
85 int hdr = 0;
86 int i; 183 int i;
184 int bios_wrong_thresh = 0;
87 185
88 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 186 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
89 for (i = 0; i < banks; i++) { 187 for (i = 0; i < banks; i++) {
90 u64 val; 188 u64 val;
189 int bios_zero_thresh = 0;
91 190
92 if (test_bit(i, owned)) 191 if (test_bit(i, owned))
93 continue; 192 continue;
@@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot)
96 195
97 /* Already owned by someone else? */ 196 /* Already owned by someone else? */
98 if (val & MCI_CTL2_CMCI_EN) { 197 if (val & MCI_CTL2_CMCI_EN) {
99 if (test_and_clear_bit(i, owned) && !boot) 198 clear_bit(i, owned);
100 print_update("SHD", &hdr, i);
101 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 199 __clear_bit(i, __get_cpu_var(mce_poll_banks));
102 continue; 200 continue;
103 } 201 }
104 202
105 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 203 if (!mce_bios_cmci_threshold) {
106 val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; 204 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
205 val |= CMCI_THRESHOLD;
206 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
207 /*
208 * If bios_cmci_threshold boot option was specified
209 * but the threshold is zero, we'll try to initialize
210 * it to 1.
211 */
212 bios_zero_thresh = 1;
213 val |= CMCI_THRESHOLD;
214 }
215
216 val |= MCI_CTL2_CMCI_EN;
107 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 217 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
108 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 218 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
109 219
110 /* Did the enable bit stick? -- the bank supports CMCI */ 220 /* Did the enable bit stick? -- the bank supports CMCI */
111 if (val & MCI_CTL2_CMCI_EN) { 221 if (val & MCI_CTL2_CMCI_EN) {
112 if (!test_and_set_bit(i, owned) && !boot) 222 set_bit(i, owned);
113 print_update("CMCI", &hdr, i);
114 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 223 __clear_bit(i, __get_cpu_var(mce_poll_banks));
224 /*
225 * We are able to set thresholds for some banks that
226 * had a threshold of 0. This means the BIOS has not
227 * set the thresholds properly or does not work with
228 * this boot option. Note down now and report later.
229 */
230 if (mce_bios_cmci_threshold && bios_zero_thresh &&
231 (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
232 bios_wrong_thresh = 1;
115 } else { 233 } else {
116 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); 234 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
117 } 235 }
118 } 236 }
119 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 237 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
120 if (hdr) 238 if (mce_bios_cmci_threshold && bios_wrong_thresh) {
121 printk(KERN_CONT "\n"); 239 pr_info_once(
240 "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
241 pr_info_once(
242 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
243 }
122} 244}
123 245
124/* 246/*
@@ -156,7 +278,7 @@ void cmci_clear(void)
156 continue; 278 continue;
157 /* Disable CMCI */ 279 /* Disable CMCI */
158 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 280 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
159 val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); 281 val &= ~MCI_CTL2_CMCI_EN;
160 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 282 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
161 __clear_bit(i, __get_cpu_var(mce_banks_owned)); 283 __clear_bit(i, __get_cpu_var(mce_banks_owned));
162 } 284 }
@@ -186,7 +308,7 @@ void cmci_rediscover(int dying)
186 continue; 308 continue;
187 /* Recheck banks in case CPUs don't all have the same */ 309 /* Recheck banks in case CPUs don't all have the same */
188 if (cmci_supported(&banks)) 310 if (cmci_supported(&banks))
189 cmci_discover(banks, 0); 311 cmci_discover(banks);
190 } 312 }
191 313
192 set_cpus_allowed_ptr(current, old); 314 set_cpus_allowed_ptr(current, old);
@@ -200,7 +322,7 @@ void cmci_reenable(void)
200{ 322{
201 int banks; 323 int banks;
202 if (cmci_supported(&banks)) 324 if (cmci_supported(&banks))
203 cmci_discover(banks, 0); 325 cmci_discover(banks);
204} 326}
205 327
206static void intel_init_cmci(void) 328static void intel_init_cmci(void)
@@ -211,7 +333,7 @@ static void intel_init_cmci(void)
211 return; 333 return;
212 334
213 mce_threshold_vector = intel_threshold_interrupt; 335 mce_threshold_vector = intel_threshold_interrupt;
214 cmci_discover(banks, 1); 336 cmci_discover(banks);
215 /* 337 /*
216 * For CPU #0 this runs with still disabled APIC, but that's 338 * For CPU #0 this runs with still disabled APIC, but that's
217 * ok because only the vector is set up. We still do another 339 * ok because only the vector is set up. We still do another
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
index c7b3fe2d72e0..091972ef49de 100644
--- a/arch/x86/kernel/cpu/mkcapflags.pl
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -8,7 +8,10 @@
8open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; 8open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
9open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; 9open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
10 10
11print OUT "#include <asm/cpufeature.h>\n\n"; 11print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n";
12print OUT "#include <asm/cpufeature.h>\n";
13print OUT "#endif\n";
14print OUT "\n";
12print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; 15print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
13 16
14%features = (); 17%features = ();
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 38e4894165b9..99d96a4978b5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -1950,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp
1950static struct intel_uncore_box * 1950static struct intel_uncore_box *
1951uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 1951uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
1952{ 1952{
1953 static struct intel_uncore_box *box; 1953 struct intel_uncore_box *box;
1954 1954
1955 box = *per_cpu_ptr(pmu->box, cpu); 1955 box = *per_cpu_ptr(pmu->box, cpu);
1956 if (box) 1956 if (box)
@@ -2347,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event)
2347 return ret; 2347 return ret;
2348} 2348}
2349 2349
2350static ssize_t uncore_get_attr_cpumask(struct device *dev,
2351 struct device_attribute *attr, char *buf)
2352{
2353 int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
2354
2355 buf[n++] = '\n';
2356 buf[n] = '\0';
2357 return n;
2358}
2359
2360static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
2361
2362static struct attribute *uncore_pmu_attrs[] = {
2363 &dev_attr_cpumask.attr,
2364 NULL,
2365};
2366
2367static struct attribute_group uncore_pmu_attr_group = {
2368 .attrs = uncore_pmu_attrs,
2369};
2370
2350static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) 2371static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
2351{ 2372{
2352 int ret; 2373 int ret;
@@ -2384,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
2384 free_percpu(type->pmus[i].box); 2405 free_percpu(type->pmus[i].box);
2385 kfree(type->pmus); 2406 kfree(type->pmus);
2386 type->pmus = NULL; 2407 type->pmus = NULL;
2387 kfree(type->attr_groups[1]); 2408 kfree(type->events_group);
2388 type->attr_groups[1] = NULL; 2409 type->events_group = NULL;
2389} 2410}
2390 2411
2391static void __init uncore_types_exit(struct intel_uncore_type **types) 2412static void __init uncore_types_exit(struct intel_uncore_type **types)
@@ -2437,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
2437 for (j = 0; j < i; j++) 2458 for (j = 0; j < i; j++)
2438 attrs[j] = &type->event_descs[j].attr.attr; 2459 attrs[j] = &type->event_descs[j].attr.attr;
2439 2460
2440 type->attr_groups[1] = events_group; 2461 type->events_group = events_group;
2441 } 2462 }
2442 2463
2464 type->pmu_group = &uncore_pmu_attr_group;
2443 type->pmus = pmus; 2465 type->pmus = pmus;
2444 return 0; 2466 return 0;
2445fail: 2467fail:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 5b81c1856aac..e68a4550e952 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -369,10 +369,12 @@ struct intel_uncore_type {
369 struct intel_uncore_pmu *pmus; 369 struct intel_uncore_pmu *pmus;
370 struct intel_uncore_ops *ops; 370 struct intel_uncore_ops *ops;
371 struct uncore_event_desc *event_descs; 371 struct uncore_event_desc *event_descs;
372 const struct attribute_group *attr_groups[3]; 372 const struct attribute_group *attr_groups[4];
373}; 373};
374 374
375#define format_group attr_groups[0] 375#define pmu_group attr_groups[0]
376#define format_group attr_groups[1]
377#define events_group attr_groups[2]
376 378
377struct intel_uncore_ops { 379struct intel_uncore_ops {
378 void (*init_box)(struct intel_uncore_box *); 380 void (*init_box)(struct intel_uncore_box *);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 8022c6681485..fbd895562292 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
140 140
141static void *c_start(struct seq_file *m, loff_t *pos) 141static void *c_start(struct seq_file *m, loff_t *pos)
142{ 142{
143 if (*pos == 0) /* just in case, cpu 0 is not the first */ 143 *pos = cpumask_next(*pos - 1, cpu_online_mask);
144 *pos = cpumask_first(cpu_online_mask);
145 else
146 *pos = cpumask_next(*pos - 1, cpu_online_mask);
147 if ((*pos) < nr_cpu_ids) 144 if ((*pos) < nr_cpu_ids)
148 return &cpu_data(*pos); 145 return &cpu_data(*pos);
149 return NULL; 146 return NULL;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 39472dd2323f..60c78917190c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -199,12 +199,14 @@ static int __init cpuid_init(void)
199 goto out_chrdev; 199 goto out_chrdev;
200 } 200 }
201 cpuid_class->devnode = cpuid_devnode; 201 cpuid_class->devnode = cpuid_devnode;
202 get_online_cpus();
202 for_each_online_cpu(i) { 203 for_each_online_cpu(i) {
203 err = cpuid_device_create(i); 204 err = cpuid_device_create(i);
204 if (err != 0) 205 if (err != 0)
205 goto out_class; 206 goto out_class;
206 } 207 }
207 register_hotcpu_notifier(&cpuid_class_cpu_notifier); 208 register_hotcpu_notifier(&cpuid_class_cpu_notifier);
209 put_online_cpus();
208 210
209 err = 0; 211 err = 0;
210 goto out; 212 goto out;
@@ -214,6 +216,7 @@ out_class:
214 for_each_online_cpu(i) { 216 for_each_online_cpu(i) {
215 cpuid_device_destroy(i); 217 cpuid_device_destroy(i);
216 } 218 }
219 put_online_cpus();
217 class_destroy(cpuid_class); 220 class_destroy(cpuid_class);
218out_chrdev: 221out_chrdev:
219 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); 222 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
@@ -225,11 +228,13 @@ static void __exit cpuid_exit(void)
225{ 228{
226 int cpu = 0; 229 int cpu = 0;
227 230
231 get_online_cpus();
228 for_each_online_cpu(cpu) 232 for_each_online_cpu(cpu)
229 cpuid_device_destroy(cpu); 233 cpuid_device_destroy(cpu);
230 class_destroy(cpuid_class); 234 class_destroy(cpuid_class);
231 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); 235 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
232 unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); 236 unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
237 put_online_cpus();
233} 238}
234 239
235module_init(cpuid_init); 240module_init(cpuid_init);
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3ae2ced4a874..b1581527a236 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = {
342 .xlate = ioapic_xlate, 342 .xlate = ioapic_xlate,
343}; 343};
344 344
345static void dt_add_ioapic_domain(unsigned int ioapic_num,
346 struct device_node *np)
347{
348 struct irq_domain *id;
349 struct mp_ioapic_gsi *gsi_cfg;
350 int ret;
351 int num;
352
353 gsi_cfg = mp_ioapic_gsi_routing(ioapic_num);
354 num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
355
356 id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops,
357 (void *)ioapic_num);
358 BUG_ON(!id);
359 if (gsi_cfg->gsi_base == 0) {
360 /*
361 * The first NR_IRQS_LEGACY irq descs are allocated in
362 * early_irq_init() and need just a mapping. The
363 * remaining irqs need both. All of them are preallocated
364 * and assigned so we can keep the 1:1 mapping which the ioapic
365 * is having.
366 */
367 ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
368 if (ret)
369 pr_err("Error mapping legacy IRQs: %d\n", ret);
370
371 if (num > NR_IRQS_LEGACY) {
372 ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
373 NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
374 if (ret)
375 pr_err("Error creating mapping for the "
376 "remaining IRQs: %d\n", ret);
377 }
378 irq_set_default_host(id);
379 } else {
380 ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
381 if (ret)
382 pr_err("Error creating IRQ mapping: %d\n", ret);
383 }
384}
385
345static void __init ioapic_add_ofnode(struct device_node *np) 386static void __init ioapic_add_ofnode(struct device_node *np)
346{ 387{
347 struct resource r; 388 struct resource r;
@@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np)
356 397
357 for (i = 0; i < nr_ioapics; i++) { 398 for (i = 0; i < nr_ioapics; i++) {
358 if (r.start == mpc_ioapic_addr(i)) { 399 if (r.start == mpc_ioapic_addr(i)) {
359 struct irq_domain *id; 400 dt_add_ioapic_domain(i, np);
360 struct mp_ioapic_gsi *gsi_cfg;
361
362 gsi_cfg = mp_ioapic_gsi_routing(i);
363
364 id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
365 &ioapic_irq_domain_ops,
366 (void*)i);
367 BUG_ON(!id);
368 return; 401 return;
369 } 402 }
370 } 403 }
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 623f28837476..0750e3ba87c0 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -57,6 +57,7 @@
57#include <asm/cpufeature.h> 57#include <asm/cpufeature.h>
58#include <asm/alternative-asm.h> 58#include <asm/alternative-asm.h>
59#include <asm/asm.h> 59#include <asm/asm.h>
60#include <asm/smap.h>
60 61
61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 62/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
62#include <linux/elf-em.h> 63#include <linux/elf-em.h>
@@ -407,7 +408,9 @@ sysenter_past_esp:
407 */ 408 */
408 cmpl $__PAGE_OFFSET-3,%ebp 409 cmpl $__PAGE_OFFSET-3,%ebp
409 jae syscall_fault 410 jae syscall_fault
411 ASM_STAC
4101: movl (%ebp),%ebp 4121: movl (%ebp),%ebp
413 ASM_CLAC
411 movl %ebp,PT_EBP(%esp) 414 movl %ebp,PT_EBP(%esp)
412 _ASM_EXTABLE(1b,syscall_fault) 415 _ASM_EXTABLE(1b,syscall_fault)
413 416
@@ -488,6 +491,7 @@ ENDPROC(ia32_sysenter_target)
488 # system call handler stub 491 # system call handler stub
489ENTRY(system_call) 492ENTRY(system_call)
490 RING0_INT_FRAME # can't unwind into user space anyway 493 RING0_INT_FRAME # can't unwind into user space anyway
494 ASM_CLAC
491 pushl_cfi %eax # save orig_eax 495 pushl_cfi %eax # save orig_eax
492 SAVE_ALL 496 SAVE_ALL
493 GET_THREAD_INFO(%ebp) 497 GET_THREAD_INFO(%ebp)
@@ -670,6 +674,7 @@ END(syscall_exit_work)
670 674
671 RING0_INT_FRAME # can't unwind into user space anyway 675 RING0_INT_FRAME # can't unwind into user space anyway
672syscall_fault: 676syscall_fault:
677 ASM_CLAC
673 GET_THREAD_INFO(%ebp) 678 GET_THREAD_INFO(%ebp)
674 movl $-EFAULT,PT_EAX(%esp) 679 movl $-EFAULT,PT_EAX(%esp)
675 jmp resume_userspace 680 jmp resume_userspace
@@ -825,6 +830,7 @@ END(interrupt)
825 */ 830 */
826 .p2align CONFIG_X86_L1_CACHE_SHIFT 831 .p2align CONFIG_X86_L1_CACHE_SHIFT
827common_interrupt: 832common_interrupt:
833 ASM_CLAC
828 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ 834 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
829 SAVE_ALL 835 SAVE_ALL
830 TRACE_IRQS_OFF 836 TRACE_IRQS_OFF
@@ -841,6 +847,7 @@ ENDPROC(common_interrupt)
841#define BUILD_INTERRUPT3(name, nr, fn) \ 847#define BUILD_INTERRUPT3(name, nr, fn) \
842ENTRY(name) \ 848ENTRY(name) \
843 RING0_INT_FRAME; \ 849 RING0_INT_FRAME; \
850 ASM_CLAC; \
844 pushl_cfi $~(nr); \ 851 pushl_cfi $~(nr); \
845 SAVE_ALL; \ 852 SAVE_ALL; \
846 TRACE_IRQS_OFF \ 853 TRACE_IRQS_OFF \
@@ -857,6 +864,7 @@ ENDPROC(name)
857 864
858ENTRY(coprocessor_error) 865ENTRY(coprocessor_error)
859 RING0_INT_FRAME 866 RING0_INT_FRAME
867 ASM_CLAC
860 pushl_cfi $0 868 pushl_cfi $0
861 pushl_cfi $do_coprocessor_error 869 pushl_cfi $do_coprocessor_error
862 jmp error_code 870 jmp error_code
@@ -865,6 +873,7 @@ END(coprocessor_error)
865 873
866ENTRY(simd_coprocessor_error) 874ENTRY(simd_coprocessor_error)
867 RING0_INT_FRAME 875 RING0_INT_FRAME
876 ASM_CLAC
868 pushl_cfi $0 877 pushl_cfi $0
869#ifdef CONFIG_X86_INVD_BUG 878#ifdef CONFIG_X86_INVD_BUG
870 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 879 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
@@ -886,6 +895,7 @@ END(simd_coprocessor_error)
886 895
887ENTRY(device_not_available) 896ENTRY(device_not_available)
888 RING0_INT_FRAME 897 RING0_INT_FRAME
898 ASM_CLAC
889 pushl_cfi $-1 # mark this as an int 899 pushl_cfi $-1 # mark this as an int
890 pushl_cfi $do_device_not_available 900 pushl_cfi $do_device_not_available
891 jmp error_code 901 jmp error_code
@@ -906,6 +916,7 @@ END(native_irq_enable_sysexit)
906 916
907ENTRY(overflow) 917ENTRY(overflow)
908 RING0_INT_FRAME 918 RING0_INT_FRAME
919 ASM_CLAC
909 pushl_cfi $0 920 pushl_cfi $0
910 pushl_cfi $do_overflow 921 pushl_cfi $do_overflow
911 jmp error_code 922 jmp error_code
@@ -914,6 +925,7 @@ END(overflow)
914 925
915ENTRY(bounds) 926ENTRY(bounds)
916 RING0_INT_FRAME 927 RING0_INT_FRAME
928 ASM_CLAC
917 pushl_cfi $0 929 pushl_cfi $0
918 pushl_cfi $do_bounds 930 pushl_cfi $do_bounds
919 jmp error_code 931 jmp error_code
@@ -922,6 +934,7 @@ END(bounds)
922 934
923ENTRY(invalid_op) 935ENTRY(invalid_op)
924 RING0_INT_FRAME 936 RING0_INT_FRAME
937 ASM_CLAC
925 pushl_cfi $0 938 pushl_cfi $0
926 pushl_cfi $do_invalid_op 939 pushl_cfi $do_invalid_op
927 jmp error_code 940 jmp error_code
@@ -930,6 +943,7 @@ END(invalid_op)
930 943
931ENTRY(coprocessor_segment_overrun) 944ENTRY(coprocessor_segment_overrun)
932 RING0_INT_FRAME 945 RING0_INT_FRAME
946 ASM_CLAC
933 pushl_cfi $0 947 pushl_cfi $0
934 pushl_cfi $do_coprocessor_segment_overrun 948 pushl_cfi $do_coprocessor_segment_overrun
935 jmp error_code 949 jmp error_code
@@ -938,6 +952,7 @@ END(coprocessor_segment_overrun)
938 952
939ENTRY(invalid_TSS) 953ENTRY(invalid_TSS)
940 RING0_EC_FRAME 954 RING0_EC_FRAME
955 ASM_CLAC
941 pushl_cfi $do_invalid_TSS 956 pushl_cfi $do_invalid_TSS
942 jmp error_code 957 jmp error_code
943 CFI_ENDPROC 958 CFI_ENDPROC
@@ -945,6 +960,7 @@ END(invalid_TSS)
945 960
946ENTRY(segment_not_present) 961ENTRY(segment_not_present)
947 RING0_EC_FRAME 962 RING0_EC_FRAME
963 ASM_CLAC
948 pushl_cfi $do_segment_not_present 964 pushl_cfi $do_segment_not_present
949 jmp error_code 965 jmp error_code
950 CFI_ENDPROC 966 CFI_ENDPROC
@@ -952,6 +968,7 @@ END(segment_not_present)
952 968
953ENTRY(stack_segment) 969ENTRY(stack_segment)
954 RING0_EC_FRAME 970 RING0_EC_FRAME
971 ASM_CLAC
955 pushl_cfi $do_stack_segment 972 pushl_cfi $do_stack_segment
956 jmp error_code 973 jmp error_code
957 CFI_ENDPROC 974 CFI_ENDPROC
@@ -959,6 +976,7 @@ END(stack_segment)
959 976
960ENTRY(alignment_check) 977ENTRY(alignment_check)
961 RING0_EC_FRAME 978 RING0_EC_FRAME
979 ASM_CLAC
962 pushl_cfi $do_alignment_check 980 pushl_cfi $do_alignment_check
963 jmp error_code 981 jmp error_code
964 CFI_ENDPROC 982 CFI_ENDPROC
@@ -966,6 +984,7 @@ END(alignment_check)
966 984
967ENTRY(divide_error) 985ENTRY(divide_error)
968 RING0_INT_FRAME 986 RING0_INT_FRAME
987 ASM_CLAC
969 pushl_cfi $0 # no error code 988 pushl_cfi $0 # no error code
970 pushl_cfi $do_divide_error 989 pushl_cfi $do_divide_error
971 jmp error_code 990 jmp error_code
@@ -975,6 +994,7 @@ END(divide_error)
975#ifdef CONFIG_X86_MCE 994#ifdef CONFIG_X86_MCE
976ENTRY(machine_check) 995ENTRY(machine_check)
977 RING0_INT_FRAME 996 RING0_INT_FRAME
997 ASM_CLAC
978 pushl_cfi $0 998 pushl_cfi $0
979 pushl_cfi machine_check_vector 999 pushl_cfi machine_check_vector
980 jmp error_code 1000 jmp error_code
@@ -984,6 +1004,7 @@ END(machine_check)
984 1004
985ENTRY(spurious_interrupt_bug) 1005ENTRY(spurious_interrupt_bug)
986 RING0_INT_FRAME 1006 RING0_INT_FRAME
1007 ASM_CLAC
987 pushl_cfi $0 1008 pushl_cfi $0
988 pushl_cfi $do_spurious_interrupt_bug 1009 pushl_cfi $do_spurious_interrupt_bug
989 jmp error_code 1010 jmp error_code
@@ -1109,17 +1130,21 @@ ENTRY(ftrace_caller)
1109 pushl %eax 1130 pushl %eax
1110 pushl %ecx 1131 pushl %ecx
1111 pushl %edx 1132 pushl %edx
1112 movl 0xc(%esp), %eax 1133 pushl $0 /* Pass NULL as regs pointer */
1134 movl 4*4(%esp), %eax
1113 movl 0x4(%ebp), %edx 1135 movl 0x4(%ebp), %edx
1136 leal function_trace_op, %ecx
1114 subl $MCOUNT_INSN_SIZE, %eax 1137 subl $MCOUNT_INSN_SIZE, %eax
1115 1138
1116.globl ftrace_call 1139.globl ftrace_call
1117ftrace_call: 1140ftrace_call:
1118 call ftrace_stub 1141 call ftrace_stub
1119 1142
1143 addl $4,%esp /* skip NULL pointer */
1120 popl %edx 1144 popl %edx
1121 popl %ecx 1145 popl %ecx
1122 popl %eax 1146 popl %eax
1147ftrace_ret:
1123#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1148#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1124.globl ftrace_graph_call 1149.globl ftrace_graph_call
1125ftrace_graph_call: 1150ftrace_graph_call:
@@ -1131,6 +1156,71 @@ ftrace_stub:
1131 ret 1156 ret
1132END(ftrace_caller) 1157END(ftrace_caller)
1133 1158
1159ENTRY(ftrace_regs_caller)
1160 pushf /* push flags before compare (in cs location) */
1161 cmpl $0, function_trace_stop
1162 jne ftrace_restore_flags
1163
1164 /*
1165 * i386 does not save SS and ESP when coming from kernel.
1166 * Instead, to get sp, &regs->sp is used (see ptrace.h).
1167 * Unfortunately, that means eflags must be at the same location
1168 * as the current return ip is. We move the return ip into the
1169 * ip location, and move flags into the return ip location.
1170 */
1171 pushl 4(%esp) /* save return ip into ip slot */
1172
1173 pushl $0 /* Load 0 into orig_ax */
1174 pushl %gs
1175 pushl %fs
1176 pushl %es
1177 pushl %ds
1178 pushl %eax
1179 pushl %ebp
1180 pushl %edi
1181 pushl %esi
1182 pushl %edx
1183 pushl %ecx
1184 pushl %ebx
1185
1186 movl 13*4(%esp), %eax /* Get the saved flags */
1187 movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
1188 /* clobbering return ip */
1189 movl $__KERNEL_CS,13*4(%esp)
1190
1191 movl 12*4(%esp), %eax /* Load ip (1st parameter) */
1192 subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
1193 movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
1194 leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
1195 pushl %esp /* Save pt_regs as 4th parameter */
1196
1197GLOBAL(ftrace_regs_call)
1198 call ftrace_stub
1199
1200 addl $4, %esp /* Skip pt_regs */
1201 movl 14*4(%esp), %eax /* Move flags back into cs */
1202 movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
1203 movl 12*4(%esp), %eax /* Get return ip from regs->ip */
1204 movl %eax, 14*4(%esp) /* Put return ip back for ret */
1205
1206 popl %ebx
1207 popl %ecx
1208 popl %edx
1209 popl %esi
1210 popl %edi
1211 popl %ebp
1212 popl %eax
1213 popl %ds
1214 popl %es
1215 popl %fs
1216 popl %gs
1217 addl $8, %esp /* Skip orig_ax and ip */
1218 popf /* Pop flags at end (no addl to corrupt flags) */
1219 jmp ftrace_ret
1220
1221ftrace_restore_flags:
1222 popf
1223 jmp ftrace_stub
1134#else /* ! CONFIG_DYNAMIC_FTRACE */ 1224#else /* ! CONFIG_DYNAMIC_FTRACE */
1135 1225
1136ENTRY(mcount) 1226ENTRY(mcount)
@@ -1171,9 +1261,6 @@ END(mcount)
1171 1261
1172#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1262#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1173ENTRY(ftrace_graph_caller) 1263ENTRY(ftrace_graph_caller)
1174 cmpl $0, function_trace_stop
1175 jne ftrace_stub
1176
1177 pushl %eax 1264 pushl %eax
1178 pushl %ecx 1265 pushl %ecx
1179 pushl %edx 1266 pushl %edx
@@ -1207,6 +1294,7 @@ return_to_handler:
1207 1294
1208ENTRY(page_fault) 1295ENTRY(page_fault)
1209 RING0_EC_FRAME 1296 RING0_EC_FRAME
1297 ASM_CLAC
1210 pushl_cfi $do_page_fault 1298 pushl_cfi $do_page_fault
1211 ALIGN 1299 ALIGN
1212error_code: 1300error_code:
@@ -1279,6 +1367,7 @@ END(page_fault)
1279 1367
1280ENTRY(debug) 1368ENTRY(debug)
1281 RING0_INT_FRAME 1369 RING0_INT_FRAME
1370 ASM_CLAC
1282 cmpl $ia32_sysenter_target,(%esp) 1371 cmpl $ia32_sysenter_target,(%esp)
1283 jne debug_stack_correct 1372 jne debug_stack_correct
1284 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn 1373 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
@@ -1303,6 +1392,7 @@ END(debug)
1303 */ 1392 */
1304ENTRY(nmi) 1393ENTRY(nmi)
1305 RING0_INT_FRAME 1394 RING0_INT_FRAME
1395 ASM_CLAC
1306 pushl_cfi %eax 1396 pushl_cfi %eax
1307 movl %ss, %eax 1397 movl %ss, %eax
1308 cmpw $__ESPFIX_SS, %ax 1398 cmpw $__ESPFIX_SS, %ax
@@ -1373,6 +1463,7 @@ END(nmi)
1373 1463
1374ENTRY(int3) 1464ENTRY(int3)
1375 RING0_INT_FRAME 1465 RING0_INT_FRAME
1466 ASM_CLAC
1376 pushl_cfi $-1 # mark this as an int 1467 pushl_cfi $-1 # mark this as an int
1377 SAVE_ALL 1468 SAVE_ALL
1378 TRACE_IRQS_OFF 1469 TRACE_IRQS_OFF
@@ -1393,6 +1484,7 @@ END(general_protection)
1393#ifdef CONFIG_KVM_GUEST 1484#ifdef CONFIG_KVM_GUEST
1394ENTRY(async_page_fault) 1485ENTRY(async_page_fault)
1395 RING0_EC_FRAME 1486 RING0_EC_FRAME
1487 ASM_CLAC
1396 pushl_cfi $do_async_page_fault 1488 pushl_cfi $do_async_page_fault
1397 jmp error_code 1489 jmp error_code
1398 CFI_ENDPROC 1490 CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8c834f..44531acd9a81 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,6 +56,8 @@
56#include <asm/ftrace.h> 56#include <asm/ftrace.h>
57#include <asm/percpu.h> 57#include <asm/percpu.h>
58#include <asm/asm.h> 58#include <asm/asm.h>
59#include <asm/rcu.h>
60#include <asm/smap.h>
59#include <linux/err.h> 61#include <linux/err.h>
60 62
61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 63/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -68,25 +70,51 @@
68 .section .entry.text, "ax" 70 .section .entry.text, "ax"
69 71
70#ifdef CONFIG_FUNCTION_TRACER 72#ifdef CONFIG_FUNCTION_TRACER
73
74#ifdef CC_USING_FENTRY
75# define function_hook __fentry__
76#else
77# define function_hook mcount
78#endif
79
71#ifdef CONFIG_DYNAMIC_FTRACE 80#ifdef CONFIG_DYNAMIC_FTRACE
72ENTRY(mcount) 81
82ENTRY(function_hook)
73 retq 83 retq
74END(mcount) 84END(function_hook)
85
86/* skip is set if stack has been adjusted */
87.macro ftrace_caller_setup skip=0
88 MCOUNT_SAVE_FRAME \skip
89
90 /* Load the ftrace_ops into the 3rd parameter */
91 leaq function_trace_op, %rdx
92
93 /* Load ip into the first parameter */
94 movq RIP(%rsp), %rdi
95 subq $MCOUNT_INSN_SIZE, %rdi
96 /* Load the parent_ip into the second parameter */
97#ifdef CC_USING_FENTRY
98 movq SS+16(%rsp), %rsi
99#else
100 movq 8(%rbp), %rsi
101#endif
102.endm
75 103
76ENTRY(ftrace_caller) 104ENTRY(ftrace_caller)
105 /* Check if tracing was disabled (quick check) */
77 cmpl $0, function_trace_stop 106 cmpl $0, function_trace_stop
78 jne ftrace_stub 107 jne ftrace_stub
79 108
80 MCOUNT_SAVE_FRAME 109 ftrace_caller_setup
81 110 /* regs go into 4th parameter (but make it NULL) */
82 movq 0x38(%rsp), %rdi 111 movq $0, %rcx
83 movq 8(%rbp), %rsi
84 subq $MCOUNT_INSN_SIZE, %rdi
85 112
86GLOBAL(ftrace_call) 113GLOBAL(ftrace_call)
87 call ftrace_stub 114 call ftrace_stub
88 115
89 MCOUNT_RESTORE_FRAME 116 MCOUNT_RESTORE_FRAME
117ftrace_return:
90 118
91#ifdef CONFIG_FUNCTION_GRAPH_TRACER 119#ifdef CONFIG_FUNCTION_GRAPH_TRACER
92GLOBAL(ftrace_graph_call) 120GLOBAL(ftrace_graph_call)
@@ -97,8 +125,78 @@ GLOBAL(ftrace_stub)
97 retq 125 retq
98END(ftrace_caller) 126END(ftrace_caller)
99 127
128ENTRY(ftrace_regs_caller)
129 /* Save the current flags before compare (in SS location)*/
130 pushfq
131
132 /* Check if tracing was disabled (quick check) */
133 cmpl $0, function_trace_stop
134 jne ftrace_restore_flags
135
136 /* skip=8 to skip flags saved in SS */
137 ftrace_caller_setup 8
138
139 /* Save the rest of pt_regs */
140 movq %r15, R15(%rsp)
141 movq %r14, R14(%rsp)
142 movq %r13, R13(%rsp)
143 movq %r12, R12(%rsp)
144 movq %r11, R11(%rsp)
145 movq %r10, R10(%rsp)
146 movq %rbp, RBP(%rsp)
147 movq %rbx, RBX(%rsp)
148 /* Copy saved flags */
149 movq SS(%rsp), %rcx
150 movq %rcx, EFLAGS(%rsp)
151 /* Kernel segments */
152 movq $__KERNEL_DS, %rcx
153 movq %rcx, SS(%rsp)
154 movq $__KERNEL_CS, %rcx
155 movq %rcx, CS(%rsp)
156 /* Stack - skipping return address */
157 leaq SS+16(%rsp), %rcx
158 movq %rcx, RSP(%rsp)
159
160 /* regs go into 4th parameter */
161 leaq (%rsp), %rcx
162
163GLOBAL(ftrace_regs_call)
164 call ftrace_stub
165
166 /* Copy flags back to SS, to restore them */
167 movq EFLAGS(%rsp), %rax
168 movq %rax, SS(%rsp)
169
170 /* Handlers can change the RIP */
171 movq RIP(%rsp), %rax
172 movq %rax, SS+8(%rsp)
173
174 /* restore the rest of pt_regs */
175 movq R15(%rsp), %r15
176 movq R14(%rsp), %r14
177 movq R13(%rsp), %r13
178 movq R12(%rsp), %r12
179 movq R10(%rsp), %r10
180 movq RBP(%rsp), %rbp
181 movq RBX(%rsp), %rbx
182
183 /* skip=8 to skip flags saved in SS */
184 MCOUNT_RESTORE_FRAME 8
185
186 /* Restore flags */
187 popfq
188
189 jmp ftrace_return
190ftrace_restore_flags:
191 popfq
192 jmp ftrace_stub
193
194END(ftrace_regs_caller)
195
196
100#else /* ! CONFIG_DYNAMIC_FTRACE */ 197#else /* ! CONFIG_DYNAMIC_FTRACE */
101ENTRY(mcount) 198
199ENTRY(function_hook)
102 cmpl $0, function_trace_stop 200 cmpl $0, function_trace_stop
103 jne ftrace_stub 201 jne ftrace_stub
104 202
@@ -119,8 +217,12 @@ GLOBAL(ftrace_stub)
119trace: 217trace:
120 MCOUNT_SAVE_FRAME 218 MCOUNT_SAVE_FRAME
121 219
122 movq 0x38(%rsp), %rdi 220 movq RIP(%rsp), %rdi
221#ifdef CC_USING_FENTRY
222 movq SS+16(%rsp), %rsi
223#else
123 movq 8(%rbp), %rsi 224 movq 8(%rbp), %rsi
225#endif
124 subq $MCOUNT_INSN_SIZE, %rdi 226 subq $MCOUNT_INSN_SIZE, %rdi
125 227
126 call *ftrace_trace_function 228 call *ftrace_trace_function
@@ -128,20 +230,22 @@ trace:
128 MCOUNT_RESTORE_FRAME 230 MCOUNT_RESTORE_FRAME
129 231
130 jmp ftrace_stub 232 jmp ftrace_stub
131END(mcount) 233END(function_hook)
132#endif /* CONFIG_DYNAMIC_FTRACE */ 234#endif /* CONFIG_DYNAMIC_FTRACE */
133#endif /* CONFIG_FUNCTION_TRACER */ 235#endif /* CONFIG_FUNCTION_TRACER */
134 236
135#ifdef CONFIG_FUNCTION_GRAPH_TRACER 237#ifdef CONFIG_FUNCTION_GRAPH_TRACER
136ENTRY(ftrace_graph_caller) 238ENTRY(ftrace_graph_caller)
137 cmpl $0, function_trace_stop
138 jne ftrace_stub
139
140 MCOUNT_SAVE_FRAME 239 MCOUNT_SAVE_FRAME
141 240
241#ifdef CC_USING_FENTRY
242 leaq SS+16(%rsp), %rdi
243 movq $0, %rdx /* No framepointers needed */
244#else
142 leaq 8(%rbp), %rdi 245 leaq 8(%rbp), %rdi
143 movq 0x38(%rsp), %rsi
144 movq (%rbp), %rdx 246 movq (%rbp), %rdx
247#endif
248 movq RIP(%rsp), %rsi
145 subq $MCOUNT_INSN_SIZE, %rsi 249 subq $MCOUNT_INSN_SIZE, %rsi
146 250
147 call prepare_ftrace_return 251 call prepare_ftrace_return
@@ -342,15 +446,15 @@ ENDPROC(native_usergs_sysret64)
342 .macro SAVE_ARGS_IRQ 446 .macro SAVE_ARGS_IRQ
343 cld 447 cld
344 /* start from rbp in pt_regs and jump over */ 448 /* start from rbp in pt_regs and jump over */
345 movq_cfi rdi, RDI-RBP 449 movq_cfi rdi, (RDI-RBP)
346 movq_cfi rsi, RSI-RBP 450 movq_cfi rsi, (RSI-RBP)
347 movq_cfi rdx, RDX-RBP 451 movq_cfi rdx, (RDX-RBP)
348 movq_cfi rcx, RCX-RBP 452 movq_cfi rcx, (RCX-RBP)
349 movq_cfi rax, RAX-RBP 453 movq_cfi rax, (RAX-RBP)
350 movq_cfi r8, R8-RBP 454 movq_cfi r8, (R8-RBP)
351 movq_cfi r9, R9-RBP 455 movq_cfi r9, (R9-RBP)
352 movq_cfi r10, R10-RBP 456 movq_cfi r10, (R10-RBP)
353 movq_cfi r11, R11-RBP 457 movq_cfi r11, (R11-RBP)
354 458
355 /* Save rbp so that we can unwind from get_irq_regs() */ 459 /* Save rbp so that we can unwind from get_irq_regs() */
356 movq_cfi rbp, 0 460 movq_cfi rbp, 0
@@ -384,7 +488,7 @@ ENDPROC(native_usergs_sysret64)
384 .endm 488 .endm
385 489
386ENTRY(save_rest) 490ENTRY(save_rest)
387 PARTIAL_FRAME 1 REST_SKIP+8 491 PARTIAL_FRAME 1 (REST_SKIP+8)
388 movq 5*8+16(%rsp), %r11 /* save return address */ 492 movq 5*8+16(%rsp), %r11 /* save return address */
389 movq_cfi rbx, RBX+16 493 movq_cfi rbx, RBX+16
390 movq_cfi rbp, RBP+16 494 movq_cfi rbp, RBP+16
@@ -440,7 +544,7 @@ ENTRY(ret_from_fork)
440 544
441 LOCK ; btr $TIF_FORK,TI_flags(%r8) 545 LOCK ; btr $TIF_FORK,TI_flags(%r8)
442 546
443 pushq_cfi kernel_eflags(%rip) 547 pushq_cfi $0x0002
444 popfq_cfi # reset kernel eflags 548 popfq_cfi # reset kernel eflags
445 549
446 call schedule_tail # rdi: 'prev' task parameter 550 call schedule_tail # rdi: 'prev' task parameter
@@ -465,7 +569,8 @@ END(ret_from_fork)
465 * System call entry. Up to 6 arguments in registers are supported. 569 * System call entry. Up to 6 arguments in registers are supported.
466 * 570 *
467 * SYSCALL does not save anything on the stack and does not change the 571 * SYSCALL does not save anything on the stack and does not change the
468 * stack pointer. 572 * stack pointer. However, it does mask the flags register for us, so
573 * CLD and CLAC are not needed.
469 */ 574 */
470 575
471/* 576/*
@@ -565,7 +670,7 @@ sysret_careful:
565 TRACE_IRQS_ON 670 TRACE_IRQS_ON
566 ENABLE_INTERRUPTS(CLBR_NONE) 671 ENABLE_INTERRUPTS(CLBR_NONE)
567 pushq_cfi %rdi 672 pushq_cfi %rdi
568 call schedule 673 SCHEDULE_USER
569 popq_cfi %rdi 674 popq_cfi %rdi
570 jmp sysret_check 675 jmp sysret_check
571 676
@@ -678,7 +783,7 @@ int_careful:
678 TRACE_IRQS_ON 783 TRACE_IRQS_ON
679 ENABLE_INTERRUPTS(CLBR_NONE) 784 ENABLE_INTERRUPTS(CLBR_NONE)
680 pushq_cfi %rdi 785 pushq_cfi %rdi
681 call schedule 786 SCHEDULE_USER
682 popq_cfi %rdi 787 popq_cfi %rdi
683 DISABLE_INTERRUPTS(CLBR_NONE) 788 DISABLE_INTERRUPTS(CLBR_NONE)
684 TRACE_IRQS_OFF 789 TRACE_IRQS_OFF
@@ -884,6 +989,7 @@ END(interrupt)
884 */ 989 */
885 .p2align CONFIG_X86_L1_CACHE_SHIFT 990 .p2align CONFIG_X86_L1_CACHE_SHIFT
886common_interrupt: 991common_interrupt:
992 ASM_CLAC
887 XCPT_FRAME 993 XCPT_FRAME
888 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 994 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
889 interrupt do_IRQ 995 interrupt do_IRQ
@@ -974,7 +1080,7 @@ retint_careful:
974 TRACE_IRQS_ON 1080 TRACE_IRQS_ON
975 ENABLE_INTERRUPTS(CLBR_NONE) 1081 ENABLE_INTERRUPTS(CLBR_NONE)
976 pushq_cfi %rdi 1082 pushq_cfi %rdi
977 call schedule 1083 SCHEDULE_USER
978 popq_cfi %rdi 1084 popq_cfi %rdi
979 GET_THREAD_INFO(%rcx) 1085 GET_THREAD_INFO(%rcx)
980 DISABLE_INTERRUPTS(CLBR_NONE) 1086 DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1023,6 +1129,7 @@ END(common_interrupt)
1023 */ 1129 */
1024.macro apicinterrupt num sym do_sym 1130.macro apicinterrupt num sym do_sym
1025ENTRY(\sym) 1131ENTRY(\sym)
1132 ASM_CLAC
1026 INTR_FRAME 1133 INTR_FRAME
1027 pushq_cfi $~(\num) 1134 pushq_cfi $~(\num)
1028.Lcommon_\sym: 1135.Lcommon_\sym:
@@ -1077,6 +1184,7 @@ apicinterrupt IRQ_WORK_VECTOR \
1077 */ 1184 */
1078.macro zeroentry sym do_sym 1185.macro zeroentry sym do_sym
1079ENTRY(\sym) 1186ENTRY(\sym)
1187 ASM_CLAC
1080 INTR_FRAME 1188 INTR_FRAME
1081 PARAVIRT_ADJUST_EXCEPTION_FRAME 1189 PARAVIRT_ADJUST_EXCEPTION_FRAME
1082 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1190 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
@@ -1094,6 +1202,7 @@ END(\sym)
1094 1202
1095.macro paranoidzeroentry sym do_sym 1203.macro paranoidzeroentry sym do_sym
1096ENTRY(\sym) 1204ENTRY(\sym)
1205 ASM_CLAC
1097 INTR_FRAME 1206 INTR_FRAME
1098 PARAVIRT_ADJUST_EXCEPTION_FRAME 1207 PARAVIRT_ADJUST_EXCEPTION_FRAME
1099 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1208 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
@@ -1112,6 +1221,7 @@ END(\sym)
1112#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) 1221#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
1113.macro paranoidzeroentry_ist sym do_sym ist 1222.macro paranoidzeroentry_ist sym do_sym ist
1114ENTRY(\sym) 1223ENTRY(\sym)
1224 ASM_CLAC
1115 INTR_FRAME 1225 INTR_FRAME
1116 PARAVIRT_ADJUST_EXCEPTION_FRAME 1226 PARAVIRT_ADJUST_EXCEPTION_FRAME
1117 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1227 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
@@ -1131,6 +1241,7 @@ END(\sym)
1131 1241
1132.macro errorentry sym do_sym 1242.macro errorentry sym do_sym
1133ENTRY(\sym) 1243ENTRY(\sym)
1244 ASM_CLAC
1134 XCPT_FRAME 1245 XCPT_FRAME
1135 PARAVIRT_ADJUST_EXCEPTION_FRAME 1246 PARAVIRT_ADJUST_EXCEPTION_FRAME
1136 subq $ORIG_RAX-R15, %rsp 1247 subq $ORIG_RAX-R15, %rsp
@@ -1149,6 +1260,7 @@ END(\sym)
1149 /* error code is on the stack already */ 1260 /* error code is on the stack already */
1150.macro paranoiderrorentry sym do_sym 1261.macro paranoiderrorentry sym do_sym
1151ENTRY(\sym) 1262ENTRY(\sym)
1263 ASM_CLAC
1152 XCPT_FRAME 1264 XCPT_FRAME
1153 PARAVIRT_ADJUST_EXCEPTION_FRAME 1265 PARAVIRT_ADJUST_EXCEPTION_FRAME
1154 subq $ORIG_RAX-R15, %rsp 1266 subq $ORIG_RAX-R15, %rsp
@@ -1449,7 +1561,7 @@ paranoid_userspace:
1449paranoid_schedule: 1561paranoid_schedule:
1450 TRACE_IRQS_ON 1562 TRACE_IRQS_ON
1451 ENABLE_INTERRUPTS(CLBR_ANY) 1563 ENABLE_INTERRUPTS(CLBR_ANY)
1452 call schedule 1564 SCHEDULE_USER
1453 DISABLE_INTERRUPTS(CLBR_ANY) 1565 DISABLE_INTERRUPTS(CLBR_ANY)
1454 TRACE_IRQS_OFF 1566 TRACE_IRQS_OFF
1455 jmp paranoid_userspace 1567 jmp paranoid_userspace
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c3a7cb4bf6e6..1d414029f1d8 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -206,6 +206,21 @@ static int
206ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 206ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
207 unsigned const char *new_code); 207 unsigned const char *new_code);
208 208
209/*
210 * Should never be called:
211 * As it is only called by __ftrace_replace_code() which is called by
212 * ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
213 * which is called to turn mcount into nops or nops into function calls
214 * but not to convert a function from not using regs to one that uses
215 * regs, which ftrace_modify_call() is for.
216 */
217int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
218 unsigned long addr)
219{
220 WARN_ON(1);
221 return -EINVAL;
222}
223
209int ftrace_update_ftrace_func(ftrace_func_t func) 224int ftrace_update_ftrace_func(ftrace_func_t func)
210{ 225{
211 unsigned long ip = (unsigned long)(&ftrace_call); 226 unsigned long ip = (unsigned long)(&ftrace_call);
@@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
220 235
221 ret = ftrace_modify_code(ip, old, new); 236 ret = ftrace_modify_code(ip, old, new);
222 237
238 /* Also update the regs callback function */
239 if (!ret) {
240 ip = (unsigned long)(&ftrace_regs_call);
241 memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
242 new = ftrace_call_replace(ip, (unsigned long)func);
243 ret = ftrace_modify_code(ip, old, new);
244 }
245
223 atomic_dec(&modifying_ftrace_code); 246 atomic_dec(&modifying_ftrace_code);
224 247
225 return ret; 248 return ret;
@@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
299 return add_break(rec->ip, old); 322 return add_break(rec->ip, old);
300} 323}
301 324
325/*
326 * If the record has the FTRACE_FL_REGS set, that means that it
327 * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
328 * is not not set, then it wants to convert to the normal callback.
329 */
330static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
331{
332 if (rec->flags & FTRACE_FL_REGS)
333 return (unsigned long)FTRACE_REGS_ADDR;
334 else
335 return (unsigned long)FTRACE_ADDR;
336}
337
338/*
339 * The FTRACE_FL_REGS_EN is set when the record already points to
340 * a function that saves all the regs. Basically the '_EN' version
341 * represents the current state of the function.
342 */
343static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
344{
345 if (rec->flags & FTRACE_FL_REGS_EN)
346 return (unsigned long)FTRACE_REGS_ADDR;
347 else
348 return (unsigned long)FTRACE_ADDR;
349}
350
302static int add_breakpoints(struct dyn_ftrace *rec, int enable) 351static int add_breakpoints(struct dyn_ftrace *rec, int enable)
303{ 352{
304 unsigned long ftrace_addr; 353 unsigned long ftrace_addr;
@@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
306 355
307 ret = ftrace_test_record(rec, enable); 356 ret = ftrace_test_record(rec, enable);
308 357
309 ftrace_addr = (unsigned long)FTRACE_ADDR; 358 ftrace_addr = get_ftrace_addr(rec);
310 359
311 switch (ret) { 360 switch (ret) {
312 case FTRACE_UPDATE_IGNORE: 361 case FTRACE_UPDATE_IGNORE:
@@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
316 /* converting nop to call */ 365 /* converting nop to call */
317 return add_brk_on_nop(rec); 366 return add_brk_on_nop(rec);
318 367
368 case FTRACE_UPDATE_MODIFY_CALL_REGS:
369 case FTRACE_UPDATE_MODIFY_CALL:
370 ftrace_addr = get_ftrace_old_addr(rec);
371 /* fall through */
319 case FTRACE_UPDATE_MAKE_NOP: 372 case FTRACE_UPDATE_MAKE_NOP:
320 /* converting a call to a nop */ 373 /* converting a call to a nop */
321 return add_brk_on_call(rec, ftrace_addr); 374 return add_brk_on_call(rec, ftrace_addr);
@@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
360 * If not, don't touch the breakpoint, we make just create 413 * If not, don't touch the breakpoint, we make just create
361 * a disaster. 414 * a disaster.
362 */ 415 */
363 ftrace_addr = (unsigned long)FTRACE_ADDR; 416 ftrace_addr = get_ftrace_addr(rec);
417 nop = ftrace_call_replace(ip, ftrace_addr);
418
419 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
420 goto update;
421
422 /* Check both ftrace_addr and ftrace_old_addr */
423 ftrace_addr = get_ftrace_old_addr(rec);
364 nop = ftrace_call_replace(ip, ftrace_addr); 424 nop = ftrace_call_replace(ip, ftrace_addr);
365 425
366 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) 426 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
367 return -EINVAL; 427 return -EINVAL;
368 } 428 }
369 429
430 update:
370 return probe_kernel_write((void *)ip, &nop[0], 1); 431 return probe_kernel_write((void *)ip, &nop[0], 1);
371} 432}
372 433
@@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable)
405 466
406 ret = ftrace_test_record(rec, enable); 467 ret = ftrace_test_record(rec, enable);
407 468
408 ftrace_addr = (unsigned long)FTRACE_ADDR; 469 ftrace_addr = get_ftrace_addr(rec);
409 470
410 switch (ret) { 471 switch (ret) {
411 case FTRACE_UPDATE_IGNORE: 472 case FTRACE_UPDATE_IGNORE:
412 return 0; 473 return 0;
413 474
475 case FTRACE_UPDATE_MODIFY_CALL_REGS:
476 case FTRACE_UPDATE_MODIFY_CALL:
414 case FTRACE_UPDATE_MAKE_CALL: 477 case FTRACE_UPDATE_MAKE_CALL:
415 /* converting nop to call */ 478 /* converting nop to call */
416 return add_update_call(rec, ftrace_addr); 479 return add_update_call(rec, ftrace_addr);
@@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
455 518
456 ret = ftrace_update_record(rec, enable); 519 ret = ftrace_update_record(rec, enable);
457 520
458 ftrace_addr = (unsigned long)FTRACE_ADDR; 521 ftrace_addr = get_ftrace_addr(rec);
459 522
460 switch (ret) { 523 switch (ret) {
461 case FTRACE_UPDATE_IGNORE: 524 case FTRACE_UPDATE_IGNORE:
462 return 0; 525 return 0;
463 526
527 case FTRACE_UPDATE_MODIFY_CALL_REGS:
528 case FTRACE_UPDATE_MODIFY_CALL:
464 case FTRACE_UPDATE_MAKE_CALL: 529 case FTRACE_UPDATE_MAKE_CALL:
465 /* converting nop to call */ 530 /* converting nop to call */
466 return finish_update_call(rec, ftrace_addr); 531 return finish_update_call(rec, ftrace_addr);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d42ab17b7397..957a47aec64e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -287,27 +287,28 @@ ENTRY(startup_32_smp)
287 leal -__PAGE_OFFSET(%ecx),%esp 287 leal -__PAGE_OFFSET(%ecx),%esp
288 288
289default_entry: 289default_entry:
290
291/* 290/*
292 * New page tables may be in 4Mbyte page mode and may 291 * New page tables may be in 4Mbyte page mode and may
293 * be using the global pages. 292 * be using the global pages.
294 * 293 *
295 * NOTE! If we are on a 486 we may have no cr4 at all! 294 * NOTE! If we are on a 486 we may have no cr4 at all!
296 * So we do not try to touch it unless we really have 295 * Specifically, cr4 exists if and only if CPUID exists,
297 * some bits in it to set. This won't work if the BSP 296 * which in turn exists if and only if EFLAGS.ID exists.
298 * implements cr4 but this AP does not -- very unlikely
299 * but be warned! The same applies to the pse feature
300 * if not equally supported. --macro
301 *
302 * NOTE! We have to correct for the fact that we're
303 * not yet offset PAGE_OFFSET..
304 */ 297 */
305#define cr4_bits pa(mmu_cr4_features) 298 movl $X86_EFLAGS_ID,%ecx
306 movl cr4_bits,%edx 299 pushl %ecx
307 andl %edx,%edx 300 popfl
308 jz 6f 301 pushfl
309 movl %cr4,%eax # Turn on paging options (PSE,PAE,..) 302 popl %eax
310 orl %edx,%eax 303 pushl $0
304 popfl
305 pushfl
306 popl %edx
307 xorl %edx,%eax
308 testl %ecx,%eax
309 jz 6f # No ID flag = no CPUID = no CR4
310
311 movl pa(mmu_cr4_features),%eax
311 movl %eax,%cr4 312 movl %eax,%cr4
312 313
313 testb $X86_CR4_PAE, %al # check if PAE is enabled 314 testb $X86_CR4_PAE, %al # check if PAE is enabled
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index f250431fb505..675a05012449 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -19,24 +19,17 @@
19#include <asm/fpu-internal.h> 19#include <asm/fpu-internal.h>
20#include <asm/user.h> 20#include <asm/user.h>
21 21
22#ifdef CONFIG_X86_64
23# include <asm/sigcontext32.h>
24# include <asm/user32.h>
25#else
26# define save_i387_xstate_ia32 save_i387_xstate
27# define restore_i387_xstate_ia32 restore_i387_xstate
28# define _fpstate_ia32 _fpstate
29# define _xstate_ia32 _xstate
30# define sig_xstate_ia32_size sig_xstate_size
31# define fx_sw_reserved_ia32 fx_sw_reserved
32# define user_i387_ia32_struct user_i387_struct
33# define user32_fxsr_struct user_fxsr_struct
34#endif
35
36/* 22/*
37 * Were we in an interrupt that interrupted kernel mode? 23 * Were we in an interrupt that interrupted kernel mode?
38 * 24 *
39 * We can do a kernel_fpu_begin/end() pair *ONLY* if that 25 * For now, with eagerfpu we will return interrupted kernel FPU
26 * state as not-idle. TBD: Ideally we can change the return value
27 * to something like __thread_has_fpu(current). But we need to
28 * be careful of doing __thread_clear_has_fpu() before saving
29 * the FPU etc for supporting nested uses etc. For now, take
30 * the simple route!
31 *
32 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
40 * pair does nothing at all: the thread must not have fpu (so 33 * pair does nothing at all: the thread must not have fpu (so
41 * that we don't try to save the FPU state), and TS must 34 * that we don't try to save the FPU state), and TS must
42 * be set (so that the clts/stts pair does nothing that is 35 * be set (so that the clts/stts pair does nothing that is
@@ -44,6 +37,9 @@
44 */ 37 */
45static inline bool interrupted_kernel_fpu_idle(void) 38static inline bool interrupted_kernel_fpu_idle(void)
46{ 39{
40 if (use_eager_fpu())
41 return 0;
42
47 return !__thread_has_fpu(current) && 43 return !__thread_has_fpu(current) &&
48 (read_cr0() & X86_CR0_TS); 44 (read_cr0() & X86_CR0_TS);
49} 45}
@@ -77,29 +73,29 @@ bool irq_fpu_usable(void)
77} 73}
78EXPORT_SYMBOL(irq_fpu_usable); 74EXPORT_SYMBOL(irq_fpu_usable);
79 75
80void kernel_fpu_begin(void) 76void __kernel_fpu_begin(void)
81{ 77{
82 struct task_struct *me = current; 78 struct task_struct *me = current;
83 79
84 WARN_ON_ONCE(!irq_fpu_usable());
85 preempt_disable();
86 if (__thread_has_fpu(me)) { 80 if (__thread_has_fpu(me)) {
87 __save_init_fpu(me); 81 __save_init_fpu(me);
88 __thread_clear_has_fpu(me); 82 __thread_clear_has_fpu(me);
89 /* We do 'stts()' in kernel_fpu_end() */ 83 /* We do 'stts()' in __kernel_fpu_end() */
90 } else { 84 } else if (!use_eager_fpu()) {
91 this_cpu_write(fpu_owner_task, NULL); 85 this_cpu_write(fpu_owner_task, NULL);
92 clts(); 86 clts();
93 } 87 }
94} 88}
95EXPORT_SYMBOL(kernel_fpu_begin); 89EXPORT_SYMBOL(__kernel_fpu_begin);
96 90
97void kernel_fpu_end(void) 91void __kernel_fpu_end(void)
98{ 92{
99 stts(); 93 if (use_eager_fpu())
100 preempt_enable(); 94 math_state_restore();
95 else
96 stts();
101} 97}
102EXPORT_SYMBOL(kernel_fpu_end); 98EXPORT_SYMBOL(__kernel_fpu_end);
103 99
104void unlazy_fpu(struct task_struct *tsk) 100void unlazy_fpu(struct task_struct *tsk)
105{ 101{
@@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk)
113} 109}
114EXPORT_SYMBOL(unlazy_fpu); 110EXPORT_SYMBOL(unlazy_fpu);
115 111
116#ifdef CONFIG_MATH_EMULATION 112unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
117# define HAVE_HWFP (boot_cpu_data.hard_math)
118#else
119# define HAVE_HWFP 1
120#endif
121
122static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
123unsigned int xstate_size; 113unsigned int xstate_size;
124EXPORT_SYMBOL_GPL(xstate_size); 114EXPORT_SYMBOL_GPL(xstate_size);
125unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
126static struct i387_fxsave_struct fx_scratch __cpuinitdata; 115static struct i387_fxsave_struct fx_scratch __cpuinitdata;
127 116
128static void __cpuinit mxcsr_feature_mask_init(void) 117static void __cpuinit mxcsr_feature_mask_init(void)
129{ 118{
130 unsigned long mask = 0; 119 unsigned long mask = 0;
131 120
132 clts();
133 if (cpu_has_fxsr) { 121 if (cpu_has_fxsr) {
134 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); 122 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
135 asm volatile("fxsave %0" : : "m" (fx_scratch)); 123 asm volatile("fxsave %0" : : "m" (fx_scratch));
@@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
138 mask = 0x0000ffbf; 126 mask = 0x0000ffbf;
139 } 127 }
140 mxcsr_feature_mask &= mask; 128 mxcsr_feature_mask &= mask;
141 stts();
142} 129}
143 130
144static void __cpuinit init_thread_xstate(void) 131static void __cpuinit init_thread_xstate(void)
@@ -192,9 +179,8 @@ void __cpuinit fpu_init(void)
192 init_thread_xstate(); 179 init_thread_xstate();
193 180
194 mxcsr_feature_mask_init(); 181 mxcsr_feature_mask_init();
195 /* clean state in init */ 182 xsave_init();
196 current_thread_info()->status = 0; 183 eager_fpu_init();
197 clear_used_math();
198} 184}
199 185
200void fpu_finit(struct fpu *fpu) 186void fpu_finit(struct fpu *fpu)
@@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu)
205 } 191 }
206 192
207 if (cpu_has_fxsr) { 193 if (cpu_has_fxsr) {
208 struct i387_fxsave_struct *fx = &fpu->state->fxsave; 194 fx_finit(&fpu->state->fxsave);
209
210 memset(fx, 0, xstate_size);
211 fx->cwd = 0x37f;
212 if (cpu_has_xmm)
213 fx->mxcsr = MXCSR_DEFAULT;
214 } else { 195 } else {
215 struct i387_fsave_struct *fp = &fpu->state->fsave; 196 struct i387_fsave_struct *fp = &fpu->state->fsave;
216 memset(fp, 0, xstate_size); 197 memset(fp, 0, xstate_size);
@@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
454 * FXSR floating point environment conversions. 435 * FXSR floating point environment conversions.
455 */ 436 */
456 437
457static void 438void
458convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) 439convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
459{ 440{
460 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; 441 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
@@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
491 memcpy(&to[i], &from[i], sizeof(to[0])); 472 memcpy(&to[i], &from[i], sizeof(to[0]));
492} 473}
493 474
494static void convert_to_fxsr(struct task_struct *tsk, 475void convert_to_fxsr(struct task_struct *tsk,
495 const struct user_i387_ia32_struct *env) 476 const struct user_i387_ia32_struct *env)
496 477
497{ 478{
498 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; 479 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
@@ -589,223 +570,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
589} 570}
590 571
591/* 572/*
592 * Signal frame handlers.
593 */
594
595static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
596{
597 struct task_struct *tsk = current;
598 struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave;
599
600 fp->status = fp->swd;
601 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
602 return -1;
603 return 1;
604}
605
606static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
607{
608 struct task_struct *tsk = current;
609 struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
610 struct user_i387_ia32_struct env;
611 int err = 0;
612
613 convert_from_fxsr(&env, tsk);
614 if (__copy_to_user(buf, &env, sizeof(env)))
615 return -1;
616
617 err |= __put_user(fx->swd, &buf->status);
618 err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
619 if (err)
620 return -1;
621
622 if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
623 return -1;
624 return 1;
625}
626
627static int save_i387_xsave(void __user *buf)
628{
629 struct task_struct *tsk = current;
630 struct _fpstate_ia32 __user *fx = buf;
631 int err = 0;
632
633
634 sanitize_i387_state(tsk);
635
636 /*
637 * For legacy compatible, we always set FP/SSE bits in the bit
638 * vector while saving the state to the user context.
639 * This will enable us capturing any changes(during sigreturn) to
640 * the FP/SSE bits by the legacy applications which don't touch
641 * xstate_bv in the xsave header.
642 *
643 * xsave aware applications can change the xstate_bv in the xsave
644 * header as well as change any contents in the memory layout.
645 * xrestore as part of sigreturn will capture all the changes.
646 */
647 tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
648
649 if (save_i387_fxsave(fx) < 0)
650 return -1;
651
652 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
653 sizeof(struct _fpx_sw_bytes));
654 err |= __put_user(FP_XSTATE_MAGIC2,
655 (__u32 __user *) (buf + sig_xstate_ia32_size
656 - FP_XSTATE_MAGIC2_SIZE));
657 if (err)
658 return -1;
659
660 return 1;
661}
662
663int save_i387_xstate_ia32(void __user *buf)
664{
665 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
666 struct task_struct *tsk = current;
667
668 if (!used_math())
669 return 0;
670
671 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
672 return -EACCES;
673 /*
674 * This will cause a "finit" to be triggered by the next
675 * attempted FPU operation by the 'current' process.
676 */
677 clear_used_math();
678
679 if (!HAVE_HWFP) {
680 return fpregs_soft_get(current, NULL,
681 0, sizeof(struct user_i387_ia32_struct),
682 NULL, fp) ? -1 : 1;
683 }
684
685 unlazy_fpu(tsk);
686
687 if (cpu_has_xsave)
688 return save_i387_xsave(fp);
689 if (cpu_has_fxsr)
690 return save_i387_fxsave(fp);
691 else
692 return save_i387_fsave(fp);
693}
694
695static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
696{
697 struct task_struct *tsk = current;
698
699 return __copy_from_user(&tsk->thread.fpu.state->fsave, buf,
700 sizeof(struct i387_fsave_struct));
701}
702
703static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
704 unsigned int size)
705{
706 struct task_struct *tsk = current;
707 struct user_i387_ia32_struct env;
708 int err;
709
710 err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0],
711 size);
712 /* mxcsr reserved bits must be masked to zero for security reasons */
713 tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
714 if (err || __copy_from_user(&env, buf, sizeof(env)))
715 return 1;
716 convert_to_fxsr(tsk, &env);
717
718 return 0;
719}
720
721static int restore_i387_xsave(void __user *buf)
722{
723 struct _fpx_sw_bytes fx_sw_user;
724 struct _fpstate_ia32 __user *fx_user =
725 ((struct _fpstate_ia32 __user *) buf);
726 struct i387_fxsave_struct __user *fx =
727 (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
728 struct xsave_hdr_struct *xsave_hdr =
729 &current->thread.fpu.state->xsave.xsave_hdr;
730 u64 mask;
731 int err;
732
733 if (check_for_xstate(fx, buf, &fx_sw_user))
734 goto fx_only;
735
736 mask = fx_sw_user.xstate_bv;
737
738 err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
739
740 xsave_hdr->xstate_bv &= pcntxt_mask;
741 /*
742 * These bits must be zero.
743 */
744 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
745
746 /*
747 * Init the state that is not present in the memory layout
748 * and enabled by the OS.
749 */
750 mask = ~(pcntxt_mask & ~mask);
751 xsave_hdr->xstate_bv &= mask;
752
753 return err;
754fx_only:
755 /*
756 * Couldn't find the extended state information in the memory
757 * layout. Restore the FP/SSE and init the other extended state
758 * enabled by the OS.
759 */
760 xsave_hdr->xstate_bv = XSTATE_FPSSE;
761 return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
762}
763
764int restore_i387_xstate_ia32(void __user *buf)
765{
766 int err;
767 struct task_struct *tsk = current;
768 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
769
770 if (HAVE_HWFP)
771 clear_fpu(tsk);
772
773 if (!buf) {
774 if (used_math()) {
775 clear_fpu(tsk);
776 clear_used_math();
777 }
778
779 return 0;
780 } else
781 if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
782 return -EACCES;
783
784 if (!used_math()) {
785 err = init_fpu(tsk);
786 if (err)
787 return err;
788 }
789
790 if (HAVE_HWFP) {
791 if (cpu_has_xsave)
792 err = restore_i387_xsave(buf);
793 else if (cpu_has_fxsr)
794 err = restore_i387_fxsave(fp, sizeof(struct
795 i387_fxsave_struct));
796 else
797 err = restore_i387_fsave(fp);
798 } else {
799 err = fpregs_soft_set(current, NULL,
800 0, sizeof(struct user_i387_ia32_struct),
801 NULL, fp) != 0;
802 }
803 set_used_math();
804
805 return err;
806}
807
808/*
809 * FPU state for core dumps. 573 * FPU state for core dumps.
810 * This is only used for a.out dumps now. 574 * This is only used for a.out dumps now.
811 * It is declared generically using elf_fpregset_t (which is 575 * It is declared generically using elf_fpregset_t (which is
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 36d1853e91af..9a5c460404dc 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -263,7 +263,7 @@ static void i8259A_shutdown(void)
263 * out of. 263 * out of.
264 */ 264 */
265 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 265 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
266 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ 266 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
267} 267}
268 268
269static struct syscore_ops i8259_syscore_ops = { 269static struct syscore_ops i8259_syscore_ops = {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d44f7829968e..e4595f105910 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
92 seq_printf(p, " Rescheduling interrupts\n"); 92 seq_printf(p, " Rescheduling interrupts\n");
93 seq_printf(p, "%*s: ", prec, "CAL"); 93 seq_printf(p, "%*s: ", prec, "CAL");
94 for_each_online_cpu(j) 94 for_each_online_cpu(j)
95 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); 95 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
96 irq_stats(j)->irq_tlb_count);
96 seq_printf(p, " Function call interrupts\n"); 97 seq_printf(p, " Function call interrupts\n");
97 seq_printf(p, "%*s: ", prec, "TLB"); 98 seq_printf(p, "%*s: ", prec, "TLB");
98 for_each_online_cpu(j) 99 for_each_online_cpu(j)
@@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
147#ifdef CONFIG_SMP 148#ifdef CONFIG_SMP
148 sum += irq_stats(cpu)->irq_resched_count; 149 sum += irq_stats(cpu)->irq_resched_count;
149 sum += irq_stats(cpu)->irq_call_count; 150 sum += irq_stats(cpu)->irq_call_count;
150 sum += irq_stats(cpu)->irq_tlb_count;
151#endif 151#endif
152#ifdef CONFIG_X86_THERMAL_VECTOR 152#ifdef CONFIG_X86_THERMAL_VECTOR
153 sum += irq_stats(cpu)->irq_thermal_count; 153 sum += irq_stats(cpu)->irq_thermal_count;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e2f751efb7b1..57916c0d3cf6 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
541 return 1; 541 return 1;
542} 542}
543 543
544#ifdef KPROBES_CAN_USE_FTRACE
545static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
546 struct kprobe_ctlblk *kcb)
547{
548 /*
549 * Emulate singlestep (and also recover regs->ip)
550 * as if there is a 5byte nop
551 */
552 regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
553 if (unlikely(p->post_handler)) {
554 kcb->kprobe_status = KPROBE_HIT_SSDONE;
555 p->post_handler(p, regs, 0);
556 }
557 __this_cpu_write(current_kprobe, NULL);
558}
559#endif
560
544/* 561/*
545 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 562 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
546 * remain disabled throughout this function. 563 * remain disabled throughout this function.
@@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
599 } else if (kprobe_running()) { 616 } else if (kprobe_running()) {
600 p = __this_cpu_read(current_kprobe); 617 p = __this_cpu_read(current_kprobe);
601 if (p->break_handler && p->break_handler(p, regs)) { 618 if (p->break_handler && p->break_handler(p, regs)) {
619#ifdef KPROBES_CAN_USE_FTRACE
620 if (kprobe_ftrace(p)) {
621 skip_singlestep(p, regs, kcb);
622 return 1;
623 }
624#endif
602 setup_singlestep(p, regs, kcb, 0); 625 setup_singlestep(p, regs, kcb, 0);
603 return 1; 626 return 1;
604 } 627 }
@@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1052 return 0; 1075 return 0;
1053} 1076}
1054 1077
1078#ifdef KPROBES_CAN_USE_FTRACE
1079/* Ftrace callback handler for kprobes */
1080void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
1081 struct ftrace_ops *ops, struct pt_regs *regs)
1082{
1083 struct kprobe *p;
1084 struct kprobe_ctlblk *kcb;
1085 unsigned long flags;
1086
1087 /* Disable irq for emulating a breakpoint and avoiding preempt */
1088 local_irq_save(flags);
1089
1090 p = get_kprobe((kprobe_opcode_t *)ip);
1091 if (unlikely(!p) || kprobe_disabled(p))
1092 goto end;
1093
1094 kcb = get_kprobe_ctlblk();
1095 if (kprobe_running()) {
1096 kprobes_inc_nmissed_count(p);
1097 } else {
1098 /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
1099 regs->ip = ip + sizeof(kprobe_opcode_t);
1100
1101 __this_cpu_write(current_kprobe, p);
1102 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1103 if (!p->pre_handler || !p->pre_handler(p, regs))
1104 skip_singlestep(p, regs, kcb);
1105 /*
1106 * If pre_handler returns !0, it sets regs->ip and
1107 * resets current kprobe.
1108 */
1109 }
1110end:
1111 local_irq_restore(flags);
1112}
1113
1114int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
1115{
1116 p->ainsn.insn = NULL;
1117 p->ainsn.boostable = -1;
1118 return 0;
1119}
1120#endif
1121
1055int __init arch_init_kprobes(void) 1122int __init arch_init_kprobes(void)
1056{ 1123{
1057 return arch_init_optprobes(); 1124 return arch_init_optprobes();
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 82746f942cd8..7720ff5a9ee2 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -75,20 +75,113 @@ struct microcode_amd {
75 75
76static struct equiv_cpu_entry *equiv_cpu_table; 76static struct equiv_cpu_entry *equiv_cpu_table;
77 77
78/* page-sized ucode patch buffer */ 78struct ucode_patch {
79void *patch; 79 struct list_head plist;
80 void *data;
81 u32 patch_id;
82 u16 equiv_cpu;
83};
84
85static LIST_HEAD(pcache);
86
87static u16 find_equiv_id(unsigned int cpu)
88{
89 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
90 int i = 0;
91
92 if (!equiv_cpu_table)
93 return 0;
94
95 while (equiv_cpu_table[i].installed_cpu != 0) {
96 if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
97 return equiv_cpu_table[i].equiv_cpu;
98
99 i++;
100 }
101 return 0;
102}
103
104static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
105{
106 int i = 0;
107
108 BUG_ON(!equiv_cpu_table);
109
110 while (equiv_cpu_table[i].equiv_cpu != 0) {
111 if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
112 return equiv_cpu_table[i].installed_cpu;
113 i++;
114 }
115 return 0;
116}
117
118/*
119 * a small, trivial cache of per-family ucode patches
120 */
121static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
122{
123 struct ucode_patch *p;
124
125 list_for_each_entry(p, &pcache, plist)
126 if (p->equiv_cpu == equiv_cpu)
127 return p;
128 return NULL;
129}
130
131static void update_cache(struct ucode_patch *new_patch)
132{
133 struct ucode_patch *p;
134
135 list_for_each_entry(p, &pcache, plist) {
136 if (p->equiv_cpu == new_patch->equiv_cpu) {
137 if (p->patch_id >= new_patch->patch_id)
138 /* we already have the latest patch */
139 return;
140
141 list_replace(&p->plist, &new_patch->plist);
142 kfree(p->data);
143 kfree(p);
144 return;
145 }
146 }
147 /* no patch found, add it */
148 list_add_tail(&new_patch->plist, &pcache);
149}
150
151static void free_cache(void)
152{
153 struct ucode_patch *p, *tmp;
154
155 list_for_each_entry_safe(p, tmp, &pcache, plist) {
156 __list_del(p->plist.prev, p->plist.next);
157 kfree(p->data);
158 kfree(p);
159 }
160}
161
162static struct ucode_patch *find_patch(unsigned int cpu)
163{
164 u16 equiv_id;
165
166 equiv_id = find_equiv_id(cpu);
167 if (!equiv_id)
168 return NULL;
169
170 return cache_find_patch(equiv_id);
171}
80 172
81static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 173static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
82{ 174{
83 struct cpuinfo_x86 *c = &cpu_data(cpu); 175 struct cpuinfo_x86 *c = &cpu_data(cpu);
84 176
177 csig->sig = cpuid_eax(0x00000001);
85 csig->rev = c->microcode; 178 csig->rev = c->microcode;
86 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); 179 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
87 180
88 return 0; 181 return 0;
89} 182}
90 183
91static unsigned int verify_ucode_size(int cpu, u32 patch_size, 184static unsigned int verify_patch_size(int cpu, u32 patch_size,
92 unsigned int size) 185 unsigned int size)
93{ 186{
94 struct cpuinfo_x86 *c = &cpu_data(cpu); 187 struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -118,95 +211,37 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
118 return patch_size; 211 return patch_size;
119} 212}
120 213
121static u16 find_equiv_id(void) 214static int apply_microcode_amd(int cpu)
122{ 215{
123 unsigned int current_cpu_id, i = 0; 216 struct cpuinfo_x86 *c = &cpu_data(cpu);
124 217 struct microcode_amd *mc_amd;
125 BUG_ON(equiv_cpu_table == NULL); 218 struct ucode_cpu_info *uci;
126 219 struct ucode_patch *p;
127 current_cpu_id = cpuid_eax(0x00000001); 220 u32 rev, dummy;
128
129 while (equiv_cpu_table[i].installed_cpu != 0) {
130 if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
131 return equiv_cpu_table[i].equiv_cpu;
132
133 i++;
134 }
135 return 0;
136}
137 221
138/* 222 BUG_ON(raw_smp_processor_id() != cpu);
139 * we signal a good patch is found by returning its size > 0
140 */
141static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
142 unsigned int leftover_size, int rev,
143 unsigned int *current_size)
144{
145 struct microcode_header_amd *mc_hdr;
146 unsigned int actual_size, patch_size;
147 u16 equiv_cpu_id;
148 223
149 /* size of the current patch we're staring at */ 224 uci = ucode_cpu_info + cpu;
150 patch_size = *(u32 *)(ucode_ptr + 4);
151 *current_size = patch_size + SECTION_HDR_SIZE;
152 225
153 equiv_cpu_id = find_equiv_id(); 226 p = find_patch(cpu);
154 if (!equiv_cpu_id) 227 if (!p)
155 return 0; 228 return 0;
156 229
157 /* 230 mc_amd = p->data;
158 * let's look at the patch header itself now 231 uci->mc = p->data;
159 */
160 mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
161 232
162 if (mc_hdr->processor_rev_id != equiv_cpu_id) 233 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
163 return 0;
164 234
165 /* ucode might be chipset specific -- currently we don't support this */ 235 /* need to apply patch? */
166 if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { 236 if (rev >= mc_amd->hdr.patch_id) {
167 pr_err("CPU%d: chipset specific code not yet supported\n", 237 c->microcode = rev;
168 cpu);
169 return 0; 238 return 0;
170 } 239 }
171 240
172 if (mc_hdr->patch_id <= rev)
173 return 0;
174
175 /*
176 * now that the header looks sane, verify its size
177 */
178 actual_size = verify_ucode_size(cpu, patch_size, leftover_size);
179 if (!actual_size)
180 return 0;
181
182 /* clear the patch buffer */
183 memset(patch, 0, PAGE_SIZE);
184
185 /* all looks ok, get the binary patch */
186 get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
187
188 return actual_size;
189}
190
191static int apply_microcode_amd(int cpu)
192{
193 u32 rev, dummy;
194 int cpu_num = raw_smp_processor_id();
195 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
196 struct microcode_amd *mc_amd = uci->mc;
197 struct cpuinfo_x86 *c = &cpu_data(cpu);
198
199 /* We should bind the task to the CPU */
200 BUG_ON(cpu_num != cpu);
201
202 if (mc_amd == NULL)
203 return 0;
204
205 wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); 241 wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
206 /* get patch id after patching */
207 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
208 242
209 /* check current patch id and patch's id for match */ 243 /* verify patch application was successful */
244 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
210 if (rev != mc_amd->hdr.patch_id) { 245 if (rev != mc_amd->hdr.patch_id) {
211 pr_err("CPU%d: update failed for patch_level=0x%08x\n", 246 pr_err("CPU%d: update failed for patch_level=0x%08x\n",
212 cpu, mc_amd->hdr.patch_id); 247 cpu, mc_amd->hdr.patch_id);
@@ -238,7 +273,7 @@ static int install_equiv_cpu_table(const u8 *buf)
238 return -ENOMEM; 273 return -ENOMEM;
239 } 274 }
240 275
241 get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); 276 memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
242 277
243 /* add header length */ 278 /* add header length */
244 return size + CONTAINER_HDR_SZ; 279 return size + CONTAINER_HDR_SZ;
@@ -250,61 +285,113 @@ static void free_equiv_cpu_table(void)
250 equiv_cpu_table = NULL; 285 equiv_cpu_table = NULL;
251} 286}
252 287
253static enum ucode_state 288static void cleanup(void)
254generic_load_microcode(int cpu, const u8 *data, size_t size)
255{ 289{
256 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 290 free_equiv_cpu_table();
257 struct microcode_header_amd *mc_hdr = NULL; 291 free_cache();
258 unsigned int mc_size, leftover, current_size = 0; 292}
293
294/*
295 * We return the current size even if some of the checks failed so that
296 * we can skip over the next patch. If we return a negative value, we
297 * signal a grave error like a memory allocation has failed and the
298 * driver cannot continue functioning normally. In such cases, we tear
299 * down everything we've used up so far and exit.
300 */
301static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
302{
303 struct cpuinfo_x86 *c = &cpu_data(cpu);
304 struct microcode_header_amd *mc_hdr;
305 struct ucode_patch *patch;
306 unsigned int patch_size, crnt_size, ret;
307 u32 proc_fam;
308 u16 proc_id;
309
310 patch_size = *(u32 *)(fw + 4);
311 crnt_size = patch_size + SECTION_HDR_SIZE;
312 mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
313 proc_id = mc_hdr->processor_rev_id;
314
315 proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
316 if (!proc_fam) {
317 pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
318 return crnt_size;
319 }
320
321 /* check if patch is for the current family */
322 proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
323 if (proc_fam != c->x86)
324 return crnt_size;
325
326 if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
327 pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
328 mc_hdr->patch_id);
329 return crnt_size;
330 }
331
332 ret = verify_patch_size(cpu, patch_size, leftover);
333 if (!ret) {
334 pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
335 return crnt_size;
336 }
337
338 patch = kzalloc(sizeof(*patch), GFP_KERNEL);
339 if (!patch) {
340 pr_err("Patch allocation failure.\n");
341 return -EINVAL;
342 }
343
344 patch->data = kzalloc(patch_size, GFP_KERNEL);
345 if (!patch->data) {
346 pr_err("Patch data allocation failure.\n");
347 kfree(patch);
348 return -EINVAL;
349 }
350
351 /* All looks ok, copy patch... */
352 memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
353 INIT_LIST_HEAD(&patch->plist);
354 patch->patch_id = mc_hdr->patch_id;
355 patch->equiv_cpu = proc_id;
356
357 /* ... and add to cache. */
358 update_cache(patch);
359
360 return crnt_size;
361}
362
363static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
364{
365 enum ucode_state ret = UCODE_ERROR;
366 unsigned int leftover;
367 u8 *fw = (u8 *)data;
368 int crnt_size = 0;
259 int offset; 369 int offset;
260 const u8 *ucode_ptr = data;
261 void *new_mc = NULL;
262 unsigned int new_rev = uci->cpu_sig.rev;
263 enum ucode_state state = UCODE_ERROR;
264 370
265 offset = install_equiv_cpu_table(ucode_ptr); 371 offset = install_equiv_cpu_table(data);
266 if (offset < 0) { 372 if (offset < 0) {
267 pr_err("failed to create equivalent cpu table\n"); 373 pr_err("failed to create equivalent cpu table\n");
268 goto out; 374 return ret;
269 } 375 }
270 ucode_ptr += offset; 376 fw += offset;
271 leftover = size - offset; 377 leftover = size - offset;
272 378
273 if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { 379 if (*(u32 *)fw != UCODE_UCODE_TYPE) {
274 pr_err("invalid type field in container file section header\n"); 380 pr_err("invalid type field in container file section header\n");
275 goto free_table; 381 free_equiv_cpu_table();
382 return ret;
276 } 383 }
277 384
278 while (leftover) { 385 while (leftover) {
279 mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, 386 crnt_size = verify_and_add_patch(cpu, fw, leftover);
280 new_rev, &current_size); 387 if (crnt_size < 0)
281 if (mc_size) { 388 return ret;
282 mc_hdr = patch;
283 new_mc = patch;
284 new_rev = mc_hdr->patch_id;
285 goto out_ok;
286 }
287
288 ucode_ptr += current_size;
289 leftover -= current_size;
290 }
291 389
292 if (!new_mc) { 390 fw += crnt_size;
293 state = UCODE_NFOUND; 391 leftover -= crnt_size;
294 goto free_table;
295 } 392 }
296 393
297out_ok: 394 return UCODE_OK;
298 uci->mc = new_mc;
299 state = UCODE_OK;
300 pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
301 cpu, uci->cpu_sig.rev, new_rev);
302
303free_table:
304 free_equiv_cpu_table();
305
306out:
307 return state;
308} 395}
309 396
310/* 397/*
@@ -315,7 +402,7 @@ out:
315 * 402 *
316 * This legacy file is always smaller than 2K in size. 403 * This legacy file is always smaller than 2K in size.
317 * 404 *
318 * Starting at family 15h they are in family specific firmware files: 405 * Beginning with family 15h, they are in family-specific firmware files:
319 * 406 *
320 * amd-ucode/microcode_amd_fam15h.bin 407 * amd-ucode/microcode_amd_fam15h.bin
321 * amd-ucode/microcode_amd_fam16h.bin 408 * amd-ucode/microcode_amd_fam16h.bin
@@ -323,12 +410,17 @@ out:
323 * 410 *
324 * These might be larger than 2K. 411 * These might be larger than 2K.
325 */ 412 */
326static enum ucode_state request_microcode_amd(int cpu, struct device *device) 413static enum ucode_state request_microcode_amd(int cpu, struct device *device,
414 bool refresh_fw)
327{ 415{
328 char fw_name[36] = "amd-ucode/microcode_amd.bin"; 416 char fw_name[36] = "amd-ucode/microcode_amd.bin";
329 const struct firmware *fw;
330 enum ucode_state ret = UCODE_NFOUND;
331 struct cpuinfo_x86 *c = &cpu_data(cpu); 417 struct cpuinfo_x86 *c = &cpu_data(cpu);
418 enum ucode_state ret = UCODE_NFOUND;
419 const struct firmware *fw;
420
421 /* reload ucode container only on the boot cpu */
422 if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
423 return UCODE_OK;
332 424
333 if (c->x86 >= 0x15) 425 if (c->x86 >= 0x15)
334 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); 426 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
@@ -344,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device)
344 goto fw_release; 436 goto fw_release;
345 } 437 }
346 438
347 ret = generic_load_microcode(cpu, fw->data, fw->size); 439 /* free old equiv table */
440 free_equiv_cpu_table();
441
442 ret = load_microcode_amd(cpu, fw->data, fw->size);
443 if (ret != UCODE_OK)
444 cleanup();
348 445
349fw_release: 446 fw_release:
350 release_firmware(fw); 447 release_firmware(fw);
351 448
352out: 449 out:
353 return ret; 450 return ret;
354} 451}
355 452
@@ -383,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void)
383 return NULL; 480 return NULL;
384 } 481 }
385 482
386 patch = (void *)get_zeroed_page(GFP_KERNEL);
387 if (!patch)
388 return NULL;
389
390 return &microcode_amd_ops; 483 return &microcode_amd_ops;
391} 484}
392 485
393void __exit exit_amd_microcode(void) 486void __exit exit_amd_microcode(void)
394{ 487{
395 free_page((unsigned long)patch); 488 cleanup();
396} 489}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9e5bcf1e2376..3a04b224d0c0 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -279,19 +279,18 @@ static struct platform_device *microcode_pdev;
279static int reload_for_cpu(int cpu) 279static int reload_for_cpu(int cpu)
280{ 280{
281 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 281 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
282 enum ucode_state ustate;
282 int err = 0; 283 int err = 0;
283 284
284 if (uci->valid) { 285 if (!uci->valid)
285 enum ucode_state ustate; 286 return err;
286
287 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
288 if (ustate == UCODE_OK)
289 apply_microcode_on_target(cpu);
290 else
291 if (ustate == UCODE_ERROR)
292 err = -EINVAL;
293 }
294 287
288 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
289 if (ustate == UCODE_OK)
290 apply_microcode_on_target(cpu);
291 else
292 if (ustate == UCODE_ERROR)
293 err = -EINVAL;
295 return err; 294 return err;
296} 295}
297 296
@@ -373,18 +372,15 @@ static void microcode_fini_cpu(int cpu)
373 372
374static enum ucode_state microcode_resume_cpu(int cpu) 373static enum ucode_state microcode_resume_cpu(int cpu)
375{ 374{
376 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
377
378 if (!uci->mc)
379 return UCODE_NFOUND;
380
381 pr_debug("CPU%d updated upon resume\n", cpu); 375 pr_debug("CPU%d updated upon resume\n", cpu);
382 apply_microcode_on_target(cpu); 376
377 if (apply_microcode_on_target(cpu))
378 return UCODE_ERROR;
383 379
384 return UCODE_OK; 380 return UCODE_OK;
385} 381}
386 382
387static enum ucode_state microcode_init_cpu(int cpu) 383static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
388{ 384{
389 enum ucode_state ustate; 385 enum ucode_state ustate;
390 386
@@ -395,7 +391,8 @@ static enum ucode_state microcode_init_cpu(int cpu)
395 if (system_state != SYSTEM_RUNNING) 391 if (system_state != SYSTEM_RUNNING)
396 return UCODE_NFOUND; 392 return UCODE_NFOUND;
397 393
398 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev); 394 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
395 refresh_fw);
399 396
400 if (ustate == UCODE_OK) { 397 if (ustate == UCODE_OK) {
401 pr_debug("CPU%d updated upon init\n", cpu); 398 pr_debug("CPU%d updated upon init\n", cpu);
@@ -408,14 +405,11 @@ static enum ucode_state microcode_init_cpu(int cpu)
408static enum ucode_state microcode_update_cpu(int cpu) 405static enum ucode_state microcode_update_cpu(int cpu)
409{ 406{
410 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 407 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
411 enum ucode_state ustate;
412 408
413 if (uci->valid) 409 if (uci->valid)
414 ustate = microcode_resume_cpu(cpu); 410 return microcode_resume_cpu(cpu);
415 else
416 ustate = microcode_init_cpu(cpu);
417 411
418 return ustate; 412 return microcode_init_cpu(cpu, false);
419} 413}
420 414
421static int mc_device_add(struct device *dev, struct subsys_interface *sif) 415static int mc_device_add(struct device *dev, struct subsys_interface *sif)
@@ -431,7 +425,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
431 if (err) 425 if (err)
432 return err; 426 return err;
433 427
434 if (microcode_init_cpu(cpu) == UCODE_ERROR) 428 if (microcode_init_cpu(cpu, true) == UCODE_ERROR)
435 return -EINVAL; 429 return -EINVAL;
436 430
437 return err; 431 return err;
@@ -480,34 +474,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
480 struct device *dev; 474 struct device *dev;
481 475
482 dev = get_cpu_device(cpu); 476 dev = get_cpu_device(cpu);
483 switch (action) { 477
478 switch (action & ~CPU_TASKS_FROZEN) {
484 case CPU_ONLINE: 479 case CPU_ONLINE:
485 case CPU_ONLINE_FROZEN:
486 microcode_update_cpu(cpu); 480 microcode_update_cpu(cpu);
487 case CPU_DOWN_FAILED:
488 case CPU_DOWN_FAILED_FROZEN:
489 pr_debug("CPU%d added\n", cpu); 481 pr_debug("CPU%d added\n", cpu);
482 /*
483 * "break" is missing on purpose here because we want to fall
484 * through in order to create the sysfs group.
485 */
486
487 case CPU_DOWN_FAILED:
490 if (sysfs_create_group(&dev->kobj, &mc_attr_group)) 488 if (sysfs_create_group(&dev->kobj, &mc_attr_group))
491 pr_err("Failed to create group for CPU%d\n", cpu); 489 pr_err("Failed to create group for CPU%d\n", cpu);
492 break; 490 break;
491
493 case CPU_DOWN_PREPARE: 492 case CPU_DOWN_PREPARE:
494 case CPU_DOWN_PREPARE_FROZEN:
495 /* Suspend is in progress, only remove the interface */ 493 /* Suspend is in progress, only remove the interface */
496 sysfs_remove_group(&dev->kobj, &mc_attr_group); 494 sysfs_remove_group(&dev->kobj, &mc_attr_group);
497 pr_debug("CPU%d removed\n", cpu); 495 pr_debug("CPU%d removed\n", cpu);
498 break; 496 break;
499 497
500 /* 498 /*
499 * case CPU_DEAD:
500 *
501 * When a CPU goes offline, don't free up or invalidate the copy of 501 * When a CPU goes offline, don't free up or invalidate the copy of
502 * the microcode in kernel memory, so that we can reuse it when the 502 * the microcode in kernel memory, so that we can reuse it when the
503 * CPU comes back online without unnecessarily requesting the userspace 503 * CPU comes back online without unnecessarily requesting the userspace
504 * for it again. 504 * for it again.
505 */ 505 */
506 case CPU_UP_CANCELED_FROZEN:
507 /* The CPU refused to come up during a system resume */
508 microcode_fini_cpu(cpu);
509 break;
510 } 506 }
507
508 /* The CPU refused to come up during a system resume */
509 if (action == CPU_UP_CANCELED_FROZEN)
510 microcode_fini_cpu(cpu);
511
511 return NOTIFY_OK; 512 return NOTIFY_OK;
512} 513}
513 514
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 0327e2b3c408..3544aed39338 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
405 return 0; 405 return 0;
406} 406}
407 407
408static enum ucode_state request_microcode_fw(int cpu, struct device *device) 408static enum ucode_state request_microcode_fw(int cpu, struct device *device,
409 bool refresh_fw)
409{ 410{
410 char name[30]; 411 char name[30];
411 struct cpuinfo_x86 *c = &cpu_data(cpu); 412 struct cpuinfo_x86 *c = &cpu_data(cpu);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index eb113693f043..a7c5661f8496 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -257,12 +257,14 @@ static int __init msr_init(void)
257 goto out_chrdev; 257 goto out_chrdev;
258 } 258 }
259 msr_class->devnode = msr_devnode; 259 msr_class->devnode = msr_devnode;
260 get_online_cpus();
260 for_each_online_cpu(i) { 261 for_each_online_cpu(i) {
261 err = msr_device_create(i); 262 err = msr_device_create(i);
262 if (err != 0) 263 if (err != 0)
263 goto out_class; 264 goto out_class;
264 } 265 }
265 register_hotcpu_notifier(&msr_class_cpu_notifier); 266 register_hotcpu_notifier(&msr_class_cpu_notifier);
267 put_online_cpus();
266 268
267 err = 0; 269 err = 0;
268 goto out; 270 goto out;
@@ -271,6 +273,7 @@ out_class:
271 i = 0; 273 i = 0;
272 for_each_online_cpu(i) 274 for_each_online_cpu(i)
273 msr_device_destroy(i); 275 msr_device_destroy(i);
276 put_online_cpus();
274 class_destroy(msr_class); 277 class_destroy(msr_class);
275out_chrdev: 278out_chrdev:
276 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); 279 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
@@ -281,11 +284,13 @@ out:
281static void __exit msr_exit(void) 284static void __exit msr_exit(void)
282{ 285{
283 int cpu = 0; 286 int cpu = 0;
287 get_online_cpus();
284 for_each_online_cpu(cpu) 288 for_each_online_cpu(cpu)
285 msr_device_destroy(cpu); 289 msr_device_destroy(cpu);
286 class_destroy(msr_class); 290 class_destroy(msr_class);
287 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); 291 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
288 unregister_hotcpu_notifier(&msr_class_cpu_notifier); 292 unregister_hotcpu_notifier(&msr_class_cpu_notifier);
293 put_online_cpus();
289} 294}
290 295
291module_init(msr_init); 296module_init(msr_init);
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
new file mode 100644
index 000000000000..e309cc5c276e
--- /dev/null
+++ b/arch/x86/kernel/perf_regs.c
@@ -0,0 +1,105 @@
1#include <linux/errno.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/perf_event.h>
5#include <linux/bug.h>
6#include <linux/stddef.h>
7#include <asm/perf_regs.h>
8#include <asm/ptrace.h>
9
10#ifdef CONFIG_X86_32
11#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX
12#else
13#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX
14#endif
15
16#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
17
18static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
19 PT_REGS_OFFSET(PERF_REG_X86_AX, ax),
20 PT_REGS_OFFSET(PERF_REG_X86_BX, bx),
21 PT_REGS_OFFSET(PERF_REG_X86_CX, cx),
22 PT_REGS_OFFSET(PERF_REG_X86_DX, dx),
23 PT_REGS_OFFSET(PERF_REG_X86_SI, si),
24 PT_REGS_OFFSET(PERF_REG_X86_DI, di),
25 PT_REGS_OFFSET(PERF_REG_X86_BP, bp),
26 PT_REGS_OFFSET(PERF_REG_X86_SP, sp),
27 PT_REGS_OFFSET(PERF_REG_X86_IP, ip),
28 PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags),
29 PT_REGS_OFFSET(PERF_REG_X86_CS, cs),
30 PT_REGS_OFFSET(PERF_REG_X86_SS, ss),
31#ifdef CONFIG_X86_32
32 PT_REGS_OFFSET(PERF_REG_X86_DS, ds),
33 PT_REGS_OFFSET(PERF_REG_X86_ES, es),
34 PT_REGS_OFFSET(PERF_REG_X86_FS, fs),
35 PT_REGS_OFFSET(PERF_REG_X86_GS, gs),
36#else
37 /*
38 * The pt_regs struct does not store
39 * ds, es, fs, gs in 64 bit mode.
40 */
41 (unsigned int) -1,
42 (unsigned int) -1,
43 (unsigned int) -1,
44 (unsigned int) -1,
45#endif
46#ifdef CONFIG_X86_64
47 PT_REGS_OFFSET(PERF_REG_X86_R8, r8),
48 PT_REGS_OFFSET(PERF_REG_X86_R9, r9),
49 PT_REGS_OFFSET(PERF_REG_X86_R10, r10),
50 PT_REGS_OFFSET(PERF_REG_X86_R11, r11),
51 PT_REGS_OFFSET(PERF_REG_X86_R12, r12),
52 PT_REGS_OFFSET(PERF_REG_X86_R13, r13),
53 PT_REGS_OFFSET(PERF_REG_X86_R14, r14),
54 PT_REGS_OFFSET(PERF_REG_X86_R15, r15),
55#endif
56};
57
58u64 perf_reg_value(struct pt_regs *regs, int idx)
59{
60 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
61 return 0;
62
63 return regs_get_register(regs, pt_regs_offset[idx]);
64}
65
66#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
67
68#ifdef CONFIG_X86_32
69int perf_reg_validate(u64 mask)
70{
71 if (!mask || mask & REG_RESERVED)
72 return -EINVAL;
73
74 return 0;
75}
76
77u64 perf_reg_abi(struct task_struct *task)
78{
79 return PERF_SAMPLE_REGS_ABI_32;
80}
81#else /* CONFIG_X86_64 */
82#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
83 (1ULL << PERF_REG_X86_ES) | \
84 (1ULL << PERF_REG_X86_FS) | \
85 (1ULL << PERF_REG_X86_GS))
86
87int perf_reg_validate(u64 mask)
88{
89 if (!mask || mask & REG_RESERVED)
90 return -EINVAL;
91
92 if (mask & REG_NOSUPPORT)
93 return -EINVAL;
94
95 return 0;
96}
97
98u64 perf_reg_abi(struct task_struct *task)
99{
100 if (test_tsk_thread_flag(task, TIF_IA32))
101 return PERF_SAMPLE_REGS_ABI_32;
102 else
103 return PERF_SAMPLE_REGS_ABI_64;
104}
105#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 0bc72e2069e3..d5f15c3f7b25 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev)
150 return oprom; 150 return oprom;
151} 151}
152 152
153void *pci_map_biosrom(struct pci_dev *pdev) 153void __iomem *pci_map_biosrom(struct pci_dev *pdev)
154{ 154{
155 struct resource *oprom = find_oprom(pdev); 155 struct resource *oprom = find_oprom(pdev);
156 156
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ef6a8456f719..dc3567e083f9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
66{ 66{
67 int ret; 67 int ret;
68 68
69 unlazy_fpu(src);
70
71 *dst = *src; 69 *dst = *src;
72 if (fpu_allocated(&src->thread.fpu)) { 70 if (fpu_allocated(&src->thread.fpu)) {
73 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); 71 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
74 ret = fpu_alloc(&dst->thread.fpu); 72 ret = fpu_alloc(&dst->thread.fpu);
75 if (ret) 73 if (ret)
76 return ret; 74 return ret;
77 fpu_copy(&dst->thread.fpu, &src->thread.fpu); 75 fpu_copy(dst, src);
78 } 76 }
79 return 0; 77 return 0;
80} 78}
@@ -97,16 +95,6 @@ void arch_task_cache_init(void)
97 SLAB_PANIC | SLAB_NOTRACK, NULL); 95 SLAB_PANIC | SLAB_NOTRACK, NULL);
98} 96}
99 97
100static inline void drop_fpu(struct task_struct *tsk)
101{
102 /*
103 * Forget coprocessor state..
104 */
105 tsk->fpu_counter = 0;
106 clear_fpu(tsk);
107 clear_used_math();
108}
109
110/* 98/*
111 * Free current thread data structures etc.. 99 * Free current thread data structures etc..
112 */ 100 */
@@ -163,7 +151,13 @@ void flush_thread(void)
163 151
164 flush_ptrace_hw_breakpoint(tsk); 152 flush_ptrace_hw_breakpoint(tsk);
165 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 153 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
166 drop_fpu(tsk); 154 drop_init_fpu(tsk);
155 /*
156 * Free the FPU state for non xsave platforms. They get reallocated
157 * lazily at the first use.
158 */
159 if (!use_eager_fpu())
160 free_thread_xstate(tsk);
167} 161}
168 162
169static void hard_disable_TSC(void) 163static void hard_disable_TSC(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 516fa186121b..b9ff83c7135b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
190 regs->cs = __USER_CS; 190 regs->cs = __USER_CS;
191 regs->ip = new_ip; 191 regs->ip = new_ip;
192 regs->sp = new_sp; 192 regs->sp = new_sp;
193 /*
194 * Free the old FP and other extended state
195 */
196 free_thread_xstate(current);
197} 193}
198EXPORT_SYMBOL_GPL(start_thread); 194EXPORT_SYMBOL_GPL(start_thread);
199 195
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0a980c9d7cb8..8a6d20ce1978 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
232 regs->cs = _cs; 232 regs->cs = _cs;
233 regs->ss = _ss; 233 regs->ss = _ss;
234 regs->flags = X86_EFLAGS_IF; 234 regs->flags = X86_EFLAGS_IF;
235 /*
236 * Free the old FP and other extended state
237 */
238 free_thread_xstate(current);
239} 235}
240 236
241void 237void
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c4c6a5c2bf0f..b00b33a18390 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
21#include <linux/signal.h> 21#include <linux/signal.h>
22#include <linux/perf_event.h> 22#include <linux/perf_event.h>
23#include <linux/hw_breakpoint.h> 23#include <linux/hw_breakpoint.h>
24#include <linux/rcupdate.h>
24 25
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26#include <asm/pgtable.h> 27#include <asm/pgtable.h>
@@ -1332,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = {
1332#define genregs32_get genregs_get 1333#define genregs32_get genregs_get
1333#define genregs32_set genregs_set 1334#define genregs32_set genregs_set
1334 1335
1335#define user_i387_ia32_struct user_i387_struct
1336#define user32_fxsr_struct user_fxsr_struct
1337
1338#endif /* CONFIG_X86_64 */ 1336#endif /* CONFIG_X86_64 */
1339 1337
1340#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1338#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1463,6 +1461,8 @@ long syscall_trace_enter(struct pt_regs *regs)
1463{ 1461{
1464 long ret = 0; 1462 long ret = 0;
1465 1463
1464 rcu_user_exit();
1465
1466 /* 1466 /*
1467 * If we stepped into a sysenter/syscall insn, it trapped in 1467 * If we stepped into a sysenter/syscall insn, it trapped in
1468 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. 1468 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
@@ -1526,4 +1526,6 @@ void syscall_trace_leave(struct pt_regs *regs)
1526 !test_thread_flag(TIF_SYSCALL_EMU); 1526 !test_thread_flag(TIF_SYSCALL_EMU);
1527 if (step || test_thread_flag(TIF_SYSCALL_TRACE)) 1527 if (step || test_thread_flag(TIF_SYSCALL_TRACE))
1528 tracehook_report_syscall_exit(regs, step); 1528 tracehook_report_syscall_exit(regs, step);
1529
1530 rcu_user_enter();
1529} 1531}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80e1b95..4f165479c453 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -961,9 +961,7 @@ void __init setup_arch(char **cmdline_p)
961 kvmclock_init(); 961 kvmclock_init();
962#endif 962#endif
963 963
964 x86_init.paging.pagetable_setup_start(swapper_pg_dir); 964 x86_init.paging.pagetable_init();
965 paging_init();
966 x86_init.paging.pagetable_setup_done(swapper_pg_dir);
967 965
968 if (boot_cpu_data.cpuid_level >= 0) { 966 if (boot_cpu_data.cpuid_level >= 0) {
969 /* A CPU has %cr4 if and only if it has CPUID */ 967 /* A CPU has %cr4 if and only if it has CPUID */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b280908a376e..b33144c8b309 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -114,11 +114,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
114 regs->orig_ax = -1; /* disable syscall checks */ 114 regs->orig_ax = -1; /* disable syscall checks */
115 115
116 get_user_ex(buf, &sc->fpstate); 116 get_user_ex(buf, &sc->fpstate);
117 err |= restore_i387_xstate(buf);
118 117
119 get_user_ex(*pax, &sc->ax); 118 get_user_ex(*pax, &sc->ax);
120 } get_user_catch(err); 119 } get_user_catch(err);
121 120
121 err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
122
122 return err; 123 return err;
123} 124}
124 125
@@ -206,35 +207,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
206 void __user **fpstate) 207 void __user **fpstate)
207{ 208{
208 /* Default to using normal stack */ 209 /* Default to using normal stack */
210 unsigned long math_size = 0;
209 unsigned long sp = regs->sp; 211 unsigned long sp = regs->sp;
212 unsigned long buf_fx = 0;
210 int onsigstack = on_sig_stack(sp); 213 int onsigstack = on_sig_stack(sp);
211 214
212#ifdef CONFIG_X86_64
213 /* redzone */ 215 /* redzone */
214 sp -= 128; 216 if (config_enabled(CONFIG_X86_64))
215#endif /* CONFIG_X86_64 */ 217 sp -= 128;
216 218
217 if (!onsigstack) { 219 if (!onsigstack) {
218 /* This is the X/Open sanctioned signal stack switching. */ 220 /* This is the X/Open sanctioned signal stack switching. */
219 if (ka->sa.sa_flags & SA_ONSTACK) { 221 if (ka->sa.sa_flags & SA_ONSTACK) {
220 if (current->sas_ss_size) 222 if (current->sas_ss_size)
221 sp = current->sas_ss_sp + current->sas_ss_size; 223 sp = current->sas_ss_sp + current->sas_ss_size;
222 } else { 224 } else if (config_enabled(CONFIG_X86_32) &&
223#ifdef CONFIG_X86_32 225 (regs->ss & 0xffff) != __USER_DS &&
224 /* This is the legacy signal stack switching. */ 226 !(ka->sa.sa_flags & SA_RESTORER) &&
225 if ((regs->ss & 0xffff) != __USER_DS && 227 ka->sa.sa_restorer) {
226 !(ka->sa.sa_flags & SA_RESTORER) && 228 /* This is the legacy signal stack switching. */
227 ka->sa.sa_restorer)
228 sp = (unsigned long) ka->sa.sa_restorer; 229 sp = (unsigned long) ka->sa.sa_restorer;
229#endif /* CONFIG_X86_32 */
230 } 230 }
231 } 231 }
232 232
233 if (used_math()) { 233 if (used_math()) {
234 sp -= sig_xstate_size; 234 sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
235#ifdef CONFIG_X86_64 235 &buf_fx, &math_size);
236 sp = round_down(sp, 64);
237#endif /* CONFIG_X86_64 */
238 *fpstate = (void __user *)sp; 236 *fpstate = (void __user *)sp;
239 } 237 }
240 238
@@ -247,8 +245,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
247 if (onsigstack && !likely(on_sig_stack(sp))) 245 if (onsigstack && !likely(on_sig_stack(sp)))
248 return (void __user *)-1L; 246 return (void __user *)-1L;
249 247
250 /* save i387 state */ 248 /* save i387 and extended state */
251 if (used_math() && save_i387_xstate(*fpstate) < 0) 249 if (used_math() &&
250 save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
252 return (void __user *)-1L; 251 return (void __user *)-1L;
253 252
254 return (void __user *)sp; 253 return (void __user *)sp;
@@ -357,7 +356,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
357 put_user_ex(sig, &frame->sig); 356 put_user_ex(sig, &frame->sig);
358 put_user_ex(&frame->info, &frame->pinfo); 357 put_user_ex(&frame->info, &frame->pinfo);
359 put_user_ex(&frame->uc, &frame->puc); 358 put_user_ex(&frame->uc, &frame->puc);
360 err |= copy_siginfo_to_user(&frame->info, info);
361 359
362 /* Create the ucontext. */ 360 /* Create the ucontext. */
363 if (cpu_has_xsave) 361 if (cpu_has_xsave)
@@ -369,9 +367,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
369 put_user_ex(sas_ss_flags(regs->sp), 367 put_user_ex(sas_ss_flags(regs->sp),
370 &frame->uc.uc_stack.ss_flags); 368 &frame->uc.uc_stack.ss_flags);
371 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 369 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
372 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
373 regs, set->sig[0]);
374 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
375 370
376 /* Set up to return from userspace. */ 371 /* Set up to return from userspace. */
377 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); 372 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -388,6 +383,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
388 */ 383 */
389 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); 384 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
390 } put_user_catch(err); 385 } put_user_catch(err);
386
387 err |= copy_siginfo_to_user(&frame->info, info);
388 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
389 regs, set->sig[0]);
390 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
391 391
392 if (err) 392 if (err)
393 return -EFAULT; 393 return -EFAULT;
@@ -436,8 +436,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
436 put_user_ex(sas_ss_flags(regs->sp), 436 put_user_ex(sas_ss_flags(regs->sp),
437 &frame->uc.uc_stack.ss_flags); 437 &frame->uc.uc_stack.ss_flags);
438 put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); 438 put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
439 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
440 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
441 439
442 /* Set up to return from userspace. If provided, use a stub 440 /* Set up to return from userspace. If provided, use a stub
443 already in userspace. */ 441 already in userspace. */
@@ -450,6 +448,9 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
450 } 448 }
451 } put_user_catch(err); 449 } put_user_catch(err);
452 450
451 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
452 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
453
453 if (err) 454 if (err)
454 return -EFAULT; 455 return -EFAULT;
455 456
@@ -474,6 +475,75 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
474} 475}
475#endif /* CONFIG_X86_32 */ 476#endif /* CONFIG_X86_32 */
476 477
478static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
479 siginfo_t *info, compat_sigset_t *set,
480 struct pt_regs *regs)
481{
482#ifdef CONFIG_X86_X32_ABI
483 struct rt_sigframe_x32 __user *frame;
484 void __user *restorer;
485 int err = 0;
486 void __user *fpstate = NULL;
487
488 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
489
490 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
491 return -EFAULT;
492
493 if (ka->sa.sa_flags & SA_SIGINFO) {
494 if (copy_siginfo_to_user32(&frame->info, info))
495 return -EFAULT;
496 }
497
498 put_user_try {
499 /* Create the ucontext. */
500 if (cpu_has_xsave)
501 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
502 else
503 put_user_ex(0, &frame->uc.uc_flags);
504 put_user_ex(0, &frame->uc.uc_link);
505 put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
506 put_user_ex(sas_ss_flags(regs->sp),
507 &frame->uc.uc_stack.ss_flags);
508 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
509 put_user_ex(0, &frame->uc.uc__pad0);
510
511 if (ka->sa.sa_flags & SA_RESTORER) {
512 restorer = ka->sa.sa_restorer;
513 } else {
514 /* could use a vstub here */
515 restorer = NULL;
516 err |= -EFAULT;
517 }
518 put_user_ex(restorer, &frame->pretcode);
519 } put_user_catch(err);
520
521 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
522 regs, set->sig[0]);
523 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
524
525 if (err)
526 return -EFAULT;
527
528 /* Set up registers for signal handler */
529 regs->sp = (unsigned long) frame;
530 regs->ip = (unsigned long) ka->sa.sa_handler;
531
532 /* We use the x32 calling convention here... */
533 regs->di = sig;
534 regs->si = (unsigned long) &frame->info;
535 regs->dx = (unsigned long) &frame->uc;
536
537 loadsegment(ds, __USER_DS);
538 loadsegment(es, __USER_DS);
539
540 regs->cs = __USER_CS;
541 regs->ss = __USER_DS;
542#endif /* CONFIG_X86_X32_ABI */
543
544 return 0;
545}
546
477#ifdef CONFIG_X86_32 547#ifdef CONFIG_X86_32
478/* 548/*
479 * Atomically swap in the new signal mask, and wait for a signal. 549 * Atomically swap in the new signal mask, and wait for a signal.
@@ -612,55 +682,22 @@ static int signr_convert(int sig)
612 return sig; 682 return sig;
613} 683}
614 684
615#ifdef CONFIG_X86_32
616
617#define is_ia32 1
618#define ia32_setup_frame __setup_frame
619#define ia32_setup_rt_frame __setup_rt_frame
620
621#else /* !CONFIG_X86_32 */
622
623#ifdef CONFIG_IA32_EMULATION
624#define is_ia32 test_thread_flag(TIF_IA32)
625#else /* !CONFIG_IA32_EMULATION */
626#define is_ia32 0
627#endif /* CONFIG_IA32_EMULATION */
628
629#ifdef CONFIG_X86_X32_ABI
630#define is_x32 test_thread_flag(TIF_X32)
631
632static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
633 siginfo_t *info, compat_sigset_t *set,
634 struct pt_regs *regs);
635#else /* !CONFIG_X86_X32_ABI */
636#define is_x32 0
637#endif /* CONFIG_X86_X32_ABI */
638
639int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
640 sigset_t *set, struct pt_regs *regs);
641int ia32_setup_frame(int sig, struct k_sigaction *ka,
642 sigset_t *set, struct pt_regs *regs);
643
644#endif /* CONFIG_X86_32 */
645
646static int 685static int
647setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 686setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
648 struct pt_regs *regs) 687 struct pt_regs *regs)
649{ 688{
650 int usig = signr_convert(sig); 689 int usig = signr_convert(sig);
651 sigset_t *set = sigmask_to_save(); 690 sigset_t *set = sigmask_to_save();
691 compat_sigset_t *cset = (compat_sigset_t *) set;
652 692
653 /* Set up the stack frame */ 693 /* Set up the stack frame */
654 if (is_ia32) { 694 if (is_ia32_frame()) {
655 if (ka->sa.sa_flags & SA_SIGINFO) 695 if (ka->sa.sa_flags & SA_SIGINFO)
656 return ia32_setup_rt_frame(usig, ka, info, set, regs); 696 return ia32_setup_rt_frame(usig, ka, info, cset, regs);
657 else 697 else
658 return ia32_setup_frame(usig, ka, set, regs); 698 return ia32_setup_frame(usig, ka, cset, regs);
659#ifdef CONFIG_X86_X32_ABI 699 } else if (is_x32_frame()) {
660 } else if (is_x32) { 700 return x32_setup_rt_frame(usig, ka, info, cset, regs);
661 return x32_setup_rt_frame(usig, ka, info,
662 (compat_sigset_t *)set, regs);
663#endif
664 } else { 701 } else {
665 return __setup_rt_frame(sig, ka, info, set, regs); 702 return __setup_rt_frame(sig, ka, info, set, regs);
666 } 703 }
@@ -779,6 +816,8 @@ static void do_signal(struct pt_regs *regs)
779void 816void
780do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 817do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
781{ 818{
819 rcu_user_exit();
820
782#ifdef CONFIG_X86_MCE 821#ifdef CONFIG_X86_MCE
783 /* notify userspace of pending MCEs */ 822 /* notify userspace of pending MCEs */
784 if (thread_info_flags & _TIF_MCE_NOTIFY) 823 if (thread_info_flags & _TIF_MCE_NOTIFY)
@@ -804,6 +843,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
804#ifdef CONFIG_X86_32 843#ifdef CONFIG_X86_32
805 clear_thread_flag(TIF_IRET); 844 clear_thread_flag(TIF_IRET);
806#endif /* CONFIG_X86_32 */ 845#endif /* CONFIG_X86_32 */
846
847 rcu_user_enter();
807} 848}
808 849
809void signal_fault(struct pt_regs *regs, void __user *frame, char *where) 850void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
@@ -824,72 +865,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
824} 865}
825 866
826#ifdef CONFIG_X86_X32_ABI 867#ifdef CONFIG_X86_X32_ABI
827static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
828 siginfo_t *info, compat_sigset_t *set,
829 struct pt_regs *regs)
830{
831 struct rt_sigframe_x32 __user *frame;
832 void __user *restorer;
833 int err = 0;
834 void __user *fpstate = NULL;
835
836 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
837
838 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
839 return -EFAULT;
840
841 if (ka->sa.sa_flags & SA_SIGINFO) {
842 if (copy_siginfo_to_user32(&frame->info, info))
843 return -EFAULT;
844 }
845
846 put_user_try {
847 /* Create the ucontext. */
848 if (cpu_has_xsave)
849 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
850 else
851 put_user_ex(0, &frame->uc.uc_flags);
852 put_user_ex(0, &frame->uc.uc_link);
853 put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
854 put_user_ex(sas_ss_flags(regs->sp),
855 &frame->uc.uc_stack.ss_flags);
856 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
857 put_user_ex(0, &frame->uc.uc__pad0);
858 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
859 regs, set->sig[0]);
860 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
861
862 if (ka->sa.sa_flags & SA_RESTORER) {
863 restorer = ka->sa.sa_restorer;
864 } else {
865 /* could use a vstub here */
866 restorer = NULL;
867 err |= -EFAULT;
868 }
869 put_user_ex(restorer, &frame->pretcode);
870 } put_user_catch(err);
871
872 if (err)
873 return -EFAULT;
874
875 /* Set up registers for signal handler */
876 regs->sp = (unsigned long) frame;
877 regs->ip = (unsigned long) ka->sa.sa_handler;
878
879 /* We use the x32 calling convention here... */
880 regs->di = sig;
881 regs->si = (unsigned long) &frame->info;
882 regs->dx = (unsigned long) &frame->uc;
883
884 loadsegment(ds, __USER_DS);
885 loadsegment(es, __USER_DS);
886
887 regs->cs = __USER_CS;
888 regs->ss = __USER_DS;
889
890 return 0;
891}
892
893asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) 868asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
894{ 869{
895 struct rt_sigframe_x32 __user *frame; 870 struct rt_sigframe_x32 __user *frame;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7c5a8c314c02..c80a33bc528b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
665 unsigned long boot_error = 0; 665 unsigned long boot_error = 0;
666 int timeout; 666 int timeout;
667 667
668 alternatives_smp_switch(1); 668 /* Just in case we booted with a single CPU. */
669 alternatives_enable_smp();
669 670
670 idle->thread.sp = (unsigned long) (((struct pt_regs *) 671 idle->thread.sp = (unsigned long) (((struct pt_regs *)
671 (THREAD_SIZE + task_stack_page(idle))) - 1); 672 (THREAD_SIZE + task_stack_page(idle))) - 1);
@@ -1053,20 +1054,6 @@ out:
1053 preempt_enable(); 1054 preempt_enable();
1054} 1055}
1055 1056
1056void arch_disable_nonboot_cpus_begin(void)
1057{
1058 /*
1059 * Avoid the smp alternatives switch during the disable_nonboot_cpus().
1060 * In the suspend path, we will be back in the SMP mode shortly anyways.
1061 */
1062 skip_smp_alternatives = true;
1063}
1064
1065void arch_disable_nonboot_cpus_end(void)
1066{
1067 skip_smp_alternatives = false;
1068}
1069
1070void arch_enable_nonboot_cpus_begin(void) 1057void arch_enable_nonboot_cpus_begin(void)
1071{ 1058{
1072 set_mtrr_aps_delayed_init(); 1059 set_mtrr_aps_delayed_init();
@@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu)
1256 if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1243 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1257 if (system_state == SYSTEM_RUNNING) 1244 if (system_state == SYSTEM_RUNNING)
1258 pr_info("CPU %u is now offline\n", cpu); 1245 pr_info("CPU %u is now offline\n", cpu);
1259
1260 if (1 == num_online_cpus())
1261 alternatives_smp_switch(0);
1262 return; 1246 return;
1263 } 1247 }
1264 msleep(100); 1248 msleep(100);
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index c346d1161488..cd3b2438a980 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child)
157 return 1; 157 return 1;
158} 158}
159 159
160void set_task_blockstep(struct task_struct *task, bool on)
161{
162 unsigned long debugctl;
163
164 /*
165 * Ensure irq/preemption can't change debugctl in between.
166 * Note also that both TIF_BLOCKSTEP and debugctl should
167 * be changed atomically wrt preemption.
168 * FIXME: this means that set/clear TIF_BLOCKSTEP is simply
169 * wrong if task != current, SIGKILL can wakeup the stopped
170 * tracee and set/clear can play with the running task, this
171 * can confuse the next __switch_to_xtra().
172 */
173 local_irq_disable();
174 debugctl = get_debugctlmsr();
175 if (on) {
176 debugctl |= DEBUGCTLMSR_BTF;
177 set_tsk_thread_flag(task, TIF_BLOCKSTEP);
178 } else {
179 debugctl &= ~DEBUGCTLMSR_BTF;
180 clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
181 }
182 if (task == current)
183 update_debugctlmsr(debugctl);
184 local_irq_enable();
185}
186
160/* 187/*
161 * Enable single or block step. 188 * Enable single or block step.
162 */ 189 */
@@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block)
169 * So no one should try to use debugger block stepping in a program 196 * So no one should try to use debugger block stepping in a program
170 * that uses user-mode single stepping itself. 197 * that uses user-mode single stepping itself.
171 */ 198 */
172 if (enable_single_step(child) && block) { 199 if (enable_single_step(child) && block)
173 unsigned long debugctl = get_debugctlmsr(); 200 set_task_blockstep(child, true);
174 201 else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
175 debugctl |= DEBUGCTLMSR_BTF; 202 set_task_blockstep(child, false);
176 update_debugctlmsr(debugctl);
177 set_tsk_thread_flag(child, TIF_BLOCKSTEP);
178 } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
179 unsigned long debugctl = get_debugctlmsr();
180
181 debugctl &= ~DEBUGCTLMSR_BTF;
182 update_debugctlmsr(debugctl);
183 clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
184 }
185} 203}
186 204
187void user_enable_single_step(struct task_struct *child) 205void user_enable_single_step(struct task_struct *child)
@@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child)
199 /* 217 /*
200 * Make sure block stepping (BTF) is disabled. 218 * Make sure block stepping (BTF) is disabled.
201 */ 219 */
202 if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { 220 if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
203 unsigned long debugctl = get_debugctlmsr(); 221 set_task_blockstep(child, false);
204
205 debugctl &= ~DEBUGCTLMSR_BTF;
206 update_debugctlmsr(debugctl);
207 clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
208 }
209 222
210 /* Always clear TIF_SINGLESTEP... */ 223 /* Always clear TIF_SINGLESTEP... */
211 clear_tsk_thread_flag(child, TIF_SINGLESTEP); 224 clear_tsk_thread_flag(child, TIF_SINGLESTEP);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b481341c9369..8276dc6794cc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -55,6 +55,7 @@
55#include <asm/i387.h> 55#include <asm/i387.h>
56#include <asm/fpu-internal.h> 56#include <asm/fpu-internal.h>
57#include <asm/mce.h> 57#include <asm/mce.h>
58#include <asm/rcu.h>
58 59
59#include <asm/mach_traps.h> 60#include <asm/mach_traps.h>
60 61
@@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
107 dec_preempt_count(); 108 dec_preempt_count();
108} 109}
109 110
110static void __kprobes 111static int __kprobes
111do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, 112do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
112 long error_code, siginfo_t *info) 113 struct pt_regs *regs, long error_code)
113{ 114{
114 struct task_struct *tsk = current;
115
116#ifdef CONFIG_X86_32 115#ifdef CONFIG_X86_32
117 if (regs->flags & X86_VM_MASK) { 116 if (regs->flags & X86_VM_MASK) {
118 /* 117 /*
119 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. 118 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
120 * On nmi (interrupt 2), do_trap should not be called. 119 * On nmi (interrupt 2), do_trap should not be called.
121 */ 120 */
122 if (trapnr < X86_TRAP_UD) 121 if (trapnr < X86_TRAP_UD) {
123 goto vm86_trap; 122 if (!handle_vm86_trap((struct kernel_vm86_regs *) regs,
124 goto trap_signal; 123 error_code, trapnr))
124 return 0;
125 }
126 return -1;
125 } 127 }
126#endif 128#endif
129 if (!user_mode(regs)) {
130 if (!fixup_exception(regs)) {
131 tsk->thread.error_code = error_code;
132 tsk->thread.trap_nr = trapnr;
133 die(str, regs, error_code);
134 }
135 return 0;
136 }
127 137
128 if (!user_mode(regs)) 138 return -1;
129 goto kernel_trap; 139}
130 140
131#ifdef CONFIG_X86_32 141static void __kprobes
132trap_signal: 142do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
133#endif 143 long error_code, siginfo_t *info)
144{
145 struct task_struct *tsk = current;
146
147
148 if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
149 return;
134 /* 150 /*
135 * We want error_code and trap_nr set for userspace faults and 151 * We want error_code and trap_nr set for userspace faults and
136 * kernelspace faults which result in die(), but not 152 * kernelspace faults which result in die(), but not
@@ -158,33 +174,20 @@ trap_signal:
158 force_sig_info(signr, info, tsk); 174 force_sig_info(signr, info, tsk);
159 else 175 else
160 force_sig(signr, tsk); 176 force_sig(signr, tsk);
161 return;
162
163kernel_trap:
164 if (!fixup_exception(regs)) {
165 tsk->thread.error_code = error_code;
166 tsk->thread.trap_nr = trapnr;
167 die(str, regs, error_code);
168 }
169 return;
170
171#ifdef CONFIG_X86_32
172vm86_trap:
173 if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
174 error_code, trapnr))
175 goto trap_signal;
176 return;
177#endif
178} 177}
179 178
180#define DO_ERROR(trapnr, signr, str, name) \ 179#define DO_ERROR(trapnr, signr, str, name) \
181dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 180dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
182{ \ 181{ \
183 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 182 exception_enter(regs); \
184 == NOTIFY_STOP) \ 183 if (notify_die(DIE_TRAP, str, regs, error_code, \
184 trapnr, signr) == NOTIFY_STOP) { \
185 exception_exit(regs); \
185 return; \ 186 return; \
187 } \
186 conditional_sti(regs); \ 188 conditional_sti(regs); \
187 do_trap(trapnr, signr, str, regs, error_code, NULL); \ 189 do_trap(trapnr, signr, str, regs, error_code, NULL); \
190 exception_exit(regs); \
188} 191}
189 192
190#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 193#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
@@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
195 info.si_errno = 0; \ 198 info.si_errno = 0; \
196 info.si_code = sicode; \ 199 info.si_code = sicode; \
197 info.si_addr = (void __user *)siaddr; \ 200 info.si_addr = (void __user *)siaddr; \
198 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 201 exception_enter(regs); \
199 == NOTIFY_STOP) \ 202 if (notify_die(DIE_TRAP, str, regs, error_code, \
203 trapnr, signr) == NOTIFY_STOP) { \
204 exception_exit(regs); \
200 return; \ 205 return; \
206 } \
201 conditional_sti(regs); \ 207 conditional_sti(regs); \
202 do_trap(trapnr, signr, str, regs, error_code, &info); \ 208 do_trap(trapnr, signr, str, regs, error_code, &info); \
209 exception_exit(regs); \
203} 210}
204 211
205DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, 212DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
@@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
222/* Runs on IST stack */ 229/* Runs on IST stack */
223dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) 230dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
224{ 231{
232 exception_enter(regs);
225 if (notify_die(DIE_TRAP, "stack segment", regs, error_code, 233 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
226 X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) 234 X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
227 return; 235 preempt_conditional_sti(regs);
228 preempt_conditional_sti(regs); 236 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
229 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); 237 preempt_conditional_cli(regs);
230 preempt_conditional_cli(regs); 238 }
239 exception_exit(regs);
231} 240}
232 241
233dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 242dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
@@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
235 static const char str[] = "double fault"; 244 static const char str[] = "double fault";
236 struct task_struct *tsk = current; 245 struct task_struct *tsk = current;
237 246
247 exception_enter(regs);
238 /* Return not checked because double check cannot be ignored */ 248 /* Return not checked because double check cannot be ignored */
239 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 249 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
240 250
@@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
255{ 265{
256 struct task_struct *tsk; 266 struct task_struct *tsk;
257 267
268 exception_enter(regs);
258 conditional_sti(regs); 269 conditional_sti(regs);
259 270
260#ifdef CONFIG_X86_32 271#ifdef CONFIG_X86_32
261 if (regs->flags & X86_VM_MASK) 272 if (regs->flags & X86_VM_MASK) {
262 goto gp_in_vm86; 273 local_irq_enable();
274 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
275 goto exit;
276 }
263#endif 277#endif
264 278
265 tsk = current; 279 tsk = current;
266 if (!user_mode(regs)) 280 if (!user_mode(regs)) {
267 goto gp_in_kernel; 281 if (fixup_exception(regs))
282 goto exit;
283
284 tsk->thread.error_code = error_code;
285 tsk->thread.trap_nr = X86_TRAP_GP;
286 if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
287 X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
288 die("general protection fault", regs, error_code);
289 goto exit;
290 }
268 291
269 tsk->thread.error_code = error_code; 292 tsk->thread.error_code = error_code;
270 tsk->thread.trap_nr = X86_TRAP_GP; 293 tsk->thread.trap_nr = X86_TRAP_GP;
@@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
279 } 302 }
280 303
281 force_sig(SIGSEGV, tsk); 304 force_sig(SIGSEGV, tsk);
282 return; 305exit:
283 306 exception_exit(regs);
284#ifdef CONFIG_X86_32
285gp_in_vm86:
286 local_irq_enable();
287 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
288 return;
289#endif
290
291gp_in_kernel:
292 if (fixup_exception(regs))
293 return;
294
295 tsk->thread.error_code = error_code;
296 tsk->thread.trap_nr = X86_TRAP_GP;
297 if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
298 X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
299 return;
300 die("general protection fault", regs, error_code);
301} 307}
302 308
303/* May run on IST stack. */ 309/* May run on IST stack. */
@@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
312 ftrace_int3_handler(regs)) 318 ftrace_int3_handler(regs))
313 return; 319 return;
314#endif 320#endif
321 exception_enter(regs);
315#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 322#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
316 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 323 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
317 SIGTRAP) == NOTIFY_STOP) 324 SIGTRAP) == NOTIFY_STOP)
318 return; 325 goto exit;
319#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 326#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
320 327
321 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 328 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
322 SIGTRAP) == NOTIFY_STOP) 329 SIGTRAP) == NOTIFY_STOP)
323 return; 330 goto exit;
324 331
325 /* 332 /*
326 * Let others (NMI) know that the debug stack is in use 333 * Let others (NMI) know that the debug stack is in use
@@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
331 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); 338 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
332 preempt_conditional_cli(regs); 339 preempt_conditional_cli(regs);
333 debug_stack_usage_dec(); 340 debug_stack_usage_dec();
341exit:
342 exception_exit(regs);
334} 343}
335 344
336#ifdef CONFIG_X86_64 345#ifdef CONFIG_X86_64
@@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
391 unsigned long dr6; 400 unsigned long dr6;
392 int si_code; 401 int si_code;
393 402
403 exception_enter(regs);
404
394 get_debugreg(dr6, 6); 405 get_debugreg(dr6, 6);
395 406
396 /* Filter out all the reserved bits which are preset to 1 */ 407 /* Filter out all the reserved bits which are preset to 1 */
@@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
406 417
407 /* Catch kmemcheck conditions first of all! */ 418 /* Catch kmemcheck conditions first of all! */
408 if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) 419 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
409 return; 420 goto exit;
410 421
411 /* DR6 may or may not be cleared by the CPU */ 422 /* DR6 may or may not be cleared by the CPU */
412 set_debugreg(0, 6); 423 set_debugreg(0, 6);
@@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
421 432
422 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, 433 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
423 SIGTRAP) == NOTIFY_STOP) 434 SIGTRAP) == NOTIFY_STOP)
424 return; 435 goto exit;
425 436
426 /* 437 /*
427 * Let others (NMI) know that the debug stack is in use 438 * Let others (NMI) know that the debug stack is in use
@@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
437 X86_TRAP_DB); 448 X86_TRAP_DB);
438 preempt_conditional_cli(regs); 449 preempt_conditional_cli(regs);
439 debug_stack_usage_dec(); 450 debug_stack_usage_dec();
440 return; 451 goto exit;
441 } 452 }
442 453
443 /* 454 /*
@@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
458 preempt_conditional_cli(regs); 469 preempt_conditional_cli(regs);
459 debug_stack_usage_dec(); 470 debug_stack_usage_dec();
460 471
461 return; 472exit:
473 exception_exit(regs);
462} 474}
463 475
464/* 476/*
@@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
555#ifdef CONFIG_X86_32 567#ifdef CONFIG_X86_32
556 ignore_fpu_irq = 1; 568 ignore_fpu_irq = 1;
557#endif 569#endif
558 570 exception_enter(regs);
559 math_error(regs, error_code, X86_TRAP_MF); 571 math_error(regs, error_code, X86_TRAP_MF);
572 exception_exit(regs);
560} 573}
561 574
562dotraplinkage void 575dotraplinkage void
563do_simd_coprocessor_error(struct pt_regs *regs, long error_code) 576do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
564{ 577{
578 exception_enter(regs);
565 math_error(regs, error_code, X86_TRAP_XF); 579 math_error(regs, error_code, X86_TRAP_XF);
580 exception_exit(regs);
566} 581}
567 582
568dotraplinkage void 583dotraplinkage void
@@ -613,11 +628,12 @@ void math_state_restore(void)
613 } 628 }
614 629
615 __thread_fpu_begin(tsk); 630 __thread_fpu_begin(tsk);
631
616 /* 632 /*
617 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 633 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
618 */ 634 */
619 if (unlikely(restore_fpu_checking(tsk))) { 635 if (unlikely(restore_fpu_checking(tsk))) {
620 __thread_fpu_end(tsk); 636 drop_init_fpu(tsk);
621 force_sig(SIGSEGV, tsk); 637 force_sig(SIGSEGV, tsk);
622 return; 638 return;
623 } 639 }
@@ -629,6 +645,9 @@ EXPORT_SYMBOL_GPL(math_state_restore);
629dotraplinkage void __kprobes 645dotraplinkage void __kprobes
630do_device_not_available(struct pt_regs *regs, long error_code) 646do_device_not_available(struct pt_regs *regs, long error_code)
631{ 647{
648 exception_enter(regs);
649 BUG_ON(use_eager_fpu());
650
632#ifdef CONFIG_MATH_EMULATION 651#ifdef CONFIG_MATH_EMULATION
633 if (read_cr0() & X86_CR0_EM) { 652 if (read_cr0() & X86_CR0_EM) {
634 struct math_emu_info info = { }; 653 struct math_emu_info info = { };
@@ -637,6 +656,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
637 656
638 info.regs = regs; 657 info.regs = regs;
639 math_emulate(&info); 658 math_emulate(&info);
659 exception_exit(regs);
640 return; 660 return;
641 } 661 }
642#endif 662#endif
@@ -644,12 +664,15 @@ do_device_not_available(struct pt_regs *regs, long error_code)
644#ifdef CONFIG_X86_32 664#ifdef CONFIG_X86_32
645 conditional_sti(regs); 665 conditional_sti(regs);
646#endif 666#endif
667 exception_exit(regs);
647} 668}
648 669
649#ifdef CONFIG_X86_32 670#ifdef CONFIG_X86_32
650dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) 671dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
651{ 672{
652 siginfo_t info; 673 siginfo_t info;
674
675 exception_enter(regs);
653 local_irq_enable(); 676 local_irq_enable();
654 677
655 info.si_signo = SIGILL; 678 info.si_signo = SIGILL;
@@ -657,10 +680,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
657 info.si_code = ILL_BADSTK; 680 info.si_code = ILL_BADSTK;
658 info.si_addr = NULL; 681 info.si_addr = NULL;
659 if (notify_die(DIE_TRAP, "iret exception", regs, error_code, 682 if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
660 X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) 683 X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
661 return; 684 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
662 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, 685 &info);
663 &info); 686 }
687 exception_exit(regs);
664} 688}
665#endif 689#endif
666 690
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 36fd42091fa7..9538f00827a9 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -41,6 +41,9 @@
41/* Adjust the return address of a call insn */ 41/* Adjust the return address of a call insn */
42#define UPROBE_FIX_CALL 0x2 42#define UPROBE_FIX_CALL 0x2
43 43
44/* Instruction will modify TF, don't change it */
45#define UPROBE_FIX_SETF 0x4
46
44#define UPROBE_FIX_RIP_AX 0x8000 47#define UPROBE_FIX_RIP_AX 0x8000
45#define UPROBE_FIX_RIP_CX 0x4000 48#define UPROBE_FIX_RIP_CX 0x4000
46 49
@@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
239 insn_get_opcode(insn); /* should be a nop */ 242 insn_get_opcode(insn); /* should be a nop */
240 243
241 switch (OPCODE1(insn)) { 244 switch (OPCODE1(insn)) {
245 case 0x9d:
246 /* popf */
247 auprobe->fixups |= UPROBE_FIX_SETF;
248 break;
242 case 0xc3: /* ret/lret */ 249 case 0xc3: /* ret/lret */
243 case 0xcb: 250 case 0xcb:
244 case 0xc2: 251 case 0xc2:
@@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
646 * Skip these instructions as per the currently known x86 ISA. 653 * Skip these instructions as per the currently known x86 ISA.
647 * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } 654 * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
648 */ 655 */
649bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) 656static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
650{ 657{
651 int i; 658 int i;
652 659
@@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
673 } 680 }
674 return false; 681 return false;
675} 682}
683
684bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
685{
686 bool ret = __skip_sstep(auprobe, regs);
687 if (ret && (regs->flags & X86_EFLAGS_TF))
688 send_sig(SIGTRAP, current, 0);
689 return ret;
690}
691
692void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
693{
694 struct task_struct *task = current;
695 struct arch_uprobe_task *autask = &task->utask->autask;
696 struct pt_regs *regs = task_pt_regs(task);
697
698 autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
699
700 regs->flags |= X86_EFLAGS_TF;
701 if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
702 set_task_blockstep(task, false);
703}
704
705void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
706{
707 struct task_struct *task = current;
708 struct arch_uprobe_task *autask = &task->utask->autask;
709 bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
710 struct pt_regs *regs = task_pt_regs(task);
711 /*
712 * The state of TIF_BLOCKSTEP was not saved so we can get an extra
713 * SIGTRAP if we do not clear TF. We need to examine the opcode to
714 * make it right.
715 */
716 if (unlikely(trapped)) {
717 if (!autask->saved_tf)
718 regs->flags &= ~X86_EFLAGS_TF;
719 } else {
720 if (autask->saved_tf)
721 send_sig(SIGTRAP, task, 0);
722 else if (!(auprobe->fixups & UPROBE_FIX_SETF))
723 regs->flags &= ~X86_EFLAGS_TF;
724 }
725}
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 6020f6f5927c..1330dd102950 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -13,9 +13,13 @@
13#include <asm/ftrace.h> 13#include <asm/ftrace.h>
14 14
15#ifdef CONFIG_FUNCTION_TRACER 15#ifdef CONFIG_FUNCTION_TRACER
16/* mcount is defined in assembly */ 16/* mcount and __fentry__ are defined in assembly */
17#ifdef CC_USING_FENTRY
18EXPORT_SYMBOL(__fentry__);
19#else
17EXPORT_SYMBOL(mcount); 20EXPORT_SYMBOL(mcount);
18#endif 21#endif
22#endif
19 23
20EXPORT_SYMBOL(__get_user_1); 24EXPORT_SYMBOL(__get_user_1);
21EXPORT_SYMBOL(__get_user_2); 25EXPORT_SYMBOL(__get_user_2);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 9f3167e891ef..7a3d075a814a 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,6 @@
26 26
27void __cpuinit x86_init_noop(void) { } 27void __cpuinit x86_init_noop(void) { }
28void __init x86_init_uint_noop(unsigned int unused) { } 28void __init x86_init_uint_noop(unsigned int unused) { }
29void __init x86_init_pgd_noop(pgd_t *unused) { }
30int __init iommu_init_noop(void) { return 0; } 29int __init iommu_init_noop(void) { return 0; }
31void iommu_shutdown_noop(void) { } 30void iommu_shutdown_noop(void) { }
32 31
@@ -68,8 +67,7 @@ struct x86_init_ops x86_init __initdata = {
68 }, 67 },
69 68
70 .paging = { 69 .paging = {
71 .pagetable_setup_start = native_pagetable_setup_start, 70 .pagetable_init = native_pagetable_init,
72 .pagetable_setup_done = native_pagetable_setup_done,
73 }, 71 },
74 72
75 .timers = { 73 .timers = {
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 3d3e20709119..ada87a329edc 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -10,9 +10,7 @@
10#include <linux/compat.h> 10#include <linux/compat.h>
11#include <asm/i387.h> 11#include <asm/i387.h>
12#include <asm/fpu-internal.h> 12#include <asm/fpu-internal.h>
13#ifdef CONFIG_IA32_EMULATION 13#include <asm/sigframe.h>
14#include <asm/sigcontext32.h>
15#endif
16#include <asm/xcr.h> 14#include <asm/xcr.h>
17 15
18/* 16/*
@@ -23,13 +21,9 @@ u64 pcntxt_mask;
23/* 21/*
24 * Represents init state for the supported extended state. 22 * Represents init state for the supported extended state.
25 */ 23 */
26static struct xsave_struct *init_xstate_buf; 24struct xsave_struct *init_xstate_buf;
27
28struct _fpx_sw_bytes fx_sw_reserved;
29#ifdef CONFIG_IA32_EMULATION
30struct _fpx_sw_bytes fx_sw_reserved_ia32;
31#endif
32 25
26static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
33static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; 27static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
34 28
35/* 29/*
@@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
44 */ 38 */
45void __sanitize_i387_state(struct task_struct *tsk) 39void __sanitize_i387_state(struct task_struct *tsk)
46{ 40{
47 u64 xstate_bv;
48 int feature_bit = 0x2;
49 struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; 41 struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
42 int feature_bit = 0x2;
43 u64 xstate_bv;
50 44
51 if (!fx) 45 if (!fx)
52 return; 46 return;
@@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk)
104 * Check for the presence of extended state information in the 98 * Check for the presence of extended state information in the
105 * user fpstate pointer in the sigcontext. 99 * user fpstate pointer in the sigcontext.
106 */ 100 */
107int check_for_xstate(struct i387_fxsave_struct __user *buf, 101static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
108 void __user *fpstate, 102 void __user *fpstate,
109 struct _fpx_sw_bytes *fx_sw_user) 103 struct _fpx_sw_bytes *fx_sw)
110{ 104{
111 int min_xstate_size = sizeof(struct i387_fxsave_struct) + 105 int min_xstate_size = sizeof(struct i387_fxsave_struct) +
112 sizeof(struct xsave_hdr_struct); 106 sizeof(struct xsave_hdr_struct);
113 unsigned int magic2; 107 unsigned int magic2;
114 int err;
115 108
116 err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], 109 if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
117 sizeof(struct _fpx_sw_bytes)); 110 return -1;
118 if (err)
119 return -EFAULT;
120 111
121 /* 112 /* Check for the first magic field and other error scenarios. */
122 * First Magic check failed. 113 if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
123 */ 114 fx_sw->xstate_size < min_xstate_size ||
124 if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) 115 fx_sw->xstate_size > xstate_size ||
125 return -EINVAL; 116 fx_sw->xstate_size > fx_sw->extended_size)
117 return -1;
126 118
127 /* 119 /*
128 * Check for error scenarios.
129 */
130 if (fx_sw_user->xstate_size < min_xstate_size ||
131 fx_sw_user->xstate_size > xstate_size ||
132 fx_sw_user->xstate_size > fx_sw_user->extended_size)
133 return -EINVAL;
134
135 err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
136 fx_sw_user->extended_size -
137 FP_XSTATE_MAGIC2_SIZE));
138 if (err)
139 return err;
140 /*
141 * Check for the presence of second magic word at the end of memory 120 * Check for the presence of second magic word at the end of memory
142 * layout. This detects the case where the user just copied the legacy 121 * layout. This detects the case where the user just copied the legacy
143 * fpstate layout with out copying the extended state information 122 * fpstate layout with out copying the extended state information
144 * in the memory layout. 123 * in the memory layout.
145 */ 124 */
146 if (magic2 != FP_XSTATE_MAGIC2) 125 if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
147 return -EFAULT; 126 || magic2 != FP_XSTATE_MAGIC2)
127 return -1;
148 128
149 return 0; 129 return 0;
150} 130}
151 131
152#ifdef CONFIG_X86_64
153/* 132/*
154 * Signal frame handlers. 133 * Signal frame handlers.
155 */ 134 */
156 135static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
157int save_i387_xstate(void __user *buf)
158{ 136{
159 struct task_struct *tsk = current; 137 if (use_fxsr()) {
160 int err = 0; 138 struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
161 139 struct user_i387_ia32_struct env;
162 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) 140 struct _fpstate_ia32 __user *fp = buf;
163 return -EACCES;
164 141
165 BUG_ON(sig_xstate_size < xstate_size); 142 convert_from_fxsr(&env, tsk);
166 143
167 if ((unsigned long)buf % 64) 144 if (__copy_to_user(buf, &env, sizeof(env)) ||
168 pr_err("%s: bad fpstate %p\n", __func__, buf); 145 __put_user(xsave->i387.swd, &fp->status) ||
169 146 __put_user(X86_FXSR_MAGIC, &fp->magic))
170 if (!used_math()) 147 return -1;
171 return 0;
172
173 if (user_has_fpu()) {
174 if (use_xsave())
175 err = xsave_user(buf);
176 else
177 err = fxsave_user(buf);
178
179 if (err)
180 return err;
181 user_fpu_end();
182 } else { 148 } else {
183 sanitize_i387_state(tsk); 149 struct i387_fsave_struct __user *fp = buf;
184 if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, 150 u32 swd;
185 xstate_size)) 151 if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
186 return -1; 152 return -1;
187 } 153 }
188 154
189 clear_used_math(); /* trigger finit */ 155 return 0;
156}
190 157
191 if (use_xsave()) { 158static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
192 struct _fpstate __user *fx = buf; 159{
193 struct _xstate __user *x = buf; 160 struct xsave_struct __user *x = buf;
194 u64 xstate_bv; 161 struct _fpx_sw_bytes *sw_bytes;
162 u32 xstate_bv;
163 int err;
195 164
196 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, 165 /* Setup the bytes not touched by the [f]xsave and reserved for SW. */
197 sizeof(struct _fpx_sw_bytes)); 166 sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
167 err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
198 168
199 err |= __put_user(FP_XSTATE_MAGIC2, 169 if (!use_xsave())
200 (__u32 __user *) (buf + sig_xstate_size 170 return err;
201 - FP_XSTATE_MAGIC2_SIZE));
202 171
203 /* 172 err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
204 * Read the xstate_bv which we copied (directly from the cpu or
205 * from the state in task struct) to the user buffers and
206 * set the FP/SSE bits.
207 */
208 err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
209 173
210 /* 174 /*
211 * For legacy compatible, we always set FP/SSE bits in the bit 175 * Read the xstate_bv which we copied (directly from the cpu or
212 * vector while saving the state to the user context. This will 176 * from the state in task struct) to the user buffers.
213 * enable us capturing any changes(during sigreturn) to 177 */
214 * the FP/SSE bits by the legacy applications which don't touch 178 err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
215 * xstate_bv in the xsave header.
216 *
217 * xsave aware apps can change the xstate_bv in the xsave
218 * header as well as change any contents in the memory layout.
219 * xrestore as part of sigreturn will capture all the changes.
220 */
221 xstate_bv |= XSTATE_FPSSE;
222 179
223 err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); 180 /*
181 * For legacy compatible, we always set FP/SSE bits in the bit
182 * vector while saving the state to the user context. This will
183 * enable us capturing any changes(during sigreturn) to
184 * the FP/SSE bits by the legacy applications which don't touch
185 * xstate_bv in the xsave header.
186 *
187 * xsave aware apps can change the xstate_bv in the xsave
188 * header as well as change any contents in the memory layout.
189 * xrestore as part of sigreturn will capture all the changes.
190 */
191 xstate_bv |= XSTATE_FPSSE;
224 192
225 if (err) 193 err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
226 return err;
227 }
228 194
229 return 1; 195 return err;
196}
197
198static inline int save_user_xstate(struct xsave_struct __user *buf)
199{
200 int err;
201
202 if (use_xsave())
203 err = xsave_user(buf);
204 else if (use_fxsr())
205 err = fxsave_user((struct i387_fxsave_struct __user *) buf);
206 else
207 err = fsave_user((struct i387_fsave_struct __user *) buf);
208
209 if (unlikely(err) && __clear_user(buf, xstate_size))
210 err = -EFAULT;
211 return err;
230} 212}
231 213
232/* 214/*
233 * Restore the extended state if present. Otherwise, restore the FP/SSE 215 * Save the fpu, extended register state to the user signal frame.
234 * state. 216 *
217 * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
218 * state is copied.
219 * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
220 *
221 * buf == buf_fx for 64-bit frames and 32-bit fsave frame.
222 * buf != buf_fx for 32-bit frames with fxstate.
223 *
224 * If the fpu, extended register state is live, save the state directly
225 * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
226 * copy the thread's fpu state to the user frame starting at 'buf_fx'.
227 *
228 * If this is a 32-bit frame with fxstate, put a fsave header before
229 * the aligned state at 'buf_fx'.
230 *
231 * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
232 * indicating the absence/presence of the extended state to the user.
235 */ 233 */
236static int restore_user_xstate(void __user *buf) 234int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
237{ 235{
238 struct _fpx_sw_bytes fx_sw_user; 236 struct xsave_struct *xsave = &current->thread.fpu.state->xsave;
239 u64 mask; 237 struct task_struct *tsk = current;
240 int err; 238 int ia32_fxstate = (buf != buf_fx);
241 239
242 if (((unsigned long)buf % 64) || 240 ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
243 check_for_xstate(buf, buf, &fx_sw_user)) 241 config_enabled(CONFIG_IA32_EMULATION));
244 goto fx_only;
245 242
246 mask = fx_sw_user.xstate_bv; 243 if (!access_ok(VERIFY_WRITE, buf, size))
244 return -EACCES;
247 245
248 /* 246 if (!HAVE_HWFP)
249 * restore the state passed by the user. 247 return fpregs_soft_get(current, NULL, 0,
250 */ 248 sizeof(struct user_i387_ia32_struct), NULL,
251 err = xrestore_user(buf, mask); 249 (struct _fpstate_ia32 __user *) buf) ? -1 : 1;
252 if (err)
253 return err;
254 250
255 /* 251 if (user_has_fpu()) {
256 * init the state skipped by the user. 252 /* Save the live register state to the user directly. */
257 */ 253 if (save_user_xstate(buf_fx))
258 mask = pcntxt_mask & ~mask; 254 return -1;
259 if (unlikely(mask)) 255 /* Update the thread's fxstate to save the fsave header. */
260 xrstor_state(init_xstate_buf, mask); 256 if (ia32_fxstate)
257 fpu_fxsave(&tsk->thread.fpu);
258 } else {
259 sanitize_i387_state(tsk);
260 if (__copy_to_user(buf_fx, xsave, xstate_size))
261 return -1;
262 }
263
264 /* Save the fsave header for the 32-bit frames. */
265 if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
266 return -1;
267
268 if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
269 return -1;
270
271 drop_init_fpu(tsk); /* trigger finit */
261 272
262 return 0; 273 return 0;
274}
263 275
264fx_only: 276static inline void
265 /* 277sanitize_restored_xstate(struct task_struct *tsk,
266 * couldn't find the extended state information in the 278 struct user_i387_ia32_struct *ia32_env,
267 * memory layout. Restore just the FP/SSE and init all 279 u64 xstate_bv, int fx_only)
268 * the other extended state. 280{
269 */ 281 struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
270 xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); 282 struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr;
271 return fxrstor_checking((__force struct i387_fxsave_struct *)buf); 283
284 if (use_xsave()) {
285 /* These bits must be zero. */
286 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
287
288 /*
289 * Init the state that is not present in the memory
290 * layout and not enabled by the OS.
291 */
292 if (fx_only)
293 xsave_hdr->xstate_bv = XSTATE_FPSSE;
294 else
295 xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv);
296 }
297
298 if (use_fxsr()) {
299 /*
300 * mscsr reserved bits must be masked to zero for security
301 * reasons.
302 */
303 xsave->i387.mxcsr &= mxcsr_feature_mask;
304
305 convert_to_fxsr(tsk, ia32_env);
306 }
272} 307}
273 308
274/* 309/*
275 * This restores directly out of user space. Exceptions are handled. 310 * Restore the extended state if present. Otherwise, restore the FP/SSE state.
276 */ 311 */
277int restore_i387_xstate(void __user *buf) 312static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
278{ 313{
314 if (use_xsave()) {
315 if ((unsigned long)buf % 64 || fx_only) {
316 u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
317 xrstor_state(init_xstate_buf, init_bv);
318 return fxrstor_user(buf);
319 } else {
320 u64 init_bv = pcntxt_mask & ~xbv;
321 if (unlikely(init_bv))
322 xrstor_state(init_xstate_buf, init_bv);
323 return xrestore_user(buf, xbv);
324 }
325 } else if (use_fxsr()) {
326 return fxrstor_user(buf);
327 } else
328 return frstor_user(buf);
329}
330
331int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
332{
333 int ia32_fxstate = (buf != buf_fx);
279 struct task_struct *tsk = current; 334 struct task_struct *tsk = current;
280 int err = 0; 335 int state_size = xstate_size;
336 u64 xstate_bv = 0;
337 int fx_only = 0;
338
339 ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
340 config_enabled(CONFIG_IA32_EMULATION));
281 341
282 if (!buf) { 342 if (!buf) {
283 if (used_math()) 343 drop_init_fpu(tsk);
284 goto clear;
285 return 0; 344 return 0;
286 } else 345 }
287 if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
288 return -EACCES;
289 346
290 if (!used_math()) { 347 if (!access_ok(VERIFY_READ, buf, size))
291 err = init_fpu(tsk); 348 return -EACCES;
292 if (err) 349
293 return err; 350 if (!used_math() && init_fpu(tsk))
351 return -1;
352
353 if (!HAVE_HWFP) {
354 return fpregs_soft_set(current, NULL,
355 0, sizeof(struct user_i387_ia32_struct),
356 NULL, buf) != 0;
294 } 357 }
295 358
296 user_fpu_begin(); 359 if (use_xsave()) {
297 if (use_xsave()) 360 struct _fpx_sw_bytes fx_sw_user;
298 err = restore_user_xstate(buf); 361 if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
299 else 362 /*
300 err = fxrstor_checking((__force struct i387_fxsave_struct *) 363 * Couldn't find the extended state information in the
301 buf); 364 * memory layout. Restore just the FP/SSE and init all
302 if (unlikely(err)) { 365 * the other extended state.
366 */
367 state_size = sizeof(struct i387_fxsave_struct);
368 fx_only = 1;
369 } else {
370 state_size = fx_sw_user.xstate_size;
371 xstate_bv = fx_sw_user.xstate_bv;
372 }
373 }
374
375 if (ia32_fxstate) {
376 /*
377 * For 32-bit frames with fxstate, copy the user state to the
378 * thread's fpu state, reconstruct fxstate from the fsave
379 * header. Sanitize the copied state etc.
380 */
381 struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
382 struct user_i387_ia32_struct env;
383 int err = 0;
384
385 /*
386 * Drop the current fpu which clears used_math(). This ensures
387 * that any context-switch during the copy of the new state,
388 * avoids the intermediate state from getting restored/saved.
389 * Thus avoiding the new restored state from getting corrupted.
390 * We will be ready to restore/save the state only after
391 * set_used_math() is again set.
392 */
393 drop_fpu(tsk);
394
395 if (__copy_from_user(xsave, buf_fx, state_size) ||
396 __copy_from_user(&env, buf, sizeof(env))) {
397 err = -1;
398 } else {
399 sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
400 set_used_math();
401 }
402
403 if (use_eager_fpu())
404 math_state_restore();
405
406 return err;
407 } else {
303 /* 408 /*
304 * Encountered an error while doing the restore from the 409 * For 64-bit frames and 32-bit fsave frames, restore the user
305 * user buffer, clear the fpu state. 410 * state to the registers directly (with exceptions handled).
306 */ 411 */
307clear: 412 user_fpu_begin();
308 clear_fpu(tsk); 413 if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
309 clear_used_math(); 414 drop_init_fpu(tsk);
415 return -1;
416 }
310 } 417 }
311 return err; 418
419 return 0;
312} 420}
313#endif
314 421
315/* 422/*
316 * Prepare the SW reserved portion of the fxsave memory layout, indicating 423 * Prepare the SW reserved portion of the fxsave memory layout, indicating
@@ -321,31 +428,22 @@ clear:
321 */ 428 */
322static void prepare_fx_sw_frame(void) 429static void prepare_fx_sw_frame(void)
323{ 430{
324 int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + 431 int fsave_header_size = sizeof(struct i387_fsave_struct);
325 FP_XSTATE_MAGIC2_SIZE; 432 int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
326 433
327 sig_xstate_size = sizeof(struct _fpstate) + size_extended; 434 if (config_enabled(CONFIG_X86_32))
328 435 size += fsave_header_size;
329#ifdef CONFIG_IA32_EMULATION
330 sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
331#endif
332
333 memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
334 436
335 fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; 437 fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
336 fx_sw_reserved.extended_size = sig_xstate_size; 438 fx_sw_reserved.extended_size = size;
337 fx_sw_reserved.xstate_bv = pcntxt_mask; 439 fx_sw_reserved.xstate_bv = pcntxt_mask;
338 fx_sw_reserved.xstate_size = xstate_size; 440 fx_sw_reserved.xstate_size = xstate_size;
339#ifdef CONFIG_IA32_EMULATION
340 memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
341 sizeof(struct _fpx_sw_bytes));
342 fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
343#endif
344}
345 441
346#ifdef CONFIG_X86_64 442 if (config_enabled(CONFIG_IA32_EMULATION)) {
347unsigned int sig_xstate_size = sizeof(struct _fpstate); 443 fx_sw_reserved_ia32 = fx_sw_reserved;
348#endif 444 fx_sw_reserved_ia32.extended_size += fsave_header_size;
445 }
446}
349 447
350/* 448/*
351 * Enable the extended processor state save/restore feature 449 * Enable the extended processor state save/restore feature
@@ -384,19 +482,21 @@ static void __init setup_xstate_features(void)
384/* 482/*
385 * setup the xstate image representing the init state 483 * setup the xstate image representing the init state
386 */ 484 */
387static void __init setup_xstate_init(void) 485static void __init setup_init_fpu_buf(void)
388{ 486{
389 setup_xstate_features();
390
391 /* 487 /*
392 * Setup init_xstate_buf to represent the init state of 488 * Setup init_xstate_buf to represent the init state of
393 * all the features managed by the xsave 489 * all the features managed by the xsave
394 */ 490 */
395 init_xstate_buf = alloc_bootmem_align(xstate_size, 491 init_xstate_buf = alloc_bootmem_align(xstate_size,
396 __alignof__(struct xsave_struct)); 492 __alignof__(struct xsave_struct));
397 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; 493 fx_finit(&init_xstate_buf->i387);
494
495 if (!cpu_has_xsave)
496 return;
497
498 setup_xstate_features();
398 499
399 clts();
400 /* 500 /*
401 * Init all the features state with header_bv being 0x0 501 * Init all the features state with header_bv being 0x0
402 */ 502 */
@@ -406,9 +506,21 @@ static void __init setup_xstate_init(void)
406 * of any feature which is not represented by all zero's. 506 * of any feature which is not represented by all zero's.
407 */ 507 */
408 xsave_state(init_xstate_buf, -1); 508 xsave_state(init_xstate_buf, -1);
409 stts();
410} 509}
411 510
511static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
512static int __init eager_fpu_setup(char *s)
513{
514 if (!strcmp(s, "on"))
515 eagerfpu = ENABLE;
516 else if (!strcmp(s, "off"))
517 eagerfpu = DISABLE;
518 else if (!strcmp(s, "auto"))
519 eagerfpu = AUTO;
520 return 1;
521}
522__setup("eagerfpu=", eager_fpu_setup);
523
412/* 524/*
413 * Enable and initialize the xsave feature. 525 * Enable and initialize the xsave feature.
414 */ 526 */
@@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void)
445 557
446 update_regset_xstate_info(xstate_size, pcntxt_mask); 558 update_regset_xstate_info(xstate_size, pcntxt_mask);
447 prepare_fx_sw_frame(); 559 prepare_fx_sw_frame();
560 setup_init_fpu_buf();
448 561
449 setup_xstate_init(); 562 /* Auto enable eagerfpu for xsaveopt */
563 if (cpu_has_xsaveopt && eagerfpu != DISABLE)
564 eagerfpu = ENABLE;
450 565
451 pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", 566 pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
452 pcntxt_mask, xstate_size); 567 pcntxt_mask, xstate_size);
@@ -471,3 +586,43 @@ void __cpuinit xsave_init(void)
471 next_func = xstate_enable; 586 next_func = xstate_enable;
472 this_func(); 587 this_func();
473} 588}
589
590static inline void __init eager_fpu_init_bp(void)
591{
592 current->thread.fpu.state =
593 alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
594 if (!init_xstate_buf)
595 setup_init_fpu_buf();
596}
597
598void __cpuinit eager_fpu_init(void)
599{
600 static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
601
602 clear_used_math();
603 current_thread_info()->status = 0;
604
605 if (eagerfpu == ENABLE)
606 setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
607
608 if (!cpu_has_eager_fpu) {
609 stts();
610 return;
611 }
612
613 if (boot_func) {
614 boot_func();
615 boot_func = NULL;
616 }
617
618 /*
619 * This is same as math_state_restore(). But use_xsave() is
620 * not yet patched to use math_state_restore().
621 */
622 init_fpu(current);
623 __thread_fpu_begin(current);
624 if (cpu_has_xsave)
625 xrstor_state(init_xstate_buf, -1);
626 else
627 fxrstor_checking(&init_xstate_buf->i387);
628}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a71faf727ff3..bca63f04dccb 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic,
183#define KVM_ISA_VMX 1 183#define KVM_ISA_VMX 1
184#define KVM_ISA_SVM 2 184#define KVM_ISA_SVM 2
185 185
186#define VMX_EXIT_REASONS \
187 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
188 { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
189 { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
190 { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
191 { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
192 { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
193 { EXIT_REASON_CPUID, "CPUID" }, \
194 { EXIT_REASON_HLT, "HLT" }, \
195 { EXIT_REASON_INVLPG, "INVLPG" }, \
196 { EXIT_REASON_RDPMC, "RDPMC" }, \
197 { EXIT_REASON_RDTSC, "RDTSC" }, \
198 { EXIT_REASON_VMCALL, "VMCALL" }, \
199 { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \
200 { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \
201 { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \
202 { EXIT_REASON_VMPTRST, "VMPTRST" }, \
203 { EXIT_REASON_VMREAD, "VMREAD" }, \
204 { EXIT_REASON_VMRESUME, "VMRESUME" }, \
205 { EXIT_REASON_VMWRITE, "VMWRITE" }, \
206 { EXIT_REASON_VMOFF, "VMOFF" }, \
207 { EXIT_REASON_VMON, "VMON" }, \
208 { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \
209 { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \
210 { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
211 { EXIT_REASON_MSR_READ, "MSR_READ" }, \
212 { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
213 { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
214 { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
215 { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
216 { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
217 { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
218 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
219 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
220 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
221 { EXIT_REASON_WBINVD, "WBINVD" }
222
223#define SVM_EXIT_REASONS \
224 { SVM_EXIT_READ_CR0, "read_cr0" }, \
225 { SVM_EXIT_READ_CR3, "read_cr3" }, \
226 { SVM_EXIT_READ_CR4, "read_cr4" }, \
227 { SVM_EXIT_READ_CR8, "read_cr8" }, \
228 { SVM_EXIT_WRITE_CR0, "write_cr0" }, \
229 { SVM_EXIT_WRITE_CR3, "write_cr3" }, \
230 { SVM_EXIT_WRITE_CR4, "write_cr4" }, \
231 { SVM_EXIT_WRITE_CR8, "write_cr8" }, \
232 { SVM_EXIT_READ_DR0, "read_dr0" }, \
233 { SVM_EXIT_READ_DR1, "read_dr1" }, \
234 { SVM_EXIT_READ_DR2, "read_dr2" }, \
235 { SVM_EXIT_READ_DR3, "read_dr3" }, \
236 { SVM_EXIT_WRITE_DR0, "write_dr0" }, \
237 { SVM_EXIT_WRITE_DR1, "write_dr1" }, \
238 { SVM_EXIT_WRITE_DR2, "write_dr2" }, \
239 { SVM_EXIT_WRITE_DR3, "write_dr3" }, \
240 { SVM_EXIT_WRITE_DR5, "write_dr5" }, \
241 { SVM_EXIT_WRITE_DR7, "write_dr7" }, \
242 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
243 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
244 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
245 { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
246 { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
247 { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
248 { SVM_EXIT_INTR, "interrupt" }, \
249 { SVM_EXIT_NMI, "nmi" }, \
250 { SVM_EXIT_SMI, "smi" }, \
251 { SVM_EXIT_INIT, "init" }, \
252 { SVM_EXIT_VINTR, "vintr" }, \
253 { SVM_EXIT_CPUID, "cpuid" }, \
254 { SVM_EXIT_INVD, "invd" }, \
255 { SVM_EXIT_HLT, "hlt" }, \
256 { SVM_EXIT_INVLPG, "invlpg" }, \
257 { SVM_EXIT_INVLPGA, "invlpga" }, \
258 { SVM_EXIT_IOIO, "io" }, \
259 { SVM_EXIT_MSR, "msr" }, \
260 { SVM_EXIT_TASK_SWITCH, "task_switch" }, \
261 { SVM_EXIT_SHUTDOWN, "shutdown" }, \
262 { SVM_EXIT_VMRUN, "vmrun" }, \
263 { SVM_EXIT_VMMCALL, "hypercall" }, \
264 { SVM_EXIT_VMLOAD, "vmload" }, \
265 { SVM_EXIT_VMSAVE, "vmsave" }, \
266 { SVM_EXIT_STGI, "stgi" }, \
267 { SVM_EXIT_CLGI, "clgi" }, \
268 { SVM_EXIT_SKINIT, "skinit" }, \
269 { SVM_EXIT_WBINVD, "wbinvd" }, \
270 { SVM_EXIT_MONITOR, "monitor" }, \
271 { SVM_EXIT_MWAIT, "mwait" }, \
272 { SVM_EXIT_XSETBV, "xsetbv" }, \
273 { SVM_EXIT_NPF, "npf" }
274
275/* 186/*
276 * Tracepoint for kvm guest exit: 187 * Tracepoint for kvm guest exit:
277 */ 188 */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b1eb202ee76a..851aa7c3b890 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
1493#ifdef CONFIG_X86_64 1493#ifdef CONFIG_X86_64
1494 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 1494 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
1495#endif 1495#endif
1496 if (user_has_fpu()) 1496 /*
1497 clts(); 1497 * If the FPU is not active (through the host task or
1498 * the guest vcpu), then restore the cr0.TS bit.
1499 */
1500 if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded)
1501 stts();
1498 load_gdt(&__get_cpu_var(host_gdt)); 1502 load_gdt(&__get_cpu_var(host_gdt));
1499} 1503}
1500 1504
@@ -3743,7 +3747,7 @@ static void vmx_set_constant_host_state(void)
3743 unsigned long tmpl; 3747 unsigned long tmpl;
3744 struct desc_ptr dt; 3748 struct desc_ptr dt;
3745 3749
3746 vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ 3750 vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */
3747 vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ 3751 vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */
3748 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ 3752 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
3749 3753
@@ -4543,7 +4547,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
4543 vcpu->run->exit_reason = KVM_EXIT_SET_TPR; 4547 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
4544 return 0; 4548 return 0;
4545 } 4549 }
4546 }; 4550 }
4547 break; 4551 break;
4548 case 2: /* clts */ 4552 case 2: /* clts */
4549 handle_clts(vcpu); 4553 handle_clts(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2966c847d489..1f09552572fa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5979,7 +5979,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
5979 */ 5979 */
5980 kvm_put_guest_xcr0(vcpu); 5980 kvm_put_guest_xcr0(vcpu);
5981 vcpu->guest_fpu_loaded = 1; 5981 vcpu->guest_fpu_loaded = 1;
5982 unlazy_fpu(current); 5982 __kernel_fpu_begin();
5983 fpu_restore_checking(&vcpu->arch.guest_fpu); 5983 fpu_restore_checking(&vcpu->arch.guest_fpu);
5984 trace_kvm_fpu(1); 5984 trace_kvm_fpu(1);
5985} 5985}
@@ -5993,6 +5993,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
5993 5993
5994 vcpu->guest_fpu_loaded = 0; 5994 vcpu->guest_fpu_loaded = 0;
5995 fpu_save_init(&vcpu->arch.guest_fpu); 5995 fpu_save_init(&vcpu->arch.guest_fpu);
5996 __kernel_fpu_end();
5996 ++vcpu->stat.fpu_reload; 5997 ++vcpu->stat.fpu_reload;
5997 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); 5998 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
5998 trace_kvm_fpu(0); 5999 trace_kvm_fpu(0);
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 5b2995f4557a..a30ca15be21c 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -17,6 +17,7 @@
17#include <asm/cpufeature.h> 17#include <asm/cpufeature.h>
18#include <asm/alternative-asm.h> 18#include <asm/alternative-asm.h>
19#include <asm/asm.h> 19#include <asm/asm.h>
20#include <asm/smap.h>
20 21
21/* 22/*
22 * By placing feature2 after feature1 in altinstructions section, we logically 23 * By placing feature2 after feature1 in altinstructions section, we logically
@@ -130,6 +131,7 @@ ENDPROC(bad_from_user)
130 */ 131 */
131ENTRY(copy_user_generic_unrolled) 132ENTRY(copy_user_generic_unrolled)
132 CFI_STARTPROC 133 CFI_STARTPROC
134 ASM_STAC
133 cmpl $8,%edx 135 cmpl $8,%edx
134 jb 20f /* less then 8 bytes, go to byte copy loop */ 136 jb 20f /* less then 8 bytes, go to byte copy loop */
135 ALIGN_DESTINATION 137 ALIGN_DESTINATION
@@ -177,6 +179,7 @@ ENTRY(copy_user_generic_unrolled)
177 decl %ecx 179 decl %ecx
178 jnz 21b 180 jnz 21b
17923: xor %eax,%eax 18123: xor %eax,%eax
182 ASM_CLAC
180 ret 183 ret
181 184
182 .section .fixup,"ax" 185 .section .fixup,"ax"
@@ -232,6 +235,7 @@ ENDPROC(copy_user_generic_unrolled)
232 */ 235 */
233ENTRY(copy_user_generic_string) 236ENTRY(copy_user_generic_string)
234 CFI_STARTPROC 237 CFI_STARTPROC
238 ASM_STAC
235 andl %edx,%edx 239 andl %edx,%edx
236 jz 4f 240 jz 4f
237 cmpl $8,%edx 241 cmpl $8,%edx
@@ -246,6 +250,7 @@ ENTRY(copy_user_generic_string)
2463: rep 2503: rep
247 movsb 251 movsb
2484: xorl %eax,%eax 2524: xorl %eax,%eax
253 ASM_CLAC
249 ret 254 ret
250 255
251 .section .fixup,"ax" 256 .section .fixup,"ax"
@@ -273,12 +278,14 @@ ENDPROC(copy_user_generic_string)
273 */ 278 */
274ENTRY(copy_user_enhanced_fast_string) 279ENTRY(copy_user_enhanced_fast_string)
275 CFI_STARTPROC 280 CFI_STARTPROC
281 ASM_STAC
276 andl %edx,%edx 282 andl %edx,%edx
277 jz 2f 283 jz 2f
278 movl %edx,%ecx 284 movl %edx,%ecx
2791: rep 2851: rep
280 movsb 286 movsb
2812: xorl %eax,%eax 2872: xorl %eax,%eax
288 ASM_CLAC
282 ret 289 ret
283 290
284 .section .fixup,"ax" 291 .section .fixup,"ax"
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index cacddc7163eb..6a4f43c2d9e6 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -15,6 +15,7 @@
15#include <asm/asm-offsets.h> 15#include <asm/asm-offsets.h>
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/asm.h> 17#include <asm/asm.h>
18#include <asm/smap.h>
18 19
19 .macro ALIGN_DESTINATION 20 .macro ALIGN_DESTINATION
20#ifdef FIX_ALIGNMENT 21#ifdef FIX_ALIGNMENT
@@ -48,6 +49,7 @@
48 */ 49 */
49ENTRY(__copy_user_nocache) 50ENTRY(__copy_user_nocache)
50 CFI_STARTPROC 51 CFI_STARTPROC
52 ASM_STAC
51 cmpl $8,%edx 53 cmpl $8,%edx
52 jb 20f /* less then 8 bytes, go to byte copy loop */ 54 jb 20f /* less then 8 bytes, go to byte copy loop */
53 ALIGN_DESTINATION 55 ALIGN_DESTINATION
@@ -95,6 +97,7 @@ ENTRY(__copy_user_nocache)
95 decl %ecx 97 decl %ecx
96 jnz 21b 98 jnz 21b
9723: xorl %eax,%eax 9923: xorl %eax,%eax
100 ASM_CLAC
98 sfence 101 sfence
99 ret 102 ret
100 103
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index b33b1fb1e6d4..156b9c804670 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -33,6 +33,7 @@
33#include <asm/asm-offsets.h> 33#include <asm/asm-offsets.h>
34#include <asm/thread_info.h> 34#include <asm/thread_info.h>
35#include <asm/asm.h> 35#include <asm/asm.h>
36#include <asm/smap.h>
36 37
37 .text 38 .text
38ENTRY(__get_user_1) 39ENTRY(__get_user_1)
@@ -40,8 +41,10 @@ ENTRY(__get_user_1)
40 GET_THREAD_INFO(%_ASM_DX) 41 GET_THREAD_INFO(%_ASM_DX)
41 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 42 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
42 jae bad_get_user 43 jae bad_get_user
44 ASM_STAC
431: movzb (%_ASM_AX),%edx 451: movzb (%_ASM_AX),%edx
44 xor %eax,%eax 46 xor %eax,%eax
47 ASM_CLAC
45 ret 48 ret
46 CFI_ENDPROC 49 CFI_ENDPROC
47ENDPROC(__get_user_1) 50ENDPROC(__get_user_1)
@@ -53,8 +56,10 @@ ENTRY(__get_user_2)
53 GET_THREAD_INFO(%_ASM_DX) 56 GET_THREAD_INFO(%_ASM_DX)
54 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 57 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
55 jae bad_get_user 58 jae bad_get_user
59 ASM_STAC
562: movzwl -1(%_ASM_AX),%edx 602: movzwl -1(%_ASM_AX),%edx
57 xor %eax,%eax 61 xor %eax,%eax
62 ASM_CLAC
58 ret 63 ret
59 CFI_ENDPROC 64 CFI_ENDPROC
60ENDPROC(__get_user_2) 65ENDPROC(__get_user_2)
@@ -66,8 +71,10 @@ ENTRY(__get_user_4)
66 GET_THREAD_INFO(%_ASM_DX) 71 GET_THREAD_INFO(%_ASM_DX)
67 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 72 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
68 jae bad_get_user 73 jae bad_get_user
74 ASM_STAC
693: mov -3(%_ASM_AX),%edx 753: mov -3(%_ASM_AX),%edx
70 xor %eax,%eax 76 xor %eax,%eax
77 ASM_CLAC
71 ret 78 ret
72 CFI_ENDPROC 79 CFI_ENDPROC
73ENDPROC(__get_user_4) 80ENDPROC(__get_user_4)
@@ -80,8 +87,10 @@ ENTRY(__get_user_8)
80 GET_THREAD_INFO(%_ASM_DX) 87 GET_THREAD_INFO(%_ASM_DX)
81 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 88 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
82 jae bad_get_user 89 jae bad_get_user
90 ASM_STAC
834: movq -7(%_ASM_AX),%_ASM_DX 914: movq -7(%_ASM_AX),%_ASM_DX
84 xor %eax,%eax 92 xor %eax,%eax
93 ASM_CLAC
85 ret 94 ret
86 CFI_ENDPROC 95 CFI_ENDPROC
87ENDPROC(__get_user_8) 96ENDPROC(__get_user_8)
@@ -91,6 +100,7 @@ bad_get_user:
91 CFI_STARTPROC 100 CFI_STARTPROC
92 xor %edx,%edx 101 xor %edx,%edx
93 mov $(-EFAULT),%_ASM_AX 102 mov $(-EFAULT),%_ASM_AX
103 ASM_CLAC
94 ret 104 ret
95 CFI_ENDPROC 105 CFI_ENDPROC
96END(bad_get_user) 106END(bad_get_user)
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index b1e6c4b2e8eb..54fcffed28ed 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -18,7 +18,11 @@
18 * Copyright (C) IBM Corporation, 2002, 2004, 2009 18 * Copyright (C) IBM Corporation, 2002, 2004, 2009
19 */ 19 */
20 20
21#ifdef __KERNEL__
21#include <linux/string.h> 22#include <linux/string.h>
23#else
24#include <string.h>
25#endif
22#include <asm/inat.h> 26#include <asm/inat.h>
23#include <asm/insn.h> 27#include <asm/insn.h>
24 28
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 7f951c8f76c4..fc6ba17a7eec 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -15,6 +15,7 @@
15#include <asm/thread_info.h> 15#include <asm/thread_info.h>
16#include <asm/errno.h> 16#include <asm/errno.h>
17#include <asm/asm.h> 17#include <asm/asm.h>
18#include <asm/smap.h>
18 19
19 20
20/* 21/*
@@ -31,7 +32,8 @@
31 32
32#define ENTER CFI_STARTPROC ; \ 33#define ENTER CFI_STARTPROC ; \
33 GET_THREAD_INFO(%_ASM_BX) 34 GET_THREAD_INFO(%_ASM_BX)
34#define EXIT ret ; \ 35#define EXIT ASM_CLAC ; \
36 ret ; \
35 CFI_ENDPROC 37 CFI_ENDPROC
36 38
37.text 39.text
@@ -39,6 +41,7 @@ ENTRY(__put_user_1)
39 ENTER 41 ENTER
40 cmp TI_addr_limit(%_ASM_BX),%_ASM_CX 42 cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
41 jae bad_put_user 43 jae bad_put_user
44 ASM_STAC
421: movb %al,(%_ASM_CX) 451: movb %al,(%_ASM_CX)
43 xor %eax,%eax 46 xor %eax,%eax
44 EXIT 47 EXIT
@@ -50,6 +53,7 @@ ENTRY(__put_user_2)
50 sub $1,%_ASM_BX 53 sub $1,%_ASM_BX
51 cmp %_ASM_BX,%_ASM_CX 54 cmp %_ASM_BX,%_ASM_CX
52 jae bad_put_user 55 jae bad_put_user
56 ASM_STAC
532: movw %ax,(%_ASM_CX) 572: movw %ax,(%_ASM_CX)
54 xor %eax,%eax 58 xor %eax,%eax
55 EXIT 59 EXIT
@@ -61,6 +65,7 @@ ENTRY(__put_user_4)
61 sub $3,%_ASM_BX 65 sub $3,%_ASM_BX
62 cmp %_ASM_BX,%_ASM_CX 66 cmp %_ASM_BX,%_ASM_CX
63 jae bad_put_user 67 jae bad_put_user
68 ASM_STAC
643: movl %eax,(%_ASM_CX) 693: movl %eax,(%_ASM_CX)
65 xor %eax,%eax 70 xor %eax,%eax
66 EXIT 71 EXIT
@@ -72,6 +77,7 @@ ENTRY(__put_user_8)
72 sub $7,%_ASM_BX 77 sub $7,%_ASM_BX
73 cmp %_ASM_BX,%_ASM_CX 78 cmp %_ASM_BX,%_ASM_CX
74 jae bad_put_user 79 jae bad_put_user
80 ASM_STAC
754: mov %_ASM_AX,(%_ASM_CX) 814: mov %_ASM_AX,(%_ASM_CX)
76#ifdef CONFIG_X86_32 82#ifdef CONFIG_X86_32
775: movl %edx,4(%_ASM_CX) 835: movl %edx,4(%_ASM_CX)
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 1781b2f950e2..98f6d6b68f5a 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -42,10 +42,11 @@ do { \
42 int __d0; \ 42 int __d0; \
43 might_fault(); \ 43 might_fault(); \
44 __asm__ __volatile__( \ 44 __asm__ __volatile__( \
45 ASM_STAC "\n" \
45 "0: rep; stosl\n" \ 46 "0: rep; stosl\n" \
46 " movl %2,%0\n" \ 47 " movl %2,%0\n" \
47 "1: rep; stosb\n" \ 48 "1: rep; stosb\n" \
48 "2:\n" \ 49 "2: " ASM_CLAC "\n" \
49 ".section .fixup,\"ax\"\n" \ 50 ".section .fixup,\"ax\"\n" \
50 "3: lea 0(%2,%0,4),%0\n" \ 51 "3: lea 0(%2,%0,4),%0\n" \
51 " jmp 2b\n" \ 52 " jmp 2b\n" \
@@ -626,10 +627,12 @@ survive:
626 return n; 627 return n;
627 } 628 }
628#endif 629#endif
630 stac();
629 if (movsl_is_ok(to, from, n)) 631 if (movsl_is_ok(to, from, n))
630 __copy_user(to, from, n); 632 __copy_user(to, from, n);
631 else 633 else
632 n = __copy_user_intel(to, from, n); 634 n = __copy_user_intel(to, from, n);
635 clac();
633 return n; 636 return n;
634} 637}
635EXPORT_SYMBOL(__copy_to_user_ll); 638EXPORT_SYMBOL(__copy_to_user_ll);
@@ -637,10 +640,12 @@ EXPORT_SYMBOL(__copy_to_user_ll);
637unsigned long __copy_from_user_ll(void *to, const void __user *from, 640unsigned long __copy_from_user_ll(void *to, const void __user *from,
638 unsigned long n) 641 unsigned long n)
639{ 642{
643 stac();
640 if (movsl_is_ok(to, from, n)) 644 if (movsl_is_ok(to, from, n))
641 __copy_user_zeroing(to, from, n); 645 __copy_user_zeroing(to, from, n);
642 else 646 else
643 n = __copy_user_zeroing_intel(to, from, n); 647 n = __copy_user_zeroing_intel(to, from, n);
648 clac();
644 return n; 649 return n;
645} 650}
646EXPORT_SYMBOL(__copy_from_user_ll); 651EXPORT_SYMBOL(__copy_from_user_ll);
@@ -648,11 +653,13 @@ EXPORT_SYMBOL(__copy_from_user_ll);
648unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, 653unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
649 unsigned long n) 654 unsigned long n)
650{ 655{
656 stac();
651 if (movsl_is_ok(to, from, n)) 657 if (movsl_is_ok(to, from, n))
652 __copy_user(to, from, n); 658 __copy_user(to, from, n);
653 else 659 else
654 n = __copy_user_intel((void __user *)to, 660 n = __copy_user_intel((void __user *)to,
655 (const void *)from, n); 661 (const void *)from, n);
662 clac();
656 return n; 663 return n;
657} 664}
658EXPORT_SYMBOL(__copy_from_user_ll_nozero); 665EXPORT_SYMBOL(__copy_from_user_ll_nozero);
@@ -660,6 +667,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero);
660unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, 667unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
661 unsigned long n) 668 unsigned long n)
662{ 669{
670 stac();
663#ifdef CONFIG_X86_INTEL_USERCOPY 671#ifdef CONFIG_X86_INTEL_USERCOPY
664 if (n > 64 && cpu_has_xmm2) 672 if (n > 64 && cpu_has_xmm2)
665 n = __copy_user_zeroing_intel_nocache(to, from, n); 673 n = __copy_user_zeroing_intel_nocache(to, from, n);
@@ -668,6 +676,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
668#else 676#else
669 __copy_user_zeroing(to, from, n); 677 __copy_user_zeroing(to, from, n);
670#endif 678#endif
679 clac();
671 return n; 680 return n;
672} 681}
673EXPORT_SYMBOL(__copy_from_user_ll_nocache); 682EXPORT_SYMBOL(__copy_from_user_ll_nocache);
@@ -675,6 +684,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache);
675unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, 684unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
676 unsigned long n) 685 unsigned long n)
677{ 686{
687 stac();
678#ifdef CONFIG_X86_INTEL_USERCOPY 688#ifdef CONFIG_X86_INTEL_USERCOPY
679 if (n > 64 && cpu_has_xmm2) 689 if (n > 64 && cpu_has_xmm2)
680 n = __copy_user_intel_nocache(to, from, n); 690 n = __copy_user_intel_nocache(to, from, n);
@@ -683,6 +693,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
683#else 693#else
684 __copy_user(to, from, n); 694 __copy_user(to, from, n);
685#endif 695#endif
696 clac();
686 return n; 697 return n;
687} 698}
688EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); 699EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index e5b130bc2d0e..05928aae911e 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -18,6 +18,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
18 might_fault(); 18 might_fault();
19 /* no memory constraint because it doesn't change any memory gcc knows 19 /* no memory constraint because it doesn't change any memory gcc knows
20 about */ 20 about */
21 stac();
21 asm volatile( 22 asm volatile(
22 " testq %[size8],%[size8]\n" 23 " testq %[size8],%[size8]\n"
23 " jz 4f\n" 24 " jz 4f\n"
@@ -40,6 +41,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
40 : [size8] "=&c"(size), [dst] "=&D" (__d0) 41 : [size8] "=&c"(size), [dst] "=&D" (__d0)
41 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), 42 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
42 [zero] "r" (0UL), [eight] "r" (8UL)); 43 [zero] "r" (0UL), [eight] "r" (8UL));
44 clac();
43 return size; 45 return size;
44} 46}
45EXPORT_SYMBOL(__clear_user); 47EXPORT_SYMBOL(__clear_user);
@@ -82,5 +84,6 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
82 for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) 84 for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
83 if (__put_user_nocheck(c, to++, sizeof(char))) 85 if (__put_user_nocheck(c, to++, sizeof(char)))
84 break; 86 break;
87 clac();
85 return len; 88 return len;
86} 89}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 76dcd9d8e0bc..a530b230e7d7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,6 +18,7 @@
18#include <asm/pgalloc.h> /* pgd_*(), ... */ 18#include <asm/pgalloc.h> /* pgd_*(), ... */
19#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ 19#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
20#include <asm/fixmap.h> /* VSYSCALL_START */ 20#include <asm/fixmap.h> /* VSYSCALL_START */
21#include <asm/rcu.h> /* exception_enter(), ... */
21 22
22/* 23/*
23 * Page fault error code bits: 24 * Page fault error code bits:
@@ -995,13 +996,24 @@ static int fault_in_kernel_space(unsigned long address)
995 return address >= TASK_SIZE_MAX; 996 return address >= TASK_SIZE_MAX;
996} 997}
997 998
999static inline bool smap_violation(int error_code, struct pt_regs *regs)
1000{
1001 if (error_code & PF_USER)
1002 return false;
1003
1004 if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
1005 return false;
1006
1007 return true;
1008}
1009
998/* 1010/*
999 * This routine handles page faults. It determines the address, 1011 * This routine handles page faults. It determines the address,
1000 * and the problem, and then passes it off to one of the appropriate 1012 * and the problem, and then passes it off to one of the appropriate
1001 * routines. 1013 * routines.
1002 */ 1014 */
1003dotraplinkage void __kprobes 1015static void __kprobes
1004do_page_fault(struct pt_regs *regs, unsigned long error_code) 1016__do_page_fault(struct pt_regs *regs, unsigned long error_code)
1005{ 1017{
1006 struct vm_area_struct *vma; 1018 struct vm_area_struct *vma;
1007 struct task_struct *tsk; 1019 struct task_struct *tsk;
@@ -1088,6 +1100,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
1088 if (unlikely(error_code & PF_RSVD)) 1100 if (unlikely(error_code & PF_RSVD))
1089 pgtable_bad(regs, error_code, address); 1101 pgtable_bad(regs, error_code, address);
1090 1102
1103 if (static_cpu_has(X86_FEATURE_SMAP)) {
1104 if (unlikely(smap_violation(error_code, regs))) {
1105 bad_area_nosemaphore(regs, error_code, address);
1106 return;
1107 }
1108 }
1109
1091 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 1110 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1092 1111
1093 /* 1112 /*
@@ -1209,3 +1228,11 @@ good_area:
1209 1228
1210 up_read(&mm->mmap_sem); 1229 up_read(&mm->mmap_sem);
1211} 1230}
1231
1232dotraplinkage void __kprobes
1233do_page_fault(struct pt_regs *regs, unsigned long error_code)
1234{
1235 exception_enter(regs);
1236 __do_page_fault(regs, error_code);
1237 exception_exit(regs);
1238}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 575d86f85ce4..11a58001b4ce 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -445,10 +445,10 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
445} 445}
446#endif /* CONFIG_HIGHMEM */ 446#endif /* CONFIG_HIGHMEM */
447 447
448void __init native_pagetable_setup_start(pgd_t *base) 448void __init native_pagetable_init(void)
449{ 449{
450 unsigned long pfn, va; 450 unsigned long pfn, va;
451 pgd_t *pgd; 451 pgd_t *pgd, *base = swapper_pg_dir;
452 pud_t *pud; 452 pud_t *pud;
453 pmd_t *pmd; 453 pmd_t *pmd;
454 pte_t *pte; 454 pte_t *pte;
@@ -475,10 +475,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
475 pte_clear(NULL, va, pte); 475 pte_clear(NULL, va, pte);
476 } 476 }
477 paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); 477 paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
478} 478 paging_init();
479
480void __init native_pagetable_setup_done(pgd_t *base)
481{
482} 479}
483 480
484/* 481/*
@@ -493,7 +490,7 @@ void __init native_pagetable_setup_done(pgd_t *base)
493 * If we're booting paravirtualized under a hypervisor, then there are 490 * If we're booting paravirtualized under a hypervisor, then there are
494 * more options: we may already be running PAE, and the pagetable may 491 * more options: we may already be running PAE, and the pagetable may
495 * or may not be based in swapper_pg_dir. In any case, 492 * or may not be based in swapper_pg_dir. In any case,
496 * paravirt_pagetable_setup_start() will set up swapper_pg_dir 493 * paravirt_pagetable_init() will set up swapper_pg_dir
497 * appropriately for the rest of the initialization to work. 494 * appropriately for the rest of the initialization to work.
498 * 495 *
499 * In general, pagetable_init() assumes that the pagetable may already 496 * In general, pagetable_init() assumes that the pagetable may already
@@ -712,7 +709,7 @@ static void __init test_wp_bit(void)
712 "Checking if this processor honours the WP bit even in supervisor mode..."); 709 "Checking if this processor honours the WP bit even in supervisor mode...");
713 710
714 /* Any page-aligned address will do, the test is non-destructive */ 711 /* Any page-aligned address will do, the test is non-destructive */
715 __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); 712 __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_KERNEL_RO);
716 boot_cpu_data.wp_works_ok = do_test_wp_bit(); 713 boot_cpu_data.wp_works_ok = do_test_wp_bit();
717 clear_fixmap(FIX_WP_TEST); 714 clear_fixmap(FIX_WP_TEST);
718 715
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 613cd83e8c0c..0777f042e400 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -98,6 +98,8 @@ static void flush_tlb_func(void *info)
98{ 98{
99 struct flush_tlb_info *f = info; 99 struct flush_tlb_info *f = info;
100 100
101 inc_irq_stat(irq_tlb_count);
102
101 if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) 103 if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
102 return; 104 return;
103 105
@@ -320,7 +322,7 @@ static ssize_t tlbflush_write_file(struct file *file,
320 if (kstrtos8(buf, 0, &shift)) 322 if (kstrtos8(buf, 0, &shift))
321 return -EINVAL; 323 return -EINVAL;
322 324
323 if (shift > 64) 325 if (shift < -1 || shift >= BITS_PER_LONG)
324 return -EINVAL; 326 return -EINVAL;
325 327
326 tlb_flushall_shift = shift; 328 tlb_flushall_shift = shift;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 33643a8bcbbb..520d2bd0b9c5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -280,6 +280,31 @@ void bpf_jit_compile(struct sk_filter *fp)
280 } 280 }
281 EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ 281 EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
282 break; 282 break;
283 case BPF_S_ALU_MOD_X: /* A %= X; */
284 seen |= SEEN_XREG;
285 EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
286 if (pc_ret0 > 0) {
287 /* addrs[pc_ret0 - 1] is start address of target
288 * (addrs[i] - 6) is the address following this jmp
289 * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long)
290 */
291 EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
292 (addrs[i] - 6));
293 } else {
294 EMIT_COND_JMP(X86_JNE, 2 + 5);
295 CLEAR_A();
296 EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */
297 }
298 EMIT2(0x31, 0xd2); /* xor %edx,%edx */
299 EMIT2(0xf7, 0xf3); /* div %ebx */
300 EMIT2(0x89, 0xd0); /* mov %edx,%eax */
301 break;
302 case BPF_S_ALU_MOD_K: /* A %= K; */
303 EMIT2(0x31, 0xd2); /* xor %edx,%edx */
304 EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */
305 EMIT2(0xf7, 0xf1); /* div %ecx */
306 EMIT2(0x89, 0xd0); /* mov %edx,%eax */
307 break;
283 case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ 308 case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
284 EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ 309 EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
285 EMIT(K, 4); 310 EMIT(K, 4);
@@ -310,9 +335,18 @@ void bpf_jit_compile(struct sk_filter *fp)
310 EMIT1_off32(0x0d, K); /* or imm32,%eax */ 335 EMIT1_off32(0x0d, K); /* or imm32,%eax */
311 break; 336 break;
312 case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ 337 case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
338 case BPF_S_ALU_XOR_X:
313 seen |= SEEN_XREG; 339 seen |= SEEN_XREG;
314 EMIT2(0x31, 0xd8); /* xor %ebx,%eax */ 340 EMIT2(0x31, 0xd8); /* xor %ebx,%eax */
315 break; 341 break;
342 case BPF_S_ALU_XOR_K: /* A ^= K; */
343 if (K == 0)
344 break;
345 if (is_imm8(K))
346 EMIT3(0x83, 0xf0, K); /* xor imm8,%eax */
347 else
348 EMIT1_off32(0x35, K); /* xor imm32,%eax */
349 break;
316 case BPF_S_ALU_LSH_X: /* A <<= X; */ 350 case BPF_S_ALU_LSH_X: /* A <<= X; */
317 seen |= SEEN_XREG; 351 seen |= SEEN_XREG;
318 EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ 352 EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 505acdd6d600..192397c98606 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -305,7 +305,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
305 res->flags = flags; 305 res->flags = flags;
306 res->start = start; 306 res->start = start;
307 res->end = end; 307 res->end = end;
308 res->child = NULL;
309 308
310 if (!pci_use_crs) { 309 if (!pci_use_crs) {
311 dev_printk(KERN_DEBUG, &info->bridge->dev, 310 dev_printk(KERN_DEBUG, &info->bridge->dev,
@@ -434,7 +433,7 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
434 433
435 size = sizeof(*info->res) * info->res_num; 434 size = sizeof(*info->res) * info->res_num;
436 info->res_num = 0; 435 info->res_num = 0;
437 info->res = kmalloc(size, GFP_KERNEL); 436 info->res = kzalloc(size, GFP_KERNEL);
438 if (!info->res) 437 if (!info->res)
439 return; 438 return;
440 439
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 937bcece7006..704b9ec043d7 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -585,7 +585,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
585 while (i >= sizeof(struct acpi_mcfg_allocation)) { 585 while (i >= sizeof(struct acpi_mcfg_allocation)) {
586 entries++; 586 entries++;
587 i -= sizeof(struct acpi_mcfg_allocation); 587 i -= sizeof(struct acpi_mcfg_allocation);
588 }; 588 }
589 if (entries == 0) { 589 if (entries == 0) {
590 pr_err(PREFIX "MMCONFIG has no entries\n"); 590 pr_err(PREFIX "MMCONFIG has no entries\n");
591 return -ENODEV; 591 return -ENODEV;
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 6f2f8eeed171..3e6d2a6db866 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -62,11 +62,6 @@ out:
62 return irq; 62 return irq;
63} 63}
64 64
65void __init pcibios_update_irq(struct pci_dev *dev, int irq)
66{
67 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
68}
69
70int __init pci_visws_init(void) 65int __init pci_visws_init(void)
71{ 66{
72 pcibios_enable_irq = &pci_visws_enable_irq; 67 pcibios_enable_irq = &pci_visws_enable_irq;
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index 73b8be0f3675..6db1cc4c7534 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1 +1,2 @@
1obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o 1obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
2obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
new file mode 100644
index 000000000000..f6a0c1b8e518
--- /dev/null
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -0,0 +1,76 @@
1/*
2 * Copyright 2012 Intel Corporation
3 * Author: Josh Triplett <josh@joshtriplett.org>
4 *
5 * Based on the bgrt driver:
6 * Copyright 2012 Red Hat, Inc <mjg@redhat.com>
7 * Author: Matthew Garrett
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13#include <linux/kernel.h>
14#include <linux/acpi.h>
15#include <linux/efi.h>
16#include <linux/efi-bgrt.h>
17
18struct acpi_table_bgrt *bgrt_tab;
19void *bgrt_image;
20size_t bgrt_image_size;
21
22struct bmp_header {
23 u16 id;
24 u32 size;
25} __packed;
26
27void efi_bgrt_init(void)
28{
29 acpi_status status;
30 void __iomem *image;
31 bool ioremapped = false;
32 struct bmp_header bmp_header;
33
34 if (acpi_disabled)
35 return;
36
37 status = acpi_get_table("BGRT", 0,
38 (struct acpi_table_header **)&bgrt_tab);
39 if (ACPI_FAILURE(status))
40 return;
41
42 if (bgrt_tab->version != 1)
43 return;
44 if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
45 return;
46
47 image = efi_lookup_mapped_addr(bgrt_tab->image_address);
48 if (!image) {
49 image = ioremap(bgrt_tab->image_address, sizeof(bmp_header));
50 ioremapped = true;
51 if (!image)
52 return;
53 }
54
55 memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
56 if (ioremapped)
57 iounmap(image);
58 bgrt_image_size = bmp_header.size;
59
60 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
61 if (!bgrt_image)
62 return;
63
64 if (ioremapped) {
65 image = ioremap(bgrt_tab->image_address, bmp_header.size);
66 if (!image) {
67 kfree(bgrt_image);
68 bgrt_image = NULL;
69 return;
70 }
71 }
72
73 memcpy_fromio(bgrt_image, image, bgrt_image_size);
74 if (ioremapped)
75 iounmap(image);
76}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 92660edaa1e7..aded2a91162a 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -31,6 +31,7 @@
31#include <linux/kernel.h> 31#include <linux/kernel.h>
32#include <linux/init.h> 32#include <linux/init.h>
33#include <linux/efi.h> 33#include <linux/efi.h>
34#include <linux/efi-bgrt.h>
34#include <linux/export.h> 35#include <linux/export.h>
35#include <linux/bootmem.h> 36#include <linux/bootmem.h>
36#include <linux/memblock.h> 37#include <linux/memblock.h>
@@ -419,10 +420,21 @@ void __init efi_reserve_boot_services(void)
419 } 420 }
420} 421}
421 422
422static void __init efi_free_boot_services(void) 423static void __init efi_unmap_memmap(void)
424{
425 if (memmap.map) {
426 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
427 memmap.map = NULL;
428 }
429}
430
431void __init efi_free_boot_services(void)
423{ 432{
424 void *p; 433 void *p;
425 434
435 if (!efi_native)
436 return;
437
426 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 438 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
427 efi_memory_desc_t *md = p; 439 efi_memory_desc_t *md = p;
428 unsigned long long start = md->phys_addr; 440 unsigned long long start = md->phys_addr;
@@ -438,6 +450,8 @@ static void __init efi_free_boot_services(void)
438 450
439 free_bootmem_late(start, size); 451 free_bootmem_late(start, size);
440 } 452 }
453
454 efi_unmap_memmap();
441} 455}
442 456
443static int __init efi_systab_init(void *phys) 457static int __init efi_systab_init(void *phys)
@@ -732,6 +746,11 @@ void __init efi_init(void)
732#endif 746#endif
733} 747}
734 748
749void __init efi_late_init(void)
750{
751 efi_bgrt_init();
752}
753
735void __init efi_set_executable(efi_memory_desc_t *md, bool executable) 754void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
736{ 755{
737 u64 addr, npages; 756 u64 addr, npages;
@@ -764,6 +783,34 @@ static void __init runtime_code_page_mkexec(void)
764} 783}
765 784
766/* 785/*
786 * We can't ioremap data in EFI boot services RAM, because we've already mapped
787 * it as RAM. So, look it up in the existing EFI memory map instead. Only
788 * callable after efi_enter_virtual_mode and before efi_free_boot_services.
789 */
790void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
791{
792 void *p;
793 if (WARN_ON(!memmap.map))
794 return NULL;
795 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
796 efi_memory_desc_t *md = p;
797 u64 size = md->num_pages << EFI_PAGE_SHIFT;
798 u64 end = md->phys_addr + size;
799 if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
800 md->type != EFI_BOOT_SERVICES_CODE &&
801 md->type != EFI_BOOT_SERVICES_DATA)
802 continue;
803 if (!md->virt_addr)
804 continue;
805 if (phys_addr >= md->phys_addr && phys_addr < end) {
806 phys_addr += md->virt_addr - md->phys_addr;
807 return (__force void __iomem *)(unsigned long)phys_addr;
808 }
809 }
810 return NULL;
811}
812
813/*
767 * This function will switch the EFI runtime services to virtual mode. 814 * This function will switch the EFI runtime services to virtual mode.
768 * Essentially, look through the EFI memmap and map every region that 815 * Essentially, look through the EFI memmap and map every region that
769 * has the runtime attribute bit set in its memory descriptor and update 816 * has the runtime attribute bit set in its memory descriptor and update
@@ -787,8 +834,10 @@ void __init efi_enter_virtual_mode(void)
787 * non-native EFI 834 * non-native EFI
788 */ 835 */
789 836
790 if (!efi_native) 837 if (!efi_native) {
791 goto out; 838 efi_unmap_memmap();
839 return;
840 }
792 841
793 /* Merge contiguous regions of the same type and attribute */ 842 /* Merge contiguous regions of the same type and attribute */
794 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 843 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -878,18 +927,12 @@ void __init efi_enter_virtual_mode(void)
878 } 927 }
879 928
880 /* 929 /*
881 * Thankfully, it does seem that no runtime services other than
882 * SetVirtualAddressMap() will touch boot services code, so we can
883 * get rid of it all at this point
884 */
885 efi_free_boot_services();
886
887 /*
888 * Now that EFI is in virtual mode, update the function 930 * Now that EFI is in virtual mode, update the function
889 * pointers in the runtime service table to the new virtual addresses. 931 * pointers in the runtime service table to the new virtual addresses.
890 * 932 *
891 * Call EFI services through wrapper functions. 933 * Call EFI services through wrapper functions.
892 */ 934 */
935 efi.runtime_version = efi_systab.fw_revision;
893 efi.get_time = virt_efi_get_time; 936 efi.get_time = virt_efi_get_time;
894 efi.set_time = virt_efi_set_time; 937 efi.set_time = virt_efi_set_time;
895 efi.get_wakeup_time = virt_efi_get_wakeup_time; 938 efi.get_wakeup_time = virt_efi_get_wakeup_time;
@@ -906,9 +949,6 @@ void __init efi_enter_virtual_mode(void)
906 if (__supported_pte_mask & _PAGE_NX) 949 if (__supported_pte_mask & _PAGE_NX)
907 runtime_code_page_mkexec(); 950 runtime_code_page_mkexec();
908 951
909out:
910 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
911 memmap.map = NULL;
912 kfree(new_memmap); 952 kfree(new_memmap);
913} 953}
914 954
diff --git a/arch/x86/realmode/rm/wakeup.h b/arch/x86/realmode/rm/wakeup.h
index 9317e0042f24..7dd86a419f5d 100644
--- a/arch/x86/realmode/rm/wakeup.h
+++ b/arch/x86/realmode/rm/wakeup.h
@@ -36,5 +36,7 @@ extern struct wakeup_header wakeup_header;
36 36
37/* Wakeup behavior bits */ 37/* Wakeup behavior bits */
38#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 38#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0
39#define WAKEUP_BEHAVIOR_RESTORE_CR4 1
40#define WAKEUP_BEHAVIOR_RESTORE_EFER 2
39 41
40#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ 42#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */
diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S
index 8905166b0bbb..e56479e58053 100644
--- a/arch/x86/realmode/rm/wakeup_asm.S
+++ b/arch/x86/realmode/rm/wakeup_asm.S
@@ -74,9 +74,18 @@ ENTRY(wakeup_start)
74 74
75 lidtl wakeup_idt 75 lidtl wakeup_idt
76 76
77 /* Clear the EFLAGS */ 77 /* Clear the EFLAGS but remember if we have EFLAGS.ID */
78 pushl $0 78 movl $X86_EFLAGS_ID, %ecx
79 pushl %ecx
79 popfl 80 popfl
81 pushfl
82 popl %edi
83 pushl $0
84 popfl
85 pushfl
86 popl %edx
87 xorl %edx, %edi
88 andl %ecx, %edi /* %edi is zero iff CPUID & %cr4 are missing */
80 89
81 /* Check header signature... */ 90 /* Check header signature... */
82 movl signature, %eax 91 movl signature, %eax
@@ -93,8 +102,8 @@ ENTRY(wakeup_start)
93 102
94 /* Restore MISC_ENABLE before entering protected mode, in case 103 /* Restore MISC_ENABLE before entering protected mode, in case
95 BIOS decided to clear XD_DISABLE during S3. */ 104 BIOS decided to clear XD_DISABLE during S3. */
96 movl pmode_behavior, %eax 105 movl pmode_behavior, %edi
97 btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax 106 btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %edi
98 jnc 1f 107 jnc 1f
99 108
100 movl pmode_misc_en, %eax 109 movl pmode_misc_en, %eax
@@ -110,15 +119,15 @@ ENTRY(wakeup_start)
110 movl pmode_cr3, %eax 119 movl pmode_cr3, %eax
111 movl %eax, %cr3 120 movl %eax, %cr3
112 121
113 movl pmode_cr4, %ecx 122 btl $WAKEUP_BEHAVIOR_RESTORE_CR4, %edi
114 jecxz 1f 123 jz 1f
115 movl %ecx, %cr4 124 movl pmode_cr4, %eax
125 movl %eax, %cr4
1161: 1261:
127 btl $WAKEUP_BEHAVIOR_RESTORE_EFER, %edi
128 jz 1f
117 movl pmode_efer, %eax 129 movl pmode_efer, %eax
118 movl pmode_efer + 4, %edx 130 movl pmode_efer + 4, %edx
119 movl %eax, %ecx
120 orl %edx, %ecx
121 jz 1f
122 movl $MSR_EFER, %ecx 131 movl $MSR_EFER, %ecx
123 wrmsr 132 wrmsr
1241: 1331:
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
index 3236aebc828d..f325af26107c 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/syscalls/Makefile
@@ -1,7 +1,9 @@
1out := $(obj)/../include/generated/asm 1out := $(obj)/../include/generated/asm
2uapi := $(obj)/../include/generated/uapi/asm
2 3
3# Create output directory if not already present 4# Create output directory if not already present
4_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') 5_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
6 $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')
5 7
6syscall32 := $(srctree)/$(src)/syscall_32.tbl 8syscall32 := $(srctree)/$(src)/syscall_32.tbl
7syscall64 := $(srctree)/$(src)/syscall_64.tbl 9syscall64 := $(srctree)/$(src)/syscall_64.tbl
@@ -18,7 +20,7 @@ quiet_cmd_systbl = SYSTBL $@
18 cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@ 20 cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
19 21
20syshdr_abi_unistd_32 := i386 22syshdr_abi_unistd_32 := i386
21$(out)/unistd_32.h: $(syscall32) $(syshdr) 23$(uapi)/unistd_32.h: $(syscall32) $(syshdr)
22 $(call if_changed,syshdr) 24 $(call if_changed,syshdr)
23 25
24syshdr_abi_unistd_32_ia32 := i386 26syshdr_abi_unistd_32_ia32 := i386
@@ -28,11 +30,11 @@ $(out)/unistd_32_ia32.h: $(syscall32) $(syshdr)
28 30
29syshdr_abi_unistd_x32 := common,x32 31syshdr_abi_unistd_x32 := common,x32
30syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT 32syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT
31$(out)/unistd_x32.h: $(syscall64) $(syshdr) 33$(uapi)/unistd_x32.h: $(syscall64) $(syshdr)
32 $(call if_changed,syshdr) 34 $(call if_changed,syshdr)
33 35
34syshdr_abi_unistd_64 := common,64 36syshdr_abi_unistd_64 := common,64
35$(out)/unistd_64.h: $(syscall64) $(syshdr) 37$(uapi)/unistd_64.h: $(syscall64) $(syshdr)
36 $(call if_changed,syshdr) 38 $(call if_changed,syshdr)
37 39
38syshdr_abi_unistd_64_x32 := x32 40syshdr_abi_unistd_64_x32 := x32
@@ -45,11 +47,12 @@ $(out)/syscalls_32.h: $(syscall32) $(systbl)
45$(out)/syscalls_64.h: $(syscall64) $(systbl) 47$(out)/syscalls_64.h: $(syscall64) $(systbl)
46 $(call if_changed,systbl) 48 $(call if_changed,systbl)
47 49
48syshdr-y += unistd_32.h unistd_64.h unistd_x32.h 50uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h
49syshdr-y += syscalls_32.h 51syshdr-y += syscalls_32.h
50syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h 52syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h
51syshdr-$(CONFIG_X86_64) += syscalls_64.h 53syshdr-$(CONFIG_X86_64) += syscalls_64.h
52 54
53targets += $(syshdr-y) 55targets += $(uapisyshdr-y) $(syshdr-y)
54 56
55all: $(addprefix $(out)/,$(targets)) 57all: $(addprefix $(uapi)/,$(uapisyshdr-y))
58all: $(addprefix $(out)/,$(syshdr-y))
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index 733057b435b0..bae601f900ef 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -28,7 +28,7 @@ posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
28hostprogs-y += test_get_len insn_sanity 28hostprogs-y += test_get_len insn_sanity
29 29
30# -I needed for generated C source and C source which in the kernel tree. 30# -I needed for generated C source and C source which in the kernel tree.
31HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ 31HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/
32 32
33HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ 33HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
34 34
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index ec57bd3818a4..7005ced5d1ad 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -6,8 +6,9 @@
6 6
7#include <xen/xen.h> 7#include <xen/xen.h>
8#include <xen/interface/physdev.h> 8#include <xen/interface/physdev.h>
9#include "xen-ops.h"
9 10
10unsigned int xen_io_apic_read(unsigned apic, unsigned reg) 11static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
11{ 12{
12 struct physdev_apic apic_op; 13 struct physdev_apic apic_op;
13 int ret; 14 int ret;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1fbe75a95f15..2d932c351f91 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -80,6 +80,8 @@
80#include "smp.h" 80#include "smp.h"
81#include "multicalls.h" 81#include "multicalls.h"
82 82
83#include <xen/events.h>
84
83EXPORT_SYMBOL_GPL(hypercall_page); 85EXPORT_SYMBOL_GPL(hypercall_page);
84 86
85DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); 87DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
@@ -1288,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void)
1288{ 1290{
1289 struct physdev_set_iopl set_iopl; 1291 struct physdev_set_iopl set_iopl;
1290 int rc; 1292 int rc;
1291 pgd_t *pgd;
1292 1293
1293 if (!xen_start_info) 1294 if (!xen_start_info)
1294 return; 1295 return;
@@ -1380,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void)
1380 acpi_numa = -1; 1381 acpi_numa = -1;
1381#endif 1382#endif
1382 1383
1383 pgd = (pgd_t *)xen_start_info->pt_base;
1384
1385 /* Don't do the full vcpu_info placement stuff until we have a 1384 /* Don't do the full vcpu_info placement stuff until we have a
1386 possible map and a non-dummy shared_info. */ 1385 possible map and a non-dummy shared_info. */
1387 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1386 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
@@ -1390,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void)
1390 early_boot_irqs_disabled = true; 1389 early_boot_irqs_disabled = true;
1391 1390
1392 xen_raw_console_write("mapping kernel into physical memory\n"); 1391 xen_raw_console_write("mapping kernel into physical memory\n");
1393 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 1392 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
1394 1393
1395 /* Allocate and initialize top and mid mfn levels for p2m structure */ 1394 /* Allocate and initialize top and mid mfn levels for p2m structure */
1396 xen_build_mfn_list_list(); 1395 xen_build_mfn_list_list();
@@ -1441,11 +1440,19 @@ asmlinkage void __init xen_start_kernel(void)
1441 const struct dom0_vga_console_info *info = 1440 const struct dom0_vga_console_info *info =
1442 (void *)((char *)xen_start_info + 1441 (void *)((char *)xen_start_info +
1443 xen_start_info->console.dom0.info_off); 1442 xen_start_info->console.dom0.info_off);
1443 struct xen_platform_op op = {
1444 .cmd = XENPF_firmware_info,
1445 .interface_version = XENPF_INTERFACE_VERSION,
1446 .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
1447 };
1444 1448
1445 xen_init_vga(info, xen_start_info->console.dom0.info_size); 1449 xen_init_vga(info, xen_start_info->console.dom0.info_size);
1446 xen_start_info->console.domU.mfn = 0; 1450 xen_start_info->console.domU.mfn = 0;
1447 xen_start_info->console.domU.evtchn = 0; 1451 xen_start_info->console.domU.evtchn = 0;
1448 1452
1453 if (HYPERVISOR_dom0_op(&op) == 0)
1454 boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
1455
1449 xen_init_apic(); 1456 xen_init_apic();
1450 1457
1451 /* Make sure ACS will be enabled */ 1458 /* Make sure ACS will be enabled */
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5141d808e751..5a16824cc2b3 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -84,6 +84,7 @@
84 */ 84 */
85DEFINE_SPINLOCK(xen_reservation_lock); 85DEFINE_SPINLOCK(xen_reservation_lock);
86 86
87#ifdef CONFIG_X86_32
87/* 88/*
88 * Identity map, in addition to plain kernel map. This needs to be 89 * Identity map, in addition to plain kernel map. This needs to be
89 * large enough to allocate page table pages to allocate the rest. 90 * large enough to allocate page table pages to allocate the rest.
@@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock);
91 */ 92 */
92#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) 93#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
93static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); 94static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
94 95#endif
95#ifdef CONFIG_X86_64 96#ifdef CONFIG_X86_64
96/* l3 pud for userspace vsyscall mapping */ 97/* l3 pud for userspace vsyscall mapping */
97static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; 98static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
@@ -1174,9 +1175,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
1174 spin_unlock(&mm->page_table_lock); 1175 spin_unlock(&mm->page_table_lock);
1175} 1176}
1176 1177
1177static void __init xen_pagetable_setup_start(pgd_t *base) 1178static void xen_post_allocator_init(void);
1178{
1179}
1180 1179
1181static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) 1180static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1182{ 1181{
@@ -1192,14 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1192 } 1191 }
1193} 1192}
1194 1193
1195static void xen_post_allocator_init(void); 1194#ifdef CONFIG_X86_64
1195static void __init xen_cleanhighmap(unsigned long vaddr,
1196 unsigned long vaddr_end)
1197{
1198 unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
1199 pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
1196 1200
1197static void __init xen_pagetable_setup_done(pgd_t *base) 1201 /* NOTE: The loop is more greedy than the cleanup_highmap variant.
1202 * We include the PMD passed in on _both_ boundaries. */
1203 for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
1204 pmd++, vaddr += PMD_SIZE) {
1205 if (pmd_none(*pmd))
1206 continue;
1207 if (vaddr < (unsigned long) _text || vaddr > kernel_end)
1208 set_pmd(pmd, __pmd(0));
1209 }
1210 /* In case we did something silly, we should crash in this function
1211 * instead of somewhere later and be confusing. */
1212 xen_mc_flush();
1213}
1214#endif
1215static void __init xen_pagetable_init(void)
1198{ 1216{
1217#ifdef CONFIG_X86_64
1218 unsigned long size;
1219 unsigned long addr;
1220#endif
1221 paging_init();
1199 xen_setup_shared_info(); 1222 xen_setup_shared_info();
1223#ifdef CONFIG_X86_64
1224 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1225 unsigned long new_mfn_list;
1226
1227 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1228
1229 /* On 32-bit, we get zero so this never gets executed. */
1230 new_mfn_list = xen_revector_p2m_tree();
1231 if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) {
1232 /* using __ka address and sticking INVALID_P2M_ENTRY! */
1233 memset((void *)xen_start_info->mfn_list, 0xff, size);
1234
1235 /* We should be in __ka space. */
1236 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
1237 addr = xen_start_info->mfn_list;
1238 /* We roundup to the PMD, which means that if anybody at this stage is
1239 * using the __ka address of xen_start_info or xen_start_info->shared_info
1240 * they are in going to crash. Fortunatly we have already revectored
1241 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
1242 size = roundup(size, PMD_SIZE);
1243 xen_cleanhighmap(addr, addr + size);
1244
1245 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1246 memblock_free(__pa(xen_start_info->mfn_list), size);
1247 /* And revector! Bye bye old array */
1248 xen_start_info->mfn_list = new_mfn_list;
1249 } else
1250 goto skip;
1251 }
1252 /* At this stage, cleanup_highmap has already cleaned __ka space
1253 * from _brk_limit way up to the max_pfn_mapped (which is the end of
1254 * the ramdisk). We continue on, erasing PMD entries that point to page
1255 * tables - do note that they are accessible at this stage via __va.
1256 * For good measure we also round up to the PMD - which means that if
1257 * anybody is using __ka address to the initial boot-stack - and try
1258 * to use it - they are going to crash. The xen_start_info has been
1259 * taken care of already in xen_setup_kernel_pagetable. */
1260 addr = xen_start_info->pt_base;
1261 size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
1262
1263 xen_cleanhighmap(addr, addr + size);
1264 xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
1265#ifdef DEBUG
1266 /* This is superflous and is not neccessary, but you know what
1267 * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
1268 * anything at this stage. */
1269 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
1270#endif
1271skip:
1272#endif
1200 xen_post_allocator_init(); 1273 xen_post_allocator_init();
1201} 1274}
1202
1203static void xen_write_cr2(unsigned long cr2) 1275static void xen_write_cr2(unsigned long cr2)
1204{ 1276{
1205 this_cpu_read(xen_vcpu)->arch.cr2 = cr2; 1277 this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
@@ -1655,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
1655 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) 1727 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
1656 BUG(); 1728 BUG();
1657} 1729}
1658 1730#ifdef CONFIG_X86_32
1659static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) 1731static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1660{ 1732{
1661 unsigned pmdidx, pteidx; 1733 unsigned pmdidx, pteidx;
@@ -1706,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1706 1778
1707 set_page_prot(pmd, PAGE_KERNEL_RO); 1779 set_page_prot(pmd, PAGE_KERNEL_RO);
1708} 1780}
1709 1781#endif
1710void __init xen_setup_machphys_mapping(void) 1782void __init xen_setup_machphys_mapping(void)
1711{ 1783{
1712 struct xen_machphys_mapping mapping; 1784 struct xen_machphys_mapping mapping;
@@ -1734,7 +1806,20 @@ static void convert_pfn_mfn(void *v)
1734 for (i = 0; i < PTRS_PER_PTE; i++) 1806 for (i = 0; i < PTRS_PER_PTE; i++)
1735 pte[i] = xen_make_pte(pte[i].pte); 1807 pte[i] = xen_make_pte(pte[i].pte);
1736} 1808}
1737 1809static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1810 unsigned long addr)
1811{
1812 if (*pt_base == PFN_DOWN(__pa(addr))) {
1813 set_page_prot((void *)addr, PAGE_KERNEL);
1814 clear_page((void *)addr);
1815 (*pt_base)++;
1816 }
1817 if (*pt_end == PFN_DOWN(__pa(addr))) {
1818 set_page_prot((void *)addr, PAGE_KERNEL);
1819 clear_page((void *)addr);
1820 (*pt_end)--;
1821 }
1822}
1738/* 1823/*
1739 * Set up the initial kernel pagetable. 1824 * Set up the initial kernel pagetable.
1740 * 1825 *
@@ -1746,11 +1831,13 @@ static void convert_pfn_mfn(void *v)
1746 * of the physical mapping once some sort of allocator has been set 1831 * of the physical mapping once some sort of allocator has been set
1747 * up. 1832 * up.
1748 */ 1833 */
1749pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, 1834void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1750 unsigned long max_pfn)
1751{ 1835{
1752 pud_t *l3; 1836 pud_t *l3;
1753 pmd_t *l2; 1837 pmd_t *l2;
1838 unsigned long addr[3];
1839 unsigned long pt_base, pt_end;
1840 unsigned i;
1754 1841
1755 /* max_pfn_mapped is the last pfn mapped in the initial memory 1842 /* max_pfn_mapped is the last pfn mapped in the initial memory
1756 * mappings. Considering that on Xen after the kernel mappings we 1843 * mappings. Considering that on Xen after the kernel mappings we
@@ -1758,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1758 * set max_pfn_mapped to the last real pfn mapped. */ 1845 * set max_pfn_mapped to the last real pfn mapped. */
1759 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); 1846 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
1760 1847
1848 pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
1849 pt_end = pt_base + xen_start_info->nr_pt_frames;
1850
1761 /* Zap identity mapping */ 1851 /* Zap identity mapping */
1762 init_level4_pgt[0] = __pgd(0); 1852 init_level4_pgt[0] = __pgd(0);
1763 1853
1764 /* Pre-constructed entries are in pfn, so convert to mfn */ 1854 /* Pre-constructed entries are in pfn, so convert to mfn */
1855 /* L4[272] -> level3_ident_pgt
1856 * L4[511] -> level3_kernel_pgt */
1765 convert_pfn_mfn(init_level4_pgt); 1857 convert_pfn_mfn(init_level4_pgt);
1858
1859 /* L3_i[0] -> level2_ident_pgt */
1766 convert_pfn_mfn(level3_ident_pgt); 1860 convert_pfn_mfn(level3_ident_pgt);
1861 /* L3_k[510] -> level2_kernel_pgt
1862 * L3_i[511] -> level2_fixmap_pgt */
1767 convert_pfn_mfn(level3_kernel_pgt); 1863 convert_pfn_mfn(level3_kernel_pgt);
1768 1864
1865 /* We get [511][511] and have Xen's version of level2_kernel_pgt */
1769 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); 1866 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1770 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); 1867 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1771 1868
1772 memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1869 addr[0] = (unsigned long)pgd;
1773 memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1870 addr[1] = (unsigned long)l3;
1774 1871 addr[2] = (unsigned long)l2;
1872 /* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
1873 * Both L4[272][0] and L4[511][511] have entries that point to the same
1874 * L2 (PMD) tables. Meaning that if you modify it in __va space
1875 * it will be also modified in the __ka space! (But if you just
1876 * modify the PMD table to point to other PTE's or none, then you
1877 * are OK - which is what cleanup_highmap does) */
1878 copy_page(level2_ident_pgt, l2);
1879 /* Graft it onto L4[511][511] */
1880 copy_page(level2_kernel_pgt, l2);
1881
1882 /* Get [511][510] and graft that in level2_fixmap_pgt */
1775 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); 1883 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
1776 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); 1884 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
1777 memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1885 copy_page(level2_fixmap_pgt, l2);
1778 1886 /* Note that we don't do anything with level1_fixmap_pgt which
1779 /* Set up identity map */ 1887 * we don't need. */
1780 xen_map_identity_early(level2_ident_pgt, max_pfn);
1781 1888
1782 /* Make pagetable pieces RO */ 1889 /* Make pagetable pieces RO */
1783 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); 1890 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1784 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); 1891 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1785 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); 1892 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1786 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); 1893 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1894 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1787 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1895 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1788 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1896 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1789 1897
@@ -1794,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1794 /* Unpin Xen-provided one */ 1902 /* Unpin Xen-provided one */
1795 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 1903 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1796 1904
1797 /* Switch over */
1798 pgd = init_level4_pgt;
1799
1800 /* 1905 /*
1801 * At this stage there can be no user pgd, and no page 1906 * At this stage there can be no user pgd, and no page
1802 * structure to attach it to, so make sure we just set kernel 1907 * structure to attach it to, so make sure we just set kernel
1803 * pgd. 1908 * pgd.
1804 */ 1909 */
1805 xen_mc_batch(); 1910 xen_mc_batch();
1806 __xen_write_cr3(true, __pa(pgd)); 1911 __xen_write_cr3(true, __pa(init_level4_pgt));
1807 xen_mc_issue(PARAVIRT_LAZY_CPU); 1912 xen_mc_issue(PARAVIRT_LAZY_CPU);
1808 1913
1809 memblock_reserve(__pa(xen_start_info->pt_base), 1914 /* We can't that easily rip out L3 and L2, as the Xen pagetables are
1810 xen_start_info->nr_pt_frames * PAGE_SIZE); 1915 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
1916 * the initial domain. For guests using the toolstack, they are in:
1917 * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only
1918 * rip out the [L4] (pgd), but for guests we shave off three pages.
1919 */
1920 for (i = 0; i < ARRAY_SIZE(addr); i++)
1921 check_pt_base(&pt_base, &pt_end, addr[i]);
1811 1922
1812 return pgd; 1923 /* Our (by three pages) smaller Xen pagetable that we are using */
1924 memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
1925 /* Revector the xen_start_info */
1926 xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
1813} 1927}
1814#else /* !CONFIG_X86_64 */ 1928#else /* !CONFIG_X86_64 */
1815static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); 1929static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
@@ -1834,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
1834 */ 1948 */
1835 swapper_kernel_pmd = 1949 swapper_kernel_pmd =
1836 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); 1950 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
1837 memcpy(swapper_kernel_pmd, initial_kernel_pmd, 1951 copy_page(swapper_kernel_pmd, initial_kernel_pmd);
1838 sizeof(pmd_t) * PTRS_PER_PMD);
1839 swapper_pg_dir[KERNEL_PGD_BOUNDARY] = 1952 swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
1840 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); 1953 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
1841 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); 1954 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
@@ -1852,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
1852 pv_mmu_ops.write_cr3 = &xen_write_cr3; 1965 pv_mmu_ops.write_cr3 = &xen_write_cr3;
1853} 1966}
1854 1967
1855pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, 1968void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1856 unsigned long max_pfn)
1857{ 1969{
1858 pmd_t *kernel_pmd; 1970 pmd_t *kernel_pmd;
1859 1971
@@ -1865,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1865 512*1024); 1977 512*1024);
1866 1978
1867 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 1979 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1868 memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 1980 copy_page(initial_kernel_pmd, kernel_pmd);
1869 1981
1870 xen_map_identity_early(initial_kernel_pmd, max_pfn); 1982 xen_map_identity_early(initial_kernel_pmd, max_pfn);
1871 1983
1872 memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); 1984 copy_page(initial_page_table, pgd);
1873 initial_page_table[KERNEL_PGD_BOUNDARY] = 1985 initial_page_table[KERNEL_PGD_BOUNDARY] =
1874 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); 1986 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
1875 1987
@@ -1885,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1885 1997
1886 memblock_reserve(__pa(xen_start_info->pt_base), 1998 memblock_reserve(__pa(xen_start_info->pt_base),
1887 xen_start_info->nr_pt_frames * PAGE_SIZE); 1999 xen_start_info->nr_pt_frames * PAGE_SIZE);
1888
1889 return initial_page_table;
1890} 2000}
1891#endif /* CONFIG_X86_64 */ 2001#endif /* CONFIG_X86_64 */
1892 2002
@@ -2068,8 +2178,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2068void __init xen_init_mmu_ops(void) 2178void __init xen_init_mmu_ops(void)
2069{ 2179{
2070 x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; 2180 x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
2071 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; 2181 x86_init.paging.pagetable_init = xen_pagetable_init;
2072 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
2073 pv_mmu_ops = xen_mmu_ops; 2182 pv_mmu_ops = xen_mmu_ops;
2074 2183
2075 memset(dummy_mapping, 0xff, PAGE_SIZE); 2184 memset(dummy_mapping, 0xff, PAGE_SIZE);
@@ -2337,6 +2446,9 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
2337 unsigned long range; 2446 unsigned long range;
2338 int err = 0; 2447 int err = 0;
2339 2448
2449 if (xen_feature(XENFEAT_auto_translated_physmap))
2450 return -EINVAL;
2451
2340 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); 2452 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
2341 2453
2342 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == 2454 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
@@ -2355,8 +2467,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
2355 if (err) 2467 if (err)
2356 goto out; 2468 goto out;
2357 2469
2358 err = -EFAULT; 2470 err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
2359 if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) 2471 if (err < 0)
2360 goto out; 2472 goto out;
2361 2473
2362 nr -= batch; 2474 nr -= batch;
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 72213da605f5..95fb2aa5927e 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -22,7 +22,7 @@
22 * 22 *
23 * P2M_PER_PAGE depends on the architecture, as a mfn is always 23 * P2M_PER_PAGE depends on the architecture, as a mfn is always
24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to 24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
25 * 512 and 1024 entries respectively. 25 * 512 and 1024 entries respectively.
26 * 26 *
27 * In short, these structures contain the Machine Frame Number (MFN) of the PFN. 27 * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
28 * 28 *
@@ -139,11 +139,11 @@
139 * / | ~0, ~0, .... | 139 * / | ~0, ~0, .... |
140 * | \---------------/ 140 * | \---------------/
141 * | 141 * |
142 * p2m_missing p2m_missing 142 * p2m_mid_missing p2m_missing
143 * /------------------\ /------------\ 143 * /-----------------\ /------------\
144 * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | 144 * | [p2m_missing] +---->| ~0, ~0, ~0 |
145 * | [p2m_mid_missing]+---->| ..., ~0 | 145 * | [p2m_missing] +---->| ..., ~0 |
146 * \------------------/ \------------/ 146 * \-----------------/ \------------/
147 * 147 *
148 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) 148 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
149 */ 149 */
@@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void)
396 396
397 m2p_override_init(); 397 m2p_override_init();
398} 398}
399#ifdef CONFIG_X86_64
400#include <linux/bootmem.h>
401unsigned long __init xen_revector_p2m_tree(void)
402{
403 unsigned long va_start;
404 unsigned long va_end;
405 unsigned long pfn;
406 unsigned long pfn_free = 0;
407 unsigned long *mfn_list = NULL;
408 unsigned long size;
409
410 va_start = xen_start_info->mfn_list;
411 /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
412 * so make sure it is rounded up to that */
413 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
414 va_end = va_start + size;
415
416 /* If we were revectored already, don't do it again. */
417 if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
418 return 0;
419
420 mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
421 if (!mfn_list) {
422 pr_warn("Could not allocate space for a new P2M tree!\n");
423 return xen_start_info->mfn_list;
424 }
425 /* Fill it out with INVALID_P2M_ENTRY value */
426 memset(mfn_list, 0xFF, size);
427
428 for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
429 unsigned topidx = p2m_top_index(pfn);
430 unsigned mididx;
431 unsigned long *mid_p;
432
433 if (!p2m_top[topidx])
434 continue;
435
436 if (p2m_top[topidx] == p2m_mid_missing)
437 continue;
438
439 mididx = p2m_mid_index(pfn);
440 mid_p = p2m_top[topidx][mididx];
441 if (!mid_p)
442 continue;
443 if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
444 continue;
445
446 if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
447 continue;
448
449 /* The old va. Rebase it on mfn_list */
450 if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
451 unsigned long *new;
452
453 if (pfn_free > (size / sizeof(unsigned long))) {
454 WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
455 size / sizeof(unsigned long), pfn_free);
456 return 0;
457 }
458 new = &mfn_list[pfn_free];
459
460 copy_page(new, mid_p);
461 p2m_top[topidx][mididx] = &mfn_list[pfn_free];
462 p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
463
464 pfn_free += P2M_PER_PAGE;
399 465
466 }
467 /* This should be the leafs allocated for identity from _brk. */
468 }
469 return (unsigned long)mfn_list;
470
471}
472#else
473unsigned long __init xen_revector_p2m_tree(void)
474{
475 return 0;
476}
477#endif
400unsigned long get_phys_to_machine(unsigned long pfn) 478unsigned long get_phys_to_machine(unsigned long pfn)
401{ 479{
402 unsigned topidx, mididx, idx; 480 unsigned topidx, mididx, idx;
@@ -430,7 +508,7 @@ static void free_p2m_page(void *p)
430 free_page((unsigned long)p); 508 free_page((unsigned long)p);
431} 509}
432 510
433/* 511/*
434 * Fully allocate the p2m structure for a given pfn. We need to check 512 * Fully allocate the p2m structure for a given pfn. We need to check
435 * that both the top and mid levels are allocated, and make sure the 513 * that both the top and mid levels are allocated, and make sure the
436 * parallel mfn tree is kept in sync. We may race with other cpus, so 514 * parallel mfn tree is kept in sync. We may race with other cpus, so
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 967633ad98c4..969570491c39 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -8,6 +8,14 @@
8#include <xen/xen.h> 8#include <xen/xen.h>
9#include <asm/iommu_table.h> 9#include <asm/iommu_table.h>
10 10
11
12#include <asm/xen/swiotlb-xen.h>
13#ifdef CONFIG_X86_64
14#include <asm/iommu.h>
15#include <asm/dma.h>
16#endif
17#include <linux/export.h>
18
11int xen_swiotlb __read_mostly; 19int xen_swiotlb __read_mostly;
12 20
13static struct dma_map_ops xen_swiotlb_dma_ops = { 21static struct dma_map_ops xen_swiotlb_dma_ops = {
@@ -34,34 +42,64 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
34int __init pci_xen_swiotlb_detect(void) 42int __init pci_xen_swiotlb_detect(void)
35{ 43{
36 44
45 if (!xen_pv_domain())
46 return 0;
47
37 /* If running as PV guest, either iommu=soft, or swiotlb=force will 48 /* If running as PV guest, either iommu=soft, or swiotlb=force will
38 * activate this IOMMU. If running as PV privileged, activate it 49 * activate this IOMMU. If running as PV privileged, activate it
39 * irregardless. 50 * irregardless.
40 */ 51 */
41 if ((xen_initial_domain() || swiotlb || swiotlb_force) && 52 if ((xen_initial_domain() || swiotlb || swiotlb_force))
42 (xen_pv_domain()))
43 xen_swiotlb = 1; 53 xen_swiotlb = 1;
44 54
45 /* If we are running under Xen, we MUST disable the native SWIOTLB. 55 /* If we are running under Xen, we MUST disable the native SWIOTLB.
46 * Don't worry about swiotlb_force flag activating the native, as 56 * Don't worry about swiotlb_force flag activating the native, as
47 * the 'swiotlb' flag is the only one turning it on. */ 57 * the 'swiotlb' flag is the only one turning it on. */
48 if (xen_pv_domain()) 58 swiotlb = 0;
49 swiotlb = 0;
50 59
60#ifdef CONFIG_X86_64
61 /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0
62 * (so no iommu=X command line over-writes).
63 * Considering that PV guests do not want the *native SWIOTLB* but
64 * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here.
65 */
66 if (max_pfn > MAX_DMA32_PFN)
67 no_iommu = 1;
68#endif
51 return xen_swiotlb; 69 return xen_swiotlb;
52} 70}
53 71
54void __init pci_xen_swiotlb_init(void) 72void __init pci_xen_swiotlb_init(void)
55{ 73{
56 if (xen_swiotlb) { 74 if (xen_swiotlb) {
57 xen_swiotlb_init(1); 75 xen_swiotlb_init(1, true /* early */);
58 dma_ops = &xen_swiotlb_dma_ops; 76 dma_ops = &xen_swiotlb_dma_ops;
59 77
60 /* Make sure ACS will be enabled */ 78 /* Make sure ACS will be enabled */
61 pci_request_acs(); 79 pci_request_acs();
62 } 80 }
63} 81}
82
83int pci_xen_swiotlb_init_late(void)
84{
85 int rc;
86
87 if (xen_swiotlb)
88 return 0;
89
90 rc = xen_swiotlb_init(1, false /* late */);
91 if (rc)
92 return rc;
93
94 dma_ops = &xen_swiotlb_dma_ops;
95 /* Make sure ACS will be enabled */
96 pci_request_acs();
97
98 return 0;
99}
100EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
101
64IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, 102IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
65 0, 103 NULL,
66 pci_xen_swiotlb_init, 104 pci_xen_swiotlb_init,
67 0); 105 NULL);
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index ffcf2615640b..0a7852483ffe 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -24,6 +24,7 @@
24#include <linux/module.h> 24#include <linux/module.h>
25 25
26#include <xen/platform_pci.h> 26#include <xen/platform_pci.h>
27#include "xen-ops.h"
27 28
28#define XEN_PLATFORM_ERR_MAGIC -1 29#define XEN_PLATFORM_ERR_MAGIC -1
29#define XEN_PLATFORM_ERR_PROTOCOL -2 30#define XEN_PLATFORM_ERR_PROTOCOL -2
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index e2d62d697b5d..8971a26d21ab 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -432,6 +432,24 @@ char * __init xen_memory_setup(void)
432 * - mfn_list 432 * - mfn_list
433 * - xen_start_info 433 * - xen_start_info
434 * See comment above "struct start_info" in <xen/interface/xen.h> 434 * See comment above "struct start_info" in <xen/interface/xen.h>
435 * We tried to make the the memblock_reserve more selective so
436 * that it would be clear what region is reserved. Sadly we ran
437 * in the problem wherein on a 64-bit hypervisor with a 32-bit
438 * initial domain, the pt_base has the cr3 value which is not
439 * neccessarily where the pagetable starts! As Jan put it: "
440 * Actually, the adjustment turns out to be correct: The page
441 * tables for a 32-on-64 dom0 get allocated in the order "first L1",
442 * "first L2", "first L3", so the offset to the page table base is
443 * indeed 2. When reading xen/include/public/xen.h's comment
444 * very strictly, this is not a violation (since there nothing is said
445 * that the first thing in the page table space is pointed to by
446 * pt_base; I admit that this seems to be implied though, namely
447 * do I think that it is implied that the page table space is the
448 * range [pt_base, pt_base + nt_pt_frames), whereas that
449 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
450 * which - without a priori knowledge - the kernel would have
451 * difficulty to figure out)." - so lets just fall back to the
452 * easy way and reserve the whole region.
435 */ 453 */
436 memblock_reserve(__pa(xen_start_info->mfn_list), 454 memblock_reserve(__pa(xen_start_info->mfn_list),
437 xen_start_info->pt_base - xen_start_info->mfn_list); 455 xen_start_info->pt_base - xen_start_info->mfn_list);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index f58dca7a6e52..353c50f18702 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -377,7 +377,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle)
377 return rc; 377 return rc;
378 378
379 if (num_online_cpus() == 1) 379 if (num_online_cpus() == 1)
380 alternatives_smp_switch(1); 380 /* Just in case we booted with a single CPU. */
381 alternatives_enable_smp();
381 382
382 rc = xen_smp_intr_init(cpu); 383 rc = xen_smp_intr_init(cpu);
383 if (rc) 384 if (rc)
@@ -424,9 +425,6 @@ static void xen_cpu_die(unsigned int cpu)
424 unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); 425 unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
425 xen_uninit_lock_cpu(cpu); 426 xen_uninit_lock_cpu(cpu);
426 xen_teardown_timer(cpu); 427 xen_teardown_timer(cpu);
427
428 if (num_online_cpus() == 1)
429 alternatives_smp_switch(0);
430} 428}
431 429
432static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ 430static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
index 1cd7f4d11e29..6722e3733f02 100644
--- a/arch/x86/xen/vga.c
+++ b/arch/x86/xen/vga.c
@@ -35,6 +35,7 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
35 info->u.text_mode_3.font_height; 35 info->u.text_mode_3.font_height;
36 break; 36 break;
37 37
38 case XEN_VGATYPE_EFI_LFB:
38 case XEN_VGATYPE_VESA_LFB: 39 case XEN_VGATYPE_VESA_LFB:
39 if (size < offsetof(struct dom0_vga_console_info, 40 if (size < offsetof(struct dom0_vga_console_info,
40 u.vesa_lfb.gbl_caps)) 41 u.vesa_lfb.gbl_caps))
@@ -54,6 +55,12 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
54 screen_info->blue_pos = info->u.vesa_lfb.blue_pos; 55 screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
55 screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; 56 screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
56 screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; 57 screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
58
59 if (info->video_type == XEN_VGATYPE_EFI_LFB) {
60 screen_info->orig_video_isVGA = VIDEO_TYPE_EFI;
61 break;
62 }
63
57 if (size >= offsetof(struct dom0_vga_console_info, 64 if (size >= offsetof(struct dom0_vga_console_info,
58 u.vesa_lfb.gbl_caps) 65 u.vesa_lfb.gbl_caps)
59 + sizeof(info->u.vesa_lfb.gbl_caps)) 66 + sizeof(info->u.vesa_lfb.gbl_caps))
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index aaa7291c9259..7faed5869e5b 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -28,9 +28,61 @@ ENTRY(startup_xen)
28 __FINIT 28 __FINIT
29 29
30.pushsection .text 30.pushsection .text
31 .align PAGE_SIZE 31 .balign PAGE_SIZE
32ENTRY(hypercall_page) 32ENTRY(hypercall_page)
33 .skip PAGE_SIZE 33#define NEXT_HYPERCALL(x) \
34 ENTRY(xen_hypercall_##x) \
35 .skip 32
36
37NEXT_HYPERCALL(set_trap_table)
38NEXT_HYPERCALL(mmu_update)
39NEXT_HYPERCALL(set_gdt)
40NEXT_HYPERCALL(stack_switch)
41NEXT_HYPERCALL(set_callbacks)
42NEXT_HYPERCALL(fpu_taskswitch)
43NEXT_HYPERCALL(sched_op_compat)
44NEXT_HYPERCALL(platform_op)
45NEXT_HYPERCALL(set_debugreg)
46NEXT_HYPERCALL(get_debugreg)
47NEXT_HYPERCALL(update_descriptor)
48NEXT_HYPERCALL(ni)
49NEXT_HYPERCALL(memory_op)
50NEXT_HYPERCALL(multicall)
51NEXT_HYPERCALL(update_va_mapping)
52NEXT_HYPERCALL(set_timer_op)
53NEXT_HYPERCALL(event_channel_op_compat)
54NEXT_HYPERCALL(xen_version)
55NEXT_HYPERCALL(console_io)
56NEXT_HYPERCALL(physdev_op_compat)
57NEXT_HYPERCALL(grant_table_op)
58NEXT_HYPERCALL(vm_assist)
59NEXT_HYPERCALL(update_va_mapping_otherdomain)
60NEXT_HYPERCALL(iret)
61NEXT_HYPERCALL(vcpu_op)
62NEXT_HYPERCALL(set_segment_base)
63NEXT_HYPERCALL(mmuext_op)
64NEXT_HYPERCALL(xsm_op)
65NEXT_HYPERCALL(nmi_op)
66NEXT_HYPERCALL(sched_op)
67NEXT_HYPERCALL(callback_op)
68NEXT_HYPERCALL(xenoprof_op)
69NEXT_HYPERCALL(event_channel_op)
70NEXT_HYPERCALL(physdev_op)
71NEXT_HYPERCALL(hvm_op)
72NEXT_HYPERCALL(sysctl)
73NEXT_HYPERCALL(domctl)
74NEXT_HYPERCALL(kexec_op)
75NEXT_HYPERCALL(tmem_op) /* 38 */
76ENTRY(xen_hypercall_rsvr)
77 .skip 320
78NEXT_HYPERCALL(mca) /* 48 */
79NEXT_HYPERCALL(arch_1)
80NEXT_HYPERCALL(arch_2)
81NEXT_HYPERCALL(arch_3)
82NEXT_HYPERCALL(arch_4)
83NEXT_HYPERCALL(arch_5)
84NEXT_HYPERCALL(arch_6)
85 .balign PAGE_SIZE
34.popsection 86.popsection
35 87
36 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") 88 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 202d4c150154..bb5a8105ea86 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void);
27void xen_setup_shared_info(void); 27void xen_setup_shared_info(void);
28void xen_build_mfn_list_list(void); 28void xen_build_mfn_list_list(void);
29void xen_setup_machphys_mapping(void); 29void xen_setup_machphys_mapping(void);
30pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); 30void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
31void xen_reserve_top(void); 31void xen_reserve_top(void);
32extern unsigned long xen_max_p2m_pfn; 32extern unsigned long xen_max_p2m_pfn;
33 33
@@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void);
45void xen_unplug_emulated_devices(void); 45void xen_unplug_emulated_devices(void);
46 46
47void __init xen_build_dynamic_phys_to_machine(void); 47void __init xen_build_dynamic_phys_to_machine(void);
48unsigned long __init xen_revector_p2m_tree(void);
48 49
49void xen_init_irq_ops(void); 50void xen_init_irq_ops(void);
50void xen_setup_timer(int cpu); 51void xen_setup_timer(int cpu);