aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-08-15 14:11:45 -0400
committerSage Weil <sage@inktank.com>2013-08-15 14:11:45 -0400
commitee3e542fec6e69bc9fb668698889a37d93950ddf (patch)
treee74ee766a4764769ef1d3d45d266b4dea64101d3 /arch/x86
parentfe2a801b50c0bb8039d627e5ae1fec249d10ff39 (diff)
parentf1d6e17f540af37bb1891480143669ba7636c4cf (diff)
Merge remote-tracking branch 'linus/master' into testing
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig31
-rw-r--r--arch/x86/Kconfig.debug21
-rw-r--r--arch/x86/Makefile7
-rw-r--r--arch/x86/boot/compressed/Makefile6
-rw-r--r--arch/x86/boot/compressed/eboot.c22
-rw-r--r--arch/x86/boot/compressed/head_64.S2
-rw-r--r--arch/x86/boot/compressed/misc.c4
-rw-r--r--arch/x86/boot/tools/build.c1
-rw-r--r--arch/x86/configs/kvm_guest.config28
-rw-r--r--arch/x86/crypto/Makefile6
-rw-r--r--arch/x86/crypto/blowfish-avx2-asm_64.S449
-rw-r--r--arch/x86/crypto/blowfish_avx2_glue.c585
-rw-r--r--arch/x86/crypto/blowfish_glue.c32
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S160
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c57
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c58
-rw-r--r--arch/x86/crypto/twofish-avx2-asm_64.S600
-rw-r--r--arch/x86/crypto/twofish_avx2_glue.c584
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c14
-rw-r--r--arch/x86/ia32/ia32_aout.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/apic.h27
-rw-r--r--arch/x86/include/asm/cpu.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h118
-rw-r--r--arch/x86/include/asm/crypto/blowfish.h43
-rw-r--r--arch/x86/include/asm/crypto/twofish.h18
-rw-r--r--arch/x86/include/asm/desc.h117
-rw-r--r--arch/x86/include/asm/efi.h28
-rw-r--r--arch/x86/include/asm/emergency-restart.h12
-rw-r--r--arch/x86/include/asm/entry_arch.h8
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/fpu-internal.h4
-rw-r--r--arch/x86/include/asm/hw_irq.h17
-rw-r--r--arch/x86/include/asm/io.h7
-rw-r--r--arch/x86/include/asm/kvm_host.h17
-rw-r--r--arch/x86/include/asm/mc146818rtc.h4
-rw-r--r--arch/x86/include/asm/mce.h9
-rw-r--r--arch/x86/include/asm/microcode.h4
-rw-r--r--arch/x86/include/asm/microcode_amd.h78
-rw-r--r--arch/x86/include/asm/microcode_intel.h6
-rw-r--r--arch/x86/include/asm/mmconfig.h4
-rw-r--r--arch/x86/include/asm/mpspec.h2
-rw-r--r--arch/x86/include/asm/mrst-vrtc.h4
-rw-r--r--arch/x86/include/asm/mshyperv.h3
-rw-r--r--arch/x86/include/asm/mtrr.h10
-rw-r--r--arch/x86/include/asm/mutex_32.h11
-rw-r--r--arch/x86/include/asm/mutex_64.h11
-rw-r--r--arch/x86/include/asm/numa.h6
-rw-r--r--arch/x86/include/asm/perf_event.h3
-rw-r--r--arch/x86/include/asm/pgtable-2level.h48
-rw-r--r--arch/x86/include/asm/pgtable-3level.h3
-rw-r--r--arch/x86/include/asm/pgtable.h57
-rw-r--r--arch/x86/include/asm/pgtable_types.h27
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/include/asm/prom.h2
-rw-r--r--arch/x86/include/asm/sighandling.h4
-rw-r--r--arch/x86/include/asm/smp.h2
-rw-r--r--arch/x86/include/asm/special_insns.h2
-rw-r--r--arch/x86/include/asm/spinlock.h4
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h104
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h3
-rw-r--r--arch/x86/include/asm/x86_init.h6
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h3
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h154
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/boot.c13
-rw-r--r--arch/x86/kernel/acpi/sleep.c22
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/apic/apic.c101
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c2
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/numaq_32.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c82
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c35
-rw-r--r--arch/x86/kernel/cpu/bugs.c21
-rw-r--r--arch/x86/kernel/cpu/centaur.c26
-rw-r--r--arch/x86/kernel/cpu/common.c103
-rw-r--r--arch/x86/kernel/cpu/cyrix.c42
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c30
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c107
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c28
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c12
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c139
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c24
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c23
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c87
-rw-r--r--arch/x86/kernel/cpu/perf_event.c71
-rw-r--r--arch/x86/kernel/cpu/perf_event.h24
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c34
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.c502
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.h40
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_uncore.c31
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c137
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c178
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c40
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h4
-rw-r--r--arch/x86/kernel/cpu/powerflags.c8
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/cpu/rdrand.c2
-rw-r--r--arch/x86/kernel/cpu/scattered.c4
-rw-r--r--arch/x86/kernel/cpu/topology.c2
-rw-r--r--arch/x86/kernel/cpu/transmeta.c6
-rw-r--r--arch/x86/kernel/cpu/umc.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/cpuid.c7
-rw-r--r--arch/x86/kernel/devicetree.c6
-rw-r--r--arch/x86/kernel/doublefault.c (renamed from arch/x86/kernel/doublefault_32.c)15
-rw-r--r--arch/x86/kernel/early-quirks.c14
-rw-r--r--arch/x86/kernel/entry_32.S12
-rw-r--r--arch/x86/kernel/entry_64.S38
-rw-r--r--arch/x86/kernel/head_32.S22
-rw-r--r--arch/x86/kernel/head_64.S9
-rw-r--r--arch/x86/kernel/hw_breakpoint.c3
-rw-r--r--arch/x86/kernel/i387.c69
-rw-r--r--arch/x86/kernel/irq.c33
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_work.c24
-rw-r--r--arch/x86/kernel/kvm.c10
-rw-r--r--arch/x86/kernel/kvmclock.c11
-rw-r--r--arch/x86/kernel/microcode_amd.c130
-rw-r--r--arch/x86/kernel/microcode_amd_early.c302
-rw-r--r--arch/x86/kernel/microcode_core.c2
-rw-r--r--arch/x86/kernel/microcode_core_early.c55
-rw-r--r--arch/x86/kernel/microcode_intel_early.c32
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c12
-rw-r--r--arch/x86/kernel/msr.c6
-rw-r--r--arch/x86/kernel/nmi.c37
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/process_32.c13
-rw-r--r--arch/x86/kernel/process_64.c11
-rw-r--r--arch/x86/kernel/ptrace.c204
-rw-r--r--arch/x86/kernel/reboot.c117
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S34
-rw-r--r--arch/x86/kernel/rtc.c17
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/signal.c16
-rw-r--r--arch/x86/kernel/smp.c72
-rw-r--r--arch/x86/kernel/smpboot.c28
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/kernel/tboot.c79
-rw-r--r--arch/x86/kernel/tracepoint.c59
-rw-r--r--arch/x86/kernel/traps.c19
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kernel/tsc_sync.c18
-rw-r--r--arch/x86/kernel/vsyscall_64.c6
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/kernel/xsave.c9
-rw-r--r--arch/x86/kvm/Makefile13
-rw-r--r--arch/x86/kvm/emulate.c391
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu.c308
-rw-r--r--arch/x86/kvm/mmu.h18
-rw-r--r--arch/x86/kvm/mmutrace.h76
-rw-r--r--arch/x86/kvm/paging_tmpl.h10
-rw-r--r--arch/x86/kvm/svm.c10
-rw-r--r--arch/x86/kvm/trace.h21
-rw-r--r--arch/x86/kvm/vmx.c32
-rw-r--r--arch/x86/kvm/x86.c82
-rw-r--r--arch/x86/lguest/Makefile2
-rw-r--r--arch/x86/lguest/boot.c6
-rw-r--r--arch/x86/lguest/head_32.S (renamed from arch/x86/lguest/i386_head.S)0
-rw-r--r--arch/x86/mm/highmem_32.c6
-rw-r--r--arch/x86/mm/hugetlbpage.c187
-rw-r--r--arch/x86/mm/init.c14
-rw-r--r--arch/x86/mm/init_32.c32
-rw-r--r--arch/x86/mm/init_64.c47
-rw-r--r--arch/x86/mm/ioremap.c8
-rw-r--r--arch/x86/mm/mmap.c4
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/mm/numa.c12
-rw-r--r--arch/x86/mm/numa_32.c2
-rw-r--r--arch/x86/mm/numa_emulation.c12
-rw-r--r--arch/x86/mm/pgtable.c4
-rw-r--r--arch/x86/mm/setup_nx.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c61
-rw-r--r--arch/x86/pci/acpi.c7
-rw-r--r--arch/x86/pci/amd_bus.c8
-rw-r--r--arch/x86/platform/ce4100/ce4100.c4
-rw-r--r--arch/x86/platform/efi/efi.c10
-rw-r--r--arch/x86/platform/mrst/mrst.c4
-rw-r--r--arch/x86/platform/mrst/vrtc.c11
-rw-r--r--arch/x86/um/signal.c1
-rw-r--r--arch/x86/vdso/vdso32-setup.c4
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/smp.c103
-rw-r--r--arch/x86/xen/spinlock.c9
-rw-r--r--arch/x86/xen/time.c116
-rw-r--r--arch/x86/xen/xen-ops.h2
204 files changed, 4636 insertions, 4393 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fe120da25625..b32ebf92b0ce 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -65,6 +65,7 @@ config X86
65 select HAVE_KERNEL_LZMA 65 select HAVE_KERNEL_LZMA
66 select HAVE_KERNEL_XZ 66 select HAVE_KERNEL_XZ
67 select HAVE_KERNEL_LZO 67 select HAVE_KERNEL_LZO
68 select HAVE_KERNEL_LZ4
68 select HAVE_HW_BREAKPOINT 69 select HAVE_HW_BREAKPOINT
69 select HAVE_MIXED_BREAKPOINTS_REGS 70 select HAVE_MIXED_BREAKPOINTS_REGS
70 select PERF_EVENTS 71 select PERF_EVENTS
@@ -102,6 +103,7 @@ config X86
102 select HAVE_ARCH_SECCOMP_FILTER 103 select HAVE_ARCH_SECCOMP_FILTER
103 select BUILDTIME_EXTABLE_SORT 104 select BUILDTIME_EXTABLE_SORT
104 select GENERIC_CMOS_UPDATE 105 select GENERIC_CMOS_UPDATE
106 select HAVE_ARCH_SOFT_DIRTY
105 select CLOCKSOURCE_WATCHDOG 107 select CLOCKSOURCE_WATCHDOG
106 select GENERIC_CLOCKEVENTS 108 select GENERIC_CLOCKEVENTS
107 select ARCH_CLOCKSOURCE_DATA if X86_64 109 select ARCH_CLOCKSOURCE_DATA if X86_64
@@ -121,6 +123,7 @@ config X86
121 select OLD_SIGACTION if X86_32 123 select OLD_SIGACTION if X86_32
122 select COMPAT_OLD_SIGACTION if IA32_EMULATION 124 select COMPAT_OLD_SIGACTION if IA32_EMULATION
123 select RTC_LIB 125 select RTC_LIB
126 select HAVE_DEBUG_STACKOVERFLOW
124 127
125config INSTRUCTION_DECODER 128config INSTRUCTION_DECODER
126 def_bool y 129 def_bool y
@@ -207,6 +210,12 @@ config ARCH_HIBERNATION_POSSIBLE
207config ARCH_SUSPEND_POSSIBLE 210config ARCH_SUSPEND_POSSIBLE
208 def_bool y 211 def_bool y
209 212
213config ARCH_WANT_HUGE_PMD_SHARE
214 def_bool y
215
216config ARCH_WANT_GENERAL_HUGETLB
217 def_bool y
218
210config ZONE_DMA32 219config ZONE_DMA32
211 bool 220 bool
212 default X86_64 221 default X86_64
@@ -336,6 +345,7 @@ config X86_EXTENDED_PLATFORM
336 345
337 If you enable this option then you'll be able to select support 346 If you enable this option then you'll be able to select support
338 for the following (non-PC) 32 bit x86 platforms: 347 for the following (non-PC) 32 bit x86 platforms:
348 Goldfish (Android emulator)
339 AMD Elan 349 AMD Elan
340 NUMAQ (IBM/Sequent) 350 NUMAQ (IBM/Sequent)
341 RDC R-321x SoC 351 RDC R-321x SoC
@@ -410,6 +420,7 @@ config X86_UV
410config X86_GOLDFISH 420config X86_GOLDFISH
411 bool "Goldfish (Virtual Platform)" 421 bool "Goldfish (Virtual Platform)"
412 depends on X86_32 422 depends on X86_32
423 depends on X86_EXTENDED_PLATFORM
413 ---help--- 424 ---help---
414 Enable support for the Goldfish virtual platform used primarily 425 Enable support for the Goldfish virtual platform used primarily
415 for Android development. Unless you are building for the Android 426 for Android development. Unless you are building for the Android
@@ -1058,8 +1069,16 @@ config MICROCODE_INTEL_LIB
1058 depends on MICROCODE_INTEL 1069 depends on MICROCODE_INTEL
1059 1070
1060config MICROCODE_INTEL_EARLY 1071config MICROCODE_INTEL_EARLY
1072 def_bool n
1073
1074config MICROCODE_AMD_EARLY
1075 def_bool n
1076
1077config MICROCODE_EARLY
1061 bool "Early load microcode" 1078 bool "Early load microcode"
1062 depends on MICROCODE_INTEL && BLK_DEV_INITRD 1079 depends on MICROCODE=y && BLK_DEV_INITRD
1080 select MICROCODE_INTEL_EARLY if MICROCODE_INTEL
1081 select MICROCODE_AMD_EARLY if MICROCODE_AMD
1063 default y 1082 default y
1064 help 1083 help
1065 This option provides functionality to read additional microcode data 1084 This option provides functionality to read additional microcode data
@@ -1067,10 +1086,6 @@ config MICROCODE_INTEL_EARLY
1067 microcode to CPU's as early as possible. No functional change if no 1086 microcode to CPU's as early as possible. No functional change if no
1068 microcode data is glued to the initrd, therefore it's safe to say Y. 1087 microcode data is glued to the initrd, therefore it's safe to say Y.
1069 1088
1070config MICROCODE_EARLY
1071 def_bool y
1072 depends on MICROCODE_INTEL_EARLY
1073
1074config X86_MSR 1089config X86_MSR
1075 tristate "/dev/cpu/*/msr - Model-specific register support" 1090 tristate "/dev/cpu/*/msr - Model-specific register support"
1076 ---help--- 1091 ---help---
@@ -1725,7 +1740,7 @@ config PHYSICAL_ALIGN
1725 1740
1726config HOTPLUG_CPU 1741config HOTPLUG_CPU
1727 bool "Support for hot-pluggable CPUs" 1742 bool "Support for hot-pluggable CPUs"
1728 depends on SMP && HOTPLUG 1743 depends on SMP
1729 ---help--- 1744 ---help---
1730 Say Y here to allow turning CPUs off and on. CPUs can be 1745 Say Y here to allow turning CPUs off and on. CPUs can be
1731 controlled through /sys/devices/system/cpu. 1746 controlled through /sys/devices/system/cpu.
@@ -2246,11 +2261,11 @@ source "drivers/pcmcia/Kconfig"
2246source "drivers/pci/hotplug/Kconfig" 2261source "drivers/pci/hotplug/Kconfig"
2247 2262
2248config RAPIDIO 2263config RAPIDIO
2249 bool "RapidIO support" 2264 tristate "RapidIO support"
2250 depends on PCI 2265 depends on PCI
2251 default n 2266 default n
2252 help 2267 help
2253 If you say Y here, the kernel will include drivers and 2268 If enabled this option will include drivers and the core
2254 infrastructure code to support RapidIO interconnect devices. 2269 infrastructure code to support RapidIO interconnect devices.
2255 2270
2256source "drivers/rapidio/Kconfig" 2271source "drivers/rapidio/Kconfig"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index c198b7e13e7b..78d91afb8e50 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -59,16 +59,6 @@ config EARLY_PRINTK_DBGP
59 with klogd/syslogd or the X server. You should normally N here, 59 with klogd/syslogd or the X server. You should normally N here,
60 unless you want to debug such a crash. You need usb debug device. 60 unless you want to debug such a crash. You need usb debug device.
61 61
62config DEBUG_STACKOVERFLOW
63 bool "Check for stack overflows"
64 depends on DEBUG_KERNEL
65 ---help---
66 Say Y here if you want to check the overflows of kernel, IRQ
67 and exception stacks. This option will cause messages of the
68 stacks in detail when free stack space drops below a certain
69 limit.
70 If in doubt, say "N".
71
72config X86_PTDUMP 62config X86_PTDUMP
73 bool "Export kernel pagetable layout to userspace via debugfs" 63 bool "Export kernel pagetable layout to userspace via debugfs"
74 depends on DEBUG_KERNEL 64 depends on DEBUG_KERNEL
@@ -122,7 +112,6 @@ config DEBUG_NX_TEST
122config DOUBLEFAULT 112config DOUBLEFAULT
123 default y 113 default y
124 bool "Enable doublefault exception handler" if EXPERT 114 bool "Enable doublefault exception handler" if EXPERT
125 depends on X86_32
126 ---help--- 115 ---help---
127 This option allows trapping of rare doublefault exceptions that 116 This option allows trapping of rare doublefault exceptions that
128 would otherwise cause a system to silently reboot. Disabling this 117 would otherwise cause a system to silently reboot. Disabling this
@@ -304,4 +293,14 @@ config DEBUG_NMI_SELFTEST
304 293
305 If unsure, say N. 294 If unsure, say N.
306 295
296config X86_DEBUG_STATIC_CPU_HAS
297 bool "Debug alternatives"
298 depends on DEBUG_KERNEL
299 ---help---
300 This option causes additional code to be generated which
301 fails if static_cpu_has() is used before alternatives have
302 run.
303
304 If unsure, say N.
305
307endmenu 306endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5c477260294f..07639c656fcd 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -220,6 +220,12 @@ archclean:
220 $(Q)$(MAKE) $(clean)=$(boot) 220 $(Q)$(MAKE) $(clean)=$(boot)
221 $(Q)$(MAKE) $(clean)=arch/x86/tools 221 $(Q)$(MAKE) $(clean)=arch/x86/tools
222 222
223PHONY += kvmconfig
224kvmconfig:
225 $(if $(wildcard $(objtree)/.config),, $(error You need an existing .config for this target))
226 $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config arch/x86/configs/kvm_guest.config
227 $(Q)yes "" | $(MAKE) oldconfig
228
223define archhelp 229define archhelp
224 echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' 230 echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
225 echo ' install - Install kernel using' 231 echo ' install - Install kernel using'
@@ -233,4 +239,5 @@ define archhelp
233 echo ' bzdisk/fdimage*/isoimage also accept:' 239 echo ' bzdisk/fdimage*/isoimage also accept:'
234 echo ' FDARGS="..." arguments for the booted kernel' 240 echo ' FDARGS="..." arguments for the booted kernel'
235 echo ' FDINITRD=file initrd for the booted kernel' 241 echo ' FDINITRD=file initrd for the booted kernel'
242 echo ' kvmconfig - Enable additional options for guest kernel support'
236endef 243endef
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 5ef205c5f37b..dcd90df10ab4 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,8 @@
4# create a compressed vmlinux image from the original vmlinux 4# create a compressed vmlinux image from the original vmlinux
5# 5#
6 6
7targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo 7targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
8 vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
8 9
9KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 10KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
10KBUILD_CFLAGS += -fno-strict-aliasing -fPIC 11KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
@@ -63,12 +64,15 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
63 $(call if_changed,xzkern) 64 $(call if_changed,xzkern)
64$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE 65$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
65 $(call if_changed,lzo) 66 $(call if_changed,lzo)
67$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
68 $(call if_changed,lz4)
66 69
67suffix-$(CONFIG_KERNEL_GZIP) := gz 70suffix-$(CONFIG_KERNEL_GZIP) := gz
68suffix-$(CONFIG_KERNEL_BZIP2) := bz2 71suffix-$(CONFIG_KERNEL_BZIP2) := bz2
69suffix-$(CONFIG_KERNEL_LZMA) := lzma 72suffix-$(CONFIG_KERNEL_LZMA) := lzma
70suffix-$(CONFIG_KERNEL_XZ) := xz 73suffix-$(CONFIG_KERNEL_XZ) := xz
71suffix-$(CONFIG_KERNEL_LZO) := lzo 74suffix-$(CONFIG_KERNEL_LZO) := lzo
75suffix-$(CONFIG_KERNEL_LZ4) := lz4
72 76
73quiet_cmd_mkpiggy = MKPIGGY $@ 77quiet_cmd_mkpiggy = MKPIGGY $@
74 cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) 78 cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index c205035a6b96..b7388a425f09 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -225,7 +225,7 @@ static void low_free(unsigned long size, unsigned long addr)
225 unsigned long nr_pages; 225 unsigned long nr_pages;
226 226
227 nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; 227 nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
228 efi_call_phys2(sys_table->boottime->free_pages, addr, size); 228 efi_call_phys2(sys_table->boottime->free_pages, addr, nr_pages);
229} 229}
230 230
231static void find_bits(unsigned long mask, u8 *pos, u8 *size) 231static void find_bits(unsigned long mask, u8 *pos, u8 *size)
@@ -992,18 +992,20 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
992 efi_memory_desc_t *mem_map; 992 efi_memory_desc_t *mem_map;
993 efi_status_t status; 993 efi_status_t status;
994 __u32 desc_version; 994 __u32 desc_version;
995 bool called_exit = false;
995 u8 nr_entries; 996 u8 nr_entries;
996 int i; 997 int i;
997 998
998 size = sizeof(*mem_map) * 32; 999 size = sizeof(*mem_map) * 32;
999 1000
1000again: 1001again:
1001 size += sizeof(*mem_map); 1002 size += sizeof(*mem_map) * 2;
1002 _size = size; 1003 _size = size;
1003 status = low_alloc(size, 1, (unsigned long *)&mem_map); 1004 status = low_alloc(size, 1, (unsigned long *)&mem_map);
1004 if (status != EFI_SUCCESS) 1005 if (status != EFI_SUCCESS)
1005 return status; 1006 return status;
1006 1007
1008get_map:
1007 status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, 1009 status = efi_call_phys5(sys_table->boottime->get_memory_map, &size,
1008 mem_map, &key, &desc_size, &desc_version); 1010 mem_map, &key, &desc_size, &desc_version);
1009 if (status == EFI_BUFFER_TOO_SMALL) { 1011 if (status == EFI_BUFFER_TOO_SMALL) {
@@ -1029,8 +1031,20 @@ again:
1029 /* Might as well exit boot services now */ 1031 /* Might as well exit boot services now */
1030 status = efi_call_phys2(sys_table->boottime->exit_boot_services, 1032 status = efi_call_phys2(sys_table->boottime->exit_boot_services,
1031 handle, key); 1033 handle, key);
1032 if (status != EFI_SUCCESS) 1034 if (status != EFI_SUCCESS) {
1033 goto free_mem_map; 1035 /*
1036 * ExitBootServices() will fail if any of the event
1037 * handlers change the memory map. In which case, we
1038 * must be prepared to retry, but only once so that
1039 * we're guaranteed to exit on repeated failures instead
1040 * of spinning forever.
1041 */
1042 if (called_exit)
1043 goto free_mem_map;
1044
1045 called_exit = true;
1046 goto get_map;
1047 }
1034 1048
1035 /* Historic? */ 1049 /* Historic? */
1036 boot_params->alt_mem_k = 32 * 1024; 1050 boot_params->alt_mem_k = 32 * 1024;
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 16f24e6dad79..06e71c2c16bf 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -27,8 +27,6 @@
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/linkage.h> 28#include <linux/linkage.h>
29#include <asm/segment.h> 29#include <asm/segment.h>
30#include <asm/pgtable_types.h>
31#include <asm/page_types.h>
32#include <asm/boot.h> 30#include <asm/boot.h>
33#include <asm/msr.h> 31#include <asm/msr.h>
34#include <asm/processor-flags.h> 32#include <asm/processor-flags.h>
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 7cb56c6ca351..0319c88290a5 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -145,6 +145,10 @@ static int lines, cols;
145#include "../../../../lib/decompress_unlzo.c" 145#include "../../../../lib/decompress_unlzo.c"
146#endif 146#endif
147 147
148#ifdef CONFIG_KERNEL_LZ4
149#include "../../../../lib/decompress_unlz4.c"
150#endif
151
148static void scroll(void) 152static void scroll(void)
149{ 153{
150 int i; 154 int i;
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 94c544650020..c941d6a8887f 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -243,6 +243,7 @@ static void parse_zoffset(char *fname)
243 c = fread(buf, 1, sizeof(buf) - 1, file); 243 c = fread(buf, 1, sizeof(buf) - 1, file);
244 if (ferror(file)) 244 if (ferror(file))
245 die("read-error on `zoffset.h'"); 245 die("read-error on `zoffset.h'");
246 fclose(file);
246 buf[c] = 0; 247 buf[c] = 0;
247 248
248 p = (char *)buf; 249 p = (char *)buf;
diff --git a/arch/x86/configs/kvm_guest.config b/arch/x86/configs/kvm_guest.config
new file mode 100644
index 000000000000..f9affcc3b9f1
--- /dev/null
+++ b/arch/x86/configs/kvm_guest.config
@@ -0,0 +1,28 @@
1CONFIG_NET=y
2CONFIG_NET_CORE=y
3CONFIG_NETDEVICES=y
4CONFIG_BLOCK=y
5CONFIG_BLK_DEV=y
6CONFIG_NETWORK_FILESYSTEMS=y
7CONFIG_INET=y
8CONFIG_TTY=y
9CONFIG_SERIAL_8250=y
10CONFIG_SERIAL_8250_CONSOLE=y
11CONFIG_IP_PNP=y
12CONFIG_IP_PNP_DHCP=y
13CONFIG_BINFMT_ELF=y
14CONFIG_PCI=y
15CONFIG_PCI_MSI=y
16CONFIG_DEBUG_KERNEL=y
17CONFIG_VIRTUALIZATION=y
18CONFIG_HYPERVISOR_GUEST=y
19CONFIG_PARAVIRT=y
20CONFIG_KVM_GUEST=y
21CONFIG_VIRTIO=y
22CONFIG_VIRTIO_PCI=y
23CONFIG_VIRTIO_BLK=y
24CONFIG_VIRTIO_CONSOLE=y
25CONFIG_VIRTIO_NET=y
26CONFIG_9P_FS=y
27CONFIG_NET_9P=y
28CONFIG_NET_9P_VIRTIO=y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a3a0ed80f17c..6c63c358a7e6 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -3,8 +3,6 @@
3# 3#
4 4
5avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) 5avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
6avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
7 $(comma)4)$(comma)%ymm2,yes,no)
8 6
9obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o 7obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
10obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o 8obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@@ -42,10 +40,8 @@ endif
42 40
43# These modules require assembler to support AVX2. 41# These modules require assembler to support AVX2.
44ifeq ($(avx2_supported),yes) 42ifeq ($(avx2_supported),yes)
45 obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
46 obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o 43 obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
47 obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o 44 obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
48 obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
49endif 45endif
50 46
51aes-i586-y := aes-i586-asm_32.o aes_glue.o 47aes-i586-y := aes-i586-asm_32.o aes_glue.o
@@ -73,10 +69,8 @@ ifeq ($(avx_supported),yes)
73endif 69endif
74 70
75ifeq ($(avx2_supported),yes) 71ifeq ($(avx2_supported),yes)
76 blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
77 camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o 72 camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
78 serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o 73 serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
79 twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
80endif 74endif
81 75
82aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 76aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
diff --git a/arch/x86/crypto/blowfish-avx2-asm_64.S b/arch/x86/crypto/blowfish-avx2-asm_64.S
deleted file mode 100644
index 784452e0d05d..000000000000
--- a/arch/x86/crypto/blowfish-avx2-asm_64.S
+++ /dev/null
@@ -1,449 +0,0 @@
1/*
2 * x86_64/AVX2 assembler optimized version of Blowfish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/linkage.h>
14
15.file "blowfish-avx2-asm_64.S"
16
17.data
18.align 32
19
20.Lprefetch_mask:
21.long 0*64
22.long 1*64
23.long 2*64
24.long 3*64
25.long 4*64
26.long 5*64
27.long 6*64
28.long 7*64
29
30.Lbswap32_mask:
31.long 0x00010203
32.long 0x04050607
33.long 0x08090a0b
34.long 0x0c0d0e0f
35
36.Lbswap128_mask:
37 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
38.Lbswap_iv_mask:
39 .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
40
41.text
42/* structure of crypto context */
43#define p 0
44#define s0 ((16 + 2) * 4)
45#define s1 ((16 + 2 + (1 * 256)) * 4)
46#define s2 ((16 + 2 + (2 * 256)) * 4)
47#define s3 ((16 + 2 + (3 * 256)) * 4)
48
49/* register macros */
50#define CTX %rdi
51#define RIO %rdx
52
53#define RS0 %rax
54#define RS1 %r8
55#define RS2 %r9
56#define RS3 %r10
57
58#define RLOOP %r11
59#define RLOOPd %r11d
60
61#define RXr0 %ymm8
62#define RXr1 %ymm9
63#define RXr2 %ymm10
64#define RXr3 %ymm11
65#define RXl0 %ymm12
66#define RXl1 %ymm13
67#define RXl2 %ymm14
68#define RXl3 %ymm15
69
70/* temp regs */
71#define RT0 %ymm0
72#define RT0x %xmm0
73#define RT1 %ymm1
74#define RT1x %xmm1
75#define RIDX0 %ymm2
76#define RIDX1 %ymm3
77#define RIDX1x %xmm3
78#define RIDX2 %ymm4
79#define RIDX3 %ymm5
80
81/* vpgatherdd mask and '-1' */
82#define RNOT %ymm6
83
84/* byte mask, (-1 >> 24) */
85#define RBYTE %ymm7
86
87/***********************************************************************
88 * 32-way AVX2 blowfish
89 ***********************************************************************/
90#define F(xl, xr) \
91 vpsrld $24, xl, RIDX0; \
92 vpsrld $16, xl, RIDX1; \
93 vpsrld $8, xl, RIDX2; \
94 vpand RBYTE, RIDX1, RIDX1; \
95 vpand RBYTE, RIDX2, RIDX2; \
96 vpand RBYTE, xl, RIDX3; \
97 \
98 vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \
99 vpcmpeqd RNOT, RNOT, RNOT; \
100 vpcmpeqd RIDX0, RIDX0, RIDX0; \
101 \
102 vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \
103 vpcmpeqd RIDX1, RIDX1, RIDX1; \
104 vpaddd RT0, RT1, RT0; \
105 \
106 vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \
107 vpxor RT0, RT1, RT0; \
108 \
109 vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \
110 vpcmpeqd RNOT, RNOT, RNOT; \
111 vpaddd RT0, RT1, RT0; \
112 \
113 vpxor RT0, xr, xr;
114
115#define add_roundkey(xl, nmem) \
116 vpbroadcastd nmem, RT0; \
117 vpxor RT0, xl ## 0, xl ## 0; \
118 vpxor RT0, xl ## 1, xl ## 1; \
119 vpxor RT0, xl ## 2, xl ## 2; \
120 vpxor RT0, xl ## 3, xl ## 3;
121
122#define round_enc() \
123 add_roundkey(RXr, p(CTX,RLOOP,4)); \
124 F(RXl0, RXr0); \
125 F(RXl1, RXr1); \
126 F(RXl2, RXr2); \
127 F(RXl3, RXr3); \
128 \
129 add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
130 F(RXr0, RXl0); \
131 F(RXr1, RXl1); \
132 F(RXr2, RXl2); \
133 F(RXr3, RXl3);
134
135#define round_dec() \
136 add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \
137 F(RXl0, RXr0); \
138 F(RXl1, RXr1); \
139 F(RXl2, RXr2); \
140 F(RXl3, RXr3); \
141 \
142 add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
143 F(RXr0, RXl0); \
144 F(RXr1, RXl1); \
145 F(RXr2, RXl2); \
146 F(RXr3, RXl3);
147
148#define init_round_constants() \
149 vpcmpeqd RNOT, RNOT, RNOT; \
150 leaq s0(CTX), RS0; \
151 leaq s1(CTX), RS1; \
152 leaq s2(CTX), RS2; \
153 leaq s3(CTX), RS3; \
154 vpsrld $24, RNOT, RBYTE;
155
156#define transpose_2x2(x0, x1, t0) \
157 vpunpckldq x0, x1, t0; \
158 vpunpckhdq x0, x1, x1; \
159 \
160 vpunpcklqdq t0, x1, x0; \
161 vpunpckhqdq t0, x1, x1;
162
163#define read_block(xl, xr) \
164 vbroadcasti128 .Lbswap32_mask, RT1; \
165 \
166 vpshufb RT1, xl ## 0, xl ## 0; \
167 vpshufb RT1, xr ## 0, xr ## 0; \
168 vpshufb RT1, xl ## 1, xl ## 1; \
169 vpshufb RT1, xr ## 1, xr ## 1; \
170 vpshufb RT1, xl ## 2, xl ## 2; \
171 vpshufb RT1, xr ## 2, xr ## 2; \
172 vpshufb RT1, xl ## 3, xl ## 3; \
173 vpshufb RT1, xr ## 3, xr ## 3; \
174 \
175 transpose_2x2(xl ## 0, xr ## 0, RT0); \
176 transpose_2x2(xl ## 1, xr ## 1, RT0); \
177 transpose_2x2(xl ## 2, xr ## 2, RT0); \
178 transpose_2x2(xl ## 3, xr ## 3, RT0);
179
180#define write_block(xl, xr) \
181 vbroadcasti128 .Lbswap32_mask, RT1; \
182 \
183 transpose_2x2(xl ## 0, xr ## 0, RT0); \
184 transpose_2x2(xl ## 1, xr ## 1, RT0); \
185 transpose_2x2(xl ## 2, xr ## 2, RT0); \
186 transpose_2x2(xl ## 3, xr ## 3, RT0); \
187 \
188 vpshufb RT1, xl ## 0, xl ## 0; \
189 vpshufb RT1, xr ## 0, xr ## 0; \
190 vpshufb RT1, xl ## 1, xl ## 1; \
191 vpshufb RT1, xr ## 1, xr ## 1; \
192 vpshufb RT1, xl ## 2, xl ## 2; \
193 vpshufb RT1, xr ## 2, xr ## 2; \
194 vpshufb RT1, xl ## 3, xl ## 3; \
195 vpshufb RT1, xr ## 3, xr ## 3;
196
197.align 8
198__blowfish_enc_blk32:
199 /* input:
200 * %rdi: ctx, CTX
201 * RXl0..4, RXr0..4: plaintext
202 * output:
203 * RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped)
204 */
205 init_round_constants();
206
207 read_block(RXl, RXr);
208
209 movl $1, RLOOPd;
210 add_roundkey(RXl, p+4*(0)(CTX));
211
212.align 4
213.L__enc_loop:
214 round_enc();
215
216 leal 2(RLOOPd), RLOOPd;
217 cmpl $17, RLOOPd;
218 jne .L__enc_loop;
219
220 add_roundkey(RXr, p+4*(17)(CTX));
221
222 write_block(RXl, RXr);
223
224 ret;
225ENDPROC(__blowfish_enc_blk32)
226
227.align 8
228__blowfish_dec_blk32:
229 /* input:
230 * %rdi: ctx, CTX
231 * RXl0..4, RXr0..4: ciphertext
232 * output:
233 * RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped)
234 */
235 init_round_constants();
236
237 read_block(RXl, RXr);
238
239 movl $14, RLOOPd;
240 add_roundkey(RXl, p+4*(17)(CTX));
241
242.align 4
243.L__dec_loop:
244 round_dec();
245
246 addl $-2, RLOOPd;
247 jns .L__dec_loop;
248
249 add_roundkey(RXr, p+4*(0)(CTX));
250
251 write_block(RXl, RXr);
252
253 ret;
254ENDPROC(__blowfish_dec_blk32)
255
256ENTRY(blowfish_ecb_enc_32way)
257 /* input:
258 * %rdi: ctx, CTX
259 * %rsi: dst
260 * %rdx: src
261 */
262
263 vzeroupper;
264
265 vmovdqu 0*32(%rdx), RXl0;
266 vmovdqu 1*32(%rdx), RXr0;
267 vmovdqu 2*32(%rdx), RXl1;
268 vmovdqu 3*32(%rdx), RXr1;
269 vmovdqu 4*32(%rdx), RXl2;
270 vmovdqu 5*32(%rdx), RXr2;
271 vmovdqu 6*32(%rdx), RXl3;
272 vmovdqu 7*32(%rdx), RXr3;
273
274 call __blowfish_enc_blk32;
275
276 vmovdqu RXr0, 0*32(%rsi);
277 vmovdqu RXl0, 1*32(%rsi);
278 vmovdqu RXr1, 2*32(%rsi);
279 vmovdqu RXl1, 3*32(%rsi);
280 vmovdqu RXr2, 4*32(%rsi);
281 vmovdqu RXl2, 5*32(%rsi);
282 vmovdqu RXr3, 6*32(%rsi);
283 vmovdqu RXl3, 7*32(%rsi);
284
285 vzeroupper;
286
287 ret;
288ENDPROC(blowfish_ecb_enc_32way)
289
290ENTRY(blowfish_ecb_dec_32way)
291 /* input:
292 * %rdi: ctx, CTX
293 * %rsi: dst
294 * %rdx: src
295 */
296
297 vzeroupper;
298
299 vmovdqu 0*32(%rdx), RXl0;
300 vmovdqu 1*32(%rdx), RXr0;
301 vmovdqu 2*32(%rdx), RXl1;
302 vmovdqu 3*32(%rdx), RXr1;
303 vmovdqu 4*32(%rdx), RXl2;
304 vmovdqu 5*32(%rdx), RXr2;
305 vmovdqu 6*32(%rdx), RXl3;
306 vmovdqu 7*32(%rdx), RXr3;
307
308 call __blowfish_dec_blk32;
309
310 vmovdqu RXr0, 0*32(%rsi);
311 vmovdqu RXl0, 1*32(%rsi);
312 vmovdqu RXr1, 2*32(%rsi);
313 vmovdqu RXl1, 3*32(%rsi);
314 vmovdqu RXr2, 4*32(%rsi);
315 vmovdqu RXl2, 5*32(%rsi);
316 vmovdqu RXr3, 6*32(%rsi);
317 vmovdqu RXl3, 7*32(%rsi);
318
319 vzeroupper;
320
321 ret;
322ENDPROC(blowfish_ecb_dec_32way)
323
324ENTRY(blowfish_cbc_dec_32way)
325 /* input:
326 * %rdi: ctx, CTX
327 * %rsi: dst
328 * %rdx: src
329 */
330
331 vzeroupper;
332
333 vmovdqu 0*32(%rdx), RXl0;
334 vmovdqu 1*32(%rdx), RXr0;
335 vmovdqu 2*32(%rdx), RXl1;
336 vmovdqu 3*32(%rdx), RXr1;
337 vmovdqu 4*32(%rdx), RXl2;
338 vmovdqu 5*32(%rdx), RXr2;
339 vmovdqu 6*32(%rdx), RXl3;
340 vmovdqu 7*32(%rdx), RXr3;
341
342 call __blowfish_dec_blk32;
343
344 /* xor with src */
345 vmovq (%rdx), RT0x;
346 vpshufd $0x4f, RT0x, RT0x;
347 vinserti128 $1, 8(%rdx), RT0, RT0;
348 vpxor RT0, RXr0, RXr0;
349 vpxor 0*32+24(%rdx), RXl0, RXl0;
350 vpxor 1*32+24(%rdx), RXr1, RXr1;
351 vpxor 2*32+24(%rdx), RXl1, RXl1;
352 vpxor 3*32+24(%rdx), RXr2, RXr2;
353 vpxor 4*32+24(%rdx), RXl2, RXl2;
354 vpxor 5*32+24(%rdx), RXr3, RXr3;
355 vpxor 6*32+24(%rdx), RXl3, RXl3;
356
357 vmovdqu RXr0, (0*32)(%rsi);
358 vmovdqu RXl0, (1*32)(%rsi);
359 vmovdqu RXr1, (2*32)(%rsi);
360 vmovdqu RXl1, (3*32)(%rsi);
361 vmovdqu RXr2, (4*32)(%rsi);
362 vmovdqu RXl2, (5*32)(%rsi);
363 vmovdqu RXr3, (6*32)(%rsi);
364 vmovdqu RXl3, (7*32)(%rsi);
365
366 vzeroupper;
367
368 ret;
369ENDPROC(blowfish_cbc_dec_32way)
370
371ENTRY(blowfish_ctr_32way)
372 /* input:
373 * %rdi: ctx, CTX
374 * %rsi: dst
375 * %rdx: src
376 * %rcx: iv (big endian, 64bit)
377 */
378
379 vzeroupper;
380
381 vpcmpeqd RT0, RT0, RT0;
382 vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */
383
384 vpcmpeqd RT1x, RT1x, RT1x;
385 vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */
386 vpxor RIDX0, RIDX0, RIDX0;
387 vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */
388
389 vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */
390
391 vpcmpeqd RT1, RT1, RT1;
392 vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */
393 vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */
394
395 vbroadcasti128 .Lbswap_iv_mask, RIDX0;
396 vbroadcasti128 .Lbswap128_mask, RIDX1;
397
398 /* load IV and byteswap */
399 vmovq (%rcx), RT1x;
400 vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */
401 vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */
402
403 /* construct IVs */
404 vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */
405 vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */
406 vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */
407 vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */
408 vpsubq RIDX2, RT1, RT1;
409 vpshufb RIDX1, RT1, RXl1;
410 vpsubq RIDX2, RT1, RT1;
411 vpshufb RIDX1, RT1, RXr1;
412 vpsubq RIDX2, RT1, RT1;
413 vpshufb RIDX1, RT1, RXl2;
414 vpsubq RIDX2, RT1, RT1;
415 vpshufb RIDX1, RT1, RXr2;
416 vpsubq RIDX2, RT1, RT1;
417 vpshufb RIDX1, RT1, RXl3;
418 vpsubq RIDX2, RT1, RT1;
419 vpshufb RIDX1, RT1, RXr3;
420
421 /* store last IV */
422 vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */
423 vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */
424 vmovq RT1x, (%rcx);
425
426 call __blowfish_enc_blk32;
427
428 /* dst = src ^ iv */
429 vpxor 0*32(%rdx), RXr0, RXr0;
430 vpxor 1*32(%rdx), RXl0, RXl0;
431 vpxor 2*32(%rdx), RXr1, RXr1;
432 vpxor 3*32(%rdx), RXl1, RXl1;
433 vpxor 4*32(%rdx), RXr2, RXr2;
434 vpxor 5*32(%rdx), RXl2, RXl2;
435 vpxor 6*32(%rdx), RXr3, RXr3;
436 vpxor 7*32(%rdx), RXl3, RXl3;
437 vmovdqu RXr0, (0*32)(%rsi);
438 vmovdqu RXl0, (1*32)(%rsi);
439 vmovdqu RXr1, (2*32)(%rsi);
440 vmovdqu RXl1, (3*32)(%rsi);
441 vmovdqu RXr2, (4*32)(%rsi);
442 vmovdqu RXl2, (5*32)(%rsi);
443 vmovdqu RXr3, (6*32)(%rsi);
444 vmovdqu RXl3, (7*32)(%rsi);
445
446 vzeroupper;
447
448 ret;
449ENDPROC(blowfish_ctr_32way)
diff --git a/arch/x86/crypto/blowfish_avx2_glue.c b/arch/x86/crypto/blowfish_avx2_glue.c
deleted file mode 100644
index 4417e9aea78d..000000000000
--- a/arch/x86/crypto/blowfish_avx2_glue.c
+++ /dev/null
@@ -1,585 +0,0 @@
1/*
2 * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/types.h>
25#include <linux/crypto.h>
26#include <linux/err.h>
27#include <crypto/algapi.h>
28#include <crypto/blowfish.h>
29#include <crypto/cryptd.h>
30#include <crypto/ctr.h>
31#include <asm/i387.h>
32#include <asm/xcr.h>
33#include <asm/xsave.h>
34#include <asm/crypto/blowfish.h>
35#include <asm/crypto/ablk_helper.h>
36#include <crypto/scatterwalk.h>
37
38#define BF_AVX2_PARALLEL_BLOCKS 32
39
40/* 32-way AVX2 parallel cipher functions */
41asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst,
42 const u8 *src);
43asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst,
44 const u8 *src);
45asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst,
46 const u8 *src);
47asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src,
48 __be64 *iv);
49
50static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes)
51{
52 if (fpu_enabled)
53 return true;
54
55 /* FPU is only used when chunk to be processed is large enough, so
56 * do not enable FPU until it is necessary.
57 */
58 if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS)
59 return false;
60
61 kernel_fpu_begin();
62 return true;
63}
64
65static inline void bf_fpu_end(bool fpu_enabled)
66{
67 if (fpu_enabled)
68 kernel_fpu_end();
69}
70
71static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
72 bool enc)
73{
74 bool fpu_enabled = false;
75 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
76 const unsigned int bsize = BF_BLOCK_SIZE;
77 unsigned int nbytes;
78 int err;
79
80 err = blkcipher_walk_virt(desc, walk);
81 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
82
83 while ((nbytes = walk->nbytes)) {
84 u8 *wsrc = walk->src.virt.addr;
85 u8 *wdst = walk->dst.virt.addr;
86
87 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
88
89 /* Process multi-block AVX2 batch */
90 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
91 do {
92 if (enc)
93 blowfish_ecb_enc_32way(ctx, wdst, wsrc);
94 else
95 blowfish_ecb_dec_32way(ctx, wdst, wsrc);
96
97 wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS;
98 wdst += bsize * BF_AVX2_PARALLEL_BLOCKS;
99 nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
100 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
101
102 if (nbytes < bsize)
103 goto done;
104 }
105
106 /* Process multi-block batch */
107 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
108 do {
109 if (enc)
110 blowfish_enc_blk_4way(ctx, wdst, wsrc);
111 else
112 blowfish_dec_blk_4way(ctx, wdst, wsrc);
113
114 wsrc += bsize * BF_PARALLEL_BLOCKS;
115 wdst += bsize * BF_PARALLEL_BLOCKS;
116 nbytes -= bsize * BF_PARALLEL_BLOCKS;
117 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
118
119 if (nbytes < bsize)
120 goto done;
121 }
122
123 /* Handle leftovers */
124 do {
125 if (enc)
126 blowfish_enc_blk(ctx, wdst, wsrc);
127 else
128 blowfish_dec_blk(ctx, wdst, wsrc);
129
130 wsrc += bsize;
131 wdst += bsize;
132 nbytes -= bsize;
133 } while (nbytes >= bsize);
134
135done:
136 err = blkcipher_walk_done(desc, walk, nbytes);
137 }
138
139 bf_fpu_end(fpu_enabled);
140 return err;
141}
142
143static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
144 struct scatterlist *src, unsigned int nbytes)
145{
146 struct blkcipher_walk walk;
147
148 blkcipher_walk_init(&walk, dst, src, nbytes);
149 return ecb_crypt(desc, &walk, true);
150}
151
152static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
153 struct scatterlist *src, unsigned int nbytes)
154{
155 struct blkcipher_walk walk;
156
157 blkcipher_walk_init(&walk, dst, src, nbytes);
158 return ecb_crypt(desc, &walk, false);
159}
160
161static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
162 struct blkcipher_walk *walk)
163{
164 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
165 unsigned int bsize = BF_BLOCK_SIZE;
166 unsigned int nbytes = walk->nbytes;
167 u64 *src = (u64 *)walk->src.virt.addr;
168 u64 *dst = (u64 *)walk->dst.virt.addr;
169 u64 *iv = (u64 *)walk->iv;
170
171 do {
172 *dst = *src ^ *iv;
173 blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
174 iv = dst;
175
176 src += 1;
177 dst += 1;
178 nbytes -= bsize;
179 } while (nbytes >= bsize);
180
181 *(u64 *)walk->iv = *iv;
182 return nbytes;
183}
184
185static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
186 struct scatterlist *src, unsigned int nbytes)
187{
188 struct blkcipher_walk walk;
189 int err;
190
191 blkcipher_walk_init(&walk, dst, src, nbytes);
192 err = blkcipher_walk_virt(desc, &walk);
193
194 while ((nbytes = walk.nbytes)) {
195 nbytes = __cbc_encrypt(desc, &walk);
196 err = blkcipher_walk_done(desc, &walk, nbytes);
197 }
198
199 return err;
200}
201
202static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
203 struct blkcipher_walk *walk)
204{
205 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
206 const unsigned int bsize = BF_BLOCK_SIZE;
207 unsigned int nbytes = walk->nbytes;
208 u64 *src = (u64 *)walk->src.virt.addr;
209 u64 *dst = (u64 *)walk->dst.virt.addr;
210 u64 last_iv;
211 int i;
212
213 /* Start of the last block. */
214 src += nbytes / bsize - 1;
215 dst += nbytes / bsize - 1;
216
217 last_iv = *src;
218
219 /* Process multi-block AVX2 batch */
220 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
221 do {
222 nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1);
223 src -= BF_AVX2_PARALLEL_BLOCKS - 1;
224 dst -= BF_AVX2_PARALLEL_BLOCKS - 1;
225
226 blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src);
227
228 nbytes -= bsize;
229 if (nbytes < bsize)
230 goto done;
231
232 *dst ^= *(src - 1);
233 src -= 1;
234 dst -= 1;
235 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
236
237 if (nbytes < bsize)
238 goto done;
239 }
240
241 /* Process multi-block batch */
242 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
243 u64 ivs[BF_PARALLEL_BLOCKS - 1];
244
245 do {
246 nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1);
247 src -= BF_PARALLEL_BLOCKS - 1;
248 dst -= BF_PARALLEL_BLOCKS - 1;
249
250 for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
251 ivs[i] = src[i];
252
253 blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
254
255 for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
256 dst[i + 1] ^= ivs[i];
257
258 nbytes -= bsize;
259 if (nbytes < bsize)
260 goto done;
261
262 *dst ^= *(src - 1);
263 src -= 1;
264 dst -= 1;
265 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
266
267 if (nbytes < bsize)
268 goto done;
269 }
270
271 /* Handle leftovers */
272 for (;;) {
273 blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
274
275 nbytes -= bsize;
276 if (nbytes < bsize)
277 break;
278
279 *dst ^= *(src - 1);
280 src -= 1;
281 dst -= 1;
282 }
283
284done:
285 *dst ^= *(u64 *)walk->iv;
286 *(u64 *)walk->iv = last_iv;
287
288 return nbytes;
289}
290
291static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
292 struct scatterlist *src, unsigned int nbytes)
293{
294 bool fpu_enabled = false;
295 struct blkcipher_walk walk;
296 int err;
297
298 blkcipher_walk_init(&walk, dst, src, nbytes);
299 err = blkcipher_walk_virt(desc, &walk);
300 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
301
302 while ((nbytes = walk.nbytes)) {
303 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
304 nbytes = __cbc_decrypt(desc, &walk);
305 err = blkcipher_walk_done(desc, &walk, nbytes);
306 }
307
308 bf_fpu_end(fpu_enabled);
309 return err;
310}
311
312static void ctr_crypt_final(struct blkcipher_desc *desc,
313 struct blkcipher_walk *walk)
314{
315 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
316 u8 *ctrblk = walk->iv;
317 u8 keystream[BF_BLOCK_SIZE];
318 u8 *src = walk->src.virt.addr;
319 u8 *dst = walk->dst.virt.addr;
320 unsigned int nbytes = walk->nbytes;
321
322 blowfish_enc_blk(ctx, keystream, ctrblk);
323 crypto_xor(keystream, src, nbytes);
324 memcpy(dst, keystream, nbytes);
325
326 crypto_inc(ctrblk, BF_BLOCK_SIZE);
327}
328
329static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
330 struct blkcipher_walk *walk)
331{
332 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
333 unsigned int bsize = BF_BLOCK_SIZE;
334 unsigned int nbytes = walk->nbytes;
335 u64 *src = (u64 *)walk->src.virt.addr;
336 u64 *dst = (u64 *)walk->dst.virt.addr;
337 int i;
338
339 /* Process multi-block AVX2 batch */
340 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
341 do {
342 blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src,
343 (__be64 *)walk->iv);
344
345 src += BF_AVX2_PARALLEL_BLOCKS;
346 dst += BF_AVX2_PARALLEL_BLOCKS;
347 nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
348 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
349
350 if (nbytes < bsize)
351 goto done;
352 }
353
354 /* Process four block batch */
355 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
356 __be64 ctrblocks[BF_PARALLEL_BLOCKS];
357 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
358
359 do {
360 /* create ctrblks for parallel encrypt */
361 for (i = 0; i < BF_PARALLEL_BLOCKS; i++) {
362 if (dst != src)
363 dst[i] = src[i];
364
365 ctrblocks[i] = cpu_to_be64(ctrblk++);
366 }
367
368 blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
369 (u8 *)ctrblocks);
370
371 src += BF_PARALLEL_BLOCKS;
372 dst += BF_PARALLEL_BLOCKS;
373 nbytes -= bsize * BF_PARALLEL_BLOCKS;
374 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
375
376 *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
377
378 if (nbytes < bsize)
379 goto done;
380 }
381
382 /* Handle leftovers */
383 do {
384 u64 ctrblk;
385
386 if (dst != src)
387 *dst = *src;
388
389 ctrblk = *(u64 *)walk->iv;
390 be64_add_cpu((__be64 *)walk->iv, 1);
391
392 blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
393
394 src += 1;
395 dst += 1;
396 } while ((nbytes -= bsize) >= bsize);
397
398done:
399 return nbytes;
400}
401
402static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
403 struct scatterlist *src, unsigned int nbytes)
404{
405 bool fpu_enabled = false;
406 struct blkcipher_walk walk;
407 int err;
408
409 blkcipher_walk_init(&walk, dst, src, nbytes);
410 err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
411 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
412
413 while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
414 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
415 nbytes = __ctr_crypt(desc, &walk);
416 err = blkcipher_walk_done(desc, &walk, nbytes);
417 }
418
419 bf_fpu_end(fpu_enabled);
420
421 if (walk.nbytes) {
422 ctr_crypt_final(desc, &walk);
423 err = blkcipher_walk_done(desc, &walk, 0);
424 }
425
426 return err;
427}
428
429static struct crypto_alg bf_algs[6] = { {
430 .cra_name = "__ecb-blowfish-avx2",
431 .cra_driver_name = "__driver-ecb-blowfish-avx2",
432 .cra_priority = 0,
433 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
434 .cra_blocksize = BF_BLOCK_SIZE,
435 .cra_ctxsize = sizeof(struct bf_ctx),
436 .cra_alignmask = 0,
437 .cra_type = &crypto_blkcipher_type,
438 .cra_module = THIS_MODULE,
439 .cra_u = {
440 .blkcipher = {
441 .min_keysize = BF_MIN_KEY_SIZE,
442 .max_keysize = BF_MAX_KEY_SIZE,
443 .setkey = blowfish_setkey,
444 .encrypt = ecb_encrypt,
445 .decrypt = ecb_decrypt,
446 },
447 },
448}, {
449 .cra_name = "__cbc-blowfish-avx2",
450 .cra_driver_name = "__driver-cbc-blowfish-avx2",
451 .cra_priority = 0,
452 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
453 .cra_blocksize = BF_BLOCK_SIZE,
454 .cra_ctxsize = sizeof(struct bf_ctx),
455 .cra_alignmask = 0,
456 .cra_type = &crypto_blkcipher_type,
457 .cra_module = THIS_MODULE,
458 .cra_u = {
459 .blkcipher = {
460 .min_keysize = BF_MIN_KEY_SIZE,
461 .max_keysize = BF_MAX_KEY_SIZE,
462 .setkey = blowfish_setkey,
463 .encrypt = cbc_encrypt,
464 .decrypt = cbc_decrypt,
465 },
466 },
467}, {
468 .cra_name = "__ctr-blowfish-avx2",
469 .cra_driver_name = "__driver-ctr-blowfish-avx2",
470 .cra_priority = 0,
471 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
472 .cra_blocksize = 1,
473 .cra_ctxsize = sizeof(struct bf_ctx),
474 .cra_alignmask = 0,
475 .cra_type = &crypto_blkcipher_type,
476 .cra_module = THIS_MODULE,
477 .cra_u = {
478 .blkcipher = {
479 .min_keysize = BF_MIN_KEY_SIZE,
480 .max_keysize = BF_MAX_KEY_SIZE,
481 .ivsize = BF_BLOCK_SIZE,
482 .setkey = blowfish_setkey,
483 .encrypt = ctr_crypt,
484 .decrypt = ctr_crypt,
485 },
486 },
487}, {
488 .cra_name = "ecb(blowfish)",
489 .cra_driver_name = "ecb-blowfish-avx2",
490 .cra_priority = 400,
491 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
492 .cra_blocksize = BF_BLOCK_SIZE,
493 .cra_ctxsize = sizeof(struct async_helper_ctx),
494 .cra_alignmask = 0,
495 .cra_type = &crypto_ablkcipher_type,
496 .cra_module = THIS_MODULE,
497 .cra_init = ablk_init,
498 .cra_exit = ablk_exit,
499 .cra_u = {
500 .ablkcipher = {
501 .min_keysize = BF_MIN_KEY_SIZE,
502 .max_keysize = BF_MAX_KEY_SIZE,
503 .setkey = ablk_set_key,
504 .encrypt = ablk_encrypt,
505 .decrypt = ablk_decrypt,
506 },
507 },
508}, {
509 .cra_name = "cbc(blowfish)",
510 .cra_driver_name = "cbc-blowfish-avx2",
511 .cra_priority = 400,
512 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
513 .cra_blocksize = BF_BLOCK_SIZE,
514 .cra_ctxsize = sizeof(struct async_helper_ctx),
515 .cra_alignmask = 0,
516 .cra_type = &crypto_ablkcipher_type,
517 .cra_module = THIS_MODULE,
518 .cra_init = ablk_init,
519 .cra_exit = ablk_exit,
520 .cra_u = {
521 .ablkcipher = {
522 .min_keysize = BF_MIN_KEY_SIZE,
523 .max_keysize = BF_MAX_KEY_SIZE,
524 .ivsize = BF_BLOCK_SIZE,
525 .setkey = ablk_set_key,
526 .encrypt = __ablk_encrypt,
527 .decrypt = ablk_decrypt,
528 },
529 },
530}, {
531 .cra_name = "ctr(blowfish)",
532 .cra_driver_name = "ctr-blowfish-avx2",
533 .cra_priority = 400,
534 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
535 .cra_blocksize = 1,
536 .cra_ctxsize = sizeof(struct async_helper_ctx),
537 .cra_alignmask = 0,
538 .cra_type = &crypto_ablkcipher_type,
539 .cra_module = THIS_MODULE,
540 .cra_init = ablk_init,
541 .cra_exit = ablk_exit,
542 .cra_u = {
543 .ablkcipher = {
544 .min_keysize = BF_MIN_KEY_SIZE,
545 .max_keysize = BF_MAX_KEY_SIZE,
546 .ivsize = BF_BLOCK_SIZE,
547 .setkey = ablk_set_key,
548 .encrypt = ablk_encrypt,
549 .decrypt = ablk_encrypt,
550 .geniv = "chainiv",
551 },
552 },
553} };
554
555
556static int __init init(void)
557{
558 u64 xcr0;
559
560 if (!cpu_has_avx2 || !cpu_has_osxsave) {
561 pr_info("AVX2 instructions are not detected.\n");
562 return -ENODEV;
563 }
564
565 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
566 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
567 pr_info("AVX detected but unusable.\n");
568 return -ENODEV;
569 }
570
571 return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
572}
573
574static void __exit fini(void)
575{
576 crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
577}
578
579module_init(init);
580module_exit(fini);
581
582MODULE_LICENSE("GPL");
583MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized");
584MODULE_ALIAS("blowfish");
585MODULE_ALIAS("blowfish-asm");
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 3548d76dbaa9..50ec333b70e6 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Glue Code for assembler optimized version of Blowfish 2 * Glue Code for assembler optimized version of Blowfish
3 * 3 *
4 * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> 4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 * 5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: 6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> 7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
@@ -32,24 +32,40 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/types.h> 33#include <linux/types.h>
34#include <crypto/algapi.h> 34#include <crypto/algapi.h>
35#include <asm/crypto/blowfish.h>
36 35
37/* regular block cipher functions */ 36/* regular block cipher functions */
38asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, 37asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
39 bool xor); 38 bool xor);
40EXPORT_SYMBOL_GPL(__blowfish_enc_blk);
41
42asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); 39asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
43EXPORT_SYMBOL_GPL(blowfish_dec_blk);
44 40
45/* 4-way parallel cipher functions */ 41/* 4-way parallel cipher functions */
46asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, 42asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
47 const u8 *src, bool xor); 43 const u8 *src, bool xor);
48EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way);
49
50asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, 44asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
51 const u8 *src); 45 const u8 *src);
52EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way); 46
47static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
48{
49 __blowfish_enc_blk(ctx, dst, src, false);
50}
51
52static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
53 const u8 *src)
54{
55 __blowfish_enc_blk(ctx, dst, src, true);
56}
57
58static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
59 const u8 *src)
60{
61 __blowfish_enc_blk_4way(ctx, dst, src, false);
62}
63
64static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
65 const u8 *src)
66{
67 __blowfish_enc_blk_4way(ctx, dst, src, true);
68}
53 69
54static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 70static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
55{ 71{
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 91a1878fcc3e..0e0b8863a34b 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -51,16 +51,6 @@
51#define ymm14_x xmm14 51#define ymm14_x xmm14
52#define ymm15_x xmm15 52#define ymm15_x xmm15
53 53
54/*
55 * AES-NI instructions do not support ymmX registers, so we need splitting and
56 * merging.
57 */
58#define vaesenclast256(zero, yreg, tmp) \
59 vextracti128 $1, yreg, tmp##_x; \
60 vaesenclast zero##_x, yreg##_x, yreg##_x; \
61 vaesenclast zero##_x, tmp##_x, tmp##_x; \
62 vinserti128 $1, tmp##_x, yreg, yreg;
63
64/********************************************************************** 54/**********************************************************************
65 32-way camellia 55 32-way camellia
66 **********************************************************************/ 56 **********************************************************************/
@@ -79,46 +69,70 @@
79 * S-function with AES subbytes \ 69 * S-function with AES subbytes \
80 */ \ 70 */ \
81 vbroadcasti128 .Linv_shift_row, t4; \ 71 vbroadcasti128 .Linv_shift_row, t4; \
82 vpbroadcastb .L0f0f0f0f, t7; \ 72 vpbroadcastd .L0f0f0f0f, t7; \
83 vbroadcasti128 .Lpre_tf_lo_s1, t0; \ 73 vbroadcasti128 .Lpre_tf_lo_s1, t5; \
84 vbroadcasti128 .Lpre_tf_hi_s1, t1; \ 74 vbroadcasti128 .Lpre_tf_hi_s1, t6; \
75 vbroadcasti128 .Lpre_tf_lo_s4, t2; \
76 vbroadcasti128 .Lpre_tf_hi_s4, t3; \
85 \ 77 \
86 /* AES inverse shift rows */ \ 78 /* AES inverse shift rows */ \
87 vpshufb t4, x0, x0; \ 79 vpshufb t4, x0, x0; \
88 vpshufb t4, x7, x7; \ 80 vpshufb t4, x7, x7; \
89 vpshufb t4, x1, x1; \
90 vpshufb t4, x4, x4; \
91 vpshufb t4, x2, x2; \
92 vpshufb t4, x5, x5; \
93 vpshufb t4, x3, x3; \ 81 vpshufb t4, x3, x3; \
94 vpshufb t4, x6, x6; \ 82 vpshufb t4, x6, x6; \
83 vpshufb t4, x2, x2; \
84 vpshufb t4, x5, x5; \
85 vpshufb t4, x1, x1; \
86 vpshufb t4, x4, x4; \
95 \ 87 \
96 /* prefilter sboxes 1, 2 and 3 */ \ 88 /* prefilter sboxes 1, 2 and 3 */ \
97 vbroadcasti128 .Lpre_tf_lo_s4, t2; \
98 vbroadcasti128 .Lpre_tf_hi_s4, t3; \
99 filter_8bit(x0, t0, t1, t7, t6); \
100 filter_8bit(x7, t0, t1, t7, t6); \
101 filter_8bit(x1, t0, t1, t7, t6); \
102 filter_8bit(x4, t0, t1, t7, t6); \
103 filter_8bit(x2, t0, t1, t7, t6); \
104 filter_8bit(x5, t0, t1, t7, t6); \
105 \
106 /* prefilter sbox 4 */ \ 89 /* prefilter sbox 4 */ \
90 filter_8bit(x0, t5, t6, t7, t4); \
91 filter_8bit(x7, t5, t6, t7, t4); \
92 vextracti128 $1, x0, t0##_x; \
93 vextracti128 $1, x7, t1##_x; \
94 filter_8bit(x3, t2, t3, t7, t4); \
95 filter_8bit(x6, t2, t3, t7, t4); \
96 vextracti128 $1, x3, t3##_x; \
97 vextracti128 $1, x6, t2##_x; \
98 filter_8bit(x2, t5, t6, t7, t4); \
99 filter_8bit(x5, t5, t6, t7, t4); \
100 filter_8bit(x1, t5, t6, t7, t4); \
101 filter_8bit(x4, t5, t6, t7, t4); \
102 \
107 vpxor t4##_x, t4##_x, t4##_x; \ 103 vpxor t4##_x, t4##_x, t4##_x; \
108 filter_8bit(x3, t2, t3, t7, t6); \
109 filter_8bit(x6, t2, t3, t7, t6); \
110 \ 104 \
111 /* AES subbytes + AES shift rows */ \ 105 /* AES subbytes + AES shift rows */ \
106 vextracti128 $1, x2, t6##_x; \
107 vextracti128 $1, x5, t5##_x; \
108 vaesenclast t4##_x, x0##_x, x0##_x; \
109 vaesenclast t4##_x, t0##_x, t0##_x; \
110 vinserti128 $1, t0##_x, x0, x0; \
111 vaesenclast t4##_x, x7##_x, x7##_x; \
112 vaesenclast t4##_x, t1##_x, t1##_x; \
113 vinserti128 $1, t1##_x, x7, x7; \
114 vaesenclast t4##_x, x3##_x, x3##_x; \
115 vaesenclast t4##_x, t3##_x, t3##_x; \
116 vinserti128 $1, t3##_x, x3, x3; \
117 vaesenclast t4##_x, x6##_x, x6##_x; \
118 vaesenclast t4##_x, t2##_x, t2##_x; \
119 vinserti128 $1, t2##_x, x6, x6; \
120 vextracti128 $1, x1, t3##_x; \
121 vextracti128 $1, x4, t2##_x; \
112 vbroadcasti128 .Lpost_tf_lo_s1, t0; \ 122 vbroadcasti128 .Lpost_tf_lo_s1, t0; \
113 vbroadcasti128 .Lpost_tf_hi_s1, t1; \ 123 vbroadcasti128 .Lpost_tf_hi_s1, t1; \
114 vaesenclast256(t4, x0, t5); \ 124 vaesenclast t4##_x, x2##_x, x2##_x; \
115 vaesenclast256(t4, x7, t5); \ 125 vaesenclast t4##_x, t6##_x, t6##_x; \
116 vaesenclast256(t4, x1, t5); \ 126 vinserti128 $1, t6##_x, x2, x2; \
117 vaesenclast256(t4, x4, t5); \ 127 vaesenclast t4##_x, x5##_x, x5##_x; \
118 vaesenclast256(t4, x2, t5); \ 128 vaesenclast t4##_x, t5##_x, t5##_x; \
119 vaesenclast256(t4, x5, t5); \ 129 vinserti128 $1, t5##_x, x5, x5; \
120 vaesenclast256(t4, x3, t5); \ 130 vaesenclast t4##_x, x1##_x, x1##_x; \
121 vaesenclast256(t4, x6, t5); \ 131 vaesenclast t4##_x, t3##_x, t3##_x; \
132 vinserti128 $1, t3##_x, x1, x1; \
133 vaesenclast t4##_x, x4##_x, x4##_x; \
134 vaesenclast t4##_x, t2##_x, t2##_x; \
135 vinserti128 $1, t2##_x, x4, x4; \
122 \ 136 \
123 /* postfilter sboxes 1 and 4 */ \ 137 /* postfilter sboxes 1 and 4 */ \
124 vbroadcasti128 .Lpost_tf_lo_s3, t2; \ 138 vbroadcasti128 .Lpost_tf_lo_s3, t2; \
@@ -139,22 +153,12 @@
139 /* postfilter sbox 2 */ \ 153 /* postfilter sbox 2 */ \
140 filter_8bit(x1, t4, t5, t7, t2); \ 154 filter_8bit(x1, t4, t5, t7, t2); \
141 filter_8bit(x4, t4, t5, t7, t2); \ 155 filter_8bit(x4, t4, t5, t7, t2); \
156 vpxor t7, t7, t7; \
142 \ 157 \
143 vpsrldq $1, t0, t1; \ 158 vpsrldq $1, t0, t1; \
144 vpsrldq $2, t0, t2; \ 159 vpsrldq $2, t0, t2; \
160 vpshufb t7, t1, t1; \
145 vpsrldq $3, t0, t3; \ 161 vpsrldq $3, t0, t3; \
146 vpsrldq $4, t0, t4; \
147 vpsrldq $5, t0, t5; \
148 vpsrldq $6, t0, t6; \
149 vpsrldq $7, t0, t7; \
150 vpbroadcastb t0##_x, t0; \
151 vpbroadcastb t1##_x, t1; \
152 vpbroadcastb t2##_x, t2; \
153 vpbroadcastb t3##_x, t3; \
154 vpbroadcastb t4##_x, t4; \
155 vpbroadcastb t6##_x, t6; \
156 vpbroadcastb t5##_x, t5; \
157 vpbroadcastb t7##_x, t7; \
158 \ 162 \
159 /* P-function */ \ 163 /* P-function */ \
160 vpxor x5, x0, x0; \ 164 vpxor x5, x0, x0; \
@@ -162,11 +166,21 @@
162 vpxor x7, x2, x2; \ 166 vpxor x7, x2, x2; \
163 vpxor x4, x3, x3; \ 167 vpxor x4, x3, x3; \
164 \ 168 \
169 vpshufb t7, t2, t2; \
170 vpsrldq $4, t0, t4; \
171 vpshufb t7, t3, t3; \
172 vpsrldq $5, t0, t5; \
173 vpshufb t7, t4, t4; \
174 \
165 vpxor x2, x4, x4; \ 175 vpxor x2, x4, x4; \
166 vpxor x3, x5, x5; \ 176 vpxor x3, x5, x5; \
167 vpxor x0, x6, x6; \ 177 vpxor x0, x6, x6; \
168 vpxor x1, x7, x7; \ 178 vpxor x1, x7, x7; \
169 \ 179 \
180 vpsrldq $6, t0, t6; \
181 vpshufb t7, t5, t5; \
182 vpshufb t7, t6, t6; \
183 \
170 vpxor x7, x0, x0; \ 184 vpxor x7, x0, x0; \
171 vpxor x4, x1, x1; \ 185 vpxor x4, x1, x1; \
172 vpxor x5, x2, x2; \ 186 vpxor x5, x2, x2; \
@@ -179,12 +193,16 @@
179 \ 193 \
180 /* Add key material and result to CD (x becomes new CD) */ \ 194 /* Add key material and result to CD (x becomes new CD) */ \
181 \ 195 \
182 vpxor t7, x0, x0; \
183 vpxor 4 * 32(mem_cd), x0, x0; \
184 \
185 vpxor t6, x1, x1; \ 196 vpxor t6, x1, x1; \
186 vpxor 5 * 32(mem_cd), x1, x1; \ 197 vpxor 5 * 32(mem_cd), x1, x1; \
187 \ 198 \
199 vpsrldq $7, t0, t6; \
200 vpshufb t7, t0, t0; \
201 vpshufb t7, t6, t7; \
202 \
203 vpxor t7, x0, x0; \
204 vpxor 4 * 32(mem_cd), x0, x0; \
205 \
188 vpxor t5, x2, x2; \ 206 vpxor t5, x2, x2; \
189 vpxor 6 * 32(mem_cd), x2, x2; \ 207 vpxor 6 * 32(mem_cd), x2, x2; \
190 \ 208 \
@@ -204,7 +222,7 @@
204 vpxor 3 * 32(mem_cd), x7, x7; 222 vpxor 3 * 32(mem_cd), x7, x7;
205 223
206/* 224/*
207 * Size optimization... with inlined roundsm16 binary would be over 5 times 225 * Size optimization... with inlined roundsm32 binary would be over 5 times
208 * larger and would only marginally faster. 226 * larger and would only marginally faster.
209 */ 227 */
210.align 8 228.align 8
@@ -324,13 +342,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
324 */ \ 342 */ \
325 vpbroadcastd kll, t0; /* only lowest 32-bit used */ \ 343 vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
326 vpxor tt0, tt0, tt0; \ 344 vpxor tt0, tt0, tt0; \
327 vpbroadcastb t0##_x, t3; \ 345 vpshufb tt0, t0, t3; \
328 vpsrldq $1, t0, t0; \ 346 vpsrldq $1, t0, t0; \
329 vpbroadcastb t0##_x, t2; \ 347 vpshufb tt0, t0, t2; \
330 vpsrldq $1, t0, t0; \ 348 vpsrldq $1, t0, t0; \
331 vpbroadcastb t0##_x, t1; \ 349 vpshufb tt0, t0, t1; \
332 vpsrldq $1, t0, t0; \ 350 vpsrldq $1, t0, t0; \
333 vpbroadcastb t0##_x, t0; \ 351 vpshufb tt0, t0, t0; \
334 \ 352 \
335 vpand l0, t0, t0; \ 353 vpand l0, t0, t0; \
336 vpand l1, t1, t1; \ 354 vpand l1, t1, t1; \
@@ -340,6 +358,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
340 rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ 358 rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
341 \ 359 \
342 vpxor l4, t0, l4; \ 360 vpxor l4, t0, l4; \
361 vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
343 vmovdqu l4, 4 * 32(l); \ 362 vmovdqu l4, 4 * 32(l); \
344 vpxor l5, t1, l5; \ 363 vpxor l5, t1, l5; \
345 vmovdqu l5, 5 * 32(l); \ 364 vmovdqu l5, 5 * 32(l); \
@@ -354,14 +373,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
354 * rl ^= t2; \ 373 * rl ^= t2; \
355 */ \ 374 */ \
356 \ 375 \
357 vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ 376 vpshufb tt0, t0, t3; \
358 vpbroadcastb t0##_x, t3; \
359 vpsrldq $1, t0, t0; \ 377 vpsrldq $1, t0, t0; \
360 vpbroadcastb t0##_x, t2; \ 378 vpshufb tt0, t0, t2; \
361 vpsrldq $1, t0, t0; \ 379 vpsrldq $1, t0, t0; \
362 vpbroadcastb t0##_x, t1; \ 380 vpshufb tt0, t0, t1; \
363 vpsrldq $1, t0, t0; \ 381 vpsrldq $1, t0, t0; \
364 vpbroadcastb t0##_x, t0; \ 382 vpshufb tt0, t0, t0; \
365 \ 383 \
366 vpor 4 * 32(r), t0, t0; \ 384 vpor 4 * 32(r), t0, t0; \
367 vpor 5 * 32(r), t1, t1; \ 385 vpor 5 * 32(r), t1, t1; \
@@ -373,6 +391,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
373 vpxor 2 * 32(r), t2, t2; \ 391 vpxor 2 * 32(r), t2, t2; \
374 vpxor 3 * 32(r), t3, t3; \ 392 vpxor 3 * 32(r), t3, t3; \
375 vmovdqu t0, 0 * 32(r); \ 393 vmovdqu t0, 0 * 32(r); \
394 vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
376 vmovdqu t1, 1 * 32(r); \ 395 vmovdqu t1, 1 * 32(r); \
377 vmovdqu t2, 2 * 32(r); \ 396 vmovdqu t2, 2 * 32(r); \
378 vmovdqu t3, 3 * 32(r); \ 397 vmovdqu t3, 3 * 32(r); \
@@ -382,14 +401,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
382 * t2 &= rl; \ 401 * t2 &= rl; \
383 * rr ^= rol32(t2, 1); \ 402 * rr ^= rol32(t2, 1); \
384 */ \ 403 */ \
385 vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ 404 vpshufb tt0, t0, t3; \
386 vpbroadcastb t0##_x, t3; \
387 vpsrldq $1, t0, t0; \ 405 vpsrldq $1, t0, t0; \
388 vpbroadcastb t0##_x, t2; \ 406 vpshufb tt0, t0, t2; \
389 vpsrldq $1, t0, t0; \ 407 vpsrldq $1, t0, t0; \
390 vpbroadcastb t0##_x, t1; \ 408 vpshufb tt0, t0, t1; \
391 vpsrldq $1, t0, t0; \ 409 vpsrldq $1, t0, t0; \
392 vpbroadcastb t0##_x, t0; \ 410 vpshufb tt0, t0, t0; \
393 \ 411 \
394 vpand 0 * 32(r), t0, t0; \ 412 vpand 0 * 32(r), t0, t0; \
395 vpand 1 * 32(r), t1, t1; \ 413 vpand 1 * 32(r), t1, t1; \
@@ -403,6 +421,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
403 vpxor 6 * 32(r), t2, t2; \ 421 vpxor 6 * 32(r), t2, t2; \
404 vpxor 7 * 32(r), t3, t3; \ 422 vpxor 7 * 32(r), t3, t3; \
405 vmovdqu t0, 4 * 32(r); \ 423 vmovdqu t0, 4 * 32(r); \
424 vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
406 vmovdqu t1, 5 * 32(r); \ 425 vmovdqu t1, 5 * 32(r); \
407 vmovdqu t2, 6 * 32(r); \ 426 vmovdqu t2, 6 * 32(r); \
408 vmovdqu t3, 7 * 32(r); \ 427 vmovdqu t3, 7 * 32(r); \
@@ -413,14 +432,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
413 * ll ^= t0; \ 432 * ll ^= t0; \
414 */ \ 433 */ \
415 \ 434 \
416 vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ 435 vpshufb tt0, t0, t3; \
417 vpbroadcastb t0##_x, t3; \
418 vpsrldq $1, t0, t0; \ 436 vpsrldq $1, t0, t0; \
419 vpbroadcastb t0##_x, t2; \ 437 vpshufb tt0, t0, t2; \
420 vpsrldq $1, t0, t0; \ 438 vpsrldq $1, t0, t0; \
421 vpbroadcastb t0##_x, t1; \ 439 vpshufb tt0, t0, t1; \
422 vpsrldq $1, t0, t0; \ 440 vpsrldq $1, t0, t0; \
423 vpbroadcastb t0##_x, t0; \ 441 vpshufb tt0, t0, t0; \
424 \ 442 \
425 vpor l4, t0, t0; \ 443 vpor l4, t0, t0; \
426 vpor l5, t1, t1; \ 444 vpor l5, t1, t1; \
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 597d4da69656..50226c4b86ed 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -187,7 +187,36 @@ static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
187 return 0; 187 return 0;
188} 188}
189 189
190static struct shash_alg alg = { 190static int sha224_ssse3_init(struct shash_desc *desc)
191{
192 struct sha256_state *sctx = shash_desc_ctx(desc);
193
194 sctx->state[0] = SHA224_H0;
195 sctx->state[1] = SHA224_H1;
196 sctx->state[2] = SHA224_H2;
197 sctx->state[3] = SHA224_H3;
198 sctx->state[4] = SHA224_H4;
199 sctx->state[5] = SHA224_H5;
200 sctx->state[6] = SHA224_H6;
201 sctx->state[7] = SHA224_H7;
202 sctx->count = 0;
203
204 return 0;
205}
206
207static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
208{
209 u8 D[SHA256_DIGEST_SIZE];
210
211 sha256_ssse3_final(desc, D);
212
213 memcpy(hash, D, SHA224_DIGEST_SIZE);
214 memset(D, 0, SHA256_DIGEST_SIZE);
215
216 return 0;
217}
218
219static struct shash_alg algs[] = { {
191 .digestsize = SHA256_DIGEST_SIZE, 220 .digestsize = SHA256_DIGEST_SIZE,
192 .init = sha256_ssse3_init, 221 .init = sha256_ssse3_init,
193 .update = sha256_ssse3_update, 222 .update = sha256_ssse3_update,
@@ -204,7 +233,24 @@ static struct shash_alg alg = {
204 .cra_blocksize = SHA256_BLOCK_SIZE, 233 .cra_blocksize = SHA256_BLOCK_SIZE,
205 .cra_module = THIS_MODULE, 234 .cra_module = THIS_MODULE,
206 } 235 }
207}; 236}, {
237 .digestsize = SHA224_DIGEST_SIZE,
238 .init = sha224_ssse3_init,
239 .update = sha256_ssse3_update,
240 .final = sha224_ssse3_final,
241 .export = sha256_ssse3_export,
242 .import = sha256_ssse3_import,
243 .descsize = sizeof(struct sha256_state),
244 .statesize = sizeof(struct sha256_state),
245 .base = {
246 .cra_name = "sha224",
247 .cra_driver_name = "sha224-ssse3",
248 .cra_priority = 150,
249 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
250 .cra_blocksize = SHA224_BLOCK_SIZE,
251 .cra_module = THIS_MODULE,
252 }
253} };
208 254
209#ifdef CONFIG_AS_AVX 255#ifdef CONFIG_AS_AVX
210static bool __init avx_usable(void) 256static bool __init avx_usable(void)
@@ -227,7 +273,7 @@ static bool __init avx_usable(void)
227 273
228static int __init sha256_ssse3_mod_init(void) 274static int __init sha256_ssse3_mod_init(void)
229{ 275{
230 /* test for SSE3 first */ 276 /* test for SSSE3 first */
231 if (cpu_has_ssse3) 277 if (cpu_has_ssse3)
232 sha256_transform_asm = sha256_transform_ssse3; 278 sha256_transform_asm = sha256_transform_ssse3;
233 279
@@ -254,7 +300,7 @@ static int __init sha256_ssse3_mod_init(void)
254 else 300 else
255#endif 301#endif
256 pr_info("Using SSSE3 optimized SHA-256 implementation\n"); 302 pr_info("Using SSSE3 optimized SHA-256 implementation\n");
257 return crypto_register_shash(&alg); 303 return crypto_register_shashes(algs, ARRAY_SIZE(algs));
258 } 304 }
259 pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 305 pr_info("Neither AVX nor SSSE3 is available/usable.\n");
260 306
@@ -263,7 +309,7 @@ static int __init sha256_ssse3_mod_init(void)
263 309
264static void __exit sha256_ssse3_mod_fini(void) 310static void __exit sha256_ssse3_mod_fini(void)
265{ 311{
266 crypto_unregister_shash(&alg); 312 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
267} 313}
268 314
269module_init(sha256_ssse3_mod_init); 315module_init(sha256_ssse3_mod_init);
@@ -273,3 +319,4 @@ MODULE_LICENSE("GPL");
273MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); 319MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
274 320
275MODULE_ALIAS("sha256"); 321MODULE_ALIAS("sha256");
322MODULE_ALIAS("sha384");
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 6cbd8df348d2..f30cd10293f0 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -194,7 +194,37 @@ static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
194 return 0; 194 return 0;
195} 195}
196 196
197static struct shash_alg alg = { 197static int sha384_ssse3_init(struct shash_desc *desc)
198{
199 struct sha512_state *sctx = shash_desc_ctx(desc);
200
201 sctx->state[0] = SHA384_H0;
202 sctx->state[1] = SHA384_H1;
203 sctx->state[2] = SHA384_H2;
204 sctx->state[3] = SHA384_H3;
205 sctx->state[4] = SHA384_H4;
206 sctx->state[5] = SHA384_H5;
207 sctx->state[6] = SHA384_H6;
208 sctx->state[7] = SHA384_H7;
209
210 sctx->count[0] = sctx->count[1] = 0;
211
212 return 0;
213}
214
215static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
216{
217 u8 D[SHA512_DIGEST_SIZE];
218
219 sha512_ssse3_final(desc, D);
220
221 memcpy(hash, D, SHA384_DIGEST_SIZE);
222 memset(D, 0, SHA512_DIGEST_SIZE);
223
224 return 0;
225}
226
227static struct shash_alg algs[] = { {
198 .digestsize = SHA512_DIGEST_SIZE, 228 .digestsize = SHA512_DIGEST_SIZE,
199 .init = sha512_ssse3_init, 229 .init = sha512_ssse3_init,
200 .update = sha512_ssse3_update, 230 .update = sha512_ssse3_update,
@@ -211,7 +241,24 @@ static struct shash_alg alg = {
211 .cra_blocksize = SHA512_BLOCK_SIZE, 241 .cra_blocksize = SHA512_BLOCK_SIZE,
212 .cra_module = THIS_MODULE, 242 .cra_module = THIS_MODULE,
213 } 243 }
214}; 244}, {
245 .digestsize = SHA384_DIGEST_SIZE,
246 .init = sha384_ssse3_init,
247 .update = sha512_ssse3_update,
248 .final = sha384_ssse3_final,
249 .export = sha512_ssse3_export,
250 .import = sha512_ssse3_import,
251 .descsize = sizeof(struct sha512_state),
252 .statesize = sizeof(struct sha512_state),
253 .base = {
254 .cra_name = "sha384",
255 .cra_driver_name = "sha384-ssse3",
256 .cra_priority = 150,
257 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
258 .cra_blocksize = SHA384_BLOCK_SIZE,
259 .cra_module = THIS_MODULE,
260 }
261} };
215 262
216#ifdef CONFIG_AS_AVX 263#ifdef CONFIG_AS_AVX
217static bool __init avx_usable(void) 264static bool __init avx_usable(void)
@@ -234,7 +281,7 @@ static bool __init avx_usable(void)
234 281
235static int __init sha512_ssse3_mod_init(void) 282static int __init sha512_ssse3_mod_init(void)
236{ 283{
237 /* test for SSE3 first */ 284 /* test for SSSE3 first */
238 if (cpu_has_ssse3) 285 if (cpu_has_ssse3)
239 sha512_transform_asm = sha512_transform_ssse3; 286 sha512_transform_asm = sha512_transform_ssse3;
240 287
@@ -261,7 +308,7 @@ static int __init sha512_ssse3_mod_init(void)
261 else 308 else
262#endif 309#endif
263 pr_info("Using SSSE3 optimized SHA-512 implementation\n"); 310 pr_info("Using SSSE3 optimized SHA-512 implementation\n");
264 return crypto_register_shash(&alg); 311 return crypto_register_shashes(algs, ARRAY_SIZE(algs));
265 } 312 }
266 pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 313 pr_info("Neither AVX nor SSSE3 is available/usable.\n");
267 314
@@ -270,7 +317,7 @@ static int __init sha512_ssse3_mod_init(void)
270 317
271static void __exit sha512_ssse3_mod_fini(void) 318static void __exit sha512_ssse3_mod_fini(void)
272{ 319{
273 crypto_unregister_shash(&alg); 320 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
274} 321}
275 322
276module_init(sha512_ssse3_mod_init); 323module_init(sha512_ssse3_mod_init);
@@ -280,3 +327,4 @@ MODULE_LICENSE("GPL");
280MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); 327MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
281 328
282MODULE_ALIAS("sha512"); 329MODULE_ALIAS("sha512");
330MODULE_ALIAS("sha384");
diff --git a/arch/x86/crypto/twofish-avx2-asm_64.S b/arch/x86/crypto/twofish-avx2-asm_64.S
deleted file mode 100644
index e1a83b9cd389..000000000000
--- a/arch/x86/crypto/twofish-avx2-asm_64.S
+++ /dev/null
@@ -1,600 +0,0 @@
1/*
2 * x86_64/AVX2 assembler optimized version of Twofish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/linkage.h>
14#include "glue_helper-asm-avx2.S"
15
16.file "twofish-avx2-asm_64.S"
17
18.data
19.align 16
20
21.Lvpshufb_mask0:
22.long 0x80808000
23.long 0x80808004
24.long 0x80808008
25.long 0x8080800c
26
27.Lbswap128_mask:
28 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
29.Lxts_gf128mul_and_shl1_mask_0:
30 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
31.Lxts_gf128mul_and_shl1_mask_1:
32 .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
33
34.text
35
36/* structure of crypto context */
37#define s0 0
38#define s1 1024
39#define s2 2048
40#define s3 3072
41#define w 4096
42#define k 4128
43
44/* register macros */
45#define CTX %rdi
46
47#define RS0 CTX
48#define RS1 %r8
49#define RS2 %r9
50#define RS3 %r10
51#define RK %r11
52#define RW %rax
53#define RROUND %r12
54#define RROUNDd %r12d
55
56#define RA0 %ymm8
57#define RB0 %ymm9
58#define RC0 %ymm10
59#define RD0 %ymm11
60#define RA1 %ymm12
61#define RB1 %ymm13
62#define RC1 %ymm14
63#define RD1 %ymm15
64
65/* temp regs */
66#define RX0 %ymm0
67#define RY0 %ymm1
68#define RX1 %ymm2
69#define RY1 %ymm3
70#define RT0 %ymm4
71#define RIDX %ymm5
72
73#define RX0x %xmm0
74#define RY0x %xmm1
75#define RX1x %xmm2
76#define RY1x %xmm3
77#define RT0x %xmm4
78
79/* vpgatherdd mask and '-1' */
80#define RNOT %ymm6
81
82/* byte mask, (-1 >> 24) */
83#define RBYTE %ymm7
84
85/**********************************************************************
86 16-way AVX2 twofish
87 **********************************************************************/
88#define init_round_constants() \
89 vpcmpeqd RNOT, RNOT, RNOT; \
90 vpsrld $24, RNOT, RBYTE; \
91 leaq k(CTX), RK; \
92 leaq w(CTX), RW; \
93 leaq s1(CTX), RS1; \
94 leaq s2(CTX), RS2; \
95 leaq s3(CTX), RS3; \
96
97#define g16(ab, rs0, rs1, rs2, rs3, xy) \
98 vpand RBYTE, ab ## 0, RIDX; \
99 vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \
100 vpcmpeqd RNOT, RNOT, RNOT; \
101 \
102 vpand RBYTE, ab ## 1, RIDX; \
103 vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \
104 vpcmpeqd RNOT, RNOT, RNOT; \
105 \
106 vpsrld $8, ab ## 0, RIDX; \
107 vpand RBYTE, RIDX, RIDX; \
108 vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
109 vpcmpeqd RNOT, RNOT, RNOT; \
110 vpxor RT0, xy ## 0, xy ## 0; \
111 \
112 vpsrld $8, ab ## 1, RIDX; \
113 vpand RBYTE, RIDX, RIDX; \
114 vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
115 vpcmpeqd RNOT, RNOT, RNOT; \
116 vpxor RT0, xy ## 1, xy ## 1; \
117 \
118 vpsrld $16, ab ## 0, RIDX; \
119 vpand RBYTE, RIDX, RIDX; \
120 vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
121 vpcmpeqd RNOT, RNOT, RNOT; \
122 vpxor RT0, xy ## 0, xy ## 0; \
123 \
124 vpsrld $16, ab ## 1, RIDX; \
125 vpand RBYTE, RIDX, RIDX; \
126 vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
127 vpcmpeqd RNOT, RNOT, RNOT; \
128 vpxor RT0, xy ## 1, xy ## 1; \
129 \
130 vpsrld $24, ab ## 0, RIDX; \
131 vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
132 vpcmpeqd RNOT, RNOT, RNOT; \
133 vpxor RT0, xy ## 0, xy ## 0; \
134 \
135 vpsrld $24, ab ## 1, RIDX; \
136 vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
137 vpcmpeqd RNOT, RNOT, RNOT; \
138 vpxor RT0, xy ## 1, xy ## 1;
139
140#define g1_16(a, x) \
141 g16(a, RS0, RS1, RS2, RS3, x);
142
143#define g2_16(b, y) \
144 g16(b, RS1, RS2, RS3, RS0, y);
145
146#define encrypt_round_end16(a, b, c, d, nk) \
147 vpaddd RY0, RX0, RX0; \
148 vpaddd RX0, RY0, RY0; \
149 vpbroadcastd nk(RK,RROUND,8), RT0; \
150 vpaddd RT0, RX0, RX0; \
151 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
152 vpaddd RT0, RY0, RY0; \
153 \
154 vpxor RY0, d ## 0, d ## 0; \
155 \
156 vpxor RX0, c ## 0, c ## 0; \
157 vpsrld $1, c ## 0, RT0; \
158 vpslld $31, c ## 0, c ## 0; \
159 vpor RT0, c ## 0, c ## 0; \
160 \
161 vpaddd RY1, RX1, RX1; \
162 vpaddd RX1, RY1, RY1; \
163 vpbroadcastd nk(RK,RROUND,8), RT0; \
164 vpaddd RT0, RX1, RX1; \
165 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
166 vpaddd RT0, RY1, RY1; \
167 \
168 vpxor RY1, d ## 1, d ## 1; \
169 \
170 vpxor RX1, c ## 1, c ## 1; \
171 vpsrld $1, c ## 1, RT0; \
172 vpslld $31, c ## 1, c ## 1; \
173 vpor RT0, c ## 1, c ## 1; \
174
175#define encrypt_round16(a, b, c, d, nk) \
176 g2_16(b, RY); \
177 \
178 vpslld $1, b ## 0, RT0; \
179 vpsrld $31, b ## 0, b ## 0; \
180 vpor RT0, b ## 0, b ## 0; \
181 \
182 vpslld $1, b ## 1, RT0; \
183 vpsrld $31, b ## 1, b ## 1; \
184 vpor RT0, b ## 1, b ## 1; \
185 \
186 g1_16(a, RX); \
187 \
188 encrypt_round_end16(a, b, c, d, nk);
189
190#define encrypt_round_first16(a, b, c, d, nk) \
191 vpslld $1, d ## 0, RT0; \
192 vpsrld $31, d ## 0, d ## 0; \
193 vpor RT0, d ## 0, d ## 0; \
194 \
195 vpslld $1, d ## 1, RT0; \
196 vpsrld $31, d ## 1, d ## 1; \
197 vpor RT0, d ## 1, d ## 1; \
198 \
199 encrypt_round16(a, b, c, d, nk);
200
201#define encrypt_round_last16(a, b, c, d, nk) \
202 g2_16(b, RY); \
203 \
204 g1_16(a, RX); \
205 \
206 encrypt_round_end16(a, b, c, d, nk);
207
208#define decrypt_round_end16(a, b, c, d, nk) \
209 vpaddd RY0, RX0, RX0; \
210 vpaddd RX0, RY0, RY0; \
211 vpbroadcastd nk(RK,RROUND,8), RT0; \
212 vpaddd RT0, RX0, RX0; \
213 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
214 vpaddd RT0, RY0, RY0; \
215 \
216 vpxor RX0, c ## 0, c ## 0; \
217 \
218 vpxor RY0, d ## 0, d ## 0; \
219 vpsrld $1, d ## 0, RT0; \
220 vpslld $31, d ## 0, d ## 0; \
221 vpor RT0, d ## 0, d ## 0; \
222 \
223 vpaddd RY1, RX1, RX1; \
224 vpaddd RX1, RY1, RY1; \
225 vpbroadcastd nk(RK,RROUND,8), RT0; \
226 vpaddd RT0, RX1, RX1; \
227 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
228 vpaddd RT0, RY1, RY1; \
229 \
230 vpxor RX1, c ## 1, c ## 1; \
231 \
232 vpxor RY1, d ## 1, d ## 1; \
233 vpsrld $1, d ## 1, RT0; \
234 vpslld $31, d ## 1, d ## 1; \
235 vpor RT0, d ## 1, d ## 1;
236
237#define decrypt_round16(a, b, c, d, nk) \
238 g1_16(a, RX); \
239 \
240 vpslld $1, a ## 0, RT0; \
241 vpsrld $31, a ## 0, a ## 0; \
242 vpor RT0, a ## 0, a ## 0; \
243 \
244 vpslld $1, a ## 1, RT0; \
245 vpsrld $31, a ## 1, a ## 1; \
246 vpor RT0, a ## 1, a ## 1; \
247 \
248 g2_16(b, RY); \
249 \
250 decrypt_round_end16(a, b, c, d, nk);
251
252#define decrypt_round_first16(a, b, c, d, nk) \
253 vpslld $1, c ## 0, RT0; \
254 vpsrld $31, c ## 0, c ## 0; \
255 vpor RT0, c ## 0, c ## 0; \
256 \
257 vpslld $1, c ## 1, RT0; \
258 vpsrld $31, c ## 1, c ## 1; \
259 vpor RT0, c ## 1, c ## 1; \
260 \
261 decrypt_round16(a, b, c, d, nk)
262
263#define decrypt_round_last16(a, b, c, d, nk) \
264 g1_16(a, RX); \
265 \
266 g2_16(b, RY); \
267 \
268 decrypt_round_end16(a, b, c, d, nk);
269
270#define encrypt_cycle16() \
271 encrypt_round16(RA, RB, RC, RD, 0); \
272 encrypt_round16(RC, RD, RA, RB, 8);
273
274#define encrypt_cycle_first16() \
275 encrypt_round_first16(RA, RB, RC, RD, 0); \
276 encrypt_round16(RC, RD, RA, RB, 8);
277
278#define encrypt_cycle_last16() \
279 encrypt_round16(RA, RB, RC, RD, 0); \
280 encrypt_round_last16(RC, RD, RA, RB, 8);
281
282#define decrypt_cycle16(n) \
283 decrypt_round16(RC, RD, RA, RB, 8); \
284 decrypt_round16(RA, RB, RC, RD, 0);
285
286#define decrypt_cycle_first16(n) \
287 decrypt_round_first16(RC, RD, RA, RB, 8); \
288 decrypt_round16(RA, RB, RC, RD, 0);
289
290#define decrypt_cycle_last16(n) \
291 decrypt_round16(RC, RD, RA, RB, 8); \
292 decrypt_round_last16(RA, RB, RC, RD, 0);
293
294#define transpose_4x4(x0,x1,x2,x3,t1,t2) \
295 vpunpckhdq x1, x0, t2; \
296 vpunpckldq x1, x0, x0; \
297 \
298 vpunpckldq x3, x2, t1; \
299 vpunpckhdq x3, x2, x2; \
300 \
301 vpunpckhqdq t1, x0, x1; \
302 vpunpcklqdq t1, x0, x0; \
303 \
304 vpunpckhqdq x2, t2, x3; \
305 vpunpcklqdq x2, t2, x2;
306
307#define read_blocks8(offs,a,b,c,d) \
308 transpose_4x4(a, b, c, d, RX0, RY0);
309
310#define write_blocks8(offs,a,b,c,d) \
311 transpose_4x4(a, b, c, d, RX0, RY0);
312
313#define inpack_enc8(a,b,c,d) \
314 vpbroadcastd 4*0(RW), RT0; \
315 vpxor RT0, a, a; \
316 \
317 vpbroadcastd 4*1(RW), RT0; \
318 vpxor RT0, b, b; \
319 \
320 vpbroadcastd 4*2(RW), RT0; \
321 vpxor RT0, c, c; \
322 \
323 vpbroadcastd 4*3(RW), RT0; \
324 vpxor RT0, d, d;
325
326#define outunpack_enc8(a,b,c,d) \
327 vpbroadcastd 4*4(RW), RX0; \
328 vpbroadcastd 4*5(RW), RY0; \
329 vpxor RX0, c, RX0; \
330 vpxor RY0, d, RY0; \
331 \
332 vpbroadcastd 4*6(RW), RT0; \
333 vpxor RT0, a, c; \
334 vpbroadcastd 4*7(RW), RT0; \
335 vpxor RT0, b, d; \
336 \
337 vmovdqa RX0, a; \
338 vmovdqa RY0, b;
339
340#define inpack_dec8(a,b,c,d) \
341 vpbroadcastd 4*4(RW), RX0; \
342 vpbroadcastd 4*5(RW), RY0; \
343 vpxor RX0, a, RX0; \
344 vpxor RY0, b, RY0; \
345 \
346 vpbroadcastd 4*6(RW), RT0; \
347 vpxor RT0, c, a; \
348 vpbroadcastd 4*7(RW), RT0; \
349 vpxor RT0, d, b; \
350 \
351 vmovdqa RX0, c; \
352 vmovdqa RY0, d;
353
354#define outunpack_dec8(a,b,c,d) \
355 vpbroadcastd 4*0(RW), RT0; \
356 vpxor RT0, a, a; \
357 \
358 vpbroadcastd 4*1(RW), RT0; \
359 vpxor RT0, b, b; \
360 \
361 vpbroadcastd 4*2(RW), RT0; \
362 vpxor RT0, c, c; \
363 \
364 vpbroadcastd 4*3(RW), RT0; \
365 vpxor RT0, d, d;
366
367#define read_blocks16(a,b,c,d) \
368 read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
369 read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
370
371#define write_blocks16(a,b,c,d) \
372 write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
373 write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
374
375#define xor_blocks16(a,b,c,d) \
376 xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
377 xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
378
379#define inpack_enc16(a,b,c,d) \
380 inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
381 inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
382
383#define outunpack_enc16(a,b,c,d) \
384 outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
385 outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
386
387#define inpack_dec16(a,b,c,d) \
388 inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
389 inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
390
391#define outunpack_dec16(a,b,c,d) \
392 outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
393 outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
394
395.align 8
396__twofish_enc_blk16:
397 /* input:
398 * %rdi: ctx, CTX
399 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
400 * output:
401 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
402 */
403 init_round_constants();
404
405 read_blocks16(RA, RB, RC, RD);
406 inpack_enc16(RA, RB, RC, RD);
407
408 xorl RROUNDd, RROUNDd;
409 encrypt_cycle_first16();
410 movl $2, RROUNDd;
411
412.align 4
413.L__enc_loop:
414 encrypt_cycle16();
415
416 addl $2, RROUNDd;
417 cmpl $14, RROUNDd;
418 jne .L__enc_loop;
419
420 encrypt_cycle_last16();
421
422 outunpack_enc16(RA, RB, RC, RD);
423 write_blocks16(RA, RB, RC, RD);
424
425 ret;
426ENDPROC(__twofish_enc_blk16)
427
428.align 8
429__twofish_dec_blk16:
430 /* input:
431 * %rdi: ctx, CTX
432 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
433 * output:
434 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
435 */
436 init_round_constants();
437
438 read_blocks16(RA, RB, RC, RD);
439 inpack_dec16(RA, RB, RC, RD);
440
441 movl $14, RROUNDd;
442 decrypt_cycle_first16();
443 movl $12, RROUNDd;
444
445.align 4
446.L__dec_loop:
447 decrypt_cycle16();
448
449 addl $-2, RROUNDd;
450 jnz .L__dec_loop;
451
452 decrypt_cycle_last16();
453
454 outunpack_dec16(RA, RB, RC, RD);
455 write_blocks16(RA, RB, RC, RD);
456
457 ret;
458ENDPROC(__twofish_dec_blk16)
459
460ENTRY(twofish_ecb_enc_16way)
461 /* input:
462 * %rdi: ctx, CTX
463 * %rsi: dst
464 * %rdx: src
465 */
466
467 vzeroupper;
468 pushq %r12;
469
470 load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
471
472 call __twofish_enc_blk16;
473
474 store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
475
476 popq %r12;
477 vzeroupper;
478
479 ret;
480ENDPROC(twofish_ecb_enc_16way)
481
482ENTRY(twofish_ecb_dec_16way)
483 /* input:
484 * %rdi: ctx, CTX
485 * %rsi: dst
486 * %rdx: src
487 */
488
489 vzeroupper;
490 pushq %r12;
491
492 load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
493
494 call __twofish_dec_blk16;
495
496 store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
497
498 popq %r12;
499 vzeroupper;
500
501 ret;
502ENDPROC(twofish_ecb_dec_16way)
503
504ENTRY(twofish_cbc_dec_16way)
505 /* input:
506 * %rdi: ctx, CTX
507 * %rsi: dst
508 * %rdx: src
509 */
510
511 vzeroupper;
512 pushq %r12;
513
514 load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
515
516 call __twofish_dec_blk16;
517
518 store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1,
519 RX0);
520
521 popq %r12;
522 vzeroupper;
523
524 ret;
525ENDPROC(twofish_cbc_dec_16way)
526
527ENTRY(twofish_ctr_16way)
528 /* input:
529 * %rdi: ctx, CTX
530 * %rsi: dst (16 blocks)
531 * %rdx: src (16 blocks)
532 * %rcx: iv (little endian, 128bit)
533 */
534
535 vzeroupper;
536 pushq %r12;
537
538 load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
539 RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
540 RBYTE);
541
542 call __twofish_enc_blk16;
543
544 store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
545
546 popq %r12;
547 vzeroupper;
548
549 ret;
550ENDPROC(twofish_ctr_16way)
551
552.align 8
553twofish_xts_crypt_16way:
554 /* input:
555 * %rdi: ctx, CTX
556 * %rsi: dst (16 blocks)
557 * %rdx: src (16 blocks)
558 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
559 * %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16
560 */
561
562 vzeroupper;
563 pushq %r12;
564
565 load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
566 RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
567 .Lxts_gf128mul_and_shl1_mask_0,
568 .Lxts_gf128mul_and_shl1_mask_1);
569
570 call *%r8;
571
572 store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
573
574 popq %r12;
575 vzeroupper;
576
577 ret;
578ENDPROC(twofish_xts_crypt_16way)
579
580ENTRY(twofish_xts_enc_16way)
581 /* input:
582 * %rdi: ctx, CTX
583 * %rsi: dst (16 blocks)
584 * %rdx: src (16 blocks)
585 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
586 */
587 leaq __twofish_enc_blk16, %r8;
588 jmp twofish_xts_crypt_16way;
589ENDPROC(twofish_xts_enc_16way)
590
591ENTRY(twofish_xts_dec_16way)
592 /* input:
593 * %rdi: ctx, CTX
594 * %rsi: dst (16 blocks)
595 * %rdx: src (16 blocks)
596 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
597 */
598 leaq __twofish_dec_blk16, %r8;
599 jmp twofish_xts_crypt_16way;
600ENDPROC(twofish_xts_dec_16way)
diff --git a/arch/x86/crypto/twofish_avx2_glue.c b/arch/x86/crypto/twofish_avx2_glue.c
deleted file mode 100644
index ce33b5be64ee..000000000000
--- a/arch/x86/crypto/twofish_avx2_glue.c
+++ /dev/null
@@ -1,584 +0,0 @@
1/*
2 * Glue Code for x86_64/AVX2 assembler optimized version of Twofish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/crypto.h>
16#include <linux/err.h>
17#include <crypto/algapi.h>
18#include <crypto/ctr.h>
19#include <crypto/twofish.h>
20#include <crypto/lrw.h>
21#include <crypto/xts.h>
22#include <asm/xcr.h>
23#include <asm/xsave.h>
24#include <asm/crypto/twofish.h>
25#include <asm/crypto/ablk_helper.h>
26#include <asm/crypto/glue_helper.h>
27#include <crypto/scatterwalk.h>
28
29#define TF_AVX2_PARALLEL_BLOCKS 16
30
31/* 16-way AVX2 parallel cipher functions */
32asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst,
33 const u8 *src);
34asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst,
35 const u8 *src);
36asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
37
38asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src,
39 le128 *iv);
40
41asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst,
42 const u8 *src, le128 *iv);
43asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst,
44 const u8 *src, le128 *iv);
45
46static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
47 const u8 *src)
48{
49 __twofish_enc_blk_3way(ctx, dst, src, false);
50}
51
52static const struct common_glue_ctx twofish_enc = {
53 .num_funcs = 4,
54 .fpu_blocks_limit = 8,
55
56 .funcs = { {
57 .num_blocks = 16,
58 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) }
59 }, {
60 .num_blocks = 8,
61 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
62 }, {
63 .num_blocks = 3,
64 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
65 }, {
66 .num_blocks = 1,
67 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
68 } }
69};
70
71static const struct common_glue_ctx twofish_ctr = {
72 .num_funcs = 4,
73 .fpu_blocks_limit = 8,
74
75 .funcs = { {
76 .num_blocks = 16,
77 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) }
78 }, {
79 .num_blocks = 8,
80 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
81 }, {
82 .num_blocks = 3,
83 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
84 }, {
85 .num_blocks = 1,
86 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
87 } }
88};
89
90static const struct common_glue_ctx twofish_enc_xts = {
91 .num_funcs = 3,
92 .fpu_blocks_limit = 8,
93
94 .funcs = { {
95 .num_blocks = 16,
96 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) }
97 }, {
98 .num_blocks = 8,
99 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
100 }, {
101 .num_blocks = 1,
102 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
103 } }
104};
105
106static const struct common_glue_ctx twofish_dec = {
107 .num_funcs = 4,
108 .fpu_blocks_limit = 8,
109
110 .funcs = { {
111 .num_blocks = 16,
112 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) }
113 }, {
114 .num_blocks = 8,
115 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
116 }, {
117 .num_blocks = 3,
118 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
119 }, {
120 .num_blocks = 1,
121 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
122 } }
123};
124
125static const struct common_glue_ctx twofish_dec_cbc = {
126 .num_funcs = 4,
127 .fpu_blocks_limit = 8,
128
129 .funcs = { {
130 .num_blocks = 16,
131 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) }
132 }, {
133 .num_blocks = 8,
134 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
135 }, {
136 .num_blocks = 3,
137 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
138 }, {
139 .num_blocks = 1,
140 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
141 } }
142};
143
144static const struct common_glue_ctx twofish_dec_xts = {
145 .num_funcs = 3,
146 .fpu_blocks_limit = 8,
147
148 .funcs = { {
149 .num_blocks = 16,
150 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) }
151 }, {
152 .num_blocks = 8,
153 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
154 }, {
155 .num_blocks = 1,
156 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
157 } }
158};
159
160static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
161 struct scatterlist *src, unsigned int nbytes)
162{
163 return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
164}
165
166static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
167 struct scatterlist *src, unsigned int nbytes)
168{
169 return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
170}
171
172static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
173 struct scatterlist *src, unsigned int nbytes)
174{
175 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
176 dst, src, nbytes);
177}
178
179static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
180 struct scatterlist *src, unsigned int nbytes)
181{
182 return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
183 nbytes);
184}
185
186static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
187 struct scatterlist *src, unsigned int nbytes)
188{
189 return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
190}
191
192static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
193{
194 /* since reusing AVX functions, starts using FPU at 8 parallel blocks */
195 return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
196}
197
198static inline void twofish_fpu_end(bool fpu_enabled)
199{
200 glue_fpu_end(fpu_enabled);
201}
202
203struct crypt_priv {
204 struct twofish_ctx *ctx;
205 bool fpu_enabled;
206};
207
208static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
209{
210 const unsigned int bsize = TF_BLOCK_SIZE;
211 struct crypt_priv *ctx = priv;
212 int i;
213
214 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
215
216 while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
217 twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
218 srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
219 nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
220 }
221
222 while (nbytes >= 8 * bsize) {
223 twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
224 srcdst += bsize * 8;
225 nbytes -= bsize * 8;
226 }
227
228 while (nbytes >= 3 * bsize) {
229 twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
230 srcdst += bsize * 3;
231 nbytes -= bsize * 3;
232 }
233
234 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
235 twofish_enc_blk(ctx->ctx, srcdst, srcdst);
236}
237
238static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
239{
240 const unsigned int bsize = TF_BLOCK_SIZE;
241 struct crypt_priv *ctx = priv;
242 int i;
243
244 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
245
246 while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
247 twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
248 srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
249 nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
250 }
251
252 while (nbytes >= 8 * bsize) {
253 twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
254 srcdst += bsize * 8;
255 nbytes -= bsize * 8;
256 }
257
258 while (nbytes >= 3 * bsize) {
259 twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
260 srcdst += bsize * 3;
261 nbytes -= bsize * 3;
262 }
263
264 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
265 twofish_dec_blk(ctx->ctx, srcdst, srcdst);
266}
267
268static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
269 struct scatterlist *src, unsigned int nbytes)
270{
271 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
272 be128 buf[TF_AVX2_PARALLEL_BLOCKS];
273 struct crypt_priv crypt_ctx = {
274 .ctx = &ctx->twofish_ctx,
275 .fpu_enabled = false,
276 };
277 struct lrw_crypt_req req = {
278 .tbuf = buf,
279 .tbuflen = sizeof(buf),
280
281 .table_ctx = &ctx->lrw_table,
282 .crypt_ctx = &crypt_ctx,
283 .crypt_fn = encrypt_callback,
284 };
285 int ret;
286
287 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
288 ret = lrw_crypt(desc, dst, src, nbytes, &req);
289 twofish_fpu_end(crypt_ctx.fpu_enabled);
290
291 return ret;
292}
293
294static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
295 struct scatterlist *src, unsigned int nbytes)
296{
297 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
298 be128 buf[TF_AVX2_PARALLEL_BLOCKS];
299 struct crypt_priv crypt_ctx = {
300 .ctx = &ctx->twofish_ctx,
301 .fpu_enabled = false,
302 };
303 struct lrw_crypt_req req = {
304 .tbuf = buf,
305 .tbuflen = sizeof(buf),
306
307 .table_ctx = &ctx->lrw_table,
308 .crypt_ctx = &crypt_ctx,
309 .crypt_fn = decrypt_callback,
310 };
311 int ret;
312
313 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
314 ret = lrw_crypt(desc, dst, src, nbytes, &req);
315 twofish_fpu_end(crypt_ctx.fpu_enabled);
316
317 return ret;
318}
319
320static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
321 struct scatterlist *src, unsigned int nbytes)
322{
323 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
324
325 return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
326 XTS_TWEAK_CAST(twofish_enc_blk),
327 &ctx->tweak_ctx, &ctx->crypt_ctx);
328}
329
330static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
331 struct scatterlist *src, unsigned int nbytes)
332{
333 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
334
335 return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
336 XTS_TWEAK_CAST(twofish_enc_blk),
337 &ctx->tweak_ctx, &ctx->crypt_ctx);
338}
339
340static struct crypto_alg tf_algs[10] = { {
341 .cra_name = "__ecb-twofish-avx2",
342 .cra_driver_name = "__driver-ecb-twofish-avx2",
343 .cra_priority = 0,
344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
345 .cra_blocksize = TF_BLOCK_SIZE,
346 .cra_ctxsize = sizeof(struct twofish_ctx),
347 .cra_alignmask = 0,
348 .cra_type = &crypto_blkcipher_type,
349 .cra_module = THIS_MODULE,
350 .cra_u = {
351 .blkcipher = {
352 .min_keysize = TF_MIN_KEY_SIZE,
353 .max_keysize = TF_MAX_KEY_SIZE,
354 .setkey = twofish_setkey,
355 .encrypt = ecb_encrypt,
356 .decrypt = ecb_decrypt,
357 },
358 },
359}, {
360 .cra_name = "__cbc-twofish-avx2",
361 .cra_driver_name = "__driver-cbc-twofish-avx2",
362 .cra_priority = 0,
363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
364 .cra_blocksize = TF_BLOCK_SIZE,
365 .cra_ctxsize = sizeof(struct twofish_ctx),
366 .cra_alignmask = 0,
367 .cra_type = &crypto_blkcipher_type,
368 .cra_module = THIS_MODULE,
369 .cra_u = {
370 .blkcipher = {
371 .min_keysize = TF_MIN_KEY_SIZE,
372 .max_keysize = TF_MAX_KEY_SIZE,
373 .setkey = twofish_setkey,
374 .encrypt = cbc_encrypt,
375 .decrypt = cbc_decrypt,
376 },
377 },
378}, {
379 .cra_name = "__ctr-twofish-avx2",
380 .cra_driver_name = "__driver-ctr-twofish-avx2",
381 .cra_priority = 0,
382 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
383 .cra_blocksize = 1,
384 .cra_ctxsize = sizeof(struct twofish_ctx),
385 .cra_alignmask = 0,
386 .cra_type = &crypto_blkcipher_type,
387 .cra_module = THIS_MODULE,
388 .cra_u = {
389 .blkcipher = {
390 .min_keysize = TF_MIN_KEY_SIZE,
391 .max_keysize = TF_MAX_KEY_SIZE,
392 .ivsize = TF_BLOCK_SIZE,
393 .setkey = twofish_setkey,
394 .encrypt = ctr_crypt,
395 .decrypt = ctr_crypt,
396 },
397 },
398}, {
399 .cra_name = "__lrw-twofish-avx2",
400 .cra_driver_name = "__driver-lrw-twofish-avx2",
401 .cra_priority = 0,
402 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
403 .cra_blocksize = TF_BLOCK_SIZE,
404 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
405 .cra_alignmask = 0,
406 .cra_type = &crypto_blkcipher_type,
407 .cra_module = THIS_MODULE,
408 .cra_exit = lrw_twofish_exit_tfm,
409 .cra_u = {
410 .blkcipher = {
411 .min_keysize = TF_MIN_KEY_SIZE +
412 TF_BLOCK_SIZE,
413 .max_keysize = TF_MAX_KEY_SIZE +
414 TF_BLOCK_SIZE,
415 .ivsize = TF_BLOCK_SIZE,
416 .setkey = lrw_twofish_setkey,
417 .encrypt = lrw_encrypt,
418 .decrypt = lrw_decrypt,
419 },
420 },
421}, {
422 .cra_name = "__xts-twofish-avx2",
423 .cra_driver_name = "__driver-xts-twofish-avx2",
424 .cra_priority = 0,
425 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
426 .cra_blocksize = TF_BLOCK_SIZE,
427 .cra_ctxsize = sizeof(struct twofish_xts_ctx),
428 .cra_alignmask = 0,
429 .cra_type = &crypto_blkcipher_type,
430 .cra_module = THIS_MODULE,
431 .cra_u = {
432 .blkcipher = {
433 .min_keysize = TF_MIN_KEY_SIZE * 2,
434 .max_keysize = TF_MAX_KEY_SIZE * 2,
435 .ivsize = TF_BLOCK_SIZE,
436 .setkey = xts_twofish_setkey,
437 .encrypt = xts_encrypt,
438 .decrypt = xts_decrypt,
439 },
440 },
441}, {
442 .cra_name = "ecb(twofish)",
443 .cra_driver_name = "ecb-twofish-avx2",
444 .cra_priority = 500,
445 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
446 .cra_blocksize = TF_BLOCK_SIZE,
447 .cra_ctxsize = sizeof(struct async_helper_ctx),
448 .cra_alignmask = 0,
449 .cra_type = &crypto_ablkcipher_type,
450 .cra_module = THIS_MODULE,
451 .cra_init = ablk_init,
452 .cra_exit = ablk_exit,
453 .cra_u = {
454 .ablkcipher = {
455 .min_keysize = TF_MIN_KEY_SIZE,
456 .max_keysize = TF_MAX_KEY_SIZE,
457 .setkey = ablk_set_key,
458 .encrypt = ablk_encrypt,
459 .decrypt = ablk_decrypt,
460 },
461 },
462}, {
463 .cra_name = "cbc(twofish)",
464 .cra_driver_name = "cbc-twofish-avx2",
465 .cra_priority = 500,
466 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
467 .cra_blocksize = TF_BLOCK_SIZE,
468 .cra_ctxsize = sizeof(struct async_helper_ctx),
469 .cra_alignmask = 0,
470 .cra_type = &crypto_ablkcipher_type,
471 .cra_module = THIS_MODULE,
472 .cra_init = ablk_init,
473 .cra_exit = ablk_exit,
474 .cra_u = {
475 .ablkcipher = {
476 .min_keysize = TF_MIN_KEY_SIZE,
477 .max_keysize = TF_MAX_KEY_SIZE,
478 .ivsize = TF_BLOCK_SIZE,
479 .setkey = ablk_set_key,
480 .encrypt = __ablk_encrypt,
481 .decrypt = ablk_decrypt,
482 },
483 },
484}, {
485 .cra_name = "ctr(twofish)",
486 .cra_driver_name = "ctr-twofish-avx2",
487 .cra_priority = 500,
488 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
489 .cra_blocksize = 1,
490 .cra_ctxsize = sizeof(struct async_helper_ctx),
491 .cra_alignmask = 0,
492 .cra_type = &crypto_ablkcipher_type,
493 .cra_module = THIS_MODULE,
494 .cra_init = ablk_init,
495 .cra_exit = ablk_exit,
496 .cra_u = {
497 .ablkcipher = {
498 .min_keysize = TF_MIN_KEY_SIZE,
499 .max_keysize = TF_MAX_KEY_SIZE,
500 .ivsize = TF_BLOCK_SIZE,
501 .setkey = ablk_set_key,
502 .encrypt = ablk_encrypt,
503 .decrypt = ablk_encrypt,
504 .geniv = "chainiv",
505 },
506 },
507}, {
508 .cra_name = "lrw(twofish)",
509 .cra_driver_name = "lrw-twofish-avx2",
510 .cra_priority = 500,
511 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
512 .cra_blocksize = TF_BLOCK_SIZE,
513 .cra_ctxsize = sizeof(struct async_helper_ctx),
514 .cra_alignmask = 0,
515 .cra_type = &crypto_ablkcipher_type,
516 .cra_module = THIS_MODULE,
517 .cra_init = ablk_init,
518 .cra_exit = ablk_exit,
519 .cra_u = {
520 .ablkcipher = {
521 .min_keysize = TF_MIN_KEY_SIZE +
522 TF_BLOCK_SIZE,
523 .max_keysize = TF_MAX_KEY_SIZE +
524 TF_BLOCK_SIZE,
525 .ivsize = TF_BLOCK_SIZE,
526 .setkey = ablk_set_key,
527 .encrypt = ablk_encrypt,
528 .decrypt = ablk_decrypt,
529 },
530 },
531}, {
532 .cra_name = "xts(twofish)",
533 .cra_driver_name = "xts-twofish-avx2",
534 .cra_priority = 500,
535 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
536 .cra_blocksize = TF_BLOCK_SIZE,
537 .cra_ctxsize = sizeof(struct async_helper_ctx),
538 .cra_alignmask = 0,
539 .cra_type = &crypto_ablkcipher_type,
540 .cra_module = THIS_MODULE,
541 .cra_init = ablk_init,
542 .cra_exit = ablk_exit,
543 .cra_u = {
544 .ablkcipher = {
545 .min_keysize = TF_MIN_KEY_SIZE * 2,
546 .max_keysize = TF_MAX_KEY_SIZE * 2,
547 .ivsize = TF_BLOCK_SIZE,
548 .setkey = ablk_set_key,
549 .encrypt = ablk_encrypt,
550 .decrypt = ablk_decrypt,
551 },
552 },
553} };
554
555static int __init init(void)
556{
557 u64 xcr0;
558
559 if (!cpu_has_avx2 || !cpu_has_osxsave) {
560 pr_info("AVX2 instructions are not detected.\n");
561 return -ENODEV;
562 }
563
564 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
565 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
566 pr_info("AVX2 detected but unusable.\n");
567 return -ENODEV;
568 }
569
570 return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
571}
572
573static void __exit fini(void)
574{
575 crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
576}
577
578module_init(init);
579module_exit(fini);
580
581MODULE_LICENSE("GPL");
582MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized");
583MODULE_ALIAS("twofish");
584MODULE_ALIAS("twofish-asm");
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 2047a562f6b3..a62ba541884e 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -50,26 +50,18 @@
50/* 8-way parallel cipher functions */ 50/* 8-way parallel cipher functions */
51asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, 51asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
52 const u8 *src); 52 const u8 *src);
53EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way);
54
55asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, 53asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
56 const u8 *src); 54 const u8 *src);
57EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way);
58 55
59asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, 56asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
60 const u8 *src); 57 const u8 *src);
61EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way);
62
63asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, 58asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
64 const u8 *src, le128 *iv); 59 const u8 *src, le128 *iv);
65EXPORT_SYMBOL_GPL(twofish_ctr_8way);
66 60
67asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, 61asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
68 const u8 *src, le128 *iv); 62 const u8 *src, le128 *iv);
69EXPORT_SYMBOL_GPL(twofish_xts_enc_8way);
70asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, 63asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
71 const u8 *src, le128 *iv); 64 const u8 *src, le128 *iv);
72EXPORT_SYMBOL_GPL(twofish_xts_dec_8way);
73 65
74static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, 66static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
75 const u8 *src) 67 const u8 *src)
@@ -77,19 +69,17 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
77 __twofish_enc_blk_3way(ctx, dst, src, false); 69 __twofish_enc_blk_3way(ctx, dst, src, false);
78} 70}
79 71
80void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) 72static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
81{ 73{
82 glue_xts_crypt_128bit_one(ctx, dst, src, iv, 74 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
83 GLUE_FUNC_CAST(twofish_enc_blk)); 75 GLUE_FUNC_CAST(twofish_enc_blk));
84} 76}
85EXPORT_SYMBOL_GPL(twofish_xts_enc);
86 77
87void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) 78static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
88{ 79{
89 glue_xts_crypt_128bit_one(ctx, dst, src, iv, 80 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
90 GLUE_FUNC_CAST(twofish_dec_blk)); 81 GLUE_FUNC_CAST(twofish_dec_blk));
91} 82}
92EXPORT_SYMBOL_GPL(twofish_xts_dec);
93 83
94 84
95static const struct common_glue_ctx twofish_enc = { 85static const struct common_glue_ctx twofish_enc = {
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 52ff81cce008..bae3aba95b15 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -308,8 +308,6 @@ static int load_aout_binary(struct linux_binprm *bprm)
308 (current->mm->start_data = N_DATADDR(ex)); 308 (current->mm->start_data = N_DATADDR(ex));
309 current->mm->brk = ex.a_bss + 309 current->mm->brk = ex.a_bss +
310 (current->mm->start_brk = N_BSSADDR(ex)); 310 (current->mm->start_brk = N_BSSADDR(ex));
311 current->mm->free_area_cache = TASK_UNMAPPED_BASE;
312 current->mm->cached_hole_size = 0;
313 311
314 retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); 312 retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
315 if (retval < 0) { 313 if (retval < 0) {
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index cf1a471a18a2..bccfca68430e 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -34,8 +34,6 @@
34#include <asm/sys_ia32.h> 34#include <asm/sys_ia32.h>
35#include <asm/smap.h> 35#include <asm/smap.h>
36 36
37#define FIX_EFLAGS __FIX_EFLAGS
38
39int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 37int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
40{ 38{
41 int err = 0; 39 int err = 0;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index b31bf97775fc..2dfac58f3b11 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -111,7 +111,7 @@ static inline void acpi_disable_pci(void)
111} 111}
112 112
113/* Low-level suspend routine. */ 113/* Low-level suspend routine. */
114extern int acpi_suspend_lowlevel(void); 114extern int (*acpi_suspend_lowlevel)(void);
115 115
116/* Physical address to resume after wakeup */ 116/* Physical address to resume after wakeup */
117#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start)) 117#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 338803422239..f8119b582c3c 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -12,6 +12,7 @@
12#include <asm/fixmap.h> 12#include <asm/fixmap.h>
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/msr.h> 14#include <asm/msr.h>
15#include <asm/idle.h>
15 16
16#define ARCH_APICTIMER_STOPS_ON_C3 1 17#define ARCH_APICTIMER_STOPS_ON_C3 1
17 18
@@ -687,5 +688,31 @@ extern int default_check_phys_apicid_present(int phys_apicid);
687#endif 688#endif
688 689
689#endif /* CONFIG_X86_LOCAL_APIC */ 690#endif /* CONFIG_X86_LOCAL_APIC */
691extern void irq_enter(void);
692extern void irq_exit(void);
693
694static inline void entering_irq(void)
695{
696 irq_enter();
697 exit_idle();
698}
699
700static inline void entering_ack_irq(void)
701{
702 ack_APIC_irq();
703 entering_irq();
704}
705
706static inline void exiting_irq(void)
707{
708 irq_exit();
709}
710
711static inline void exiting_ack_irq(void)
712{
713 irq_exit();
714 /* Ack only at the end to avoid potential reentry */
715 ack_APIC_irq();
716}
690 717
691#endif /* _ASM_X86_APIC_H */ 718#endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 5f9a1243190e..d2b12988d2ed 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -28,7 +28,7 @@ struct x86_cpu {
28#ifdef CONFIG_HOTPLUG_CPU 28#ifdef CONFIG_HOTPLUG_CPU
29extern int arch_register_cpu(int num); 29extern int arch_register_cpu(int num);
30extern void arch_unregister_cpu(int); 30extern void arch_unregister_cpu(int);
31extern void __cpuinit start_cpu0(void); 31extern void start_cpu0(void);
32#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 32#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
33extern int _debug_hotplug_cpu(int cpu, int action); 33extern int _debug_hotplug_cpu(int cpu, int action);
34#endif 34#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e99ac27f95b2..47538a61c91b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -92,7 +92,7 @@
92#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ 92#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
93#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ 93#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */
94#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ 94#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
95 /* 21 available, was AMD_C1E */ 95#define X86_FEATURE_ALWAYS (3*32+21) /* "" Always-present feature */
96#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ 96#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */
97#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ 97#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
98#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ 98#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
@@ -356,15 +356,36 @@ extern const char * const x86_power_flags[32];
356#endif /* CONFIG_X86_64 */ 356#endif /* CONFIG_X86_64 */
357 357
358#if __GNUC__ >= 4 358#if __GNUC__ >= 4
359extern void warn_pre_alternatives(void);
360extern bool __static_cpu_has_safe(u16 bit);
361
359/* 362/*
360 * Static testing of CPU features. Used the same as boot_cpu_has(). 363 * Static testing of CPU features. Used the same as boot_cpu_has().
361 * These are only valid after alternatives have run, but will statically 364 * These are only valid after alternatives have run, but will statically
362 * patch the target code for additional performance. 365 * patch the target code for additional performance.
363 *
364 */ 366 */
365static __always_inline __pure bool __static_cpu_has(u16 bit) 367static __always_inline __pure bool __static_cpu_has(u16 bit)
366{ 368{
367#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5 369#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
370
371#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
372 /*
373 * Catch too early usage of this before alternatives
374 * have run.
375 */
376 asm goto("1: jmp %l[t_warn]\n"
377 "2:\n"
378 ".section .altinstructions,\"a\"\n"
379 " .long 1b - .\n"
380 " .long 0\n" /* no replacement */
381 " .word %P0\n" /* 1: do replace */
382 " .byte 2b - 1b\n" /* source len */
383 " .byte 0\n" /* replacement len */
384 ".previous\n"
385 /* skipping size check since replacement size = 0 */
386 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
387#endif
388
368 asm goto("1: jmp %l[t_no]\n" 389 asm goto("1: jmp %l[t_no]\n"
369 "2:\n" 390 "2:\n"
370 ".section .altinstructions,\"a\"\n" 391 ".section .altinstructions,\"a\"\n"
@@ -379,7 +400,13 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
379 return true; 400 return true;
380 t_no: 401 t_no:
381 return false; 402 return false;
382#else 403
404#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
405 t_warn:
406 warn_pre_alternatives();
407 return false;
408#endif
409#else /* GCC_VERSION >= 40500 */
383 u8 flag; 410 u8 flag;
384 /* Open-coded due to __stringify() in ALTERNATIVE() */ 411 /* Open-coded due to __stringify() in ALTERNATIVE() */
385 asm volatile("1: movb $0,%0\n" 412 asm volatile("1: movb $0,%0\n"
@@ -411,11 +438,94 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
411 __static_cpu_has(bit) : \ 438 __static_cpu_has(bit) : \
412 boot_cpu_has(bit) \ 439 boot_cpu_has(bit) \
413) 440)
441
442static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
443{
444#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
445/*
446 * We need to spell the jumps to the compiler because, depending on the offset,
447 * the replacement jump can be bigger than the original jump, and this we cannot
448 * have. Thus, we force the jump to the widest, 4-byte, signed relative
449 * offset even though the last would often fit in less bytes.
450 */
451 asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
452 "2:\n"
453 ".section .altinstructions,\"a\"\n"
454 " .long 1b - .\n" /* src offset */
455 " .long 3f - .\n" /* repl offset */
456 " .word %P1\n" /* always replace */
457 " .byte 2b - 1b\n" /* src len */
458 " .byte 4f - 3f\n" /* repl len */
459 ".previous\n"
460 ".section .altinstr_replacement,\"ax\"\n"
461 "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
462 "4:\n"
463 ".previous\n"
464 ".section .altinstructions,\"a\"\n"
465 " .long 1b - .\n" /* src offset */
466 " .long 0\n" /* no replacement */
467 " .word %P0\n" /* feature bit */
468 " .byte 2b - 1b\n" /* src len */
469 " .byte 0\n" /* repl len */
470 ".previous\n"
471 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
472 : : t_dynamic, t_no);
473 return true;
474 t_no:
475 return false;
476 t_dynamic:
477 return __static_cpu_has_safe(bit);
478#else /* GCC_VERSION >= 40500 */
479 u8 flag;
480 /* Open-coded due to __stringify() in ALTERNATIVE() */
481 asm volatile("1: movb $2,%0\n"
482 "2:\n"
483 ".section .altinstructions,\"a\"\n"
484 " .long 1b - .\n" /* src offset */
485 " .long 3f - .\n" /* repl offset */
486 " .word %P2\n" /* always replace */
487 " .byte 2b - 1b\n" /* source len */
488 " .byte 4f - 3f\n" /* replacement len */
489 ".previous\n"
490 ".section .discard,\"aw\",@progbits\n"
491 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
492 ".previous\n"
493 ".section .altinstr_replacement,\"ax\"\n"
494 "3: movb $0,%0\n"
495 "4:\n"
496 ".previous\n"
497 ".section .altinstructions,\"a\"\n"
498 " .long 1b - .\n" /* src offset */
499 " .long 5f - .\n" /* repl offset */
500 " .word %P1\n" /* feature bit */
501 " .byte 4b - 3b\n" /* src len */
502 " .byte 6f - 5f\n" /* repl len */
503 ".previous\n"
504 ".section .discard,\"aw\",@progbits\n"
505 " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
506 ".previous\n"
507 ".section .altinstr_replacement,\"ax\"\n"
508 "5: movb $1,%0\n"
509 "6:\n"
510 ".previous\n"
511 : "=qm" (flag)
512 : "i" (bit), "i" (X86_FEATURE_ALWAYS));
513 return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
514#endif
515}
516
517#define static_cpu_has_safe(bit) \
518( \
519 __builtin_constant_p(boot_cpu_has(bit)) ? \
520 boot_cpu_has(bit) : \
521 _static_cpu_has_safe(bit) \
522)
414#else 523#else
415/* 524/*
416 * gcc 3.x is too stupid to do the static test; fall back to dynamic. 525 * gcc 3.x is too stupid to do the static test; fall back to dynamic.
417 */ 526 */
418#define static_cpu_has(bit) boot_cpu_has(bit) 527#define static_cpu_has(bit) boot_cpu_has(bit)
528#define static_cpu_has_safe(bit) boot_cpu_has(bit)
419#endif 529#endif
420 530
421#define cpu_has_bug(c, bit) cpu_has(c, (bit)) 531#define cpu_has_bug(c, bit) cpu_has(c, (bit))
diff --git a/arch/x86/include/asm/crypto/blowfish.h b/arch/x86/include/asm/crypto/blowfish.h
deleted file mode 100644
index f097b2face10..000000000000
--- a/arch/x86/include/asm/crypto/blowfish.h
+++ /dev/null
@@ -1,43 +0,0 @@
1#ifndef ASM_X86_BLOWFISH_H
2#define ASM_X86_BLOWFISH_H
3
4#include <linux/crypto.h>
5#include <crypto/blowfish.h>
6
7#define BF_PARALLEL_BLOCKS 4
8
9/* regular block cipher functions */
10asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
11 bool xor);
12asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
13
14/* 4-way parallel cipher functions */
15asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
16 const u8 *src, bool xor);
17asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
18 const u8 *src);
19
20static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
21{
22 __blowfish_enc_blk(ctx, dst, src, false);
23}
24
25static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
26 const u8 *src)
27{
28 __blowfish_enc_blk(ctx, dst, src, true);
29}
30
31static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
32 const u8 *src)
33{
34 __blowfish_enc_blk_4way(ctx, dst, src, false);
35}
36
37static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
38 const u8 *src)
39{
40 __blowfish_enc_blk_4way(ctx, dst, src, true);
41}
42
43#endif
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
index e655c6029b45..878c51ceebb5 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -28,20 +28,6 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
28asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, 28asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
29 const u8 *src); 29 const u8 *src);
30 30
31/* 8-way parallel cipher functions */
32asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
33 const u8 *src);
34asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
35 const u8 *src);
36asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
37 const u8 *src);
38asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
39 const u8 *src, le128 *iv);
40asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
41 const u8 *src, le128 *iv);
42asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
43 const u8 *src, le128 *iv);
44
45/* helpers from twofish_x86_64-3way module */ 31/* helpers from twofish_x86_64-3way module */
46extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); 32extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
47extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, 33extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
@@ -57,8 +43,4 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
57extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, 43extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
58 unsigned int keylen); 44 unsigned int keylen);
59 45
60/* helpers from twofish-avx module */
61extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
62extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
63
64#endif /* ASM_X86_TWOFISH_H */ 46#endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 8bf1c06070d5..b90e5dfeee46 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -36,8 +36,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
36 36
37extern struct desc_ptr idt_descr; 37extern struct desc_ptr idt_descr;
38extern gate_desc idt_table[]; 38extern gate_desc idt_table[];
39extern struct desc_ptr nmi_idt_descr; 39extern struct desc_ptr debug_idt_descr;
40extern gate_desc nmi_idt_table[]; 40extern gate_desc debug_idt_table[];
41 41
42struct gdt_page { 42struct gdt_page {
43 struct desc_struct gdt[GDT_ENTRIES]; 43 struct desc_struct gdt[GDT_ENTRIES];
@@ -316,7 +316,20 @@ static inline void set_nmi_gate(int gate, void *addr)
316 gate_desc s; 316 gate_desc s;
317 317
318 pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); 318 pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
319 write_idt_entry(nmi_idt_table, gate, &s); 319 write_idt_entry(debug_idt_table, gate, &s);
320}
321#endif
322
323#ifdef CONFIG_TRACING
324extern struct desc_ptr trace_idt_descr;
325extern gate_desc trace_idt_table[];
326static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
327{
328 write_idt_entry(trace_idt_table, entry, gate);
329}
330#else
331static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
332{
320} 333}
321#endif 334#endif
322 335
@@ -331,6 +344,7 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
331 * setup time 344 * setup time
332 */ 345 */
333 write_idt_entry(idt_table, gate, &s); 346 write_idt_entry(idt_table, gate, &s);
347 write_trace_idt_entry(gate, &s);
334} 348}
335 349
336/* 350/*
@@ -360,12 +374,39 @@ static inline void alloc_system_vector(int vector)
360 } 374 }
361} 375}
362 376
363static inline void alloc_intr_gate(unsigned int n, void *addr) 377#ifdef CONFIG_TRACING
378static inline void trace_set_intr_gate(unsigned int gate, void *addr)
379{
380 gate_desc s;
381
382 pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
383 write_idt_entry(trace_idt_table, gate, &s);
384}
385
386static inline void __trace_alloc_intr_gate(unsigned int n, void *addr)
387{
388 trace_set_intr_gate(n, addr);
389}
390#else
391static inline void trace_set_intr_gate(unsigned int gate, void *addr)
392{
393}
394
395#define __trace_alloc_intr_gate(n, addr)
396#endif
397
398static inline void __alloc_intr_gate(unsigned int n, void *addr)
364{ 399{
365 alloc_system_vector(n);
366 set_intr_gate(n, addr); 400 set_intr_gate(n, addr);
367} 401}
368 402
403#define alloc_intr_gate(n, addr) \
404 do { \
405 alloc_system_vector(n); \
406 __alloc_intr_gate(n, addr); \
407 __trace_alloc_intr_gate(n, trace_##addr); \
408 } while (0)
409
369/* 410/*
370 * This routine sets up an interrupt gate at directory privilege level 3. 411 * This routine sets up an interrupt gate at directory privilege level 3.
371 */ 412 */
@@ -405,4 +446,70 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
405 _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); 446 _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
406} 447}
407 448
449#ifdef CONFIG_X86_64
450DECLARE_PER_CPU(u32, debug_idt_ctr);
451static inline bool is_debug_idt_enabled(void)
452{
453 if (this_cpu_read(debug_idt_ctr))
454 return true;
455
456 return false;
457}
458
459static inline void load_debug_idt(void)
460{
461 load_idt((const struct desc_ptr *)&debug_idt_descr);
462}
463#else
464static inline bool is_debug_idt_enabled(void)
465{
466 return false;
467}
468
469static inline void load_debug_idt(void)
470{
471}
472#endif
473
474#ifdef CONFIG_TRACING
475extern atomic_t trace_idt_ctr;
476static inline bool is_trace_idt_enabled(void)
477{
478 if (atomic_read(&trace_idt_ctr))
479 return true;
480
481 return false;
482}
483
484static inline void load_trace_idt(void)
485{
486 load_idt((const struct desc_ptr *)&trace_idt_descr);
487}
488#else
489static inline bool is_trace_idt_enabled(void)
490{
491 return false;
492}
493
494static inline void load_trace_idt(void)
495{
496}
497#endif
498
499/*
500 * The load_current_idt() must be called with interrupts disabled
501 * to avoid races. That way the IDT will always be set back to the expected
502 * descriptor. It's also called when a CPU is being initialized, and
503 * that doesn't need to disable interrupts, as nothing should be
504 * bothering the CPU then.
505 */
506static inline void load_current_idt(void)
507{
508 if (is_debug_idt_enabled())
509 load_debug_idt();
510 else if (is_trace_idt_enabled())
511 load_trace_idt();
512 else
513 load_idt((const struct desc_ptr *)&idt_descr);
514}
408#endif /* _ASM_X86_DESC_H */ 515#endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 60c89f30c727..0062a0125041 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -52,40 +52,40 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
52 u64 arg4, u64 arg5, u64 arg6); 52 u64 arg4, u64 arg5, u64 arg6);
53 53
54#define efi_call_phys0(f) \ 54#define efi_call_phys0(f) \
55 efi_call0((void *)(f)) 55 efi_call0((f))
56#define efi_call_phys1(f, a1) \ 56#define efi_call_phys1(f, a1) \
57 efi_call1((void *)(f), (u64)(a1)) 57 efi_call1((f), (u64)(a1))
58#define efi_call_phys2(f, a1, a2) \ 58#define efi_call_phys2(f, a1, a2) \
59 efi_call2((void *)(f), (u64)(a1), (u64)(a2)) 59 efi_call2((f), (u64)(a1), (u64)(a2))
60#define efi_call_phys3(f, a1, a2, a3) \ 60#define efi_call_phys3(f, a1, a2, a3) \
61 efi_call3((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3)) 61 efi_call3((f), (u64)(a1), (u64)(a2), (u64)(a3))
62#define efi_call_phys4(f, a1, a2, a3, a4) \ 62#define efi_call_phys4(f, a1, a2, a3, a4) \
63 efi_call4((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ 63 efi_call4((f), (u64)(a1), (u64)(a2), (u64)(a3), \
64 (u64)(a4)) 64 (u64)(a4))
65#define efi_call_phys5(f, a1, a2, a3, a4, a5) \ 65#define efi_call_phys5(f, a1, a2, a3, a4, a5) \
66 efi_call5((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ 66 efi_call5((f), (u64)(a1), (u64)(a2), (u64)(a3), \
67 (u64)(a4), (u64)(a5)) 67 (u64)(a4), (u64)(a5))
68#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \ 68#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \
69 efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ 69 efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \
70 (u64)(a4), (u64)(a5), (u64)(a6)) 70 (u64)(a4), (u64)(a5), (u64)(a6))
71 71
72#define efi_call_virt0(f) \ 72#define efi_call_virt0(f) \
73 efi_call0((void *)(efi.systab->runtime->f)) 73 efi_call0((efi.systab->runtime->f))
74#define efi_call_virt1(f, a1) \ 74#define efi_call_virt1(f, a1) \
75 efi_call1((void *)(efi.systab->runtime->f), (u64)(a1)) 75 efi_call1((efi.systab->runtime->f), (u64)(a1))
76#define efi_call_virt2(f, a1, a2) \ 76#define efi_call_virt2(f, a1, a2) \
77 efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2)) 77 efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2))
78#define efi_call_virt3(f, a1, a2, a3) \ 78#define efi_call_virt3(f, a1, a2, a3) \
79 efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 79 efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
80 (u64)(a3)) 80 (u64)(a3))
81#define efi_call_virt4(f, a1, a2, a3, a4) \ 81#define efi_call_virt4(f, a1, a2, a3, a4) \
82 efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 82 efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
83 (u64)(a3), (u64)(a4)) 83 (u64)(a3), (u64)(a4))
84#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ 84#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
85 efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 85 efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
86 (u64)(a3), (u64)(a4), (u64)(a5)) 86 (u64)(a3), (u64)(a4), (u64)(a5))
87#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ 87#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
88 efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 88 efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
89 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) 89 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
90 90
91extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, 91extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index 75ce3f47d204..77a99ac06d00 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -1,18 +1,6 @@
1#ifndef _ASM_X86_EMERGENCY_RESTART_H 1#ifndef _ASM_X86_EMERGENCY_RESTART_H
2#define _ASM_X86_EMERGENCY_RESTART_H 2#define _ASM_X86_EMERGENCY_RESTART_H
3 3
4enum reboot_type {
5 BOOT_TRIPLE = 't',
6 BOOT_KBD = 'k',
7 BOOT_BIOS = 'b',
8 BOOT_ACPI = 'a',
9 BOOT_EFI = 'e',
10 BOOT_CF9 = 'p',
11 BOOT_CF9_COND = 'q',
12};
13
14extern enum reboot_type reboot_type;
15
16extern void machine_emergency_restart(void); 4extern void machine_emergency_restart(void);
17 5
18#endif /* _ASM_X86_EMERGENCY_RESTART_H */ 6#endif /* _ASM_X86_EMERGENCY_RESTART_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 9bd4ecac72be..dc5fa661465f 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -13,14 +13,16 @@
13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) 13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) 14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 16BUILD_INTERRUPT3(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR,
17BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) 17 smp_irq_move_cleanup_interrupt)
18BUILD_INTERRUPT3(reboot_interrupt, REBOOT_VECTOR, smp_reboot_interrupt)
18#endif 19#endif
19 20
20BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) 21BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
21 22
22#ifdef CONFIG_HAVE_KVM 23#ifdef CONFIG_HAVE_KVM
23BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) 24BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
25 smp_kvm_posted_intr_ipi)
24#endif 26#endif
25 27
26/* 28/*
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 0dc7d9e21c34..e846225265ed 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -81,11 +81,11 @@ enum fixed_addresses {
81 + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, 81 + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
82 VVAR_PAGE, 82 VVAR_PAGE,
83 VSYSCALL_HPET, 83 VSYSCALL_HPET,
84#endif
85#ifdef CONFIG_PARAVIRT_CLOCK 84#ifdef CONFIG_PARAVIRT_CLOCK
86 PVCLOCK_FIXMAP_BEGIN, 85 PVCLOCK_FIXMAP_BEGIN,
87 PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1, 86 PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
88#endif 87#endif
88#endif
89 FIX_DBGP_BASE, 89 FIX_DBGP_BASE,
90 FIX_EARLYCON_MEM_BASE, 90 FIX_EARLYCON_MEM_BASE,
91#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT 91#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index e25cc33ec54d..4d0bda7b11e3 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -62,10 +62,8 @@ extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
62#define xstateregs_active fpregs_active 62#define xstateregs_active fpregs_active
63 63
64#ifdef CONFIG_MATH_EMULATION 64#ifdef CONFIG_MATH_EMULATION
65# define HAVE_HWFP (boot_cpu_data.hard_math)
66extern void finit_soft_fpu(struct i387_soft_struct *soft); 65extern void finit_soft_fpu(struct i387_soft_struct *soft);
67#else 66#else
68# define HAVE_HWFP 1
69static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} 67static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
70#endif 68#endif
71 69
@@ -345,7 +343,7 @@ static inline void __thread_fpu_end(struct task_struct *tsk)
345 343
346static inline void __thread_fpu_begin(struct task_struct *tsk) 344static inline void __thread_fpu_begin(struct task_struct *tsk)
347{ 345{
348 if (!use_eager_fpu()) 346 if (!static_cpu_has_safe(X86_FEATURE_EAGER_FPU))
349 clts(); 347 clts();
350 __thread_set_has_fpu(tsk); 348 __thread_set_has_fpu(tsk);
351} 349}
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 1da97efad08a..e4ac559c4a24 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -77,6 +77,23 @@ extern void threshold_interrupt(void);
77extern void call_function_interrupt(void); 77extern void call_function_interrupt(void);
78extern void call_function_single_interrupt(void); 78extern void call_function_single_interrupt(void);
79 79
80#ifdef CONFIG_TRACING
81/* Interrupt handlers registered during init_IRQ */
82extern void trace_apic_timer_interrupt(void);
83extern void trace_x86_platform_ipi(void);
84extern void trace_error_interrupt(void);
85extern void trace_irq_work_interrupt(void);
86extern void trace_spurious_interrupt(void);
87extern void trace_thermal_interrupt(void);
88extern void trace_reschedule_interrupt(void);
89extern void trace_threshold_interrupt(void);
90extern void trace_call_function_interrupt(void);
91extern void trace_call_function_single_interrupt(void);
92#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
93#define trace_reboot_interrupt reboot_interrupt
94#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
95#endif /* CONFIG_TRACING */
96
80/* IOAPIC */ 97/* IOAPIC */
81#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) 98#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
82extern unsigned long io_apic_irqs; 99extern unsigned long io_apic_irqs;
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d8e8eefbe24c..34f69cb9350a 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -345,4 +345,11 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
345 345
346#define IO_SPACE_LIMIT 0xffff 346#define IO_SPACE_LIMIT 0xffff
347 347
348#ifdef CONFIG_MTRR
349extern int __must_check arch_phys_wc_add(unsigned long base,
350 unsigned long size);
351extern void arch_phys_wc_del(int handle);
352#define arch_phys_wc_add arch_phys_wc_add
353#endif
354
348#endif /* _ASM_X86_IO_H */ 355#endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3741c653767c..f87f7fcefa0a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -59,7 +59,7 @@
59 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 59 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
60 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 60 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
61 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ 61 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
62 | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \ 62 | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
63 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 63 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
64 64
65#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 65#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -222,14 +222,22 @@ struct kvm_mmu_page {
222 int root_count; /* Currently serving as active root */ 222 int root_count; /* Currently serving as active root */
223 unsigned int unsync_children; 223 unsigned int unsync_children;
224 unsigned long parent_ptes; /* Reverse mapping for parent_pte */ 224 unsigned long parent_ptes; /* Reverse mapping for parent_pte */
225
226 /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */
227 unsigned long mmu_valid_gen;
228
225 DECLARE_BITMAP(unsync_child_bitmap, 512); 229 DECLARE_BITMAP(unsync_child_bitmap, 512);
226 230
227#ifdef CONFIG_X86_32 231#ifdef CONFIG_X86_32
232 /*
233 * Used out of the mmu-lock to avoid reading spte values while an
234 * update is in progress; see the comments in __get_spte_lockless().
235 */
228 int clear_spte_count; 236 int clear_spte_count;
229#endif 237#endif
230 238
239 /* Number of writes since the last time traversal visited this page. */
231 int write_flooding_count; 240 int write_flooding_count;
232 bool mmio_cached;
233}; 241};
234 242
235struct kvm_pio_request { 243struct kvm_pio_request {
@@ -529,11 +537,14 @@ struct kvm_arch {
529 unsigned int n_requested_mmu_pages; 537 unsigned int n_requested_mmu_pages;
530 unsigned int n_max_mmu_pages; 538 unsigned int n_max_mmu_pages;
531 unsigned int indirect_shadow_pages; 539 unsigned int indirect_shadow_pages;
540 unsigned long mmu_valid_gen;
532 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; 541 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
533 /* 542 /*
534 * Hash table of struct kvm_mmu_page. 543 * Hash table of struct kvm_mmu_page.
535 */ 544 */
536 struct list_head active_mmu_pages; 545 struct list_head active_mmu_pages;
546 struct list_head zapped_obsolete_pages;
547
537 struct list_head assigned_dev_head; 548 struct list_head assigned_dev_head;
538 struct iommu_domain *iommu_domain; 549 struct iommu_domain *iommu_domain;
539 int iommu_flags; 550 int iommu_flags;
@@ -769,7 +780,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
769 struct kvm_memory_slot *slot, 780 struct kvm_memory_slot *slot,
770 gfn_t gfn_offset, unsigned long mask); 781 gfn_t gfn_offset, unsigned long mask);
771void kvm_mmu_zap_all(struct kvm *kvm); 782void kvm_mmu_zap_all(struct kvm *kvm);
772void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); 783void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
773unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); 784unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
774void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); 785void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
775 786
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index d354fb781c57..a55c7efcc4ed 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -95,8 +95,8 @@ static inline unsigned char current_lock_cmos_reg(void)
95unsigned char rtc_cmos_read(unsigned char addr); 95unsigned char rtc_cmos_read(unsigned char addr);
96void rtc_cmos_write(unsigned char val, unsigned char addr); 96void rtc_cmos_write(unsigned char val, unsigned char addr);
97 97
98extern int mach_set_rtc_mmss(unsigned long nowtime); 98extern int mach_set_rtc_mmss(const struct timespec *now);
99extern unsigned long mach_get_cmos_time(void); 99extern void mach_get_cmos_time(struct timespec *now);
100 100
101#define RTC_IRQ 8 101#define RTC_IRQ 8
102 102
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index fa5f71e021d5..29e3093bbd21 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -61,7 +61,7 @@
61#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ 61#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
62#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ 62#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
63#define MCJ_EXCEPTION 0x8 /* raise as exception */ 63#define MCJ_EXCEPTION 0x8 /* raise as exception */
64#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ 64#define MCJ_IRQ_BROADCAST 0x10 /* do IRQ broadcasting */
65 65
66#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ 66#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
67 67
@@ -214,6 +214,13 @@ void mce_log_therm_throt_event(__u64 status);
214/* Interrupt Handler for core thermal thresholds */ 214/* Interrupt Handler for core thermal thresholds */
215extern int (*platform_thermal_notify)(__u64 msr_val); 215extern int (*platform_thermal_notify)(__u64 msr_val);
216 216
217/* Interrupt Handler for package thermal thresholds */
218extern int (*platform_thermal_package_notify)(__u64 msr_val);
219
220/* Callback support of rate control, return true, if
221 * callback has rate control */
222extern bool (*platform_thermal_package_rate_control)(void);
223
217#ifdef CONFIG_X86_THERMAL_VECTOR 224#ifdef CONFIG_X86_THERMAL_VECTOR
218extern void mcheck_intel_therm_init(void); 225extern void mcheck_intel_therm_init(void);
219#else 226#else
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 6bc3985ee473..f98bd6625318 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -60,11 +60,11 @@ static inline void __exit exit_amd_microcode(void) {}
60#ifdef CONFIG_MICROCODE_EARLY 60#ifdef CONFIG_MICROCODE_EARLY
61#define MAX_UCODE_COUNT 128 61#define MAX_UCODE_COUNT 128
62extern void __init load_ucode_bsp(void); 62extern void __init load_ucode_bsp(void);
63extern void __cpuinit load_ucode_ap(void); 63extern void load_ucode_ap(void);
64extern int __init save_microcode_in_initrd(void); 64extern int __init save_microcode_in_initrd(void);
65#else 65#else
66static inline void __init load_ucode_bsp(void) {} 66static inline void __init load_ucode_bsp(void) {}
67static inline void __cpuinit load_ucode_ap(void) {} 67static inline void load_ucode_ap(void) {}
68static inline int __init save_microcode_in_initrd(void) 68static inline int __init save_microcode_in_initrd(void)
69{ 69{
70 return 0; 70 return 0;
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
new file mode 100644
index 000000000000..50e5c58ced23
--- /dev/null
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -0,0 +1,78 @@
1#ifndef _ASM_X86_MICROCODE_AMD_H
2#define _ASM_X86_MICROCODE_AMD_H
3
4#include <asm/microcode.h>
5
6#define UCODE_MAGIC 0x00414d44
7#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
8#define UCODE_UCODE_TYPE 0x00000001
9
10#define SECTION_HDR_SIZE 8
11#define CONTAINER_HDR_SZ 12
12
13struct equiv_cpu_entry {
14 u32 installed_cpu;
15 u32 fixed_errata_mask;
16 u32 fixed_errata_compare;
17 u16 equiv_cpu;
18 u16 res;
19} __attribute__((packed));
20
21struct microcode_header_amd {
22 u32 data_code;
23 u32 patch_id;
24 u16 mc_patch_data_id;
25 u8 mc_patch_data_len;
26 u8 init_flag;
27 u32 mc_patch_data_checksum;
28 u32 nb_dev_id;
29 u32 sb_dev_id;
30 u16 processor_rev_id;
31 u8 nb_rev_id;
32 u8 sb_rev_id;
33 u8 bios_api_rev;
34 u8 reserved1[3];
35 u32 match_reg[8];
36} __attribute__((packed));
37
38struct microcode_amd {
39 struct microcode_header_amd hdr;
40 unsigned int mpb[0];
41};
42
43static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
44 unsigned int sig)
45{
46 int i = 0;
47
48 if (!equiv_cpu_table)
49 return 0;
50
51 while (equiv_cpu_table[i].installed_cpu != 0) {
52 if (sig == equiv_cpu_table[i].installed_cpu)
53 return equiv_cpu_table[i].equiv_cpu;
54
55 i++;
56 }
57 return 0;
58}
59
60extern int __apply_microcode_amd(struct microcode_amd *mc_amd);
61extern int apply_microcode_amd(int cpu);
62extern enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size);
63
64#ifdef CONFIG_MICROCODE_AMD_EARLY
65#ifdef CONFIG_X86_32
66#define MPB_MAX_SIZE PAGE_SIZE
67extern u8 amd_bsp_mpb[MPB_MAX_SIZE];
68#endif
69extern void __init load_ucode_amd_bsp(void);
70extern void load_ucode_amd_ap(void);
71extern int __init save_microcode_in_initrd_amd(void);
72#else
73static inline void __init load_ucode_amd_bsp(void) {}
74static inline void load_ucode_amd_ap(void) {}
75static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; }
76#endif
77
78#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 5356f927d411..9067166409bf 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -65,12 +65,14 @@ update_match_revision(struct microcode_header_intel *mc_header, int rev);
65 65
66#ifdef CONFIG_MICROCODE_INTEL_EARLY 66#ifdef CONFIG_MICROCODE_INTEL_EARLY
67extern void __init load_ucode_intel_bsp(void); 67extern void __init load_ucode_intel_bsp(void);
68extern void __cpuinit load_ucode_intel_ap(void); 68extern void load_ucode_intel_ap(void);
69extern void show_ucode_info_early(void); 69extern void show_ucode_info_early(void);
70extern int __init save_microcode_in_initrd_intel(void);
70#else 71#else
71static inline __init void load_ucode_intel_bsp(void) {} 72static inline __init void load_ucode_intel_bsp(void) {}
72static inline __cpuinit void load_ucode_intel_ap(void) {} 73static inline void load_ucode_intel_ap(void) {}
73static inline void show_ucode_info_early(void) {} 74static inline void show_ucode_info_early(void) {}
75static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; }
74#endif 76#endif
75 77
76#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) 78#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
diff --git a/arch/x86/include/asm/mmconfig.h b/arch/x86/include/asm/mmconfig.h
index 9b119da1d105..04a3fed22cfe 100644
--- a/arch/x86/include/asm/mmconfig.h
+++ b/arch/x86/include/asm/mmconfig.h
@@ -2,8 +2,8 @@
2#define _ASM_X86_MMCONFIG_H 2#define _ASM_X86_MMCONFIG_H
3 3
4#ifdef CONFIG_PCI_MMCONFIG 4#ifdef CONFIG_PCI_MMCONFIG
5extern void __cpuinit fam10h_check_enable_mmcfg(void); 5extern void fam10h_check_enable_mmcfg(void);
6extern void __cpuinit check_enable_amd_mmconf_dmi(void); 6extern void check_enable_amd_mmconf_dmi(void);
7#else 7#else
8static inline void fam10h_check_enable_mmcfg(void) { } 8static inline void fam10h_check_enable_mmcfg(void) { }
9static inline void check_enable_amd_mmconf_dmi(void) { } 9static inline void check_enable_amd_mmconf_dmi(void) { }
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 3e2f42a4b872..626cf70082d7 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -94,7 +94,7 @@ static inline void early_reserve_e820_mpc_new(void) { }
94#define default_get_smp_config x86_init_uint_noop 94#define default_get_smp_config x86_init_uint_noop
95#endif 95#endif
96 96
97void __cpuinit generic_processor_info(int apicid, int version); 97void generic_processor_info(int apicid, int version);
98#ifdef CONFIG_ACPI 98#ifdef CONFIG_ACPI
99extern void mp_register_ioapic(int id, u32 address, u32 gsi_base); 99extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
100extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, 100extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h
index 73668abdbedf..1e69a75412a4 100644
--- a/arch/x86/include/asm/mrst-vrtc.h
+++ b/arch/x86/include/asm/mrst-vrtc.h
@@ -3,7 +3,7 @@
3 3
4extern unsigned char vrtc_cmos_read(unsigned char reg); 4extern unsigned char vrtc_cmos_read(unsigned char reg);
5extern void vrtc_cmos_write(unsigned char val, unsigned char reg); 5extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
6extern unsigned long vrtc_get_time(void); 6extern void vrtc_get_time(struct timespec *now);
7extern int vrtc_set_mmss(unsigned long nowtime); 7extern int vrtc_set_mmss(const struct timespec *now);
8 8
9#endif 9#endif
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index c2934be2446a..cd9c41938b8a 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -12,6 +12,9 @@ struct ms_hyperv_info {
12extern struct ms_hyperv_info ms_hyperv; 12extern struct ms_hyperv_info ms_hyperv;
13 13
14void hyperv_callback_vector(void); 14void hyperv_callback_vector(void);
15#ifdef CONFIG_TRACING
16#define trace_hyperv_callback_vector hyperv_callback_vector
17#endif
15void hyperv_vector_handler(struct pt_regs *regs); 18void hyperv_vector_handler(struct pt_regs *regs);
16void hv_register_vmbus_handler(int irq, irq_handler_t handler); 19void hv_register_vmbus_handler(int irq, irq_handler_t handler);
17 20
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index e235582f9930..f768f6298419 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -26,7 +26,10 @@
26#include <uapi/asm/mtrr.h> 26#include <uapi/asm/mtrr.h>
27 27
28 28
29/* The following functions are for use by other drivers */ 29/*
30 * The following functions are for use by other drivers that cannot use
31 * arch_phys_wc_add and arch_phys_wc_del.
32 */
30# ifdef CONFIG_MTRR 33# ifdef CONFIG_MTRR
31extern u8 mtrr_type_lookup(u64 addr, u64 end); 34extern u8 mtrr_type_lookup(u64 addr, u64 end);
32extern void mtrr_save_fixed_ranges(void *); 35extern void mtrr_save_fixed_ranges(void *);
@@ -45,6 +48,7 @@ extern void mtrr_aps_init(void);
45extern void mtrr_bp_restore(void); 48extern void mtrr_bp_restore(void);
46extern int mtrr_trim_uncached_memory(unsigned long end_pfn); 49extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
47extern int amd_special_default_mtrr(void); 50extern int amd_special_default_mtrr(void);
51extern int phys_wc_to_mtrr_index(int handle);
48# else 52# else
49static inline u8 mtrr_type_lookup(u64 addr, u64 end) 53static inline u8 mtrr_type_lookup(u64 addr, u64 end)
50{ 54{
@@ -80,6 +84,10 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
80static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) 84static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
81{ 85{
82} 86}
87static inline int phys_wc_to_mtrr_index(int handle)
88{
89 return -1;
90}
83 91
84#define mtrr_ap_init() do {} while (0) 92#define mtrr_ap_init() do {} while (0)
85#define mtrr_bp_init() do {} while (0) 93#define mtrr_bp_init() do {} while (0)
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 03f90c8a5a7c..0208c3c2cbc6 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -42,17 +42,14 @@ do { \
42 * __mutex_fastpath_lock_retval - try to take the lock by moving the count 42 * __mutex_fastpath_lock_retval - try to take the lock by moving the count
43 * from 1 to a 0 value 43 * from 1 to a 0 value
44 * @count: pointer of type atomic_t 44 * @count: pointer of type atomic_t
45 * @fail_fn: function to call if the original value was not 1
46 * 45 *
47 * Change the count from 1 to a value lower than 1, and call <fail_fn> if it 46 * Change the count from 1 to a value lower than 1. This function returns 0
48 * wasn't 1 originally. This function returns 0 if the fastpath succeeds, 47 * if the fastpath succeeds, or -1 otherwise.
49 * or anything the slow path function returns
50 */ 48 */
51static inline int __mutex_fastpath_lock_retval(atomic_t *count, 49static inline int __mutex_fastpath_lock_retval(atomic_t *count)
52 int (*fail_fn)(atomic_t *))
53{ 50{
54 if (unlikely(atomic_dec_return(count) < 0)) 51 if (unlikely(atomic_dec_return(count) < 0))
55 return fail_fn(count); 52 return -1;
56 else 53 else
57 return 0; 54 return 0;
58} 55}
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 68a87b0f8e29..2c543fff241b 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -37,17 +37,14 @@ do { \
37 * __mutex_fastpath_lock_retval - try to take the lock by moving the count 37 * __mutex_fastpath_lock_retval - try to take the lock by moving the count
38 * from 1 to a 0 value 38 * from 1 to a 0 value
39 * @count: pointer of type atomic_t 39 * @count: pointer of type atomic_t
40 * @fail_fn: function to call if the original value was not 1
41 * 40 *
42 * Change the count from 1 to a value lower than 1, and call <fail_fn> if 41 * Change the count from 1 to a value lower than 1. This function returns 0
43 * it wasn't 1 originally. This function returns 0 if the fastpath succeeds, 42 * if the fastpath succeeds, or -1 otherwise.
44 * or anything the slow path function returns
45 */ 43 */
46static inline int __mutex_fastpath_lock_retval(atomic_t *count, 44static inline int __mutex_fastpath_lock_retval(atomic_t *count)
47 int (*fail_fn)(atomic_t *))
48{ 45{
49 if (unlikely(atomic_dec_return(count) < 0)) 46 if (unlikely(atomic_dec_return(count) < 0))
50 return fail_fn(count); 47 return -1;
51 else 48 else
52 return 0; 49 return 0;
53} 50}
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 1b99ee5c9f00..4064acae625d 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -39,7 +39,7 @@ static inline void set_apicid_to_node(int apicid, s16 node)
39 __apicid_to_node[apicid] = node; 39 __apicid_to_node[apicid] = node;
40} 40}
41 41
42extern int __cpuinit numa_cpu_node(int cpu); 42extern int numa_cpu_node(int cpu);
43 43
44#else /* CONFIG_NUMA */ 44#else /* CONFIG_NUMA */
45static inline void set_apicid_to_node(int apicid, s16 node) 45static inline void set_apicid_to_node(int apicid, s16 node)
@@ -60,8 +60,8 @@ static inline int numa_cpu_node(int cpu)
60extern void numa_set_node(int cpu, int node); 60extern void numa_set_node(int cpu, int node);
61extern void numa_clear_node(int cpu); 61extern void numa_clear_node(int cpu);
62extern void __init init_cpu_to_node(void); 62extern void __init init_cpu_to_node(void);
63extern void __cpuinit numa_add_cpu(int cpu); 63extern void numa_add_cpu(int cpu);
64extern void __cpuinit numa_remove_cpu(int cpu); 64extern void numa_remove_cpu(int cpu);
65#else /* CONFIG_NUMA */ 65#else /* CONFIG_NUMA */
66static inline void numa_set_node(int cpu, int node) { } 66static inline void numa_set_node(int cpu, int node) { }
67static inline void numa_clear_node(int cpu) { } 67static inline void numa_clear_node(int cpu) { }
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 57cb63402213..8249df45d2f2 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,6 +29,9 @@
29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) 29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL 30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
31 31
32#define HSW_IN_TX (1ULL << 32)
33#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
34
32#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) 35#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
33#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) 36#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
34#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) 37#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index f2b489cf1602..3bf2dd0cf61f 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -55,9 +55,53 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
55#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) 55#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
56#endif 56#endif
57 57
58#ifdef CONFIG_MEM_SOFT_DIRTY
59
60/*
61 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and
62 * _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset
63 * into this range.
64 */
65#define PTE_FILE_MAX_BITS 28
66#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
67#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
68#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
69#define PTE_FILE_SHIFT4 (_PAGE_BIT_SOFT_DIRTY + 1)
70#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
71#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
72#define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1)
73
74#define pte_to_pgoff(pte) \
75 ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \
76 & ((1U << PTE_FILE_BITS1) - 1))) \
77 + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \
78 & ((1U << PTE_FILE_BITS2) - 1)) \
79 << (PTE_FILE_BITS1)) \
80 + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \
81 & ((1U << PTE_FILE_BITS3) - 1)) \
82 << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
83 + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \
84 << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))
85
86#define pgoff_to_pte(off) \
87 ((pte_t) { .pte_low = \
88 ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \
89 + ((((off) >> PTE_FILE_BITS1) \
90 & ((1U << PTE_FILE_BITS2) - 1)) \
91 << PTE_FILE_SHIFT2) \
92 + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
93 & ((1U << PTE_FILE_BITS3) - 1)) \
94 << PTE_FILE_SHIFT3) \
95 + ((((off) >> \
96 (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \
97 << PTE_FILE_SHIFT4) \
98 + _PAGE_FILE })
99
100#else /* CONFIG_MEM_SOFT_DIRTY */
101
58/* 102/*
59 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, 103 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
60 * split up the 29 bits of offset into this range: 104 * split up the 29 bits of offset into this range.
61 */ 105 */
62#define PTE_FILE_MAX_BITS 29 106#define PTE_FILE_MAX_BITS 29
63#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) 107#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
@@ -88,6 +132,8 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
88 << PTE_FILE_SHIFT3) \ 132 << PTE_FILE_SHIFT3) \
89 + _PAGE_FILE }) 133 + _PAGE_FILE })
90 134
135#endif /* CONFIG_MEM_SOFT_DIRTY */
136
91/* Encode and de-code a swap entry */ 137/* Encode and de-code a swap entry */
92#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE 138#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
93#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) 139#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 4cc9f2b7cdc3..81bb91b49a88 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -179,6 +179,9 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
179/* 179/*
180 * Bits 0, 6 and 7 are taken in the low part of the pte, 180 * Bits 0, 6 and 7 are taken in the low part of the pte,
181 * put the 32 bits of offset into the high part. 181 * put the 32 bits of offset into the high part.
182 *
183 * For soft-dirty tracking 11 bit is taken from
184 * the low part of pte as well.
182 */ 185 */
183#define pte_to_pgoff(pte) ((pte).pte_high) 186#define pte_to_pgoff(pte) ((pte).pte_high)
184#define pgoff_to_pte(off) \ 187#define pgoff_to_pte(off) \
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1e672234c4ff..1c00631164c2 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -207,7 +207,7 @@ static inline pte_t pte_mkexec(pte_t pte)
207 207
208static inline pte_t pte_mkdirty(pte_t pte) 208static inline pte_t pte_mkdirty(pte_t pte)
209{ 209{
210 return pte_set_flags(pte, _PAGE_DIRTY); 210 return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
211} 211}
212 212
213static inline pte_t pte_mkyoung(pte_t pte) 213static inline pte_t pte_mkyoung(pte_t pte)
@@ -271,7 +271,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
271 271
272static inline pmd_t pmd_mkdirty(pmd_t pmd) 272static inline pmd_t pmd_mkdirty(pmd_t pmd)
273{ 273{
274 return pmd_set_flags(pmd, _PAGE_DIRTY); 274 return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
275} 275}
276 276
277static inline pmd_t pmd_mkhuge(pmd_t pmd) 277static inline pmd_t pmd_mkhuge(pmd_t pmd)
@@ -294,6 +294,56 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
294 return pmd_clear_flags(pmd, _PAGE_PRESENT); 294 return pmd_clear_flags(pmd, _PAGE_PRESENT);
295} 295}
296 296
297static inline int pte_soft_dirty(pte_t pte)
298{
299 return pte_flags(pte) & _PAGE_SOFT_DIRTY;
300}
301
302static inline int pmd_soft_dirty(pmd_t pmd)
303{
304 return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
305}
306
307static inline pte_t pte_mksoft_dirty(pte_t pte)
308{
309 return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
310}
311
312static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
313{
314 return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
315}
316
317static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
318{
319 return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
320}
321
322static inline int pte_swp_soft_dirty(pte_t pte)
323{
324 return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
325}
326
327static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
328{
329 return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
330}
331
332static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
333{
334 return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
335}
336
337static inline pte_t pte_file_mksoft_dirty(pte_t pte)
338{
339 return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
340}
341
342static inline int pte_file_soft_dirty(pte_t pte)
343{
344 return pte_flags(pte) & _PAGE_SOFT_DIRTY;
345}
346
297/* 347/*
298 * Mask out unsupported bits in a present pgprot. Non-present pgprots 348 * Mask out unsupported bits in a present pgprot. Non-present pgprots
299 * can use those bits for other purposes, so leave them be. 349 * can use those bits for other purposes, so leave them be.
@@ -506,9 +556,6 @@ static inline unsigned long pages_to_mb(unsigned long npg)
506 return npg >> (20 - PAGE_SHIFT); 556 return npg >> (20 - PAGE_SHIFT);
507} 557}
508 558
509#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
510 remap_pfn_range(vma, vaddr, pfn, size, prot)
511
512#if PAGETABLE_LEVELS > 2 559#if PAGETABLE_LEVELS > 2
513static inline int pud_none(pud_t pud) 560static inline int pud_none(pud_t pud)
514{ 561{
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index e6423002c10b..f4843e031131 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -55,6 +55,33 @@
55#define _PAGE_HIDDEN (_AT(pteval_t, 0)) 55#define _PAGE_HIDDEN (_AT(pteval_t, 0))
56#endif 56#endif
57 57
58/*
59 * The same hidden bit is used by kmemcheck, but since kmemcheck
60 * works on kernel pages while soft-dirty engine on user space,
61 * they do not conflict with each other.
62 */
63
64#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN
65
66#ifdef CONFIG_MEM_SOFT_DIRTY
67#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
68#else
69#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0))
70#endif
71
72/*
73 * Tracking soft dirty bit when a page goes to a swap is tricky.
74 * We need a bit which can be stored in pte _and_ not conflict
75 * with swap entry format. On x86 bits 6 and 7 are *not* involved
76 * into swap entry computation, but bit 6 is used for nonlinear
77 * file mapping, so we borrow bit 7 for soft dirty tracking.
78 */
79#ifdef CONFIG_MEM_SOFT_DIRTY
80#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE
81#else
82#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0))
83#endif
84
58#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 85#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
59#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) 86#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
60#else 87#else
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 22224b3b43bb..24cf5aefb704 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -89,9 +89,9 @@ struct cpuinfo_x86 {
89 char wp_works_ok; /* It doesn't on 386's */ 89 char wp_works_ok; /* It doesn't on 386's */
90 90
91 /* Problems on some 486Dx4's and old 386's: */ 91 /* Problems on some 486Dx4's and old 386's: */
92 char hard_math;
93 char rfu; 92 char rfu;
94 char pad0; 93 char pad0;
94 char pad1;
95#else 95#else
96 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ 96 /* Number of 4K pages in DTLB/ITLB combined(in pages): */
97 int x86_tlbsize; 97 int x86_tlbsize;
@@ -164,6 +164,7 @@ extern const struct seq_operations cpuinfo_op;
164#define cache_line_size() (boot_cpu_data.x86_cache_alignment) 164#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
165 165
166extern void cpu_detect(struct cpuinfo_x86 *c); 166extern void cpu_detect(struct cpuinfo_x86 *c);
167extern void fpu_detect(struct cpuinfo_x86 *c);
167 168
168extern void early_cpu_init(void); 169extern void early_cpu_init(void);
169extern void identify_boot_cpu(void); 170extern void identify_boot_cpu(void);
@@ -981,5 +982,5 @@ bool xen_set_default_idle(void);
981#endif 982#endif
982 983
983void stop_this_cpu(void *dummy); 984void stop_this_cpu(void *dummy);
984 985void df_debug(struct pt_regs *regs, long error_code);
985#endif /* _ASM_X86_PROCESSOR_H */ 986#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 60bef663609a..bade6ac3b14f 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -27,7 +27,7 @@ extern int of_ioapic;
27extern u64 initial_dtb; 27extern u64 initial_dtb;
28extern void add_dtb(u64 data); 28extern void add_dtb(u64 data);
29extern void x86_add_irq_domains(void); 29extern void x86_add_irq_domains(void);
30void __cpuinit x86_of_pci_init(void); 30void x86_of_pci_init(void);
31void x86_dtb_init(void); 31void x86_dtb_init(void);
32#else 32#else
33static inline void add_dtb(u64 data) { } 33static inline void add_dtb(u64 data) { }
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index beff97f7df37..7a958164088c 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -7,10 +7,10 @@
7 7
8#include <asm/processor-flags.h> 8#include <asm/processor-flags.h>
9 9
10#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ 10#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
11 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ 11 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
12 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ 12 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
13 X86_EFLAGS_CF) 13 X86_EFLAGS_CF | X86_EFLAGS_RF)
14 14
15void signal_fault(struct pt_regs *regs, void __user *frame, char *where); 15void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
16 16
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index b073aaea747c..4137890e88e3 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -179,7 +179,7 @@ static inline int wbinvd_on_all_cpus(void)
179} 179}
180#endif /* CONFIG_SMP */ 180#endif /* CONFIG_SMP */
181 181
182extern unsigned disabled_cpus __cpuinitdata; 182extern unsigned disabled_cpus;
183 183
184#ifdef CONFIG_X86_32_SMP 184#ifdef CONFIG_X86_32_SMP
185/* 185/*
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 41fc93a2e225..2f4d924fe6c9 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -16,7 +16,7 @@ static inline void native_clts(void)
16 * all loads stores around it, which can hurt performance. Solution is to 16 * all loads stores around it, which can hurt performance. Solution is to
17 * use a variable and mimic reads and writes to it to enforce serialization 17 * use a variable and mimic reads and writes to it to enforce serialization
18 */ 18 */
19static unsigned long __force_order; 19extern unsigned long __force_order;
20 20
21static inline unsigned long native_read_cr0(void) 21static inline unsigned long native_read_cr0(void)
22{ 22{
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 33692eaabab5..e3ddd7db723f 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -233,8 +233,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
233#define arch_read_relax(lock) cpu_relax() 233#define arch_read_relax(lock) cpu_relax()
234#define arch_write_relax(lock) cpu_relax() 234#define arch_write_relax(lock) cpu_relax()
235 235
236/* The {read|write|spin}_lock() on x86 are full memory barriers. */
237static inline void smp_mb__after_lock(void) { }
238#define ARCH_HAS_SMP_MB_AFTER_LOCK
239
240#endif /* _ASM_X86_SPINLOCK_H */ 236#endif /* _ASM_X86_SPINLOCK_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a1df6e84691f..27811190cbd7 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -89,7 +89,6 @@ struct thread_info {
89#define TIF_FORK 18 /* ret_from_fork */ 89#define TIF_FORK 18 /* ret_from_fork */
90#define TIF_NOHZ 19 /* in adaptive nohz mode */ 90#define TIF_NOHZ 19 /* in adaptive nohz mode */
91#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ 91#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
92#define TIF_DEBUG 21 /* uses debug registers */
93#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ 92#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 93#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
95#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ 94#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
@@ -113,7 +112,6 @@ struct thread_info {
113#define _TIF_IA32 (1 << TIF_IA32) 112#define _TIF_IA32 (1 << TIF_IA32)
114#define _TIF_FORK (1 << TIF_FORK) 113#define _TIF_FORK (1 << TIF_FORK)
115#define _TIF_NOHZ (1 << TIF_NOHZ) 114#define _TIF_NOHZ (1 << TIF_NOHZ)
116#define _TIF_DEBUG (1 << TIF_DEBUG)
117#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) 115#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
118#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 116#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
119#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) 117#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
@@ -154,7 +152,7 @@ struct thread_info {
154 (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) 152 (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
155 153
156#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) 154#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
157#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) 155#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
158 156
159#define PREEMPT_ACTIVE 0x10000000 157#define PREEMPT_ACTIVE 0x10000000
160 158
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 50a7fc0f824a..cf512003e663 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void)
62 62
63static inline void __flush_tlb_one(unsigned long addr) 63static inline void __flush_tlb_one(unsigned long addr)
64{ 64{
65 __flush_tlb_single(addr); 65 __flush_tlb_single(addr);
66} 66}
67 67
68#define TLB_FLUSH_ALL -1UL 68#define TLB_FLUSH_ALL -1UL
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
new file mode 100644
index 000000000000..2874df24e7a4
--- /dev/null
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -0,0 +1,104 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM irq_vectors
3
4#if !defined(_TRACE_IRQ_VECTORS_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_IRQ_VECTORS_H
6
7#include <linux/tracepoint.h>
8
9extern void trace_irq_vector_regfunc(void);
10extern void trace_irq_vector_unregfunc(void);
11
12DECLARE_EVENT_CLASS(x86_irq_vector,
13
14 TP_PROTO(int vector),
15
16 TP_ARGS(vector),
17
18 TP_STRUCT__entry(
19 __field( int, vector )
20 ),
21
22 TP_fast_assign(
23 __entry->vector = vector;
24 ),
25
26 TP_printk("vector=%d", __entry->vector) );
27
28#define DEFINE_IRQ_VECTOR_EVENT(name) \
29DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \
30 TP_PROTO(int vector), \
31 TP_ARGS(vector), \
32 trace_irq_vector_regfunc, \
33 trace_irq_vector_unregfunc); \
34DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \
35 TP_PROTO(int vector), \
36 TP_ARGS(vector), \
37 trace_irq_vector_regfunc, \
38 trace_irq_vector_unregfunc);
39
40
41/*
42 * local_timer - called when entering/exiting a local timer interrupt
43 * vector handler
44 */
45DEFINE_IRQ_VECTOR_EVENT(local_timer);
46
47/*
48 * reschedule - called when entering/exiting a reschedule vector handler
49 */
50DEFINE_IRQ_VECTOR_EVENT(reschedule);
51
52/*
53 * spurious_apic - called when entering/exiting a spurious apic vector handler
54 */
55DEFINE_IRQ_VECTOR_EVENT(spurious_apic);
56
57/*
58 * error_apic - called when entering/exiting an error apic vector handler
59 */
60DEFINE_IRQ_VECTOR_EVENT(error_apic);
61
62/*
63 * x86_platform_ipi - called when entering/exiting a x86 platform ipi interrupt
64 * vector handler
65 */
66DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);
67
68/*
69 * irq_work - called when entering/exiting a irq work interrupt
70 * vector handler
71 */
72DEFINE_IRQ_VECTOR_EVENT(irq_work);
73
74/*
75 * call_function - called when entering/exiting a call function interrupt
76 * vector handler
77 */
78DEFINE_IRQ_VECTOR_EVENT(call_function);
79
80/*
81 * call_function_single - called when entering/exiting a call function
82 * single interrupt vector handler
83 */
84DEFINE_IRQ_VECTOR_EVENT(call_function_single);
85
86/*
87 * threshold_apic - called when entering/exiting a threshold apic interrupt
88 * vector handler
89 */
90DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
91
92/*
93 * thermal_apic - called when entering/exiting a thermal apic interrupt
94 * vector handler
95 */
96DEFINE_IRQ_VECTOR_EVENT(thermal_apic);
97
98#undef TRACE_INCLUDE_PATH
99#define TRACE_INCLUDE_PATH .
100#define TRACE_INCLUDE_FILE irq_vectors
101#endif /* _TRACE_IRQ_VECTORS_H */
102
103/* This part must be outside protection */
104#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 142810c457dc..4f7923dd0007 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -235,7 +235,7 @@ extern long __copy_user_nocache(void *dst, const void __user *src,
235static inline int 235static inline int
236__copy_from_user_nocache(void *dst, const void __user *src, unsigned size) 236__copy_from_user_nocache(void *dst, const void __user *src, unsigned size)
237{ 237{
238 might_sleep(); 238 might_fault();
239 return __copy_user_nocache(dst, src, size, 1); 239 return __copy_user_nocache(dst, src, size, 1);
240} 240}
241 241
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index a06983cdc125..0b46ef261c77 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -731,6 +731,9 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
731} 731}
732 732
733extern void uv_bau_message_intr1(void); 733extern void uv_bau_message_intr1(void);
734#ifdef CONFIG_TRACING
735#define trace_uv_bau_message_intr1 uv_bau_message_intr1
736#endif
734extern void uv_bau_timeout_intr1(void); 737extern void uv_bau_timeout_intr1(void);
735 738
736struct atomic_short { 739struct atomic_short {
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index d8d99222b36a..828a1565ba57 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -142,6 +142,8 @@ struct x86_cpuinit_ops {
142 void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); 142 void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
143}; 143};
144 144
145struct timespec;
146
145/** 147/**
146 * struct x86_platform_ops - platform specific runtime functions 148 * struct x86_platform_ops - platform specific runtime functions
147 * @calibrate_tsc: calibrate TSC 149 * @calibrate_tsc: calibrate TSC
@@ -156,8 +158,8 @@ struct x86_cpuinit_ops {
156 */ 158 */
157struct x86_platform_ops { 159struct x86_platform_ops {
158 unsigned long (*calibrate_tsc)(void); 160 unsigned long (*calibrate_tsc)(void);
159 unsigned long (*get_wallclock)(void); 161 void (*get_wallclock)(struct timespec *ts);
160 int (*set_wallclock)(unsigned long nowtime); 162 int (*set_wallclock)(const struct timespec *ts);
161 void (*iommu_shutdown)(void); 163 void (*iommu_shutdown)(void);
162 bool (*is_untracked_pat_range)(u64 start, u64 end); 164 bool (*is_untracked_pat_range)(u64 start, u64 end);
163 void (*nmi_init)(void); 165 void (*nmi_init)(void);
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 2af848dfa754..bb0465090ae5 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -170,6 +170,9 @@
170#define MSR_KNC_EVNTSEL0 0x00000028 170#define MSR_KNC_EVNTSEL0 0x00000028
171#define MSR_KNC_EVNTSEL1 0x00000029 171#define MSR_KNC_EVNTSEL1 0x00000029
172 172
173/* Alternative perfctr range with full access. */
174#define MSR_IA32_PMC0 0x000004c1
175
173/* AMD64 MSRs. Not complete. See the architecture manual for a more 176/* AMD64 MSRs. Not complete. See the architecture manual for a more
174 complete list. */ 177 complete list. */
175 178
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 54991a746043..180a0c3c224d 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -2,75 +2,129 @@
2#define _UAPI_ASM_X86_PROCESSOR_FLAGS_H 2#define _UAPI_ASM_X86_PROCESSOR_FLAGS_H
3/* Various flags defined: can be included from assembler. */ 3/* Various flags defined: can be included from assembler. */
4 4
5#include <linux/const.h>
6
5/* 7/*
6 * EFLAGS bits 8 * EFLAGS bits
7 */ 9 */
8#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ 10#define X86_EFLAGS_CF_BIT 0 /* Carry Flag */
9#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */ 11#define X86_EFLAGS_CF _BITUL(X86_EFLAGS_CF_BIT)
10#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ 12#define X86_EFLAGS_FIXED_BIT 1 /* Bit 1 - always on */
11#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ 13#define X86_EFLAGS_FIXED _BITUL(X86_EFLAGS_FIXED_BIT)
12#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ 14#define X86_EFLAGS_PF_BIT 2 /* Parity Flag */
13#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ 15#define X86_EFLAGS_PF _BITUL(X86_EFLAGS_PF_BIT)
14#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ 16#define X86_EFLAGS_AF_BIT 4 /* Auxiliary carry Flag */
15#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ 17#define X86_EFLAGS_AF _BITUL(X86_EFLAGS_AF_BIT)
16#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ 18#define X86_EFLAGS_ZF_BIT 6 /* Zero Flag */
17#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ 19#define X86_EFLAGS_ZF _BITUL(X86_EFLAGS_ZF_BIT)
18#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ 20#define X86_EFLAGS_SF_BIT 7 /* Sign Flag */
19#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ 21#define X86_EFLAGS_SF _BITUL(X86_EFLAGS_SF_BIT)
20#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ 22#define X86_EFLAGS_TF_BIT 8 /* Trap Flag */
21#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ 23#define X86_EFLAGS_TF _BITUL(X86_EFLAGS_TF_BIT)
22#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ 24#define X86_EFLAGS_IF_BIT 9 /* Interrupt Flag */
23#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ 25#define X86_EFLAGS_IF _BITUL(X86_EFLAGS_IF_BIT)
24#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ 26#define X86_EFLAGS_DF_BIT 10 /* Direction Flag */
25#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ 27#define X86_EFLAGS_DF _BITUL(X86_EFLAGS_DF_BIT)
28#define X86_EFLAGS_OF_BIT 11 /* Overflow Flag */
29#define X86_EFLAGS_OF _BITUL(X86_EFLAGS_OF_BIT)
30#define X86_EFLAGS_IOPL_BIT 12 /* I/O Privilege Level (2 bits) */
31#define X86_EFLAGS_IOPL (_AC(3,UL) << X86_EFLAGS_IOPL_BIT)
32#define X86_EFLAGS_NT_BIT 14 /* Nested Task */
33#define X86_EFLAGS_NT _BITUL(X86_EFLAGS_NT_BIT)
34#define X86_EFLAGS_RF_BIT 16 /* Resume Flag */
35#define X86_EFLAGS_RF _BITUL(X86_EFLAGS_RF_BIT)
36#define X86_EFLAGS_VM_BIT 17 /* Virtual Mode */
37#define X86_EFLAGS_VM _BITUL(X86_EFLAGS_VM_BIT)
38#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */
39#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT)
40#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */
41#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT)
42#define X86_EFLAGS_VIF_BIT 19 /* Virtual Interrupt Flag */
43#define X86_EFLAGS_VIF _BITUL(X86_EFLAGS_VIF_BIT)
44#define X86_EFLAGS_VIP_BIT 20 /* Virtual Interrupt Pending */
45#define X86_EFLAGS_VIP _BITUL(X86_EFLAGS_VIP_BIT)
46#define X86_EFLAGS_ID_BIT 21 /* CPUID detection */
47#define X86_EFLAGS_ID _BITUL(X86_EFLAGS_ID_BIT)
26 48
27/* 49/*
28 * Basic CPU control in CR0 50 * Basic CPU control in CR0
29 */ 51 */
30#define X86_CR0_PE 0x00000001 /* Protection Enable */ 52#define X86_CR0_PE_BIT 0 /* Protection Enable */
31#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */ 53#define X86_CR0_PE _BITUL(X86_CR0_PE_BIT)
32#define X86_CR0_EM 0x00000004 /* Emulation */ 54#define X86_CR0_MP_BIT 1 /* Monitor Coprocessor */
33#define X86_CR0_TS 0x00000008 /* Task Switched */ 55#define X86_CR0_MP _BITUL(X86_CR0_MP_BIT)
34#define X86_CR0_ET 0x00000010 /* Extension Type */ 56#define X86_CR0_EM_BIT 2 /* Emulation */
35#define X86_CR0_NE 0x00000020 /* Numeric Error */ 57#define X86_CR0_EM _BITUL(X86_CR0_EM_BIT)
36#define X86_CR0_WP 0x00010000 /* Write Protect */ 58#define X86_CR0_TS_BIT 3 /* Task Switched */
37#define X86_CR0_AM 0x00040000 /* Alignment Mask */ 59#define X86_CR0_TS _BITUL(X86_CR0_TS_BIT)
38#define X86_CR0_NW 0x20000000 /* Not Write-through */ 60#define X86_CR0_ET_BIT 4 /* Extension Type */
39#define X86_CR0_CD 0x40000000 /* Cache Disable */ 61#define X86_CR0_ET _BITUL(X86_CR0_ET_BIT)
40#define X86_CR0_PG 0x80000000 /* Paging */ 62#define X86_CR0_NE_BIT 5 /* Numeric Error */
63#define X86_CR0_NE _BITUL(X86_CR0_NE_BIT)
64#define X86_CR0_WP_BIT 16 /* Write Protect */
65#define X86_CR0_WP _BITUL(X86_CR0_WP_BIT)
66#define X86_CR0_AM_BIT 18 /* Alignment Mask */
67#define X86_CR0_AM _BITUL(X86_CR0_AM_BIT)
68#define X86_CR0_NW_BIT 29 /* Not Write-through */
69#define X86_CR0_NW _BITUL(X86_CR0_NW_BIT)
70#define X86_CR0_CD_BIT 30 /* Cache Disable */
71#define X86_CR0_CD _BITUL(X86_CR0_CD_BIT)
72#define X86_CR0_PG_BIT 31 /* Paging */
73#define X86_CR0_PG _BITUL(X86_CR0_PG_BIT)
41 74
42/* 75/*
43 * Paging options in CR3 76 * Paging options in CR3
44 */ 77 */
45#define X86_CR3_PWT 0x00000008 /* Page Write Through */ 78#define X86_CR3_PWT_BIT 3 /* Page Write Through */
46#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ 79#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
47#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */ 80#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
81#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
82#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
48 83
49/* 84/*
50 * Intel CPU features in CR4 85 * Intel CPU features in CR4
51 */ 86 */
52#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */ 87#define X86_CR4_VME_BIT 0 /* enable vm86 extensions */
53#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */ 88#define X86_CR4_VME _BITUL(X86_CR4_VME_BIT)
54#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */ 89#define X86_CR4_PVI_BIT 1 /* virtual interrupts flag enable */
55#define X86_CR4_DE 0x00000008 /* enable debugging extensions */ 90#define X86_CR4_PVI _BITUL(X86_CR4_PVI_BIT)
56#define X86_CR4_PSE 0x00000010 /* enable page size extensions */ 91#define X86_CR4_TSD_BIT 2 /* disable time stamp at ipl 3 */
57#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */ 92#define X86_CR4_TSD _BITUL(X86_CR4_TSD_BIT)
58#define X86_CR4_MCE 0x00000040 /* Machine check enable */ 93#define X86_CR4_DE_BIT 3 /* enable debugging extensions */
59#define X86_CR4_PGE 0x00000080 /* enable global pages */ 94#define X86_CR4_DE _BITUL(X86_CR4_DE_BIT)
60#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */ 95#define X86_CR4_PSE_BIT 4 /* enable page size extensions */
61#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ 96#define X86_CR4_PSE _BITUL(X86_CR4_PSE_BIT)
62#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ 97#define X86_CR4_PAE_BIT 5 /* enable physical address extensions */
63#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ 98#define X86_CR4_PAE _BITUL(X86_CR4_PAE_BIT)
64#define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ 99#define X86_CR4_MCE_BIT 6 /* Machine check enable */
65#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ 100#define X86_CR4_MCE _BITUL(X86_CR4_MCE_BIT)
66#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ 101#define X86_CR4_PGE_BIT 7 /* enable global pages */
67#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ 102#define X86_CR4_PGE _BITUL(X86_CR4_PGE_BIT)
68#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */ 103#define X86_CR4_PCE_BIT 8 /* enable performance counters at ipl 3 */
104#define X86_CR4_PCE _BITUL(X86_CR4_PCE_BIT)
105#define X86_CR4_OSFXSR_BIT 9 /* enable fast FPU save and restore */
106#define X86_CR4_OSFXSR _BITUL(X86_CR4_OSFXSR_BIT)
107#define X86_CR4_OSXMMEXCPT_BIT 10 /* enable unmasked SSE exceptions */
108#define X86_CR4_OSXMMEXCPT _BITUL(X86_CR4_OSXMMEXCPT_BIT)
109#define X86_CR4_VMXE_BIT 13 /* enable VMX virtualization */
110#define X86_CR4_VMXE _BITUL(X86_CR4_VMXE_BIT)
111#define X86_CR4_SMXE_BIT 14 /* enable safer mode (TXT) */
112#define X86_CR4_SMXE _BITUL(X86_CR4_SMXE_BIT)
113#define X86_CR4_FSGSBASE_BIT 16 /* enable RDWRFSGS support */
114#define X86_CR4_FSGSBASE _BITUL(X86_CR4_FSGSBASE_BIT)
115#define X86_CR4_PCIDE_BIT 17 /* enable PCID support */
116#define X86_CR4_PCIDE _BITUL(X86_CR4_PCIDE_BIT)
117#define X86_CR4_OSXSAVE_BIT 18 /* enable xsave and xrestore */
118#define X86_CR4_OSXSAVE _BITUL(X86_CR4_OSXSAVE_BIT)
119#define X86_CR4_SMEP_BIT 20 /* enable SMEP support */
120#define X86_CR4_SMEP _BITUL(X86_CR4_SMEP_BIT)
121#define X86_CR4_SMAP_BIT 21 /* enable SMAP support */
122#define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT)
69 123
70/* 124/*
71 * x86-64 Task Priority Register, CR8 125 * x86-64 Task Priority Register, CR8
72 */ 126 */
73#define X86_CR8_TPR 0x0000000F /* task priority register */ 127#define X86_CR8_TPR _AC(0x0000000f,UL) /* task priority register */
74 128
75/* 129/*
76 * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h> 130 * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h>
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7bd3bd310106..88d99ea77723 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -16,6 +16,8 @@ CFLAGS_REMOVE_ftrace.o = -pg
16CFLAGS_REMOVE_early_printk.o = -pg 16CFLAGS_REMOVE_early_printk.o = -pg
17endif 17endif
18 18
19CFLAGS_irq.o := -I$(src)/../include/asm/trace
20
19obj-y := process_$(BITS).o signal.o entry_$(BITS).o 21obj-y := process_$(BITS).o signal.o entry_$(BITS).o
20obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 22obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
21obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o 23obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
@@ -67,7 +69,7 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
67obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 69obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
68obj-y += kprobes/ 70obj-y += kprobes/
69obj-$(CONFIG_MODULES) += module.o 71obj-$(CONFIG_MODULES) += module.o
70obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o 72obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
71obj-$(CONFIG_KGDB) += kgdb.o 73obj-$(CONFIG_KGDB) += kgdb.o
72obj-$(CONFIG_VM86) += vm86_32.o 74obj-$(CONFIG_VM86) += vm86_32.o
73obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 75obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
@@ -93,6 +95,7 @@ obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o
93microcode-y := microcode_core.o 95microcode-y := microcode_core.o
94microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o 96microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
95microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o 97microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
98obj-$(CONFIG_MICROCODE_AMD_EARLY) += microcode_amd_early.o
96obj-$(CONFIG_MICROCODE) += microcode.o 99obj-$(CONFIG_MICROCODE) += microcode.o
97 100
98obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o 101obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
@@ -102,6 +105,7 @@ obj-$(CONFIG_OF) += devicetree.o
102obj-$(CONFIG_UPROBES) += uprobes.o 105obj-$(CONFIG_UPROBES) += uprobes.o
103 106
104obj-$(CONFIG_PERF_EVENTS) += perf_regs.o 107obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
108obj-$(CONFIG_TRACING) += tracepoint.o
105 109
106### 110###
107# 64 bit specific files 111# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 230c8ea878e5..2627a81253ee 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -44,6 +44,7 @@
44#include <asm/mpspec.h> 44#include <asm/mpspec.h>
45#include <asm/smp.h> 45#include <asm/smp.h>
46 46
47#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
47static int __initdata acpi_force = 0; 48static int __initdata acpi_force = 0;
48u32 acpi_rsdt_forced; 49u32 acpi_rsdt_forced;
49int acpi_disabled; 50int acpi_disabled;
@@ -194,7 +195,7 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
194 return 0; 195 return 0;
195} 196}
196 197
197static void __cpuinit acpi_register_lapic(int id, u8 enabled) 198static void acpi_register_lapic(int id, u8 enabled)
198{ 199{
199 unsigned int ver = 0; 200 unsigned int ver = 0;
200 201
@@ -559,6 +560,12 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
559int (*__acpi_register_gsi)(struct device *dev, u32 gsi, 560int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
560 int trigger, int polarity) = acpi_register_gsi_pic; 561 int trigger, int polarity) = acpi_register_gsi_pic;
561 562
563#ifdef CONFIG_ACPI_SLEEP
564int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel;
565#else
566int (*acpi_suspend_lowlevel)(void);
567#endif
568
562/* 569/*
563 * success: return IRQ number (>=0) 570 * success: return IRQ number (>=0)
564 * failure: return < 0 571 * failure: return < 0
@@ -600,7 +607,7 @@ void __init acpi_set_irq_model_ioapic(void)
600#ifdef CONFIG_ACPI_HOTPLUG_CPU 607#ifdef CONFIG_ACPI_HOTPLUG_CPU
601#include <acpi/processor.h> 608#include <acpi/processor.h>
602 609
603static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) 610static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
604{ 611{
605#ifdef CONFIG_ACPI_NUMA 612#ifdef CONFIG_ACPI_NUMA
606 int nid; 613 int nid;
@@ -613,7 +620,7 @@ static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
613#endif 620#endif
614} 621}
615 622
616static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) 623static int _acpi_map_lsapic(acpi_handle handle, int *pcpu)
617{ 624{
618 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 625 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
619 union acpi_object *obj; 626 union acpi_object *obj;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index b44577bc9744..33120100ff5e 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -26,12 +26,12 @@ static char temp_stack[4096];
26#endif 26#endif
27 27
28/** 28/**
29 * acpi_suspend_lowlevel - save kernel state 29 * x86_acpi_suspend_lowlevel - save kernel state
30 * 30 *
31 * Create an identity mapped page table and copy the wakeup routine to 31 * Create an identity mapped page table and copy the wakeup routine to
32 * low memory. 32 * low memory.
33 */ 33 */
34int acpi_suspend_lowlevel(void) 34int x86_acpi_suspend_lowlevel(void)
35{ 35{
36 struct wakeup_header *header = 36 struct wakeup_header *header =
37 (struct wakeup_header *) __va(real_mode_header->wakeup_header); 37 (struct wakeup_header *) __va(real_mode_header->wakeup_header);
@@ -48,9 +48,20 @@ int acpi_suspend_lowlevel(void)
48#ifndef CONFIG_64BIT 48#ifndef CONFIG_64BIT
49 native_store_gdt((struct desc_ptr *)&header->pmode_gdt); 49 native_store_gdt((struct desc_ptr *)&header->pmode_gdt);
50 50
51 /*
52 * We have to check that we can write back the value, and not
53 * just read it. At least on 90 nm Pentium M (Family 6, Model
54 * 13), reading an invalid MSR is not guaranteed to trap, see
55 * Erratum X4 in "Intel Pentium M Processor on 90 nm Process
56 * with 2-MB L2 Cache and Intel® Processor A100 and A110 on 90
57 * nm process with 512-KB L2 Cache Specification Update".
58 */
51 if (!rdmsr_safe(MSR_EFER, 59 if (!rdmsr_safe(MSR_EFER,
52 &header->pmode_efer_low, 60 &header->pmode_efer_low,
53 &header->pmode_efer_high)) 61 &header->pmode_efer_high) &&
62 !wrmsr_safe(MSR_EFER,
63 header->pmode_efer_low,
64 header->pmode_efer_high))
54 header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER); 65 header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER);
55#endif /* !CONFIG_64BIT */ 66#endif /* !CONFIG_64BIT */
56 67
@@ -61,7 +72,10 @@ int acpi_suspend_lowlevel(void)
61 } 72 }
62 if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, 73 if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
63 &header->pmode_misc_en_low, 74 &header->pmode_misc_en_low,
64 &header->pmode_misc_en_high)) 75 &header->pmode_misc_en_high) &&
76 !wrmsr_safe(MSR_IA32_MISC_ENABLE,
77 header->pmode_misc_en_low,
78 header->pmode_misc_en_high))
65 header->pmode_behavior |= 79 header->pmode_behavior |=
66 (1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE); 80 (1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE);
67 header->realmode_flags = acpi_realmode_flags; 81 header->realmode_flags = acpi_realmode_flags;
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 67f59f8c6956..c9c2c982d5e4 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -15,3 +15,5 @@ extern unsigned long acpi_copy_wakeup_routine(unsigned long);
15extern void wakeup_long64(void); 15extern void wakeup_long64(void);
16 16
17extern void do_suspend_lowlevel(void); 17extern void do_suspend_lowlevel(void);
18
19extern int x86_acpi_suspend_lowlevel(void);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 904611bf0e5a..eca89c53a7f5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -35,6 +35,7 @@
35#include <linux/smp.h> 35#include <linux/smp.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37 37
38#include <asm/trace/irq_vectors.h>
38#include <asm/irq_remapping.h> 39#include <asm/irq_remapping.h>
39#include <asm/perf_event.h> 40#include <asm/perf_event.h>
40#include <asm/x86_init.h> 41#include <asm/x86_init.h>
@@ -57,7 +58,7 @@
57 58
58unsigned int num_processors; 59unsigned int num_processors;
59 60
60unsigned disabled_cpus __cpuinitdata; 61unsigned disabled_cpus;
61 62
62/* Processor that is doing the boot up */ 63/* Processor that is doing the boot up */
63unsigned int boot_cpu_physical_apicid = -1U; 64unsigned int boot_cpu_physical_apicid = -1U;
@@ -543,7 +544,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
543 * Setup the local APIC timer for this CPU. Copy the initialized values 544 * Setup the local APIC timer for this CPU. Copy the initialized values
544 * of the boot CPU and register the clock event in the framework. 545 * of the boot CPU and register the clock event in the framework.
545 */ 546 */
546static void __cpuinit setup_APIC_timer(void) 547static void setup_APIC_timer(void)
547{ 548{
548 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 549 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
549 550
@@ -865,7 +866,7 @@ void __init setup_boot_APIC_clock(void)
865 setup_APIC_timer(); 866 setup_APIC_timer();
866} 867}
867 868
868void __cpuinit setup_secondary_APIC_clock(void) 869void setup_secondary_APIC_clock(void)
869{ 870{
870 setup_APIC_timer(); 871 setup_APIC_timer();
871} 872}
@@ -919,17 +920,35 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
919 /* 920 /*
920 * NOTE! We'd better ACK the irq immediately, 921 * NOTE! We'd better ACK the irq immediately,
921 * because timer handling can be slow. 922 * because timer handling can be slow.
923 *
924 * update_process_times() expects us to have done irq_enter().
925 * Besides, if we don't timer interrupts ignore the global
926 * interrupt lock, which is the WrongThing (tm) to do.
922 */ 927 */
923 ack_APIC_irq(); 928 entering_ack_irq();
929 local_apic_timer_interrupt();
930 exiting_irq();
931
932 set_irq_regs(old_regs);
933}
934
935void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
936{
937 struct pt_regs *old_regs = set_irq_regs(regs);
938
924 /* 939 /*
940 * NOTE! We'd better ACK the irq immediately,
941 * because timer handling can be slow.
942 *
925 * update_process_times() expects us to have done irq_enter(). 943 * update_process_times() expects us to have done irq_enter().
926 * Besides, if we don't timer interrupts ignore the global 944 * Besides, if we don't timer interrupts ignore the global
927 * interrupt lock, which is the WrongThing (tm) to do. 945 * interrupt lock, which is the WrongThing (tm) to do.
928 */ 946 */
929 irq_enter(); 947 entering_ack_irq();
930 exit_idle(); 948 trace_local_timer_entry(LOCAL_TIMER_VECTOR);
931 local_apic_timer_interrupt(); 949 local_apic_timer_interrupt();
932 irq_exit(); 950 trace_local_timer_exit(LOCAL_TIMER_VECTOR);
951 exiting_irq();
933 952
934 set_irq_regs(old_regs); 953 set_irq_regs(old_regs);
935} 954}
@@ -1210,7 +1229,7 @@ void __init init_bsp_APIC(void)
1210 apic_write(APIC_LVT1, value); 1229 apic_write(APIC_LVT1, value);
1211} 1230}
1212 1231
1213static void __cpuinit lapic_setup_esr(void) 1232static void lapic_setup_esr(void)
1214{ 1233{
1215 unsigned int oldvalue, value, maxlvt; 1234 unsigned int oldvalue, value, maxlvt;
1216 1235
@@ -1257,7 +1276,7 @@ static void __cpuinit lapic_setup_esr(void)
1257 * Used to setup local APIC while initializing BSP or bringin up APs. 1276 * Used to setup local APIC while initializing BSP or bringin up APs.
1258 * Always called with preemption disabled. 1277 * Always called with preemption disabled.
1259 */ 1278 */
1260void __cpuinit setup_local_APIC(void) 1279void setup_local_APIC(void)
1261{ 1280{
1262 int cpu = smp_processor_id(); 1281 int cpu = smp_processor_id();
1263 unsigned int value, queued; 1282 unsigned int value, queued;
@@ -1452,7 +1471,7 @@ void __cpuinit setup_local_APIC(void)
1452#endif 1471#endif
1453} 1472}
1454 1473
1455void __cpuinit end_local_APIC_setup(void) 1474void end_local_APIC_setup(void)
1456{ 1475{
1457 lapic_setup_esr(); 1476 lapic_setup_esr();
1458 1477
@@ -1907,12 +1926,10 @@ int __init APIC_init_uniprocessor(void)
1907/* 1926/*
1908 * This interrupt should _never_ happen with our APIC/SMP architecture 1927 * This interrupt should _never_ happen with our APIC/SMP architecture
1909 */ 1928 */
1910void smp_spurious_interrupt(struct pt_regs *regs) 1929static inline void __smp_spurious_interrupt(void)
1911{ 1930{
1912 u32 v; 1931 u32 v;
1913 1932
1914 irq_enter();
1915 exit_idle();
1916 /* 1933 /*
1917 * Check if this really is a spurious interrupt and ACK it 1934 * Check if this really is a spurious interrupt and ACK it
1918 * if it is a vectored one. Just in case... 1935 * if it is a vectored one. Just in case...
@@ -1927,13 +1944,28 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1927 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 1944 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1928 pr_info("spurious APIC interrupt on CPU#%d, " 1945 pr_info("spurious APIC interrupt on CPU#%d, "
1929 "should never happen.\n", smp_processor_id()); 1946 "should never happen.\n", smp_processor_id());
1930 irq_exit(); 1947}
1948
1949void smp_spurious_interrupt(struct pt_regs *regs)
1950{
1951 entering_irq();
1952 __smp_spurious_interrupt();
1953 exiting_irq();
1954}
1955
1956void smp_trace_spurious_interrupt(struct pt_regs *regs)
1957{
1958 entering_irq();
1959 trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
1960 __smp_spurious_interrupt();
1961 trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
1962 exiting_irq();
1931} 1963}
1932 1964
1933/* 1965/*
1934 * This interrupt should never happen with our APIC/SMP architecture 1966 * This interrupt should never happen with our APIC/SMP architecture
1935 */ 1967 */
1936void smp_error_interrupt(struct pt_regs *regs) 1968static inline void __smp_error_interrupt(struct pt_regs *regs)
1937{ 1969{
1938 u32 v0, v1; 1970 u32 v0, v1;
1939 u32 i = 0; 1971 u32 i = 0;
@@ -1948,8 +1980,6 @@ void smp_error_interrupt(struct pt_regs *regs)
1948 "Illegal register address", /* APIC Error Bit 7 */ 1980 "Illegal register address", /* APIC Error Bit 7 */
1949 }; 1981 };
1950 1982
1951 irq_enter();
1952 exit_idle();
1953 /* First tickle the hardware, only then report what went on. -- REW */ 1983 /* First tickle the hardware, only then report what went on. -- REW */
1954 v0 = apic_read(APIC_ESR); 1984 v0 = apic_read(APIC_ESR);
1955 apic_write(APIC_ESR, 0); 1985 apic_write(APIC_ESR, 0);
@@ -1970,7 +2000,22 @@ void smp_error_interrupt(struct pt_regs *regs)
1970 2000
1971 apic_printk(APIC_DEBUG, KERN_CONT "\n"); 2001 apic_printk(APIC_DEBUG, KERN_CONT "\n");
1972 2002
1973 irq_exit(); 2003}
2004
2005void smp_error_interrupt(struct pt_regs *regs)
2006{
2007 entering_irq();
2008 __smp_error_interrupt(regs);
2009 exiting_irq();
2010}
2011
2012void smp_trace_error_interrupt(struct pt_regs *regs)
2013{
2014 entering_irq();
2015 trace_error_apic_entry(ERROR_APIC_VECTOR);
2016 __smp_error_interrupt(regs);
2017 trace_error_apic_exit(ERROR_APIC_VECTOR);
2018 exiting_irq();
1974} 2019}
1975 2020
1976/** 2021/**
@@ -2062,7 +2107,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
2062 apic_write(APIC_LVT1, value); 2107 apic_write(APIC_LVT1, value);
2063} 2108}
2064 2109
2065void __cpuinit generic_processor_info(int apicid, int version) 2110void generic_processor_info(int apicid, int version)
2066{ 2111{
2067 int cpu, max = nr_cpu_ids; 2112 int cpu, max = nr_cpu_ids;
2068 bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, 2113 bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2302,7 +2347,7 @@ static void lapic_resume(void)
2302 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 2347 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
2303 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 2348 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
2304 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 2349 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
2305#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) 2350#if defined(CONFIG_X86_MCE_INTEL)
2306 if (maxlvt >= 5) 2351 if (maxlvt >= 5)
2307 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 2352 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
2308#endif 2353#endif
@@ -2332,7 +2377,7 @@ static struct syscore_ops lapic_syscore_ops = {
2332 .suspend = lapic_suspend, 2377 .suspend = lapic_suspend,
2333}; 2378};
2334 2379
2335static void __cpuinit apic_pm_activate(void) 2380static void apic_pm_activate(void)
2336{ 2381{
2337 apic_pm_state.active = 1; 2382 apic_pm_state.active = 1;
2338} 2383}
@@ -2357,7 +2402,7 @@ static void apic_pm_activate(void) { }
2357 2402
2358#ifdef CONFIG_X86_64 2403#ifdef CONFIG_X86_64
2359 2404
2360static int __cpuinit apic_cluster_num(void) 2405static int apic_cluster_num(void)
2361{ 2406{
2362 int i, clusters, zeros; 2407 int i, clusters, zeros;
2363 unsigned id; 2408 unsigned id;
@@ -2402,10 +2447,10 @@ static int __cpuinit apic_cluster_num(void)
2402 return clusters; 2447 return clusters;
2403} 2448}
2404 2449
2405static int __cpuinitdata multi_checked; 2450static int multi_checked;
2406static int __cpuinitdata multi; 2451static int multi;
2407 2452
2408static int __cpuinit set_multi(const struct dmi_system_id *d) 2453static int set_multi(const struct dmi_system_id *d)
2409{ 2454{
2410 if (multi) 2455 if (multi)
2411 return 0; 2456 return 0;
@@ -2414,7 +2459,7 @@ static int __cpuinit set_multi(const struct dmi_system_id *d)
2414 return 0; 2459 return 0;
2415} 2460}
2416 2461
2417static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = { 2462static const struct dmi_system_id multi_dmi_table[] = {
2418 { 2463 {
2419 .callback = set_multi, 2464 .callback = set_multi,
2420 .ident = "IBM System Summit2", 2465 .ident = "IBM System Summit2",
@@ -2426,7 +2471,7 @@ static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
2426 {} 2471 {}
2427}; 2472};
2428 2473
2429static void __cpuinit dmi_check_multi(void) 2474static void dmi_check_multi(void)
2430{ 2475{
2431 if (multi_checked) 2476 if (multi_checked)
2432 return; 2477 return;
@@ -2443,7 +2488,7 @@ static void __cpuinit dmi_check_multi(void)
2443 * multi-chassis. 2488 * multi-chassis.
2444 * Use DMI to check them 2489 * Use DMI to check them
2445 */ 2490 */
2446__cpuinit int apic_is_clustered_box(void) 2491int apic_is_clustered_box(void)
2447{ 2492{
2448 dmi_check_multi(); 2493 dmi_check_multi();
2449 if (multi) 2494 if (multi)
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 9a9110918ca7..3e67f9e3d7ef 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -74,7 +74,7 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
74 return initial_apic_id >> index_msb; 74 return initial_apic_id >> index_msb;
75} 75}
76 76
77static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) 77static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
78{ 78{
79 union numachip_csr_g3_ext_irq_gen int_gen; 79 union numachip_csr_g3_ext_irq_gen int_gen;
80 80
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 0874799a98c6..c55224731b2d 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -130,7 +130,7 @@ int es7000_plat;
130 */ 130 */
131 131
132 132
133static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) 133static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
134{ 134{
135 unsigned long vect = 0, psaival = 0; 135 unsigned long vect = 0, psaival = 0;
136 136
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index d661ee95cabf..1e42e8f305ee 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -105,7 +105,7 @@ static void __init smp_dump_qct(void)
105 } 105 }
106} 106}
107 107
108void __cpuinit numaq_tsc_disable(void) 108void numaq_tsc_disable(void)
109{ 109{
110 if (!found_numaq) 110 if (!found_numaq)
111 return; 111 return;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index c88baa4ff0e5..140e29db478d 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -148,7 +148,7 @@ static void init_x2apic_ldr(void)
148 /* 148 /*
149 * At CPU state changes, update the x2apic cluster sibling info. 149 * At CPU state changes, update the x2apic cluster sibling info.
150 */ 150 */
151static int __cpuinit 151static int
152update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) 152update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
153{ 153{
154 unsigned int this_cpu = (unsigned long)hcpu; 154 unsigned int this_cpu = (unsigned long)hcpu;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 794f6eb54cd3..1191ac1c9d25 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -25,6 +25,7 @@
25#include <linux/kdebug.h> 25#include <linux/kdebug.h>
26#include <linux/delay.h> 26#include <linux/delay.h>
27#include <linux/crash_dump.h> 27#include <linux/crash_dump.h>
28#include <linux/reboot.h>
28 29
29#include <asm/uv/uv_mmrs.h> 30#include <asm/uv/uv_mmrs.h>
30#include <asm/uv/uv_hub.h> 31#include <asm/uv/uv_hub.h>
@@ -36,7 +37,6 @@
36#include <asm/ipi.h> 37#include <asm/ipi.h>
37#include <asm/smp.h> 38#include <asm/smp.h>
38#include <asm/x86_init.h> 39#include <asm/x86_init.h>
39#include <asm/emergency-restart.h>
40#include <asm/nmi.h> 40#include <asm/nmi.h>
41 41
42/* BMC sets a bit this MMR non-zero before sending an NMI */ 42/* BMC sets a bit this MMR non-zero before sending an NMI */
@@ -51,6 +51,8 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
51 51
52static enum uv_system_type uv_system_type; 52static enum uv_system_type uv_system_type;
53static u64 gru_start_paddr, gru_end_paddr; 53static u64 gru_start_paddr, gru_end_paddr;
54static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
55static u64 gru_dist_lmask, gru_dist_umask;
54static union uvh_apicid uvh_apicid; 56static union uvh_apicid uvh_apicid;
55int uv_min_hub_revision_id; 57int uv_min_hub_revision_id;
56EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 58EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
@@ -72,7 +74,20 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
72 74
73static inline bool is_GRU_range(u64 start, u64 end) 75static inline bool is_GRU_range(u64 start, u64 end)
74{ 76{
75 return start >= gru_start_paddr && end <= gru_end_paddr; 77 if (gru_dist_base) {
78 u64 su = start & gru_dist_umask; /* upper (incl pnode) bits */
79 u64 sl = start & gru_dist_lmask; /* base offset bits */
80 u64 eu = end & gru_dist_umask;
81 u64 el = end & gru_dist_lmask;
82
83 /* Must reside completely within a single GRU range */
84 return (sl == gru_dist_base && el == gru_dist_base &&
85 su >= gru_first_node_paddr &&
86 su <= gru_last_node_paddr &&
87 eu == su);
88 } else {
89 return start >= gru_start_paddr && end <= gru_end_paddr;
90 }
76} 91}
77 92
78static bool uv_is_untracked_pat_range(u64 start, u64 end) 93static bool uv_is_untracked_pat_range(u64 start, u64 end)
@@ -194,7 +209,7 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
194unsigned long sn_rtc_cycles_per_second; 209unsigned long sn_rtc_cycles_per_second;
195EXPORT_SYMBOL(sn_rtc_cycles_per_second); 210EXPORT_SYMBOL(sn_rtc_cycles_per_second);
196 211
197static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) 212static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
198{ 213{
199#ifdef CONFIG_SMP 214#ifdef CONFIG_SMP
200 unsigned long val; 215 unsigned long val;
@@ -401,7 +416,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
401 .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, 416 .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
402}; 417};
403 418
404static __cpuinit void set_x2apic_extra_bits(int pnode) 419static void set_x2apic_extra_bits(int pnode)
405{ 420{
406 __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); 421 __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
407} 422}
@@ -463,26 +478,63 @@ static __init void map_high(char *id, unsigned long base, int pshift,
463 pr_info("UV: Map %s_HI base address NULL\n", id); 478 pr_info("UV: Map %s_HI base address NULL\n", id);
464 return; 479 return;
465 } 480 }
466 pr_info("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes); 481 pr_debug("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
467 if (map_type == map_uc) 482 if (map_type == map_uc)
468 init_extra_mapping_uc(paddr, bytes); 483 init_extra_mapping_uc(paddr, bytes);
469 else 484 else
470 init_extra_mapping_wb(paddr, bytes); 485 init_extra_mapping_wb(paddr, bytes);
471} 486}
472 487
488static __init void map_gru_distributed(unsigned long c)
489{
490 union uvh_rh_gam_gru_overlay_config_mmr_u gru;
491 u64 paddr;
492 unsigned long bytes;
493 int nid;
494
495 gru.v = c;
496 /* only base bits 42:28 relevant in dist mode */
497 gru_dist_base = gru.v & 0x000007fff0000000UL;
498 if (!gru_dist_base) {
499 pr_info("UV: Map GRU_DIST base address NULL\n");
500 return;
501 }
502 bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
503 gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1);
504 gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1);
505 gru_dist_base &= gru_dist_lmask; /* Clear bits above M */
506 for_each_online_node(nid) {
507 paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) |
508 gru_dist_base;
509 init_extra_mapping_wb(paddr, bytes);
510 gru_first_node_paddr = min(paddr, gru_first_node_paddr);
511 gru_last_node_paddr = max(paddr, gru_last_node_paddr);
512 }
513 /* Save upper (63:M) bits of address only for is_GRU_range */
514 gru_first_node_paddr &= gru_dist_umask;
515 gru_last_node_paddr &= gru_dist_umask;
516 pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n",
517 gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
518}
519
473static __init void map_gru_high(int max_pnode) 520static __init void map_gru_high(int max_pnode)
474{ 521{
475 union uvh_rh_gam_gru_overlay_config_mmr_u gru; 522 union uvh_rh_gam_gru_overlay_config_mmr_u gru;
476 int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; 523 int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
477 524
478 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); 525 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
479 if (gru.s.enable) { 526 if (!gru.s.enable) {
480 map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
481 gru_start_paddr = ((u64)gru.s.base << shift);
482 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
483 } else {
484 pr_info("UV: GRU disabled\n"); 527 pr_info("UV: GRU disabled\n");
528 return;
529 }
530
531 if (is_uv3_hub() && gru.s3.mode) {
532 map_gru_distributed(gru.v);
533 return;
485 } 534 }
535 map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
536 gru_start_paddr = ((u64)gru.s.base << shift);
537 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
486} 538}
487 539
488static __init void map_mmr_high(int max_pnode) 540static __init void map_mmr_high(int max_pnode)
@@ -683,7 +735,7 @@ static void uv_heartbeat(unsigned long ignored)
683 mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL); 735 mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
684} 736}
685 737
686static void __cpuinit uv_heartbeat_enable(int cpu) 738static void uv_heartbeat_enable(int cpu)
687{ 739{
688 while (!uv_cpu_hub_info(cpu)->scir.enabled) { 740 while (!uv_cpu_hub_info(cpu)->scir.enabled) {
689 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; 741 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
@@ -700,7 +752,7 @@ static void __cpuinit uv_heartbeat_enable(int cpu)
700} 752}
701 753
702#ifdef CONFIG_HOTPLUG_CPU 754#ifdef CONFIG_HOTPLUG_CPU
703static void __cpuinit uv_heartbeat_disable(int cpu) 755static void uv_heartbeat_disable(int cpu)
704{ 756{
705 if (uv_cpu_hub_info(cpu)->scir.enabled) { 757 if (uv_cpu_hub_info(cpu)->scir.enabled) {
706 uv_cpu_hub_info(cpu)->scir.enabled = 0; 758 uv_cpu_hub_info(cpu)->scir.enabled = 0;
@@ -712,8 +764,8 @@ static void __cpuinit uv_heartbeat_disable(int cpu)
712/* 764/*
713 * cpu hotplug notifier 765 * cpu hotplug notifier
714 */ 766 */
715static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self, 767static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action,
716 unsigned long action, void *hcpu) 768 void *hcpu)
717{ 769{
718 long cpu = (long)hcpu; 770 long cpu = (long)hcpu;
719 771
@@ -783,7 +835,7 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode,
783 * Called on each cpu to initialize the per_cpu UV data area. 835 * Called on each cpu to initialize the per_cpu UV data area.
784 * FIXME: hotplug not supported yet 836 * FIXME: hotplug not supported yet
785 */ 837 */
786void __cpuinit uv_cpu_init(void) 838void uv_cpu_init(void)
787{ 839{
788 /* CPU 0 initilization will be done via uv_system_init. */ 840 /* CPU 0 initilization will be done via uv_system_init. */
789 if (!uv_blade_info) 841 if (!uv_blade_info)
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 0ef4bba2acb7..d67c4be3e8b1 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -28,7 +28,6 @@ void foo(void)
28 OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); 28 OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
29 OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); 29 OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
30 OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); 30 OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
31 OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math);
32 OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); 31 OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
33 OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); 32 OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
34 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); 33 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b0684e4a73aa..47b56a7e99cb 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
31 31
32ifdef CONFIG_PERF_EVENTS 32ifdef CONFIG_PERF_EVENTS
33obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o 33obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
34ifdef CONFIG_AMD_IOMMU
35obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
36endif
34obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o 37obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
35obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o 38obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
36obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o 39obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
37endif 40endif
38 41
42
39obj-$(CONFIG_X86_MCE) += mcheck/ 43obj-$(CONFIG_X86_MCE) += mcheck/
40obj-$(CONFIG_MTRR) += mtrr/ 44obj-$(CONFIG_MTRR) += mtrr/
41 45
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5013a48d1aff..f654ecefea5b 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -69,7 +69,7 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
69extern void vide(void); 69extern void vide(void);
70__asm__(".align 4\nvide: ret"); 70__asm__(".align 4\nvide: ret");
71 71
72static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) 72static void init_amd_k5(struct cpuinfo_x86 *c)
73{ 73{
74/* 74/*
75 * General Systems BIOSen alias the cpu frequency registers 75 * General Systems BIOSen alias the cpu frequency registers
@@ -87,10 +87,10 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
87} 87}
88 88
89 89
90static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) 90static void init_amd_k6(struct cpuinfo_x86 *c)
91{ 91{
92 u32 l, h; 92 u32 l, h;
93 int mbytes = num_physpages >> (20-PAGE_SHIFT); 93 int mbytes = get_num_physpages() >> (20-PAGE_SHIFT);
94 94
95 if (c->x86_model < 6) { 95 if (c->x86_model < 6) {
96 /* Based on AMD doc 20734R - June 2000 */ 96 /* Based on AMD doc 20734R - June 2000 */
@@ -179,7 +179,7 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
179 } 179 }
180} 180}
181 181
182static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) 182static void amd_k7_smp_check(struct cpuinfo_x86 *c)
183{ 183{
184 /* calling is from identify_secondary_cpu() ? */ 184 /* calling is from identify_secondary_cpu() ? */
185 if (!c->cpu_index) 185 if (!c->cpu_index)
@@ -222,7 +222,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
222 add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE); 222 add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE);
223} 223}
224 224
225static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) 225static void init_amd_k7(struct cpuinfo_x86 *c)
226{ 226{
227 u32 l, h; 227 u32 l, h;
228 228
@@ -267,7 +267,7 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
267 * To workaround broken NUMA config. Read the comment in 267 * To workaround broken NUMA config. Read the comment in
268 * srat_detect_node(). 268 * srat_detect_node().
269 */ 269 */
270static int __cpuinit nearby_node(int apicid) 270static int nearby_node(int apicid)
271{ 271{
272 int i, node; 272 int i, node;
273 273
@@ -292,7 +292,7 @@ static int __cpuinit nearby_node(int apicid)
292 * (2) AMD processors supporting compute units 292 * (2) AMD processors supporting compute units
293 */ 293 */
294#ifdef CONFIG_X86_HT 294#ifdef CONFIG_X86_HT
295static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) 295static void amd_get_topology(struct cpuinfo_x86 *c)
296{ 296{
297 u32 nodes, cores_per_cu = 1; 297 u32 nodes, cores_per_cu = 1;
298 u8 node_id; 298 u8 node_id;
@@ -342,7 +342,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
342 * On a AMD dual core setup the lower bits of the APIC id distingush the cores. 342 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
343 * Assumes number of cores is a power of two. 343 * Assumes number of cores is a power of two.
344 */ 344 */
345static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) 345static void amd_detect_cmp(struct cpuinfo_x86 *c)
346{ 346{
347#ifdef CONFIG_X86_HT 347#ifdef CONFIG_X86_HT
348 unsigned bits; 348 unsigned bits;
@@ -369,7 +369,7 @@ u16 amd_get_nb_id(int cpu)
369} 369}
370EXPORT_SYMBOL_GPL(amd_get_nb_id); 370EXPORT_SYMBOL_GPL(amd_get_nb_id);
371 371
372static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 372static void srat_detect_node(struct cpuinfo_x86 *c)
373{ 373{
374#ifdef CONFIG_NUMA 374#ifdef CONFIG_NUMA
375 int cpu = smp_processor_id(); 375 int cpu = smp_processor_id();
@@ -421,7 +421,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
421#endif 421#endif
422} 422}
423 423
424static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) 424static void early_init_amd_mc(struct cpuinfo_x86 *c)
425{ 425{
426#ifdef CONFIG_X86_HT 426#ifdef CONFIG_X86_HT
427 unsigned bits, ecx; 427 unsigned bits, ecx;
@@ -447,7 +447,7 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
447#endif 447#endif
448} 448}
449 449
450static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c) 450static void bsp_init_amd(struct cpuinfo_x86 *c)
451{ 451{
452 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { 452 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
453 453
@@ -475,7 +475,7 @@ static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c)
475 } 475 }
476} 476}
477 477
478static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 478static void early_init_amd(struct cpuinfo_x86 *c)
479{ 479{
480 early_init_amd_mc(c); 480 early_init_amd_mc(c);
481 481
@@ -514,7 +514,7 @@ static const int amd_erratum_383[];
514static const int amd_erratum_400[]; 514static const int amd_erratum_400[];
515static bool cpu_has_amd_erratum(const int *erratum); 515static bool cpu_has_amd_erratum(const int *erratum);
516 516
517static void __cpuinit init_amd(struct cpuinfo_x86 *c) 517static void init_amd(struct cpuinfo_x86 *c)
518{ 518{
519 u32 dummy; 519 u32 dummy;
520 unsigned long long value; 520 unsigned long long value;
@@ -740,8 +740,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
740} 740}
741 741
742#ifdef CONFIG_X86_32 742#ifdef CONFIG_X86_32
743static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, 743static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
744 unsigned int size)
745{ 744{
746 /* AMD errata T13 (order #21922) */ 745 /* AMD errata T13 (order #21922) */
747 if ((c->x86 == 6)) { 746 if ((c->x86 == 6)) {
@@ -757,7 +756,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
757} 756}
758#endif 757#endif
759 758
760static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) 759static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
761{ 760{
762 tlb_flushall_shift = 5; 761 tlb_flushall_shift = 5;
763 762
@@ -765,7 +764,7 @@ static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
765 tlb_flushall_shift = 4; 764 tlb_flushall_shift = 4;
766} 765}
767 766
768static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) 767static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
769{ 768{
770 u32 ebx, eax, ecx, edx; 769 u32 ebx, eax, ecx, edx;
771 u16 mask = 0xfff; 770 u16 mask = 0xfff;
@@ -820,7 +819,7 @@ static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
820 cpu_set_tlb_flushall_shift(c); 819 cpu_set_tlb_flushall_shift(c);
821} 820}
822 821
823static const struct cpu_dev __cpuinitconst amd_cpu_dev = { 822static const struct cpu_dev amd_cpu_dev = {
824 .c_vendor = "AMD", 823 .c_vendor = "AMD",
825 .c_ident = { "AuthenticAMD" }, 824 .c_ident = { "AuthenticAMD" },
826#ifdef CONFIG_X86_32 825#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4112be9a4659..03445346ee0a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -17,15 +17,6 @@
17#include <asm/paravirt.h> 17#include <asm/paravirt.h>
18#include <asm/alternative.h> 18#include <asm/alternative.h>
19 19
20static int __init no_387(char *s)
21{
22 boot_cpu_data.hard_math = 0;
23 write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0());
24 return 1;
25}
26
27__setup("no387", no_387);
28
29static double __initdata x = 4195835.0; 20static double __initdata x = 4195835.0;
30static double __initdata y = 3145727.0; 21static double __initdata y = 3145727.0;
31 22
@@ -44,15 +35,6 @@ static void __init check_fpu(void)
44{ 35{
45 s32 fdiv_bug; 36 s32 fdiv_bug;
46 37
47 if (!boot_cpu_data.hard_math) {
48#ifndef CONFIG_MATH_EMULATION
49 pr_emerg("No coprocessor found and no math emulation present\n");
50 pr_emerg("Giving up\n");
51 for (;;) ;
52#endif
53 return;
54 }
55
56 kernel_fpu_begin(); 38 kernel_fpu_begin();
57 39
58 /* 40 /*
@@ -107,5 +89,6 @@ void __init check_bugs(void)
107 * kernel_fpu_begin/end() in check_fpu() relies on the patched 89 * kernel_fpu_begin/end() in check_fpu() relies on the patched
108 * alternative instructions. 90 * alternative instructions.
109 */ 91 */
110 check_fpu(); 92 if (cpu_has_fpu)
93 check_fpu();
111} 94}
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 159103c0b1f4..fbf6c3bc2400 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -11,7 +11,7 @@
11 11
12#ifdef CONFIG_X86_OOSTORE 12#ifdef CONFIG_X86_OOSTORE
13 13
14static u32 __cpuinit power2(u32 x) 14static u32 power2(u32 x)
15{ 15{
16 u32 s = 1; 16 u32 s = 1;
17 17
@@ -25,7 +25,7 @@ static u32 __cpuinit power2(u32 x)
25/* 25/*
26 * Set up an actual MCR 26 * Set up an actual MCR
27 */ 27 */
28static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) 28static void centaur_mcr_insert(int reg, u32 base, u32 size, int key)
29{ 29{
30 u32 lo, hi; 30 u32 lo, hi;
31 31
@@ -42,7 +42,7 @@ static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
42 * 42 *
43 * Shortcut: We know you can't put 4Gig of RAM on a winchip 43 * Shortcut: We know you can't put 4Gig of RAM on a winchip
44 */ 44 */
45static u32 __cpuinit ramtop(void) 45static u32 ramtop(void)
46{ 46{
47 u32 clip = 0xFFFFFFFFUL; 47 u32 clip = 0xFFFFFFFFUL;
48 u32 top = 0; 48 u32 top = 0;
@@ -91,7 +91,7 @@ static u32 __cpuinit ramtop(void)
91/* 91/*
92 * Compute a set of MCR's to give maximum coverage 92 * Compute a set of MCR's to give maximum coverage
93 */ 93 */
94static int __cpuinit centaur_mcr_compute(int nr, int key) 94static int centaur_mcr_compute(int nr, int key)
95{ 95{
96 u32 mem = ramtop(); 96 u32 mem = ramtop();
97 u32 root = power2(mem); 97 u32 root = power2(mem);
@@ -157,7 +157,7 @@ static int __cpuinit centaur_mcr_compute(int nr, int key)
157 return ct; 157 return ct;
158} 158}
159 159
160static void __cpuinit centaur_create_optimal_mcr(void) 160static void centaur_create_optimal_mcr(void)
161{ 161{
162 int used; 162 int used;
163 int i; 163 int i;
@@ -181,7 +181,7 @@ static void __cpuinit centaur_create_optimal_mcr(void)
181 wrmsr(MSR_IDT_MCR0+i, 0, 0); 181 wrmsr(MSR_IDT_MCR0+i, 0, 0);
182} 182}
183 183
184static void __cpuinit winchip2_create_optimal_mcr(void) 184static void winchip2_create_optimal_mcr(void)
185{ 185{
186 u32 lo, hi; 186 u32 lo, hi;
187 int used; 187 int used;
@@ -217,7 +217,7 @@ static void __cpuinit winchip2_create_optimal_mcr(void)
217/* 217/*
218 * Handle the MCR key on the Winchip 2. 218 * Handle the MCR key on the Winchip 2.
219 */ 219 */
220static void __cpuinit winchip2_unprotect_mcr(void) 220static void winchip2_unprotect_mcr(void)
221{ 221{
222 u32 lo, hi; 222 u32 lo, hi;
223 u32 key; 223 u32 key;
@@ -229,7 +229,7 @@ static void __cpuinit winchip2_unprotect_mcr(void)
229 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 229 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
230} 230}
231 231
232static void __cpuinit winchip2_protect_mcr(void) 232static void winchip2_protect_mcr(void)
233{ 233{
234 u32 lo, hi; 234 u32 lo, hi;
235 235
@@ -247,7 +247,7 @@ static void __cpuinit winchip2_protect_mcr(void)
247#define RNG_ENABLED (1 << 3) 247#define RNG_ENABLED (1 << 3)
248#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ 248#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
249 249
250static void __cpuinit init_c3(struct cpuinfo_x86 *c) 250static void init_c3(struct cpuinfo_x86 *c)
251{ 251{
252 u32 lo, hi; 252 u32 lo, hi;
253 253
@@ -318,7 +318,7 @@ enum {
318 EAMD3D = 1<<20, 318 EAMD3D = 1<<20,
319}; 319};
320 320
321static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) 321static void early_init_centaur(struct cpuinfo_x86 *c)
322{ 322{
323 switch (c->x86) { 323 switch (c->x86) {
324#ifdef CONFIG_X86_32 324#ifdef CONFIG_X86_32
@@ -337,7 +337,7 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
337#endif 337#endif
338} 338}
339 339
340static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 340static void init_centaur(struct cpuinfo_x86 *c)
341{ 341{
342#ifdef CONFIG_X86_32 342#ifdef CONFIG_X86_32
343 char *name; 343 char *name;
@@ -468,7 +468,7 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
468#endif 468#endif
469} 469}
470 470
471static unsigned int __cpuinit 471static unsigned int
472centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) 472centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
473{ 473{
474#ifdef CONFIG_X86_32 474#ifdef CONFIG_X86_32
@@ -488,7 +488,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
488 return size; 488 return size;
489} 489}
490 490
491static const struct cpu_dev __cpuinitconst centaur_cpu_dev = { 491static const struct cpu_dev centaur_cpu_dev = {
492 .c_vendor = "Centaur", 492 .c_vendor = "Centaur",
493 .c_ident = { "CentaurHauls" }, 493 .c_ident = { "CentaurHauls" },
494 .c_early_init = early_init_centaur, 494 .c_early_init = early_init_centaur,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 22018f70a671..25eb2747b063 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -63,7 +63,7 @@ void __init setup_cpu_local_masks(void)
63 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 63 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
64} 64}
65 65
66static void __cpuinit default_init(struct cpuinfo_x86 *c) 66static void default_init(struct cpuinfo_x86 *c)
67{ 67{
68#ifdef CONFIG_X86_64 68#ifdef CONFIG_X86_64
69 cpu_detect_cache_sizes(c); 69 cpu_detect_cache_sizes(c);
@@ -80,13 +80,13 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
80#endif 80#endif
81} 81}
82 82
83static const struct cpu_dev __cpuinitconst default_cpu = { 83static const struct cpu_dev default_cpu = {
84 .c_init = default_init, 84 .c_init = default_init,
85 .c_vendor = "Unknown", 85 .c_vendor = "Unknown",
86 .c_x86_vendor = X86_VENDOR_UNKNOWN, 86 .c_x86_vendor = X86_VENDOR_UNKNOWN,
87}; 87};
88 88
89static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; 89static const struct cpu_dev *this_cpu = &default_cpu;
90 90
91DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 91DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
92#ifdef CONFIG_X86_64 92#ifdef CONFIG_X86_64
@@ -160,8 +160,8 @@ static int __init x86_xsaveopt_setup(char *s)
160__setup("noxsaveopt", x86_xsaveopt_setup); 160__setup("noxsaveopt", x86_xsaveopt_setup);
161 161
162#ifdef CONFIG_X86_32 162#ifdef CONFIG_X86_32
163static int cachesize_override __cpuinitdata = -1; 163static int cachesize_override = -1;
164static int disable_x86_serial_nr __cpuinitdata = 1; 164static int disable_x86_serial_nr = 1;
165 165
166static int __init cachesize_setup(char *str) 166static int __init cachesize_setup(char *str)
167{ 167{
@@ -215,12 +215,12 @@ static inline int flag_is_changeable_p(u32 flag)
215} 215}
216 216
217/* Probe for the CPUID instruction */ 217/* Probe for the CPUID instruction */
218int __cpuinit have_cpuid_p(void) 218int have_cpuid_p(void)
219{ 219{
220 return flag_is_changeable_p(X86_EFLAGS_ID); 220 return flag_is_changeable_p(X86_EFLAGS_ID);
221} 221}
222 222
223static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 223static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
224{ 224{
225 unsigned long lo, hi; 225 unsigned long lo, hi;
226 226
@@ -298,7 +298,7 @@ struct cpuid_dependent_feature {
298 u32 level; 298 u32 level;
299}; 299};
300 300
301static const struct cpuid_dependent_feature __cpuinitconst 301static const struct cpuid_dependent_feature
302cpuid_dependent_features[] = { 302cpuid_dependent_features[] = {
303 { X86_FEATURE_MWAIT, 0x00000005 }, 303 { X86_FEATURE_MWAIT, 0x00000005 },
304 { X86_FEATURE_DCA, 0x00000009 }, 304 { X86_FEATURE_DCA, 0x00000009 },
@@ -306,7 +306,7 @@ cpuid_dependent_features[] = {
306 { 0, 0 } 306 { 0, 0 }
307}; 307};
308 308
309static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) 309static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
310{ 310{
311 const struct cpuid_dependent_feature *df; 311 const struct cpuid_dependent_feature *df;
312 312
@@ -344,7 +344,7 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
344 */ 344 */
345 345
346/* Look up CPU names by table lookup. */ 346/* Look up CPU names by table lookup. */
347static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) 347static const char *table_lookup_model(struct cpuinfo_x86 *c)
348{ 348{
349 const struct cpu_model_info *info; 349 const struct cpu_model_info *info;
350 350
@@ -364,8 +364,8 @@ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
364 return NULL; /* Not found */ 364 return NULL; /* Not found */
365} 365}
366 366
367__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata; 367__u32 cpu_caps_cleared[NCAPINTS];
368__u32 cpu_caps_set[NCAPINTS] __cpuinitdata; 368__u32 cpu_caps_set[NCAPINTS];
369 369
370void load_percpu_segment(int cpu) 370void load_percpu_segment(int cpu)
371{ 371{
@@ -394,9 +394,9 @@ void switch_to_new_gdt(int cpu)
394 load_percpu_segment(cpu); 394 load_percpu_segment(cpu);
395} 395}
396 396
397static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; 397static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
398 398
399static void __cpuinit get_model_name(struct cpuinfo_x86 *c) 399static void get_model_name(struct cpuinfo_x86 *c)
400{ 400{
401 unsigned int *v; 401 unsigned int *v;
402 char *p, *q; 402 char *p, *q;
@@ -425,7 +425,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
425 } 425 }
426} 426}
427 427
428void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) 428void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
429{ 429{
430 unsigned int n, dummy, ebx, ecx, edx, l2size; 430 unsigned int n, dummy, ebx, ecx, edx, l2size;
431 431
@@ -479,7 +479,7 @@ u16 __read_mostly tlb_lld_4m[NR_INFO];
479 */ 479 */
480s8 __read_mostly tlb_flushall_shift = -1; 480s8 __read_mostly tlb_flushall_shift = -1;
481 481
482void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) 482void cpu_detect_tlb(struct cpuinfo_x86 *c)
483{ 483{
484 if (this_cpu->c_detect_tlb) 484 if (this_cpu->c_detect_tlb)
485 this_cpu->c_detect_tlb(c); 485 this_cpu->c_detect_tlb(c);
@@ -493,7 +493,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
493 tlb_flushall_shift); 493 tlb_flushall_shift);
494} 494}
495 495
496void __cpuinit detect_ht(struct cpuinfo_x86 *c) 496void detect_ht(struct cpuinfo_x86 *c)
497{ 497{
498#ifdef CONFIG_X86_HT 498#ifdef CONFIG_X86_HT
499 u32 eax, ebx, ecx, edx; 499 u32 eax, ebx, ecx, edx;
@@ -544,7 +544,7 @@ out:
544#endif 544#endif
545} 545}
546 546
547static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) 547static void get_cpu_vendor(struct cpuinfo_x86 *c)
548{ 548{
549 char *v = c->x86_vendor_id; 549 char *v = c->x86_vendor_id;
550 int i; 550 int i;
@@ -571,7 +571,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
571 this_cpu = &default_cpu; 571 this_cpu = &default_cpu;
572} 572}
573 573
574void __cpuinit cpu_detect(struct cpuinfo_x86 *c) 574void cpu_detect(struct cpuinfo_x86 *c)
575{ 575{
576 /* Get vendor name */ 576 /* Get vendor name */
577 cpuid(0x00000000, (unsigned int *)&c->cpuid_level, 577 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
@@ -601,7 +601,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
601 } 601 }
602} 602}
603 603
604void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) 604void get_cpu_cap(struct cpuinfo_x86 *c)
605{ 605{
606 u32 tfms, xlvl; 606 u32 tfms, xlvl;
607 u32 ebx; 607 u32 ebx;
@@ -652,7 +652,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
652 init_scattered_cpuid_features(c); 652 init_scattered_cpuid_features(c);
653} 653}
654 654
655static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) 655static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
656{ 656{
657#ifdef CONFIG_X86_32 657#ifdef CONFIG_X86_32
658 int i; 658 int i;
@@ -711,10 +711,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
711 return; 711 return;
712 712
713 cpu_detect(c); 713 cpu_detect(c);
714
715 get_cpu_vendor(c); 714 get_cpu_vendor(c);
716
717 get_cpu_cap(c); 715 get_cpu_cap(c);
716 fpu_detect(c);
718 717
719 if (this_cpu->c_early_init) 718 if (this_cpu->c_early_init)
720 this_cpu->c_early_init(c); 719 this_cpu->c_early_init(c);
@@ -724,6 +723,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
724 723
725 if (this_cpu->c_bsp_init) 724 if (this_cpu->c_bsp_init)
726 this_cpu->c_bsp_init(c); 725 this_cpu->c_bsp_init(c);
726
727 setup_force_cpu_cap(X86_FEATURE_ALWAYS);
727} 728}
728 729
729void __init early_cpu_init(void) 730void __init early_cpu_init(void)
@@ -768,7 +769,7 @@ void __init early_cpu_init(void)
768 * unless we can find a reliable way to detect all the broken cases. 769 * unless we can find a reliable way to detect all the broken cases.
769 * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). 770 * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
770 */ 771 */
771static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) 772static void detect_nopl(struct cpuinfo_x86 *c)
772{ 773{
773#ifdef CONFIG_X86_32 774#ifdef CONFIG_X86_32
774 clear_cpu_cap(c, X86_FEATURE_NOPL); 775 clear_cpu_cap(c, X86_FEATURE_NOPL);
@@ -777,7 +778,7 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
777#endif 778#endif
778} 779}
779 780
780static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 781static void generic_identify(struct cpuinfo_x86 *c)
781{ 782{
782 c->extended_cpuid_level = 0; 783 c->extended_cpuid_level = 0;
783 784
@@ -814,7 +815,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
814/* 815/*
815 * This does the hard work of actually picking apart the CPU stuff... 816 * This does the hard work of actually picking apart the CPU stuff...
816 */ 817 */
817static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) 818static void identify_cpu(struct cpuinfo_x86 *c)
818{ 819{
819 int i; 820 int i;
820 821
@@ -959,7 +960,7 @@ void __init identify_boot_cpu(void)
959 cpu_detect_tlb(&boot_cpu_data); 960 cpu_detect_tlb(&boot_cpu_data);
960} 961}
961 962
962void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 963void identify_secondary_cpu(struct cpuinfo_x86 *c)
963{ 964{
964 BUG_ON(c == &boot_cpu_data); 965 BUG_ON(c == &boot_cpu_data);
965 identify_cpu(c); 966 identify_cpu(c);
@@ -974,14 +975,14 @@ struct msr_range {
974 unsigned max; 975 unsigned max;
975}; 976};
976 977
977static const struct msr_range msr_range_array[] __cpuinitconst = { 978static const struct msr_range msr_range_array[] = {
978 { 0x00000000, 0x00000418}, 979 { 0x00000000, 0x00000418},
979 { 0xc0000000, 0xc000040b}, 980 { 0xc0000000, 0xc000040b},
980 { 0xc0010000, 0xc0010142}, 981 { 0xc0010000, 0xc0010142},
981 { 0xc0011000, 0xc001103b}, 982 { 0xc0011000, 0xc001103b},
982}; 983};
983 984
984static void __cpuinit __print_cpu_msr(void) 985static void __print_cpu_msr(void)
985{ 986{
986 unsigned index_min, index_max; 987 unsigned index_min, index_max;
987 unsigned index; 988 unsigned index;
@@ -1000,7 +1001,7 @@ static void __cpuinit __print_cpu_msr(void)
1000 } 1001 }
1001} 1002}
1002 1003
1003static int show_msr __cpuinitdata; 1004static int show_msr;
1004 1005
1005static __init int setup_show_msr(char *arg) 1006static __init int setup_show_msr(char *arg)
1006{ 1007{
@@ -1021,7 +1022,7 @@ static __init int setup_noclflush(char *arg)
1021} 1022}
1022__setup("noclflush", setup_noclflush); 1023__setup("noclflush", setup_noclflush);
1023 1024
1024void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) 1025void print_cpu_info(struct cpuinfo_x86 *c)
1025{ 1026{
1026 const char *vendor = NULL; 1027 const char *vendor = NULL;
1027 1028
@@ -1050,7 +1051,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1050 print_cpu_msr(c); 1051 print_cpu_msr(c);
1051} 1052}
1052 1053
1053void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c) 1054void print_cpu_msr(struct cpuinfo_x86 *c)
1054{ 1055{
1055 if (c->cpu_index < show_msr) 1056 if (c->cpu_index < show_msr)
1056 __print_cpu_msr(); 1057 __print_cpu_msr();
@@ -1071,8 +1072,8 @@ __setup("clearcpuid=", setup_disablecpuid);
1071 1072
1072#ifdef CONFIG_X86_64 1073#ifdef CONFIG_X86_64
1073struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1074struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
1074struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, 1075struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
1075 (unsigned long) nmi_idt_table }; 1076 (unsigned long) debug_idt_table };
1076 1077
1077DEFINE_PER_CPU_FIRST(union irq_stack_union, 1078DEFINE_PER_CPU_FIRST(union irq_stack_union,
1078 irq_stack_union) __aligned(PAGE_SIZE); 1079 irq_stack_union) __aligned(PAGE_SIZE);
@@ -1148,20 +1149,20 @@ int is_debug_stack(unsigned long addr)
1148 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); 1149 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
1149} 1150}
1150 1151
1151static DEFINE_PER_CPU(u32, debug_stack_use_ctr); 1152DEFINE_PER_CPU(u32, debug_idt_ctr);
1152 1153
1153void debug_stack_set_zero(void) 1154void debug_stack_set_zero(void)
1154{ 1155{
1155 this_cpu_inc(debug_stack_use_ctr); 1156 this_cpu_inc(debug_idt_ctr);
1156 load_idt((const struct desc_ptr *)&nmi_idt_descr); 1157 load_current_idt();
1157} 1158}
1158 1159
1159void debug_stack_reset(void) 1160void debug_stack_reset(void)
1160{ 1161{
1161 if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) 1162 if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
1162 return; 1163 return;
1163 if (this_cpu_dec_return(debug_stack_use_ctr) == 0) 1164 if (this_cpu_dec_return(debug_idt_ctr) == 0)
1164 load_idt((const struct desc_ptr *)&idt_descr); 1165 load_current_idt();
1165} 1166}
1166 1167
1167#else /* CONFIG_X86_64 */ 1168#else /* CONFIG_X86_64 */
@@ -1215,7 +1216,7 @@ static void dbg_restore_debug_regs(void)
1215 */ 1216 */
1216#ifdef CONFIG_X86_64 1217#ifdef CONFIG_X86_64
1217 1218
1218void __cpuinit cpu_init(void) 1219void cpu_init(void)
1219{ 1220{
1220 struct orig_ist *oist; 1221 struct orig_ist *oist;
1221 struct task_struct *me; 1222 struct task_struct *me;
@@ -1257,7 +1258,7 @@ void __cpuinit cpu_init(void)
1257 switch_to_new_gdt(cpu); 1258 switch_to_new_gdt(cpu);
1258 loadsegment(fs, 0); 1259 loadsegment(fs, 0);
1259 1260
1260 load_idt((const struct desc_ptr *)&idt_descr); 1261 load_current_idt();
1261 1262
1262 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); 1263 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
1263 syscall_init(); 1264 syscall_init();
@@ -1314,7 +1315,7 @@ void __cpuinit cpu_init(void)
1314 1315
1315#else 1316#else
1316 1317
1317void __cpuinit cpu_init(void) 1318void cpu_init(void)
1318{ 1319{
1319 int cpu = smp_processor_id(); 1320 int cpu = smp_processor_id();
1320 struct task_struct *curr = current; 1321 struct task_struct *curr = current;
@@ -1334,7 +1335,7 @@ void __cpuinit cpu_init(void)
1334 if (cpu_has_vme || cpu_has_tsc || cpu_has_de) 1335 if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
1335 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1336 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1336 1337
1337 load_idt(&idt_descr); 1338 load_current_idt();
1338 switch_to_new_gdt(cpu); 1339 switch_to_new_gdt(cpu);
1339 1340
1340 /* 1341 /*
@@ -1363,3 +1364,17 @@ void __cpuinit cpu_init(void)
1363 fpu_init(); 1364 fpu_init();
1364} 1365}
1365#endif 1366#endif
1367
1368#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
1369void warn_pre_alternatives(void)
1370{
1371 WARN(1, "You're using static_cpu_has before alternatives have run!\n");
1372}
1373EXPORT_SYMBOL_GPL(warn_pre_alternatives);
1374#endif
1375
1376inline bool __static_cpu_has_safe(u16 bit)
1377{
1378 return boot_cpu_has(bit);
1379}
1380EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index d048d5ca43c1..d0969c75ab54 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -15,7 +15,7 @@
15/* 15/*
16 * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU 16 * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
17 */ 17 */
18static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) 18static void __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
19{ 19{
20 unsigned char ccr2, ccr3; 20 unsigned char ccr2, ccr3;
21 21
@@ -44,7 +44,7 @@ static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
44 } 44 }
45} 45}
46 46
47static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) 47static void do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
48{ 48{
49 unsigned long flags; 49 unsigned long flags;
50 50
@@ -59,25 +59,25 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
59 * Actually since bugs.h doesn't even reference this perhaps someone should 59 * Actually since bugs.h doesn't even reference this perhaps someone should
60 * fix the documentation ??? 60 * fix the documentation ???
61 */ 61 */
62static unsigned char Cx86_dir0_msb __cpuinitdata = 0; 62static unsigned char Cx86_dir0_msb = 0;
63 63
64static const char __cpuinitconst Cx86_model[][9] = { 64static const char Cx86_model[][9] = {
65 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", 65 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
66 "M II ", "Unknown" 66 "M II ", "Unknown"
67}; 67};
68static const char __cpuinitconst Cx486_name[][5] = { 68static const char Cx486_name[][5] = {
69 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", 69 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
70 "SRx2", "DRx2" 70 "SRx2", "DRx2"
71}; 71};
72static const char __cpuinitconst Cx486S_name[][4] = { 72static const char Cx486S_name[][4] = {
73 "S", "S2", "Se", "S2e" 73 "S", "S2", "Se", "S2e"
74}; 74};
75static const char __cpuinitconst Cx486D_name[][4] = { 75static const char Cx486D_name[][4] = {
76 "DX", "DX2", "?", "?", "?", "DX4" 76 "DX", "DX2", "?", "?", "?", "DX4"
77}; 77};
78static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; 78static char Cx86_cb[] = "?.5x Core/Bus Clock";
79static const char __cpuinitconst cyrix_model_mult1[] = "12??43"; 79static const char cyrix_model_mult1[] = "12??43";
80static const char __cpuinitconst cyrix_model_mult2[] = "12233445"; 80static const char cyrix_model_mult2[] = "12233445";
81 81
82/* 82/*
83 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old 83 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@@ -87,7 +87,7 @@ static const char __cpuinitconst cyrix_model_mult2[] = "12233445";
87 * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP 87 * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
88 */ 88 */
89 89
90static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) 90static void check_cx686_slop(struct cpuinfo_x86 *c)
91{ 91{
92 unsigned long flags; 92 unsigned long flags;
93 93
@@ -112,7 +112,7 @@ static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
112} 112}
113 113
114 114
115static void __cpuinit set_cx86_reorder(void) 115static void set_cx86_reorder(void)
116{ 116{
117 u8 ccr3; 117 u8 ccr3;
118 118
@@ -127,7 +127,7 @@ static void __cpuinit set_cx86_reorder(void)
127 setCx86(CX86_CCR3, ccr3); 127 setCx86(CX86_CCR3, ccr3);
128} 128}
129 129
130static void __cpuinit set_cx86_memwb(void) 130static void set_cx86_memwb(void)
131{ 131{
132 printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); 132 printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
133 133
@@ -143,7 +143,7 @@ static void __cpuinit set_cx86_memwb(void)
143 * Configure later MediaGX and/or Geode processor. 143 * Configure later MediaGX and/or Geode processor.
144 */ 144 */
145 145
146static void __cpuinit geode_configure(void) 146static void geode_configure(void)
147{ 147{
148 unsigned long flags; 148 unsigned long flags;
149 u8 ccr3; 149 u8 ccr3;
@@ -166,7 +166,7 @@ static void __cpuinit geode_configure(void)
166 local_irq_restore(flags); 166 local_irq_restore(flags);
167} 167}
168 168
169static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) 169static void early_init_cyrix(struct cpuinfo_x86 *c)
170{ 170{
171 unsigned char dir0, dir0_msn, dir1 = 0; 171 unsigned char dir0, dir0_msn, dir1 = 0;
172 172
@@ -185,7 +185,7 @@ static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c)
185 } 185 }
186} 186}
187 187
188static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) 188static void init_cyrix(struct cpuinfo_x86 *c)
189{ 189{
190 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; 190 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
191 char *buf = c->x86_model_id; 191 char *buf = c->x86_model_id;
@@ -333,7 +333,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
333 switch (dir0_lsn) { 333 switch (dir0_lsn) {
334 case 0xd: /* either a 486SLC or DLC w/o DEVID */ 334 case 0xd: /* either a 486SLC or DLC w/o DEVID */
335 dir0_msn = 0; 335 dir0_msn = 0;
336 p = Cx486_name[(c->hard_math) ? 1 : 0]; 336 p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
337 break; 337 break;
338 338
339 case 0xe: /* a 486S A step */ 339 case 0xe: /* a 486S A step */
@@ -356,7 +356,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
356/* 356/*
357 * Handle National Semiconductor branded processors 357 * Handle National Semiconductor branded processors
358 */ 358 */
359static void __cpuinit init_nsc(struct cpuinfo_x86 *c) 359static void init_nsc(struct cpuinfo_x86 *c)
360{ 360{
361 /* 361 /*
362 * There may be GX1 processors in the wild that are branded 362 * There may be GX1 processors in the wild that are branded
@@ -405,7 +405,7 @@ static inline int test_cyrix_52div(void)
405 return (unsigned char) (test >> 8) == 0x02; 405 return (unsigned char) (test >> 8) == 0x02;
406} 406}
407 407
408static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) 408static void cyrix_identify(struct cpuinfo_x86 *c)
409{ 409{
410 /* Detect Cyrix with disabled CPUID */ 410 /* Detect Cyrix with disabled CPUID */
411 if (c->x86 == 4 && test_cyrix_52div()) { 411 if (c->x86 == 4 && test_cyrix_52div()) {
@@ -441,7 +441,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
441 } 441 }
442} 442}
443 443
444static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = { 444static const struct cpu_dev cyrix_cpu_dev = {
445 .c_vendor = "Cyrix", 445 .c_vendor = "Cyrix",
446 .c_ident = { "CyrixInstead" }, 446 .c_ident = { "CyrixInstead" },
447 .c_early_init = early_init_cyrix, 447 .c_early_init = early_init_cyrix,
@@ -452,7 +452,7 @@ static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = {
452 452
453cpu_dev_register(cyrix_cpu_dev); 453cpu_dev_register(cyrix_cpu_dev);
454 454
455static const struct cpu_dev __cpuinitconst nsc_cpu_dev = { 455static const struct cpu_dev nsc_cpu_dev = {
456 .c_vendor = "NSC", 456 .c_vendor = "NSC",
457 .c_ident = { "Geode by NSC" }, 457 .c_ident = { "Geode by NSC" },
458 .c_init = init_nsc, 458 .c_init = init_nsc,
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 1e7e84a02eba..87279212d318 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -60,7 +60,7 @@ detect_hypervisor_vendor(void)
60 } 60 }
61} 61}
62 62
63void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) 63void init_hypervisor(struct cpuinfo_x86 *c)
64{ 64{
65 if (x86_hyper && x86_hyper->set_cpu_features) 65 if (x86_hyper && x86_hyper->set_cpu_features)
66 x86_hyper->set_cpu_features(c); 66 x86_hyper->set_cpu_features(c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 9b0c441c03f5..ec7299566f79 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -26,7 +26,7 @@
26#include <asm/apic.h> 26#include <asm/apic.h>
27#endif 27#endif
28 28
29static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 29static void early_init_intel(struct cpuinfo_x86 *c)
30{ 30{
31 u64 misc_enable; 31 u64 misc_enable;
32 32
@@ -163,7 +163,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
163 * This is called before we do cpu ident work 163 * This is called before we do cpu ident work
164 */ 164 */
165 165
166int __cpuinit ppro_with_ram_bug(void) 166int ppro_with_ram_bug(void)
167{ 167{
168 /* Uses data from early_cpu_detect now */ 168 /* Uses data from early_cpu_detect now */
169 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 169 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -176,7 +176,7 @@ int __cpuinit ppro_with_ram_bug(void)
176 return 0; 176 return 0;
177} 177}
178 178
179static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) 179static void intel_smp_check(struct cpuinfo_x86 *c)
180{ 180{
181 /* calling is from identify_secondary_cpu() ? */ 181 /* calling is from identify_secondary_cpu() ? */
182 if (!c->cpu_index) 182 if (!c->cpu_index)
@@ -196,7 +196,7 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
196 } 196 }
197} 197}
198 198
199static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) 199static void intel_workarounds(struct cpuinfo_x86 *c)
200{ 200{
201 unsigned long lo, hi; 201 unsigned long lo, hi;
202 202
@@ -275,12 +275,12 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
275 intel_smp_check(c); 275 intel_smp_check(c);
276} 276}
277#else 277#else
278static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) 278static void intel_workarounds(struct cpuinfo_x86 *c)
279{ 279{
280} 280}
281#endif 281#endif
282 282
283static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 283static void srat_detect_node(struct cpuinfo_x86 *c)
284{ 284{
285#ifdef CONFIG_NUMA 285#ifdef CONFIG_NUMA
286 unsigned node; 286 unsigned node;
@@ -300,7 +300,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
300/* 300/*
301 * find out the number of processor cores on the die 301 * find out the number of processor cores on the die
302 */ 302 */
303static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) 303static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
304{ 304{
305 unsigned int eax, ebx, ecx, edx; 305 unsigned int eax, ebx, ecx, edx;
306 306
@@ -315,7 +315,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
315 return 1; 315 return 1;
316} 316}
317 317
318static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) 318static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
319{ 319{
320 /* Intel VMX MSR indicated features */ 320 /* Intel VMX MSR indicated features */
321#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 321#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
@@ -353,7 +353,7 @@ static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c)
353 } 353 }
354} 354}
355 355
356static void __cpuinit init_intel(struct cpuinfo_x86 *c) 356static void init_intel(struct cpuinfo_x86 *c)
357{ 357{
358 unsigned int l2 = 0; 358 unsigned int l2 = 0;
359 359
@@ -472,7 +472,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
472} 472}
473 473
474#ifdef CONFIG_X86_32 474#ifdef CONFIG_X86_32
475static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) 475static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
476{ 476{
477 /* 477 /*
478 * Intel PIII Tualatin. This comes in two flavours. 478 * Intel PIII Tualatin. This comes in two flavours.
@@ -506,7 +506,7 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
506 506
507#define STLB_4K 0x41 507#define STLB_4K 0x41
508 508
509static const struct _tlb_table intel_tlb_table[] __cpuinitconst = { 509static const struct _tlb_table intel_tlb_table[] = {
510 { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, 510 { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
511 { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, 511 { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" },
512 { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, 512 { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" },
@@ -536,7 +536,7 @@ static const struct _tlb_table intel_tlb_table[] __cpuinitconst = {
536 { 0x00, 0, 0 } 536 { 0x00, 0, 0 }
537}; 537};
538 538
539static void __cpuinit intel_tlb_lookup(const unsigned char desc) 539static void intel_tlb_lookup(const unsigned char desc)
540{ 540{
541 unsigned char k; 541 unsigned char k;
542 if (desc == 0) 542 if (desc == 0)
@@ -605,7 +605,7 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc)
605 } 605 }
606} 606}
607 607
608static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) 608static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
609{ 609{
610 switch ((c->x86 << 8) + c->x86_model) { 610 switch ((c->x86 << 8) + c->x86_model) {
611 case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 611 case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
@@ -634,7 +634,7 @@ static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
634 } 634 }
635} 635}
636 636
637static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) 637static void intel_detect_tlb(struct cpuinfo_x86 *c)
638{ 638{
639 int i, j, n; 639 int i, j, n;
640 unsigned int regs[4]; 640 unsigned int regs[4];
@@ -661,7 +661,7 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
661 intel_tlb_flushall_shift_set(c); 661 intel_tlb_flushall_shift_set(c);
662} 662}
663 663
664static const struct cpu_dev __cpuinitconst intel_cpu_dev = { 664static const struct cpu_dev intel_cpu_dev = {
665 .c_vendor = "Intel", 665 .c_vendor = "Intel",
666 .c_ident = { "GenuineIntel" }, 666 .c_ident = { "GenuineIntel" },
667#ifdef CONFIG_X86_32 667#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 7c6f7d548c0f..1414c90feaba 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -37,7 +37,7 @@ struct _cache_table {
37/* All the cache descriptor types we care about (no TLB or 37/* All the cache descriptor types we care about (no TLB or
38 trace cache entries) */ 38 trace cache entries) */
39 39
40static const struct _cache_table __cpuinitconst cache_table[] = 40static const struct _cache_table cache_table[] =
41{ 41{
42 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 42 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
43 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ 43 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@@ -203,7 +203,7 @@ union l3_cache {
203 unsigned val; 203 unsigned val;
204}; 204};
205 205
206static const unsigned short __cpuinitconst assocs[] = { 206static const unsigned short assocs[] = {
207 [1] = 1, 207 [1] = 1,
208 [2] = 2, 208 [2] = 2,
209 [4] = 4, 209 [4] = 4,
@@ -217,10 +217,10 @@ static const unsigned short __cpuinitconst assocs[] = {
217 [0xf] = 0xffff /* fully associative - no way to show this currently */ 217 [0xf] = 0xffff /* fully associative - no way to show this currently */
218}; 218};
219 219
220static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; 220static const unsigned char levels[] = { 1, 1, 2, 3 };
221static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 }; 221static const unsigned char types[] = { 1, 2, 3, 3 };
222 222
223static void __cpuinit 223static void
224amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 224amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
225 union _cpuid4_leaf_ebx *ebx, 225 union _cpuid4_leaf_ebx *ebx,
226 union _cpuid4_leaf_ecx *ecx) 226 union _cpuid4_leaf_ecx *ecx)
@@ -302,7 +302,7 @@ struct _cache_attr {
302/* 302/*
303 * L3 cache descriptors 303 * L3 cache descriptors
304 */ 304 */
305static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb) 305static void amd_calc_l3_indices(struct amd_northbridge *nb)
306{ 306{
307 struct amd_l3_cache *l3 = &nb->l3_cache; 307 struct amd_l3_cache *l3 = &nb->l3_cache;
308 unsigned int sc0, sc1, sc2, sc3; 308 unsigned int sc0, sc1, sc2, sc3;
@@ -325,7 +325,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
325 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; 325 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
326} 326}
327 327
328static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) 328static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
329{ 329{
330 int node; 330 int node;
331 331
@@ -528,8 +528,7 @@ static struct _cache_attr subcaches =
528#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ 528#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
529 529
530static int 530static int
531__cpuinit cpuid4_cache_lookup_regs(int index, 531cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
532 struct _cpuid4_info_regs *this_leaf)
533{ 532{
534 union _cpuid4_leaf_eax eax; 533 union _cpuid4_leaf_eax eax;
535 union _cpuid4_leaf_ebx ebx; 534 union _cpuid4_leaf_ebx ebx;
@@ -560,7 +559,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
560 return 0; 559 return 0;
561} 560}
562 561
563static int __cpuinit find_num_cache_leaves(struct cpuinfo_x86 *c) 562static int find_num_cache_leaves(struct cpuinfo_x86 *c)
564{ 563{
565 unsigned int eax, ebx, ecx, edx, op; 564 unsigned int eax, ebx, ecx, edx, op;
566 union _cpuid4_leaf_eax cache_eax; 565 union _cpuid4_leaf_eax cache_eax;
@@ -580,7 +579,7 @@ static int __cpuinit find_num_cache_leaves(struct cpuinfo_x86 *c)
580 return i; 579 return i;
581} 580}
582 581
583void __cpuinit init_amd_cacheinfo(struct cpuinfo_x86 *c) 582void init_amd_cacheinfo(struct cpuinfo_x86 *c)
584{ 583{
585 584
586 if (cpu_has_topoext) { 585 if (cpu_has_topoext) {
@@ -593,7 +592,7 @@ void __cpuinit init_amd_cacheinfo(struct cpuinfo_x86 *c)
593 } 592 }
594} 593}
595 594
596unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) 595unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
597{ 596{
598 /* Cache sizes */ 597 /* Cache sizes */
599 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; 598 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
@@ -618,36 +617,34 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
618 * parameters cpuid leaf to find the cache details 617 * parameters cpuid leaf to find the cache details
619 */ 618 */
620 for (i = 0; i < num_cache_leaves; i++) { 619 for (i = 0; i < num_cache_leaves; i++) {
621 struct _cpuid4_info_regs this_leaf; 620 struct _cpuid4_info_regs this_leaf = {};
622 int retval; 621 int retval;
623 622
624 retval = cpuid4_cache_lookup_regs(i, &this_leaf); 623 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
625 if (retval >= 0) { 624 if (retval < 0)
626 switch (this_leaf.eax.split.level) { 625 continue;
627 case 1: 626
628 if (this_leaf.eax.split.type == 627 switch (this_leaf.eax.split.level) {
629 CACHE_TYPE_DATA) 628 case 1:
630 new_l1d = this_leaf.size/1024; 629 if (this_leaf.eax.split.type == CACHE_TYPE_DATA)
631 else if (this_leaf.eax.split.type == 630 new_l1d = this_leaf.size/1024;
632 CACHE_TYPE_INST) 631 else if (this_leaf.eax.split.type == CACHE_TYPE_INST)
633 new_l1i = this_leaf.size/1024; 632 new_l1i = this_leaf.size/1024;
634 break; 633 break;
635 case 2: 634 case 2:
636 new_l2 = this_leaf.size/1024; 635 new_l2 = this_leaf.size/1024;
637 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 636 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
638 index_msb = get_count_order(num_threads_sharing); 637 index_msb = get_count_order(num_threads_sharing);
639 l2_id = c->apicid & ~((1 << index_msb) - 1); 638 l2_id = c->apicid & ~((1 << index_msb) - 1);
640 break; 639 break;
641 case 3: 640 case 3:
642 new_l3 = this_leaf.size/1024; 641 new_l3 = this_leaf.size/1024;
643 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 642 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
644 index_msb = get_count_order( 643 index_msb = get_count_order(num_threads_sharing);
645 num_threads_sharing); 644 l3_id = c->apicid & ~((1 << index_msb) - 1);
646 l3_id = c->apicid & ~((1 << index_msb) - 1); 645 break;
647 break; 646 default:
648 default: 647 break;
649 break;
650 }
651 } 648 }
652 } 649 }
653 } 650 }
@@ -746,7 +743,7 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
746 743
747#ifdef CONFIG_SMP 744#ifdef CONFIG_SMP
748 745
749static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) 746static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
750{ 747{
751 struct _cpuid4_info *this_leaf; 748 struct _cpuid4_info *this_leaf;
752 int i, sibling; 749 int i, sibling;
@@ -795,7 +792,7 @@ static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
795 return 1; 792 return 1;
796} 793}
797 794
798static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 795static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
799{ 796{
800 struct _cpuid4_info *this_leaf, *sibling_leaf; 797 struct _cpuid4_info *this_leaf, *sibling_leaf;
801 unsigned long num_threads_sharing; 798 unsigned long num_threads_sharing;
@@ -830,7 +827,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
830 } 827 }
831 } 828 }
832} 829}
833static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) 830static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
834{ 831{
835 struct _cpuid4_info *this_leaf, *sibling_leaf; 832 struct _cpuid4_info *this_leaf, *sibling_leaf;
836 int sibling; 833 int sibling;
@@ -843,16 +840,16 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
843 } 840 }
844} 841}
845#else 842#else
846static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 843static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
847{ 844{
848} 845}
849 846
850static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) 847static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
851{ 848{
852} 849}
853#endif 850#endif
854 851
855static void __cpuinit free_cache_attributes(unsigned int cpu) 852static void free_cache_attributes(unsigned int cpu)
856{ 853{
857 int i; 854 int i;
858 855
@@ -863,7 +860,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
863 per_cpu(ici_cpuid4_info, cpu) = NULL; 860 per_cpu(ici_cpuid4_info, cpu) = NULL;
864} 861}
865 862
866static void __cpuinit get_cpu_leaves(void *_retval) 863static void get_cpu_leaves(void *_retval)
867{ 864{
868 int j, *retval = _retval, cpu = smp_processor_id(); 865 int j, *retval = _retval, cpu = smp_processor_id();
869 866
@@ -883,7 +880,7 @@ static void __cpuinit get_cpu_leaves(void *_retval)
883 } 880 }
884} 881}
885 882
886static int __cpuinit detect_cache_attributes(unsigned int cpu) 883static int detect_cache_attributes(unsigned int cpu)
887{ 884{
888 int retval; 885 int retval;
889 886
@@ -1017,7 +1014,7 @@ static struct attribute *default_attrs[] = {
1017}; 1014};
1018 1015
1019#ifdef CONFIG_AMD_NB 1016#ifdef CONFIG_AMD_NB
1020static struct attribute ** __cpuinit amd_l3_attrs(void) 1017static struct attribute **amd_l3_attrs(void)
1021{ 1018{
1022 static struct attribute **attrs; 1019 static struct attribute **attrs;
1023 int n; 1020 int n;
@@ -1093,7 +1090,7 @@ static struct kobj_type ktype_percpu_entry = {
1093 .sysfs_ops = &sysfs_ops, 1090 .sysfs_ops = &sysfs_ops,
1094}; 1091};
1095 1092
1096static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) 1093static void cpuid4_cache_sysfs_exit(unsigned int cpu)
1097{ 1094{
1098 kfree(per_cpu(ici_cache_kobject, cpu)); 1095 kfree(per_cpu(ici_cache_kobject, cpu));
1099 kfree(per_cpu(ici_index_kobject, cpu)); 1096 kfree(per_cpu(ici_index_kobject, cpu));
@@ -1102,7 +1099,7 @@ static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1102 free_cache_attributes(cpu); 1099 free_cache_attributes(cpu);
1103} 1100}
1104 1101
1105static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) 1102static int cpuid4_cache_sysfs_init(unsigned int cpu)
1106{ 1103{
1107 int err; 1104 int err;
1108 1105
@@ -1134,7 +1131,7 @@ err_out:
1134static DECLARE_BITMAP(cache_dev_map, NR_CPUS); 1131static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1135 1132
1136/* Add/Remove cache interface for CPU device */ 1133/* Add/Remove cache interface for CPU device */
1137static int __cpuinit cache_add_dev(struct device *dev) 1134static int cache_add_dev(struct device *dev)
1138{ 1135{
1139 unsigned int cpu = dev->id; 1136 unsigned int cpu = dev->id;
1140 unsigned long i, j; 1137 unsigned long i, j;
@@ -1185,7 +1182,7 @@ static int __cpuinit cache_add_dev(struct device *dev)
1185 return 0; 1182 return 0;
1186} 1183}
1187 1184
1188static void __cpuinit cache_remove_dev(struct device *dev) 1185static void cache_remove_dev(struct device *dev)
1189{ 1186{
1190 unsigned int cpu = dev->id; 1187 unsigned int cpu = dev->id;
1191 unsigned long i; 1188 unsigned long i;
@@ -1202,8 +1199,8 @@ static void __cpuinit cache_remove_dev(struct device *dev)
1202 cpuid4_cache_sysfs_exit(cpu); 1199 cpuid4_cache_sysfs_exit(cpu);
1203} 1200}
1204 1201
1205static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, 1202static int cacheinfo_cpu_callback(struct notifier_block *nfb,
1206 unsigned long action, void *hcpu) 1203 unsigned long action, void *hcpu)
1207{ 1204{
1208 unsigned int cpu = (unsigned long)hcpu; 1205 unsigned int cpu = (unsigned long)hcpu;
1209 struct device *dev; 1206 struct device *dev;
@@ -1222,7 +1219,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1222 return NOTIFY_OK; 1219 return NOTIFY_OK;
1223} 1220}
1224 1221
1225static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { 1222static struct notifier_block cacheinfo_cpu_notifier = {
1226 .notifier_call = cacheinfo_cpu_callback, 1223 .notifier_call = cacheinfo_cpu_callback,
1227}; 1224};
1228 1225
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index ddc72f839332..5ac2d1fb28bc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -153,7 +153,7 @@ static void raise_mce(struct mce *m)
153 return; 153 return;
154 154
155#ifdef CONFIG_X86_LOCAL_APIC 155#ifdef CONFIG_X86_LOCAL_APIC
156 if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { 156 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
157 unsigned long start; 157 unsigned long start;
158 int cpu; 158 int cpu;
159 159
@@ -167,7 +167,7 @@ static void raise_mce(struct mce *m)
167 cpumask_clear_cpu(cpu, mce_inject_cpumask); 167 cpumask_clear_cpu(cpu, mce_inject_cpumask);
168 } 168 }
169 if (!cpumask_empty(mce_inject_cpumask)) { 169 if (!cpumask_empty(mce_inject_cpumask)) {
170 if (m->inject_flags & MCJ_IRQ_BRAODCAST) { 170 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
171 /* 171 /*
172 * don't wait because mce_irq_ipi is necessary 172 * don't wait because mce_irq_ipi is necessary
173 * to be sync with following raise_local 173 * to be sync with following raise_local
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index beb1f1689e52..c370e1c4468b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -110,22 +110,17 @@ static struct severity {
110 /* known AR MCACODs: */ 110 /* known AR MCACODs: */
111#ifdef CONFIG_MEMORY_FAILURE 111#ifdef CONFIG_MEMORY_FAILURE
112 MCESEV( 112 MCESEV(
113 KEEP, "HT thread notices Action required: data load error", 113 KEEP, "Action required but unaffected thread is continuable",
114 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), 114 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
115 MCGMASK(MCG_STATUS_EIPV, 0) 115 MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
116 ), 116 ),
117 MCESEV( 117 MCESEV(
118 AR, "Action required: data load error", 118 AR, "Action required: data load error in a user process",
119 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), 119 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
120 USER 120 USER
121 ), 121 ),
122 MCESEV( 122 MCESEV(
123 KEEP, "HT thread notices Action required: instruction fetch error", 123 AR, "Action required: instruction fetch error in a user process",
124 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
125 MCGMASK(MCG_STATUS_EIPV, 0)
126 ),
127 MCESEV(
128 AR, "Action required: instruction fetch error",
129 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), 124 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
130 USER 125 USER
131 ), 126 ),
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9239504b41cb..87a65c939bcd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -89,7 +89,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
89static DEFINE_PER_CPU(struct mce, mces_seen); 89static DEFINE_PER_CPU(struct mce, mces_seen);
90static int cpu_missing; 90static int cpu_missing;
91 91
92/* MCA banks polled by the period polling timer for corrected events */ 92/*
93 * MCA banks polled by the period polling timer for corrected events.
94 * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
95 */
93DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 96DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
94 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 97 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
95}; 98};
@@ -1360,7 +1363,7 @@ int mce_notify_irq(void)
1360} 1363}
1361EXPORT_SYMBOL_GPL(mce_notify_irq); 1364EXPORT_SYMBOL_GPL(mce_notify_irq);
1362 1365
1363static int __cpuinit __mcheck_cpu_mce_banks_init(void) 1366static int __mcheck_cpu_mce_banks_init(void)
1364{ 1367{
1365 int i; 1368 int i;
1366 u8 num_banks = mca_cfg.banks; 1369 u8 num_banks = mca_cfg.banks;
@@ -1381,7 +1384,7 @@ static int __cpuinit __mcheck_cpu_mce_banks_init(void)
1381/* 1384/*
1382 * Initialize Machine Checks for a CPU. 1385 * Initialize Machine Checks for a CPU.
1383 */ 1386 */
1384static int __cpuinit __mcheck_cpu_cap_init(void) 1387static int __mcheck_cpu_cap_init(void)
1385{ 1388{
1386 unsigned b; 1389 unsigned b;
1387 u64 cap; 1390 u64 cap;
@@ -1480,7 +1483,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
1480} 1483}
1481 1484
1482/* Add per CPU specific workarounds here */ 1485/* Add per CPU specific workarounds here */
1483static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) 1486static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1484{ 1487{
1485 struct mca_config *cfg = &mca_cfg; 1488 struct mca_config *cfg = &mca_cfg;
1486 1489
@@ -1590,7 +1593,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1590 return 0; 1593 return 0;
1591} 1594}
1592 1595
1593static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) 1596static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
1594{ 1597{
1595 if (c->x86 != 5) 1598 if (c->x86 != 5)
1596 return 0; 1599 return 0;
@@ -1661,7 +1664,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
1661 * Called for each booted CPU to set up machine checks. 1664 * Called for each booted CPU to set up machine checks.
1662 * Must be called with preempt off: 1665 * Must be called with preempt off:
1663 */ 1666 */
1664void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) 1667void mcheck_cpu_init(struct cpuinfo_x86 *c)
1665{ 1668{
1666 if (mca_cfg.disabled) 1669 if (mca_cfg.disabled)
1667 return; 1670 return;
@@ -2079,7 +2082,6 @@ static struct bus_type mce_subsys = {
2079 2082
2080DEFINE_PER_CPU(struct device *, mce_device); 2083DEFINE_PER_CPU(struct device *, mce_device);
2081 2084
2082__cpuinitdata
2083void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 2085void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
2084 2086
2085static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) 2087static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
@@ -2225,7 +2227,7 @@ static void mce_device_release(struct device *dev)
2225} 2227}
2226 2228
2227/* Per cpu device init. All of the cpus still share the same ctrl bank: */ 2229/* Per cpu device init. All of the cpus still share the same ctrl bank: */
2228static __cpuinit int mce_device_create(unsigned int cpu) 2230static int mce_device_create(unsigned int cpu)
2229{ 2231{
2230 struct device *dev; 2232 struct device *dev;
2231 int err; 2233 int err;
@@ -2271,7 +2273,7 @@ error:
2271 return err; 2273 return err;
2272} 2274}
2273 2275
2274static __cpuinit void mce_device_remove(unsigned int cpu) 2276static void mce_device_remove(unsigned int cpu)
2275{ 2277{
2276 struct device *dev = per_cpu(mce_device, cpu); 2278 struct device *dev = per_cpu(mce_device, cpu);
2277 int i; 2279 int i;
@@ -2291,7 +2293,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
2291} 2293}
2292 2294
2293/* Make sure there are no machine checks on offlined CPUs. */ 2295/* Make sure there are no machine checks on offlined CPUs. */
2294static void __cpuinit mce_disable_cpu(void *h) 2296static void mce_disable_cpu(void *h)
2295{ 2297{
2296 unsigned long action = *(unsigned long *)h; 2298 unsigned long action = *(unsigned long *)h;
2297 int i; 2299 int i;
@@ -2309,7 +2311,7 @@ static void __cpuinit mce_disable_cpu(void *h)
2309 } 2311 }
2310} 2312}
2311 2313
2312static void __cpuinit mce_reenable_cpu(void *h) 2314static void mce_reenable_cpu(void *h)
2313{ 2315{
2314 unsigned long action = *(unsigned long *)h; 2316 unsigned long action = *(unsigned long *)h;
2315 int i; 2317 int i;
@@ -2328,7 +2330,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
2328} 2330}
2329 2331
2330/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 2332/* Get notified when a cpu comes on/off. Be hotplug friendly. */
2331static int __cpuinit 2333static int
2332mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 2334mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2333{ 2335{
2334 unsigned int cpu = (unsigned long)hcpu; 2336 unsigned int cpu = (unsigned long)hcpu;
@@ -2364,7 +2366,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2364 return NOTIFY_OK; 2366 return NOTIFY_OK;
2365} 2367}
2366 2368
2367static struct notifier_block mce_cpu_notifier __cpuinitdata = { 2369static struct notifier_block mce_cpu_notifier = {
2368 .notifier_call = mce_cpu_callback, 2370 .notifier_call = mce_cpu_callback,
2369}; 2371};
2370 2372
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9cb52767999a..603df4f74640 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -458,10 +458,8 @@ static struct kobj_type threshold_ktype = {
458 .default_attrs = default_attrs, 458 .default_attrs = default_attrs,
459}; 459};
460 460
461static __cpuinit int allocate_threshold_blocks(unsigned int cpu, 461static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
462 unsigned int bank, 462 unsigned int block, u32 address)
463 unsigned int block,
464 u32 address)
465{ 463{
466 struct threshold_block *b = NULL; 464 struct threshold_block *b = NULL;
467 u32 low, high; 465 u32 low, high;
@@ -543,7 +541,7 @@ out_free:
543 return err; 541 return err;
544} 542}
545 543
546static __cpuinit int __threshold_add_blocks(struct threshold_bank *b) 544static int __threshold_add_blocks(struct threshold_bank *b)
547{ 545{
548 struct list_head *head = &b->blocks->miscj; 546 struct list_head *head = &b->blocks->miscj;
549 struct threshold_block *pos = NULL; 547 struct threshold_block *pos = NULL;
@@ -567,7 +565,7 @@ static __cpuinit int __threshold_add_blocks(struct threshold_bank *b)
567 return err; 565 return err;
568} 566}
569 567
570static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) 568static int threshold_create_bank(unsigned int cpu, unsigned int bank)
571{ 569{
572 struct device *dev = per_cpu(mce_device, cpu); 570 struct device *dev = per_cpu(mce_device, cpu);
573 struct amd_northbridge *nb = NULL; 571 struct amd_northbridge *nb = NULL;
@@ -632,7 +630,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
632} 630}
633 631
634/* create dir/files for all valid threshold banks */ 632/* create dir/files for all valid threshold banks */
635static __cpuinit int threshold_create_device(unsigned int cpu) 633static int threshold_create_device(unsigned int cpu)
636{ 634{
637 unsigned int bank; 635 unsigned int bank;
638 struct threshold_bank **bp; 636 struct threshold_bank **bp;
@@ -736,7 +734,7 @@ static void threshold_remove_device(unsigned int cpu)
736} 734}
737 735
738/* get notified when a cpu comes on/off */ 736/* get notified when a cpu comes on/off */
739static void __cpuinit 737static void
740amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu) 738amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
741{ 739{
742 switch (action) { 740 switch (action) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index ae1697c2afe3..d56405309dc1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -24,6 +24,18 @@
24 * Also supports reliable discovery of shared banks. 24 * Also supports reliable discovery of shared banks.
25 */ 25 */
26 26
27/*
28 * CMCI can be delivered to multiple cpus that share a machine check bank
29 * so we need to designate a single cpu to process errors logged in each bank
30 * in the interrupt handler (otherwise we would have many races and potential
31 * double reporting of the same error).
32 * Note that this can change when a cpu is offlined or brought online since
33 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
34 * disables CMCI on all banks owned by the cpu and clears this bitfield. At
35 * this point, cmci_rediscover() kicks in and a different cpu may end up
36 * taking ownership of some of the shared MCA banks that were previously
37 * owned by the offlined cpu.
38 */
27static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 39static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
28 40
29/* 41/*
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 47a1870279aa..3eec7de76efb 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -29,6 +29,7 @@
29#include <asm/idle.h> 29#include <asm/idle.h>
30#include <asm/mce.h> 30#include <asm/mce.h>
31#include <asm/msr.h> 31#include <asm/msr.h>
32#include <asm/trace/irq_vectors.h>
32 33
33/* How long to wait between reporting thermal events */ 34/* How long to wait between reporting thermal events */
34#define CHECK_INTERVAL (300 * HZ) 35#define CHECK_INTERVAL (300 * HZ)
@@ -54,12 +55,24 @@ struct thermal_state {
54 struct _thermal_state package_power_limit; 55 struct _thermal_state package_power_limit;
55 struct _thermal_state core_thresh0; 56 struct _thermal_state core_thresh0;
56 struct _thermal_state core_thresh1; 57 struct _thermal_state core_thresh1;
58 struct _thermal_state pkg_thresh0;
59 struct _thermal_state pkg_thresh1;
57}; 60};
58 61
59/* Callback to handle core threshold interrupts */ 62/* Callback to handle core threshold interrupts */
60int (*platform_thermal_notify)(__u64 msr_val); 63int (*platform_thermal_notify)(__u64 msr_val);
61EXPORT_SYMBOL(platform_thermal_notify); 64EXPORT_SYMBOL(platform_thermal_notify);
62 65
66/* Callback to handle core package threshold_interrupts */
67int (*platform_thermal_package_notify)(__u64 msr_val);
68EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
69
70/* Callback support of rate control, return true, if
71 * callback has rate control */
72bool (*platform_thermal_package_rate_control)(void);
73EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
74
75
63static DEFINE_PER_CPU(struct thermal_state, thermal_state); 76static DEFINE_PER_CPU(struct thermal_state, thermal_state);
64 77
65static atomic_t therm_throt_en = ATOMIC_INIT(0); 78static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -181,11 +194,6 @@ static int therm_throt_process(bool new_event, int event, int level)
181 this_cpu, 194 this_cpu,
182 level == CORE_LEVEL ? "Core" : "Package", 195 level == CORE_LEVEL ? "Core" : "Package",
183 state->count); 196 state->count);
184 else
185 printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n",
186 this_cpu,
187 level == CORE_LEVEL ? "Core" : "Package",
188 state->count);
189 return 1; 197 return 1;
190 } 198 }
191 if (old_event) { 199 if (old_event) {
@@ -193,36 +201,46 @@ static int therm_throt_process(bool new_event, int event, int level)
193 printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", 201 printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
194 this_cpu, 202 this_cpu,
195 level == CORE_LEVEL ? "Core" : "Package"); 203 level == CORE_LEVEL ? "Core" : "Package");
196 else
197 printk(KERN_INFO "CPU%d: %s power limit normal\n",
198 this_cpu,
199 level == CORE_LEVEL ? "Core" : "Package");
200 return 1; 204 return 1;
201 } 205 }
202 206
203 return 0; 207 return 0;
204} 208}
205 209
206static int thresh_event_valid(int event) 210static int thresh_event_valid(int level, int event)
207{ 211{
208 struct _thermal_state *state; 212 struct _thermal_state *state;
209 unsigned int this_cpu = smp_processor_id(); 213 unsigned int this_cpu = smp_processor_id();
210 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); 214 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
211 u64 now = get_jiffies_64(); 215 u64 now = get_jiffies_64();
212 216
213 state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1; 217 if (level == PACKAGE_LEVEL)
218 state = (event == 0) ? &pstate->pkg_thresh0 :
219 &pstate->pkg_thresh1;
220 else
221 state = (event == 0) ? &pstate->core_thresh0 :
222 &pstate->core_thresh1;
214 223
215 if (time_before64(now, state->next_check)) 224 if (time_before64(now, state->next_check))
216 return 0; 225 return 0;
217 226
218 state->next_check = now + CHECK_INTERVAL; 227 state->next_check = now + CHECK_INTERVAL;
228
229 return 1;
230}
231
232static bool int_pln_enable;
233static int __init int_pln_enable_setup(char *s)
234{
235 int_pln_enable = true;
236
219 return 1; 237 return 1;
220} 238}
239__setup("int_pln_enable", int_pln_enable_setup);
221 240
222#ifdef CONFIG_SYSFS 241#ifdef CONFIG_SYSFS
223/* Add/Remove thermal_throttle interface for CPU device: */ 242/* Add/Remove thermal_throttle interface for CPU device: */
224static __cpuinit int thermal_throttle_add_dev(struct device *dev, 243static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
225 unsigned int cpu)
226{ 244{
227 int err; 245 int err;
228 struct cpuinfo_x86 *c = &cpu_data(cpu); 246 struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -231,7 +249,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev,
231 if (err) 249 if (err)
232 return err; 250 return err;
233 251
234 if (cpu_has(c, X86_FEATURE_PLN)) 252 if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
235 err = sysfs_add_file_to_group(&dev->kobj, 253 err = sysfs_add_file_to_group(&dev->kobj,
236 &dev_attr_core_power_limit_count.attr, 254 &dev_attr_core_power_limit_count.attr,
237 thermal_attr_group.name); 255 thermal_attr_group.name);
@@ -239,7 +257,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev,
239 err = sysfs_add_file_to_group(&dev->kobj, 257 err = sysfs_add_file_to_group(&dev->kobj,
240 &dev_attr_package_throttle_count.attr, 258 &dev_attr_package_throttle_count.attr,
241 thermal_attr_group.name); 259 thermal_attr_group.name);
242 if (cpu_has(c, X86_FEATURE_PLN)) 260 if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
243 err = sysfs_add_file_to_group(&dev->kobj, 261 err = sysfs_add_file_to_group(&dev->kobj,
244 &dev_attr_package_power_limit_count.attr, 262 &dev_attr_package_power_limit_count.attr,
245 thermal_attr_group.name); 263 thermal_attr_group.name);
@@ -248,7 +266,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev,
248 return err; 266 return err;
249} 267}
250 268
251static __cpuinit void thermal_throttle_remove_dev(struct device *dev) 269static void thermal_throttle_remove_dev(struct device *dev)
252{ 270{
253 sysfs_remove_group(&dev->kobj, &thermal_attr_group); 271 sysfs_remove_group(&dev->kobj, &thermal_attr_group);
254} 272}
@@ -257,7 +275,7 @@ static __cpuinit void thermal_throttle_remove_dev(struct device *dev)
257static DEFINE_MUTEX(therm_cpu_lock); 275static DEFINE_MUTEX(therm_cpu_lock);
258 276
259/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 277/* Get notified when a cpu comes on/off. Be hotplug friendly. */
260static __cpuinit int 278static int
261thermal_throttle_cpu_callback(struct notifier_block *nfb, 279thermal_throttle_cpu_callback(struct notifier_block *nfb,
262 unsigned long action, 280 unsigned long action,
263 void *hcpu) 281 void *hcpu)
@@ -288,7 +306,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
288 return notifier_from_errno(err); 306 return notifier_from_errno(err);
289} 307}
290 308
291static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = 309static struct notifier_block thermal_throttle_cpu_notifier =
292{ 310{
293 .notifier_call = thermal_throttle_cpu_callback, 311 .notifier_call = thermal_throttle_cpu_callback,
294}; 312};
@@ -321,6 +339,39 @@ device_initcall(thermal_throttle_init_device);
321 339
322#endif /* CONFIG_SYSFS */ 340#endif /* CONFIG_SYSFS */
323 341
342static void notify_package_thresholds(__u64 msr_val)
343{
344 bool notify_thres_0 = false;
345 bool notify_thres_1 = false;
346
347 if (!platform_thermal_package_notify)
348 return;
349
350 /* lower threshold check */
351 if (msr_val & THERM_LOG_THRESHOLD0)
352 notify_thres_0 = true;
353 /* higher threshold check */
354 if (msr_val & THERM_LOG_THRESHOLD1)
355 notify_thres_1 = true;
356
357 if (!notify_thres_0 && !notify_thres_1)
358 return;
359
360 if (platform_thermal_package_rate_control &&
361 platform_thermal_package_rate_control()) {
362 /* Rate control is implemented in callback */
363 platform_thermal_package_notify(msr_val);
364 return;
365 }
366
367 /* lower threshold reached */
368 if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
369 platform_thermal_package_notify(msr_val);
370 /* higher threshold reached */
371 if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
372 platform_thermal_package_notify(msr_val);
373}
374
324static void notify_thresholds(__u64 msr_val) 375static void notify_thresholds(__u64 msr_val)
325{ 376{
326 /* check whether the interrupt handler is defined; 377 /* check whether the interrupt handler is defined;
@@ -330,10 +381,12 @@ static void notify_thresholds(__u64 msr_val)
330 return; 381 return;
331 382
332 /* lower threshold reached */ 383 /* lower threshold reached */
333 if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0)) 384 if ((msr_val & THERM_LOG_THRESHOLD0) &&
385 thresh_event_valid(CORE_LEVEL, 0))
334 platform_thermal_notify(msr_val); 386 platform_thermal_notify(msr_val);
335 /* higher threshold reached */ 387 /* higher threshold reached */
336 if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1)) 388 if ((msr_val & THERM_LOG_THRESHOLD1) &&
389 thresh_event_valid(CORE_LEVEL, 1))
337 platform_thermal_notify(msr_val); 390 platform_thermal_notify(msr_val);
338} 391}
339 392
@@ -352,17 +405,19 @@ static void intel_thermal_interrupt(void)
352 CORE_LEVEL) != 0) 405 CORE_LEVEL) != 0)
353 mce_log_therm_throt_event(msr_val); 406 mce_log_therm_throt_event(msr_val);
354 407
355 if (this_cpu_has(X86_FEATURE_PLN)) 408 if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
356 therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, 409 therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
357 POWER_LIMIT_EVENT, 410 POWER_LIMIT_EVENT,
358 CORE_LEVEL); 411 CORE_LEVEL);
359 412
360 if (this_cpu_has(X86_FEATURE_PTS)) { 413 if (this_cpu_has(X86_FEATURE_PTS)) {
361 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); 414 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
415 /* check violations of package thermal thresholds */
416 notify_package_thresholds(msr_val);
362 therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, 417 therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
363 THERMAL_THROTTLING_EVENT, 418 THERMAL_THROTTLING_EVENT,
364 PACKAGE_LEVEL); 419 PACKAGE_LEVEL);
365 if (this_cpu_has(X86_FEATURE_PLN)) 420 if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
366 therm_throt_process(msr_val & 421 therm_throt_process(msr_val &
367 PACKAGE_THERM_STATUS_POWER_LIMIT, 422 PACKAGE_THERM_STATUS_POWER_LIMIT,
368 POWER_LIMIT_EVENT, 423 POWER_LIMIT_EVENT,
@@ -378,15 +433,26 @@ static void unexpected_thermal_interrupt(void)
378 433
379static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; 434static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
380 435
381asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) 436static inline void __smp_thermal_interrupt(void)
382{ 437{
383 irq_enter();
384 exit_idle();
385 inc_irq_stat(irq_thermal_count); 438 inc_irq_stat(irq_thermal_count);
386 smp_thermal_vector(); 439 smp_thermal_vector();
387 irq_exit(); 440}
388 /* Ack only at the end to avoid potential reentry */ 441
389 ack_APIC_irq(); 442asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
443{
444 entering_irq();
445 __smp_thermal_interrupt();
446 exiting_ack_irq();
447}
448
449asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
450{
451 entering_irq();
452 trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
453 __smp_thermal_interrupt();
454 trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
455 exiting_ack_irq();
390} 456}
391 457
392/* Thermal monitoring depends on APIC, ACPI and clock modulation */ 458/* Thermal monitoring depends on APIC, ACPI and clock modulation */
@@ -470,9 +536,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
470 apic_write(APIC_LVTTHMR, h); 536 apic_write(APIC_LVTTHMR, h);
471 537
472 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); 538 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
473 if (cpu_has(c, X86_FEATURE_PLN)) 539 if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
540 wrmsr(MSR_IA32_THERM_INTERRUPT,
541 (l | (THERM_INT_LOW_ENABLE
542 | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
543 else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
474 wrmsr(MSR_IA32_THERM_INTERRUPT, 544 wrmsr(MSR_IA32_THERM_INTERRUPT,
475 l | (THERM_INT_LOW_ENABLE 545 l | (THERM_INT_LOW_ENABLE
476 | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); 546 | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
477 else 547 else
478 wrmsr(MSR_IA32_THERM_INTERRUPT, 548 wrmsr(MSR_IA32_THERM_INTERRUPT,
@@ -480,9 +550,14 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
480 550
481 if (cpu_has(c, X86_FEATURE_PTS)) { 551 if (cpu_has(c, X86_FEATURE_PTS)) {
482 rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 552 rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
483 if (cpu_has(c, X86_FEATURE_PLN)) 553 if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
484 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, 554 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
485 l | (PACKAGE_THERM_INT_LOW_ENABLE 555 (l | (PACKAGE_THERM_INT_LOW_ENABLE
556 | PACKAGE_THERM_INT_HIGH_ENABLE))
557 & ~PACKAGE_THERM_INT_PLN_ENABLE, h);
558 else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
559 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
560 l | (PACKAGE_THERM_INT_LOW_ENABLE
486 | PACKAGE_THERM_INT_HIGH_ENABLE 561 | PACKAGE_THERM_INT_HIGH_ENABLE
487 | PACKAGE_THERM_INT_PLN_ENABLE), h); 562 | PACKAGE_THERM_INT_PLN_ENABLE), h);
488 else 563 else
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index aa578cadb940..fe6b1c86645b 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -8,6 +8,7 @@
8#include <asm/apic.h> 8#include <asm/apic.h>
9#include <asm/idle.h> 9#include <asm/idle.h>
10#include <asm/mce.h> 10#include <asm/mce.h>
11#include <asm/trace/irq_vectors.h>
11 12
12static void default_threshold_interrupt(void) 13static void default_threshold_interrupt(void)
13{ 14{
@@ -17,13 +18,24 @@ static void default_threshold_interrupt(void)
17 18
18void (*mce_threshold_vector)(void) = default_threshold_interrupt; 19void (*mce_threshold_vector)(void) = default_threshold_interrupt;
19 20
20asmlinkage void smp_threshold_interrupt(void) 21static inline void __smp_threshold_interrupt(void)
21{ 22{
22 irq_enter();
23 exit_idle();
24 inc_irq_stat(irq_threshold_count); 23 inc_irq_stat(irq_threshold_count);
25 mce_threshold_vector(); 24 mce_threshold_vector();
26 irq_exit(); 25}
27 /* Ack only at the end to avoid potential reentry */ 26
28 ack_APIC_irq(); 27asmlinkage void smp_threshold_interrupt(void)
28{
29 entering_irq();
30 __smp_threshold_interrupt();
31 exiting_ack_irq();
32}
33
34asmlinkage void smp_trace_threshold_interrupt(void)
35{
36 entering_irq();
37 trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
38 __smp_threshold_interrupt();
39 trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
40 exiting_ack_irq();
29} 41}
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 68a3343e5798..9e451b0876b5 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -167,7 +167,7 @@ static void post_set(void)
167 setCx86(CX86_CCR3, ccr3); 167 setCx86(CX86_CCR3, ccr3);
168 168
169 /* Enable caches */ 169 /* Enable caches */
170 write_cr0(read_cr0() & 0xbfffffff); 170 write_cr0(read_cr0() & ~X86_CR0_CD);
171 171
172 /* Restore value of CR4 */ 172 /* Restore value of CR4 */
173 if (cpu_has_pge) 173 if (cpu_has_pge)
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fa72a39e5d46..d4cdfa67509e 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -510,8 +510,9 @@ generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
510static void generic_get_mtrr(unsigned int reg, unsigned long *base, 510static void generic_get_mtrr(unsigned int reg, unsigned long *base,
511 unsigned long *size, mtrr_type *type) 511 unsigned long *size, mtrr_type *type)
512{ 512{
513 unsigned int mask_lo, mask_hi, base_lo, base_hi; 513 u32 mask_lo, mask_hi, base_lo, base_hi;
514 unsigned int tmp, hi; 514 unsigned int hi;
515 u64 tmp, mask;
515 516
516 /* 517 /*
517 * get_mtrr doesn't need to update mtrr_state, also it could be called 518 * get_mtrr doesn't need to update mtrr_state, also it could be called
@@ -532,18 +533,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
532 rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); 533 rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
533 534
534 /* Work out the shifted address mask: */ 535 /* Work out the shifted address mask: */
535 tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; 536 tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
536 mask_lo = size_or_mask | tmp; 537 mask = size_or_mask | tmp;
537 538
538 /* Expand tmp with high bits to all 1s: */ 539 /* Expand tmp with high bits to all 1s: */
539 hi = fls(tmp); 540 hi = fls64(tmp);
540 if (hi > 0) { 541 if (hi > 0) {
541 tmp |= ~((1<<(hi - 1)) - 1); 542 tmp |= ~((1ULL<<(hi - 1)) - 1);
542 543
543 if (tmp != mask_lo) { 544 if (tmp != mask) {
544 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); 545 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
545 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 546 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
546 mask_lo = tmp; 547 mask = tmp;
547 } 548 }
548 } 549 }
549 550
@@ -551,8 +552,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
551 * This works correctly if size is a power of two, i.e. a 552 * This works correctly if size is a power of two, i.e. a
552 * contiguous range: 553 * contiguous range:
553 */ 554 */
554 *size = -mask_lo; 555 *size = -mask;
555 *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; 556 *base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
556 *type = base_lo & 0xff; 557 *type = base_lo & 0xff;
557 558
558out_put_cpu: 559out_put_cpu:
@@ -701,7 +702,7 @@ static void post_set(void) __releases(set_atomicity_lock)
701 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); 702 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
702 703
703 /* Enable caches */ 704 /* Enable caches */
704 write_cr0(read_cr0() & 0xbfffffff); 705 write_cr0(read_cr0() & ~X86_CR0_CD);
705 706
706 /* Restore value of CR4 */ 707 /* Restore value of CR4 */
707 if (cpu_has_pge) 708 if (cpu_has_pge)
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 726bf963c227..f961de9964c7 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -51,9 +51,13 @@
51#include <asm/e820.h> 51#include <asm/e820.h>
52#include <asm/mtrr.h> 52#include <asm/mtrr.h>
53#include <asm/msr.h> 53#include <asm/msr.h>
54#include <asm/pat.h>
54 55
55#include "mtrr.h" 56#include "mtrr.h"
56 57
58/* arch_phys_wc_add returns an MTRR register index plus this offset. */
59#define MTRR_TO_PHYS_WC_OFFSET 1000
60
57u32 num_var_ranges; 61u32 num_var_ranges;
58 62
59unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; 63unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
@@ -305,7 +309,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
305 return -EINVAL; 309 return -EINVAL;
306 } 310 }
307 311
308 if (base & size_or_mask || size & size_or_mask) { 312 if ((base | (base + size - 1)) >>
313 (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
309 pr_warning("mtrr: base or size exceeds the MTRR width\n"); 314 pr_warning("mtrr: base or size exceeds the MTRR width\n");
310 return -EINVAL; 315 return -EINVAL;
311 } 316 }
@@ -524,6 +529,73 @@ int mtrr_del(int reg, unsigned long base, unsigned long size)
524} 529}
525EXPORT_SYMBOL(mtrr_del); 530EXPORT_SYMBOL(mtrr_del);
526 531
532/**
533 * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
534 * @base: Physical base address
535 * @size: Size of region
536 *
537 * If PAT is available, this does nothing. If PAT is unavailable, it
538 * attempts to add a WC MTRR covering size bytes starting at base and
539 * logs an error if this fails.
540 *
541 * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
542 * but drivers should not try to interpret that return value.
543 */
544int arch_phys_wc_add(unsigned long base, unsigned long size)
545{
546 int ret;
547
548 if (pat_enabled)
549 return 0; /* Success! (We don't need to do anything.) */
550
551 ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
552 if (ret < 0) {
553 pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
554 (void *)base, (void *)(base + size - 1));
555 return ret;
556 }
557 return ret + MTRR_TO_PHYS_WC_OFFSET;
558}
559EXPORT_SYMBOL(arch_phys_wc_add);
560
561/*
562 * arch_phys_wc_del - undoes arch_phys_wc_add
563 * @handle: Return value from arch_phys_wc_add
564 *
565 * This cleans up after mtrr_add_wc_if_needed.
566 *
567 * The API guarantees that mtrr_del_wc_if_needed(error code) and
568 * mtrr_del_wc_if_needed(0) do nothing.
569 */
570void arch_phys_wc_del(int handle)
571{
572 if (handle >= 1) {
573 WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
574 mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
575 }
576}
577EXPORT_SYMBOL(arch_phys_wc_del);
578
579/*
580 * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value
581 * @handle: Return value from arch_phys_wc_add
582 *
583 * This will turn the return value from arch_phys_wc_add into an mtrr
584 * index suitable for debugging.
585 *
586 * Note: There is no legitimate use for this function, except possibly
587 * in printk line. Alas there is an illegitimate use in some ancient
588 * drm ioctls.
589 */
590int phys_wc_to_mtrr_index(int handle)
591{
592 if (handle < MTRR_TO_PHYS_WC_OFFSET)
593 return -1;
594 else
595 return handle - MTRR_TO_PHYS_WC_OFFSET;
596}
597EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index);
598
527/* 599/*
528 * HACK ALERT! 600 * HACK ALERT!
529 * These should be called implicitly, but we can't yet until all the initcall 601 * These should be called implicitly, but we can't yet until all the initcall
@@ -583,6 +655,7 @@ static struct syscore_ops mtrr_syscore_ops = {
583 655
584int __initdata changed_by_mtrr_cleanup; 656int __initdata changed_by_mtrr_cleanup;
585 657
658#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
586/** 659/**
587 * mtrr_bp_init - initialize mtrrs on the boot CPU 660 * mtrr_bp_init - initialize mtrrs on the boot CPU
588 * 661 *
@@ -600,7 +673,7 @@ void __init mtrr_bp_init(void)
600 673
601 if (cpu_has_mtrr) { 674 if (cpu_has_mtrr) {
602 mtrr_if = &generic_mtrr_ops; 675 mtrr_if = &generic_mtrr_ops;
603 size_or_mask = 0xff000000; /* 36 bits */ 676 size_or_mask = SIZE_OR_MASK_BITS(36);
604 size_and_mask = 0x00f00000; 677 size_and_mask = 0x00f00000;
605 phys_addr = 36; 678 phys_addr = 36;
606 679
@@ -619,7 +692,7 @@ void __init mtrr_bp_init(void)
619 boot_cpu_data.x86_mask == 0x4)) 692 boot_cpu_data.x86_mask == 0x4))
620 phys_addr = 36; 693 phys_addr = 36;
621 694
622 size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); 695 size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
623 size_and_mask = ~size_or_mask & 0xfffff00000ULL; 696 size_and_mask = ~size_or_mask & 0xfffff00000ULL;
624 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && 697 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
625 boot_cpu_data.x86 == 6) { 698 boot_cpu_data.x86 == 6) {
@@ -627,7 +700,7 @@ void __init mtrr_bp_init(void)
627 * VIA C* family have Intel style MTRRs, 700 * VIA C* family have Intel style MTRRs,
628 * but don't support PAE 701 * but don't support PAE
629 */ 702 */
630 size_or_mask = 0xfff00000; /* 32 bits */ 703 size_or_mask = SIZE_OR_MASK_BITS(32);
631 size_and_mask = 0; 704 size_and_mask = 0;
632 phys_addr = 32; 705 phys_addr = 32;
633 } 706 }
@@ -637,21 +710,21 @@ void __init mtrr_bp_init(void)
637 if (cpu_has_k6_mtrr) { 710 if (cpu_has_k6_mtrr) {
638 /* Pre-Athlon (K6) AMD CPU MTRRs */ 711 /* Pre-Athlon (K6) AMD CPU MTRRs */
639 mtrr_if = mtrr_ops[X86_VENDOR_AMD]; 712 mtrr_if = mtrr_ops[X86_VENDOR_AMD];
640 size_or_mask = 0xfff00000; /* 32 bits */ 713 size_or_mask = SIZE_OR_MASK_BITS(32);
641 size_and_mask = 0; 714 size_and_mask = 0;
642 } 715 }
643 break; 716 break;
644 case X86_VENDOR_CENTAUR: 717 case X86_VENDOR_CENTAUR:
645 if (cpu_has_centaur_mcr) { 718 if (cpu_has_centaur_mcr) {
646 mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; 719 mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
647 size_or_mask = 0xfff00000; /* 32 bits */ 720 size_or_mask = SIZE_OR_MASK_BITS(32);
648 size_and_mask = 0; 721 size_and_mask = 0;
649 } 722 }
650 break; 723 break;
651 case X86_VENDOR_CYRIX: 724 case X86_VENDOR_CYRIX:
652 if (cpu_has_cyrix_arr) { 725 if (cpu_has_cyrix_arr) {
653 mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; 726 mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
654 size_or_mask = 0xfff00000; /* 32 bits */ 727 size_or_mask = SIZE_OR_MASK_BITS(32);
655 size_and_mask = 0; 728 size_and_mask = 0;
656 } 729 }
657 break; 730 break;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1025f3c99d20..a7c7305030cc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event)
403 * check that PEBS LBR correction does not conflict with 403 * check that PEBS LBR correction does not conflict with
404 * whatever the user is asking with attr->branch_sample_type 404 * whatever the user is asking with attr->branch_sample_type
405 */ 405 */
406 if (event->attr.precise_ip > 1) { 406 if (event->attr.precise_ip > 1 &&
407 x86_pmu.intel_cap.pebs_format < 2) {
407 u64 *br_type = &event->attr.branch_sample_type; 408 u64 *br_type = &event->attr.branch_sample_type;
408 409
409 if (has_branch_stack(event)) { 410 if (has_branch_stack(event)) {
@@ -568,7 +569,7 @@ struct sched_state {
568struct perf_sched { 569struct perf_sched {
569 int max_weight; 570 int max_weight;
570 int max_events; 571 int max_events;
571 struct event_constraint **constraints; 572 struct perf_event **events;
572 struct sched_state state; 573 struct sched_state state;
573 int saved_states; 574 int saved_states;
574 struct sched_state saved[SCHED_STATES_MAX]; 575 struct sched_state saved[SCHED_STATES_MAX];
@@ -577,7 +578,7 @@ struct perf_sched {
577/* 578/*
578 * Initialize interator that runs through all events and counters. 579 * Initialize interator that runs through all events and counters.
579 */ 580 */
580static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, 581static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
581 int num, int wmin, int wmax) 582 int num, int wmin, int wmax)
582{ 583{
583 int idx; 584 int idx;
@@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
585 memset(sched, 0, sizeof(*sched)); 586 memset(sched, 0, sizeof(*sched));
586 sched->max_events = num; 587 sched->max_events = num;
587 sched->max_weight = wmax; 588 sched->max_weight = wmax;
588 sched->constraints = c; 589 sched->events = events;
589 590
590 for (idx = 0; idx < num; idx++) { 591 for (idx = 0; idx < num; idx++) {
591 if (c[idx]->weight == wmin) 592 if (events[idx]->hw.constraint->weight == wmin)
592 break; 593 break;
593 } 594 }
594 595
@@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
635 if (sched->state.event >= sched->max_events) 636 if (sched->state.event >= sched->max_events)
636 return false; 637 return false;
637 638
638 c = sched->constraints[sched->state.event]; 639 c = sched->events[sched->state.event]->hw.constraint;
639
640 /* Prefer fixed purpose counters */ 640 /* Prefer fixed purpose counters */
641 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { 641 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
642 idx = INTEL_PMC_IDX_FIXED; 642 idx = INTEL_PMC_IDX_FIXED;
@@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
694 if (sched->state.weight > sched->max_weight) 694 if (sched->state.weight > sched->max_weight)
695 return false; 695 return false;
696 } 696 }
697 c = sched->constraints[sched->state.event]; 697 c = sched->events[sched->state.event]->hw.constraint;
698 } while (c->weight != sched->state.weight); 698 } while (c->weight != sched->state.weight);
699 699
700 sched->state.counter = 0; /* start with first counter */ 700 sched->state.counter = 0; /* start with first counter */
@@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
705/* 705/*
706 * Assign a counter for each event. 706 * Assign a counter for each event.
707 */ 707 */
708int perf_assign_events(struct event_constraint **constraints, int n, 708int perf_assign_events(struct perf_event **events, int n,
709 int wmin, int wmax, int *assign) 709 int wmin, int wmax, int *assign)
710{ 710{
711 struct perf_sched sched; 711 struct perf_sched sched;
712 712
713 perf_sched_init(&sched, constraints, n, wmin, wmax); 713 perf_sched_init(&sched, events, n, wmin, wmax);
714 714
715 do { 715 do {
716 if (!perf_sched_find_counter(&sched)) 716 if (!perf_sched_find_counter(&sched))
@@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n,
724 724
725int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 725int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
726{ 726{
727 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 727 struct event_constraint *c;
728 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 728 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
729 struct perf_event *e;
729 int i, wmin, wmax, num = 0; 730 int i, wmin, wmax, num = 0;
730 struct hw_perf_event *hwc; 731 struct hw_perf_event *hwc;
731 732
732 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 733 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
733 734
734 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { 735 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
736 hwc = &cpuc->event_list[i]->hw;
735 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 737 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
736 constraints[i] = c; 738 hwc->constraint = c;
739
737 wmin = min(wmin, c->weight); 740 wmin = min(wmin, c->weight);
738 wmax = max(wmax, c->weight); 741 wmax = max(wmax, c->weight);
739 } 742 }
@@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
743 */ 746 */
744 for (i = 0; i < n; i++) { 747 for (i = 0; i < n; i++) {
745 hwc = &cpuc->event_list[i]->hw; 748 hwc = &cpuc->event_list[i]->hw;
746 c = constraints[i]; 749 c = hwc->constraint;
747 750
748 /* never assigned */ 751 /* never assigned */
749 if (hwc->idx == -1) 752 if (hwc->idx == -1)
@@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
764 767
765 /* slow path */ 768 /* slow path */
766 if (i != n) 769 if (i != n)
767 num = perf_assign_events(constraints, n, wmin, wmax, assign); 770 num = perf_assign_events(cpuc->event_list, n, wmin,
771 wmax, assign);
768 772
769 /* 773 /*
774 * Mark the event as committed, so we do not put_constraint()
775 * in case new events are added and fail scheduling.
776 */
777 if (!num && assign) {
778 for (i = 0; i < n; i++) {
779 e = cpuc->event_list[i];
780 e->hw.flags |= PERF_X86_EVENT_COMMITTED;
781 }
782 }
783 /*
770 * scheduling failed or is just a simulation, 784 * scheduling failed or is just a simulation,
771 * free resources if necessary 785 * free resources if necessary
772 */ 786 */
773 if (!assign || num) { 787 if (!assign || num) {
774 for (i = 0; i < n; i++) { 788 for (i = 0; i < n; i++) {
789 e = cpuc->event_list[i];
790 /*
791 * do not put_constraint() on comitted events,
792 * because they are good to go
793 */
794 if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
795 continue;
796
775 if (x86_pmu.put_event_constraints) 797 if (x86_pmu.put_event_constraints)
776 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); 798 x86_pmu.put_event_constraints(cpuc, e);
777 } 799 }
778 } 800 }
779 return num ? -EINVAL : 0; 801 return num ? -EINVAL : 0;
@@ -1153,6 +1175,11 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1153 int i; 1175 int i;
1154 1176
1155 /* 1177 /*
1178 * event is descheduled
1179 */
1180 event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
1181
1182 /*
1156 * If we're called during a txn, we don't need to do anything. 1183 * If we're called during a txn, we don't need to do anything.
1157 * The events never got scheduled and ->cancel_txn will truncate 1184 * The events never got scheduled and ->cancel_txn will truncate
1158 * the event_list. 1185 * the event_list.
@@ -1249,16 +1276,26 @@ void perf_events_lapic_init(void)
1249static int __kprobes 1276static int __kprobes
1250perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) 1277perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1251{ 1278{
1279 int ret;
1280 u64 start_clock;
1281 u64 finish_clock;
1282
1252 if (!atomic_read(&active_events)) 1283 if (!atomic_read(&active_events))
1253 return NMI_DONE; 1284 return NMI_DONE;
1254 1285
1255 return x86_pmu.handle_irq(regs); 1286 start_clock = local_clock();
1287 ret = x86_pmu.handle_irq(regs);
1288 finish_clock = local_clock();
1289
1290 perf_sample_event_took(finish_clock - start_clock);
1291
1292 return ret;
1256} 1293}
1257 1294
1258struct event_constraint emptyconstraint; 1295struct event_constraint emptyconstraint;
1259struct event_constraint unconstrained; 1296struct event_constraint unconstrained;
1260 1297
1261static int __cpuinit 1298static int
1262x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) 1299x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1263{ 1300{
1264 unsigned int cpu = (long)hcpu; 1301 unsigned int cpu = (long)hcpu;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ba9aadfa683b..97e557bc4c91 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -63,10 +63,12 @@ struct event_constraint {
63 int flags; 63 int flags;
64}; 64};
65/* 65/*
66 * struct event_constraint flags 66 * struct hw_perf_event.flags flags
67 */ 67 */
68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ 68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ 69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
70#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
71#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
70 72
71struct amd_nb { 73struct amd_nb {
72 int nb_id; /* NorthBridge id */ 74 int nb_id; /* NorthBridge id */
@@ -227,11 +229,14 @@ struct cpu_hw_events {
227 * - inv 229 * - inv
228 * - edge 230 * - edge
229 * - cnt-mask 231 * - cnt-mask
232 * - in_tx
233 * - in_tx_checkpointed
230 * The other filters are supported by fixed counters. 234 * The other filters are supported by fixed counters.
231 * The any-thread option is supported starting with v3. 235 * The any-thread option is supported starting with v3.
232 */ 236 */
237#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
233#define FIXED_EVENT_CONSTRAINT(c, n) \ 238#define FIXED_EVENT_CONSTRAINT(c, n) \
234 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) 239 EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
235 240
236/* 241/*
237 * Constraint on the Event code + UMask 242 * Constraint on the Event code + UMask
@@ -247,6 +252,11 @@ struct cpu_hw_events {
247 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ 252 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
248 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) 253 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
249 254
255/* DataLA version of store sampling without extra enable bit. */
256#define INTEL_PST_HSW_CONSTRAINT(c, n) \
257 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
258 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
259
250#define EVENT_CONSTRAINT_END \ 260#define EVENT_CONSTRAINT_END \
251 EVENT_CONSTRAINT(0, 0, 0) 261 EVENT_CONSTRAINT(0, 0, 0)
252 262
@@ -301,6 +311,11 @@ union perf_capabilities {
301 u64 pebs_arch_reg:1; 311 u64 pebs_arch_reg:1;
302 u64 pebs_format:4; 312 u64 pebs_format:4;
303 u64 smm_freeze:1; 313 u64 smm_freeze:1;
314 /*
315 * PMU supports separate counter range for writing
316 * values > 32bit.
317 */
318 u64 full_width_write:1;
304 }; 319 };
305 u64 capabilities; 320 u64 capabilities;
306}; 321};
@@ -375,6 +390,7 @@ struct x86_pmu {
375 struct event_constraint *event_constraints; 390 struct event_constraint *event_constraints;
376 struct x86_pmu_quirk *quirks; 391 struct x86_pmu_quirk *quirks;
377 int perfctr_second_write; 392 int perfctr_second_write;
393 bool late_ack;
378 394
379 /* 395 /*
380 * sysfs attrs 396 * sysfs attrs
@@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
528 544
529void x86_pmu_enable_all(int added); 545void x86_pmu_enable_all(int added);
530 546
531int perf_assign_events(struct event_constraint **constraints, int n, 547int perf_assign_events(struct perf_event **events, int n,
532 int wmin, int wmax, int *assign); 548 int wmin, int wmax, int *assign);
533int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); 549int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
534 550
@@ -633,6 +649,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
633 649
634extern struct event_constraint intel_ivb_pebs_event_constraints[]; 650extern struct event_constraint intel_ivb_pebs_event_constraints[];
635 651
652extern struct event_constraint intel_hsw_pebs_event_constraints[];
653
636struct event_constraint *intel_pebs_constraints(struct perf_event *event); 654struct event_constraint *intel_pebs_constraints(struct perf_event *event);
637 655
638void intel_pmu_pebs_enable(struct perf_event *event); 656void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 7e28d9467bb4..4cbe03287b08 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -648,48 +648,48 @@ static __initconst const struct x86_pmu amd_pmu = {
648 .cpu_dead = amd_pmu_cpu_dead, 648 .cpu_dead = amd_pmu_cpu_dead,
649}; 649};
650 650
651static int setup_event_constraints(void) 651static int __init amd_core_pmu_init(void)
652{ 652{
653 if (boot_cpu_data.x86 == 0x15) 653 if (!cpu_has_perfctr_core)
654 return 0;
655
656 switch (boot_cpu_data.x86) {
657 case 0x15:
658 pr_cont("Fam15h ");
654 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; 659 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
655 return 0; 660 break;
656}
657 661
658static int setup_perfctr_core(void) 662 default:
659{ 663 pr_err("core perfctr but no constraints; unknown hardware!\n");
660 if (!cpu_has_perfctr_core) {
661 WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
662 KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
663 return -ENODEV; 664 return -ENODEV;
664 } 665 }
665 666
666 WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
667 KERN_ERR "hw perf events core counters need constraints handler!");
668
669 /* 667 /*
670 * If core performance counter extensions exists, we must use 668 * If core performance counter extensions exists, we must use
671 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also 669 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
672 * x86_pmu_addr_offset(). 670 * amd_pmu_addr_offset().
673 */ 671 */
674 x86_pmu.eventsel = MSR_F15H_PERF_CTL; 672 x86_pmu.eventsel = MSR_F15H_PERF_CTL;
675 x86_pmu.perfctr = MSR_F15H_PERF_CTR; 673 x86_pmu.perfctr = MSR_F15H_PERF_CTR;
676 x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; 674 x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
677 675
678 printk(KERN_INFO "perf: AMD core performance counters detected\n"); 676 pr_cont("core perfctr, ");
679
680 return 0; 677 return 0;
681} 678}
682 679
683__init int amd_pmu_init(void) 680__init int amd_pmu_init(void)
684{ 681{
682 int ret;
683
685 /* Performance-monitoring supported from K7 and later: */ 684 /* Performance-monitoring supported from K7 and later: */
686 if (boot_cpu_data.x86 < 6) 685 if (boot_cpu_data.x86 < 6)
687 return -ENODEV; 686 return -ENODEV;
688 687
689 x86_pmu = amd_pmu; 688 x86_pmu = amd_pmu;
690 689
691 setup_event_constraints(); 690 ret = amd_core_pmu_init();
692 setup_perfctr_core(); 691 if (ret)
692 return ret;
693 693
694 /* Events are common for all AMDs */ 694 /* Events are common for all AMDs */
695 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 695 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 5f0581e713c2..e09f0bfb7b8f 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -851,7 +851,7 @@ static void clear_APIC_ibs(void *dummy)
851 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); 851 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
852} 852}
853 853
854static int __cpuinit 854static int
855perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) 855perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
856{ 856{
857 switch (action & ~CPU_TASKS_FROZEN) { 857 switch (action & ~CPU_TASKS_FROZEN) {
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
new file mode 100644
index 000000000000..639d1289b1ba
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
@@ -0,0 +1,502 @@
1/*
2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
3 *
4 * Author: Steven Kinney <Steven.Kinney@amd.com>
5 * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
6 *
7 * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/perf_event.h>
15#include <linux/module.h>
16#include <linux/cpumask.h>
17#include <linux/slab.h>
18
19#include "perf_event.h"
20#include "perf_event_amd_iommu.h"
21
22#define COUNTER_SHIFT 16
23
24#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
25#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
26
27/* iommu pmu config masks */
28#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
29#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
30#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
31#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
32#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
33#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
34#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
35
36static struct perf_amd_iommu __perf_iommu;
37
38struct perf_amd_iommu {
39 struct pmu pmu;
40 u8 max_banks;
41 u8 max_counters;
42 u64 cntr_assign_mask;
43 raw_spinlock_t lock;
44 const struct attribute_group *attr_groups[4];
45};
46
47#define format_group attr_groups[0]
48#define cpumask_group attr_groups[1]
49#define events_group attr_groups[2]
50#define null_group attr_groups[3]
51
52/*---------------------------------------------
53 * sysfs format attributes
54 *---------------------------------------------*/
55PMU_FORMAT_ATTR(csource, "config:0-7");
56PMU_FORMAT_ATTR(devid, "config:8-23");
57PMU_FORMAT_ATTR(pasid, "config:24-39");
58PMU_FORMAT_ATTR(domid, "config:40-55");
59PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
60PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
61PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
62
63static struct attribute *iommu_format_attrs[] = {
64 &format_attr_csource.attr,
65 &format_attr_devid.attr,
66 &format_attr_pasid.attr,
67 &format_attr_domid.attr,
68 &format_attr_devid_mask.attr,
69 &format_attr_pasid_mask.attr,
70 &format_attr_domid_mask.attr,
71 NULL,
72};
73
74static struct attribute_group amd_iommu_format_group = {
75 .name = "format",
76 .attrs = iommu_format_attrs,
77};
78
79/*---------------------------------------------
80 * sysfs events attributes
81 *---------------------------------------------*/
82struct amd_iommu_event_desc {
83 struct kobj_attribute attr;
84 const char *event;
85};
86
87static ssize_t _iommu_event_show(struct kobject *kobj,
88 struct kobj_attribute *attr, char *buf)
89{
90 struct amd_iommu_event_desc *event =
91 container_of(attr, struct amd_iommu_event_desc, attr);
92 return sprintf(buf, "%s\n", event->event);
93}
94
95#define AMD_IOMMU_EVENT_DESC(_name, _event) \
96{ \
97 .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
98 .event = _event, \
99}
100
101static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
102 AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
103 AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
104 AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
105 AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
106 AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
107 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
108 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
109 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
110 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
111 AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
112 AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
113 AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
114 AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
115 AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
116 AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
117 AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
118 AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
119 AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
120 AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
121 { /* end: all zeroes */ },
122};
123
124/*---------------------------------------------
125 * sysfs cpumask attributes
126 *---------------------------------------------*/
127static cpumask_t iommu_cpumask;
128
129static ssize_t _iommu_cpumask_show(struct device *dev,
130 struct device_attribute *attr,
131 char *buf)
132{
133 int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask);
134 buf[n++] = '\n';
135 buf[n] = '\0';
136 return n;
137}
138static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
139
140static struct attribute *iommu_cpumask_attrs[] = {
141 &dev_attr_cpumask.attr,
142 NULL,
143};
144
145static struct attribute_group amd_iommu_cpumask_group = {
146 .attrs = iommu_cpumask_attrs,
147};
148
149/*---------------------------------------------*/
150
151static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
152{
153 unsigned long flags;
154 int shift, bank, cntr, retval;
155 int max_banks = perf_iommu->max_banks;
156 int max_cntrs = perf_iommu->max_counters;
157
158 raw_spin_lock_irqsave(&perf_iommu->lock, flags);
159
160 for (bank = 0, shift = 0; bank < max_banks; bank++) {
161 for (cntr = 0; cntr < max_cntrs; cntr++) {
162 shift = bank + (bank*3) + cntr;
163 if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
164 continue;
165 } else {
166 perf_iommu->cntr_assign_mask |= (1ULL<<shift);
167 retval = ((u16)((u16)bank<<8) | (u8)(cntr));
168 goto out;
169 }
170 }
171 }
172 retval = -ENOSPC;
173out:
174 raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
175 return retval;
176}
177
178static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
179 u8 bank, u8 cntr)
180{
181 unsigned long flags;
182 int max_banks, max_cntrs;
183 int shift = 0;
184
185 max_banks = perf_iommu->max_banks;
186 max_cntrs = perf_iommu->max_counters;
187
188 if ((bank > max_banks) || (cntr > max_cntrs))
189 return -EINVAL;
190
191 shift = bank + cntr + (bank*3);
192
193 raw_spin_lock_irqsave(&perf_iommu->lock, flags);
194 perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
195 raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
196
197 return 0;
198}
199
200static int perf_iommu_event_init(struct perf_event *event)
201{
202 struct hw_perf_event *hwc = &event->hw;
203 struct perf_amd_iommu *perf_iommu;
204 u64 config, config1;
205
206 /* test the event attr type check for PMU enumeration */
207 if (event->attr.type != event->pmu->type)
208 return -ENOENT;
209
210 /*
211 * IOMMU counters are shared across all cores.
212 * Therefore, it does not support per-process mode.
213 * Also, it does not support event sampling mode.
214 */
215 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
216 return -EINVAL;
217
218 /* IOMMU counters do not have usr/os/guest/host bits */
219 if (event->attr.exclude_user || event->attr.exclude_kernel ||
220 event->attr.exclude_host || event->attr.exclude_guest)
221 return -EINVAL;
222
223 if (event->cpu < 0)
224 return -EINVAL;
225
226 perf_iommu = &__perf_iommu;
227
228 if (event->pmu != &perf_iommu->pmu)
229 return -ENOENT;
230
231 if (perf_iommu) {
232 config = event->attr.config;
233 config1 = event->attr.config1;
234 } else {
235 return -EINVAL;
236 }
237
238 /* integrate with iommu base devid (0000), assume one iommu */
239 perf_iommu->max_banks =
240 amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
241 perf_iommu->max_counters =
242 amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
243 if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
244 return -EINVAL;
245
246 /* update the hw_perf_event struct with the iommu config data */
247 hwc->config = config;
248 hwc->extra_reg.config = config1;
249
250 return 0;
251}
252
253static void perf_iommu_enable_event(struct perf_event *ev)
254{
255 u8 csource = _GET_CSOURCE(ev);
256 u16 devid = _GET_DEVID(ev);
257 u64 reg = 0ULL;
258
259 reg = csource;
260 amd_iommu_pc_get_set_reg_val(devid,
261 _GET_BANK(ev), _GET_CNTR(ev) ,
262 IOMMU_PC_COUNTER_SRC_REG, &reg, true);
263
264 reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
265 if (reg)
266 reg |= (1UL << 31);
267 amd_iommu_pc_get_set_reg_val(devid,
268 _GET_BANK(ev), _GET_CNTR(ev) ,
269 IOMMU_PC_DEVID_MATCH_REG, &reg, true);
270
271 reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
272 if (reg)
273 reg |= (1UL << 31);
274 amd_iommu_pc_get_set_reg_val(devid,
275 _GET_BANK(ev), _GET_CNTR(ev) ,
276 IOMMU_PC_PASID_MATCH_REG, &reg, true);
277
278 reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
279 if (reg)
280 reg |= (1UL << 31);
281 amd_iommu_pc_get_set_reg_val(devid,
282 _GET_BANK(ev), _GET_CNTR(ev) ,
283 IOMMU_PC_DOMID_MATCH_REG, &reg, true);
284}
285
286static void perf_iommu_disable_event(struct perf_event *event)
287{
288 u64 reg = 0ULL;
289
290 amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
291 _GET_BANK(event), _GET_CNTR(event),
292 IOMMU_PC_COUNTER_SRC_REG, &reg, true);
293}
294
295static void perf_iommu_start(struct perf_event *event, int flags)
296{
297 struct hw_perf_event *hwc = &event->hw;
298
299 pr_debug("perf: amd_iommu:perf_iommu_start\n");
300 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
301 return;
302
303 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
304 hwc->state = 0;
305
306 if (flags & PERF_EF_RELOAD) {
307 u64 prev_raw_count = local64_read(&hwc->prev_count);
308 amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
309 _GET_BANK(event), _GET_CNTR(event),
310 IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
311 }
312
313 perf_iommu_enable_event(event);
314 perf_event_update_userpage(event);
315
316}
317
318static void perf_iommu_read(struct perf_event *event)
319{
320 u64 count = 0ULL;
321 u64 prev_raw_count = 0ULL;
322 u64 delta = 0ULL;
323 struct hw_perf_event *hwc = &event->hw;
324 pr_debug("perf: amd_iommu:perf_iommu_read\n");
325
326 amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
327 _GET_BANK(event), _GET_CNTR(event),
328 IOMMU_PC_COUNTER_REG, &count, false);
329
330 /* IOMMU pc counter register is only 48 bits */
331 count &= 0xFFFFFFFFFFFFULL;
332
333 prev_raw_count = local64_read(&hwc->prev_count);
334 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
335 count) != prev_raw_count)
336 return;
337
338 /* Handling 48-bit counter overflowing */
339 delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
340 delta >>= COUNTER_SHIFT;
341 local64_add(delta, &event->count);
342
343}
344
345static void perf_iommu_stop(struct perf_event *event, int flags)
346{
347 struct hw_perf_event *hwc = &event->hw;
348 u64 config;
349
350 pr_debug("perf: amd_iommu:perf_iommu_stop\n");
351
352 if (hwc->state & PERF_HES_UPTODATE)
353 return;
354
355 perf_iommu_disable_event(event);
356 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
357 hwc->state |= PERF_HES_STOPPED;
358
359 if (hwc->state & PERF_HES_UPTODATE)
360 return;
361
362 config = hwc->config;
363 perf_iommu_read(event);
364 hwc->state |= PERF_HES_UPTODATE;
365}
366
367static int perf_iommu_add(struct perf_event *event, int flags)
368{
369 int retval;
370 struct perf_amd_iommu *perf_iommu =
371 container_of(event->pmu, struct perf_amd_iommu, pmu);
372
373 pr_debug("perf: amd_iommu:perf_iommu_add\n");
374 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
375
376 /* request an iommu bank/counter */
377 retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
378 if (retval != -ENOSPC)
379 event->hw.extra_reg.reg = (u16)retval;
380 else
381 return retval;
382
383 if (flags & PERF_EF_START)
384 perf_iommu_start(event, PERF_EF_RELOAD);
385
386 return 0;
387}
388
389static void perf_iommu_del(struct perf_event *event, int flags)
390{
391 struct perf_amd_iommu *perf_iommu =
392 container_of(event->pmu, struct perf_amd_iommu, pmu);
393
394 pr_debug("perf: amd_iommu:perf_iommu_del\n");
395 perf_iommu_stop(event, PERF_EF_UPDATE);
396
397 /* clear the assigned iommu bank/counter */
398 clear_avail_iommu_bnk_cntr(perf_iommu,
399 _GET_BANK(event),
400 _GET_CNTR(event));
401
402 perf_event_update_userpage(event);
403}
404
405static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
406{
407 struct attribute **attrs;
408 struct attribute_group *attr_group;
409 int i = 0, j;
410
411 while (amd_iommu_v2_event_descs[i].attr.attr.name)
412 i++;
413
414 attr_group = kzalloc(sizeof(struct attribute *)
415 * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
416 if (!attr_group)
417 return -ENOMEM;
418
419 attrs = (struct attribute **)(attr_group + 1);
420 for (j = 0; j < i; j++)
421 attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
422
423 attr_group->name = "events";
424 attr_group->attrs = attrs;
425 perf_iommu->events_group = attr_group;
426
427 return 0;
428}
429
430static __init void amd_iommu_pc_exit(void)
431{
432 if (__perf_iommu.events_group != NULL) {
433 kfree(__perf_iommu.events_group);
434 __perf_iommu.events_group = NULL;
435 }
436}
437
438static __init int _init_perf_amd_iommu(
439 struct perf_amd_iommu *perf_iommu, char *name)
440{
441 int ret;
442
443 raw_spin_lock_init(&perf_iommu->lock);
444
445 /* Init format attributes */
446 perf_iommu->format_group = &amd_iommu_format_group;
447
448 /* Init cpumask attributes to only core 0 */
449 cpumask_set_cpu(0, &iommu_cpumask);
450 perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
451
452 /* Init events attributes */
453 if (_init_events_attrs(perf_iommu) != 0)
454 pr_err("perf: amd_iommu: Only support raw events.\n");
455
456 /* Init null attributes */
457 perf_iommu->null_group = NULL;
458 perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
459
460 ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
461 if (ret) {
462 pr_err("perf: amd_iommu: Failed to initialized.\n");
463 amd_iommu_pc_exit();
464 } else {
465 pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
466 amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
467 amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
468 }
469
470 return ret;
471}
472
473static struct perf_amd_iommu __perf_iommu = {
474 .pmu = {
475 .event_init = perf_iommu_event_init,
476 .add = perf_iommu_add,
477 .del = perf_iommu_del,
478 .start = perf_iommu_start,
479 .stop = perf_iommu_stop,
480 .read = perf_iommu_read,
481 },
482 .max_banks = 0x00,
483 .max_counters = 0x00,
484 .cntr_assign_mask = 0ULL,
485 .format_group = NULL,
486 .cpumask_group = NULL,
487 .events_group = NULL,
488 .null_group = NULL,
489};
490
491static __init int amd_iommu_pc_init(void)
492{
493 /* Make sure the IOMMU PC resource is available */
494 if (!amd_iommu_pc_supported())
495 return -ENODEV;
496
497 _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
498
499 return 0;
500}
501
502device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
new file mode 100644
index 000000000000..845d173278e3
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
@@ -0,0 +1,40 @@
1/*
2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
3 *
4 * Author: Steven Kinney <Steven.Kinney@amd.com>
5 * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#ifndef _PERF_EVENT_AMD_IOMMU_H_
13#define _PERF_EVENT_AMD_IOMMU_H_
14
15/* iommu pc mmio region register indexes */
16#define IOMMU_PC_COUNTER_REG 0x00
17#define IOMMU_PC_COUNTER_SRC_REG 0x08
18#define IOMMU_PC_PASID_MATCH_REG 0x10
19#define IOMMU_PC_DOMID_MATCH_REG 0x18
20#define IOMMU_PC_DEVID_MATCH_REG 0x20
21#define IOMMU_PC_COUNTER_REPORT_REG 0x28
22
23/* maximun specified bank/counters */
24#define PC_MAX_SPEC_BNKS 64
25#define PC_MAX_SPEC_CNTRS 16
26
27/* iommu pc reg masks*/
28#define IOMMU_BASE_DEVID 0x0000
29
30/* amd_iommu_init.c external support functions */
31extern bool amd_iommu_pc_supported(void);
32
33extern u8 amd_iommu_pc_get_max_banks(u16 devid);
34
35extern u8 amd_iommu_pc_get_max_counters(u16 devid);
36
37extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
38 u8 fxn, u64 *value, bool is_write);
39
40#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index c0c661adf03e..754291adec33 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -288,13 +288,13 @@ static struct pmu amd_l2_pmu = {
288 .read = amd_uncore_read, 288 .read = amd_uncore_read,
289}; 289};
290 290
291static struct amd_uncore * __cpuinit amd_uncore_alloc(unsigned int cpu) 291static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
292{ 292{
293 return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL, 293 return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
294 cpu_to_node(cpu)); 294 cpu_to_node(cpu));
295} 295}
296 296
297static void __cpuinit amd_uncore_cpu_up_prepare(unsigned int cpu) 297static void amd_uncore_cpu_up_prepare(unsigned int cpu)
298{ 298{
299 struct amd_uncore *uncore; 299 struct amd_uncore *uncore;
300 300
@@ -322,8 +322,8 @@ static void __cpuinit amd_uncore_cpu_up_prepare(unsigned int cpu)
322} 322}
323 323
324static struct amd_uncore * 324static struct amd_uncore *
325__cpuinit amd_uncore_find_online_sibling(struct amd_uncore *this, 325amd_uncore_find_online_sibling(struct amd_uncore *this,
326 struct amd_uncore * __percpu *uncores) 326 struct amd_uncore * __percpu *uncores)
327{ 327{
328 unsigned int cpu; 328 unsigned int cpu;
329 struct amd_uncore *that; 329 struct amd_uncore *that;
@@ -348,7 +348,7 @@ __cpuinit amd_uncore_find_online_sibling(struct amd_uncore *this,
348 return this; 348 return this;
349} 349}
350 350
351static void __cpuinit amd_uncore_cpu_starting(unsigned int cpu) 351static void amd_uncore_cpu_starting(unsigned int cpu)
352{ 352{
353 unsigned int eax, ebx, ecx, edx; 353 unsigned int eax, ebx, ecx, edx;
354 struct amd_uncore *uncore; 354 struct amd_uncore *uncore;
@@ -376,8 +376,8 @@ static void __cpuinit amd_uncore_cpu_starting(unsigned int cpu)
376 } 376 }
377} 377}
378 378
379static void __cpuinit uncore_online(unsigned int cpu, 379static void uncore_online(unsigned int cpu,
380 struct amd_uncore * __percpu *uncores) 380 struct amd_uncore * __percpu *uncores)
381{ 381{
382 struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); 382 struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
383 383
@@ -388,7 +388,7 @@ static void __cpuinit uncore_online(unsigned int cpu,
388 cpumask_set_cpu(cpu, uncore->active_mask); 388 cpumask_set_cpu(cpu, uncore->active_mask);
389} 389}
390 390
391static void __cpuinit amd_uncore_cpu_online(unsigned int cpu) 391static void amd_uncore_cpu_online(unsigned int cpu)
392{ 392{
393 if (amd_uncore_nb) 393 if (amd_uncore_nb)
394 uncore_online(cpu, amd_uncore_nb); 394 uncore_online(cpu, amd_uncore_nb);
@@ -397,8 +397,8 @@ static void __cpuinit amd_uncore_cpu_online(unsigned int cpu)
397 uncore_online(cpu, amd_uncore_l2); 397 uncore_online(cpu, amd_uncore_l2);
398} 398}
399 399
400static void __cpuinit uncore_down_prepare(unsigned int cpu, 400static void uncore_down_prepare(unsigned int cpu,
401 struct amd_uncore * __percpu *uncores) 401 struct amd_uncore * __percpu *uncores)
402{ 402{
403 unsigned int i; 403 unsigned int i;
404 struct amd_uncore *this = *per_cpu_ptr(uncores, cpu); 404 struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
@@ -423,7 +423,7 @@ static void __cpuinit uncore_down_prepare(unsigned int cpu,
423 } 423 }
424} 424}
425 425
426static void __cpuinit amd_uncore_cpu_down_prepare(unsigned int cpu) 426static void amd_uncore_cpu_down_prepare(unsigned int cpu)
427{ 427{
428 if (amd_uncore_nb) 428 if (amd_uncore_nb)
429 uncore_down_prepare(cpu, amd_uncore_nb); 429 uncore_down_prepare(cpu, amd_uncore_nb);
@@ -432,8 +432,7 @@ static void __cpuinit amd_uncore_cpu_down_prepare(unsigned int cpu)
432 uncore_down_prepare(cpu, amd_uncore_l2); 432 uncore_down_prepare(cpu, amd_uncore_l2);
433} 433}
434 434
435static void __cpuinit uncore_dead(unsigned int cpu, 435static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
436 struct amd_uncore * __percpu *uncores)
437{ 436{
438 struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); 437 struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
439 438
@@ -445,7 +444,7 @@ static void __cpuinit uncore_dead(unsigned int cpu,
445 *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; 444 *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
446} 445}
447 446
448static void __cpuinit amd_uncore_cpu_dead(unsigned int cpu) 447static void amd_uncore_cpu_dead(unsigned int cpu)
449{ 448{
450 if (amd_uncore_nb) 449 if (amd_uncore_nb)
451 uncore_dead(cpu, amd_uncore_nb); 450 uncore_dead(cpu, amd_uncore_nb);
@@ -454,7 +453,7 @@ static void __cpuinit amd_uncore_cpu_dead(unsigned int cpu)
454 uncore_dead(cpu, amd_uncore_l2); 453 uncore_dead(cpu, amd_uncore_l2);
455} 454}
456 455
457static int __cpuinit 456static int
458amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action, 457amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
459 void *hcpu) 458 void *hcpu)
460{ 459{
@@ -489,7 +488,7 @@ amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
489 return NOTIFY_OK; 488 return NOTIFY_OK;
490} 489}
491 490
492static struct notifier_block amd_uncore_cpu_notifier_block __cpuinitdata = { 491static struct notifier_block amd_uncore_cpu_notifier_block = {
493 .notifier_call = amd_uncore_cpu_notifier, 492 .notifier_call = amd_uncore_cpu_notifier,
494 .priority = CPU_PRI_PERF + 1, 493 .priority = CPU_PRI_PERF + 1,
495}; 494};
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a9e22073bd56..a45d8d4ace10 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/export.h> 14#include <linux/export.h>
15 15
16#include <asm/cpufeature.h>
16#include <asm/hardirq.h> 17#include <asm/hardirq.h>
17#include <asm/apic.h> 18#include <asm/apic.h>
18 19
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = {
190 NULL, 191 NULL,
191}; 192};
192 193
194static struct event_constraint intel_hsw_event_constraints[] = {
195 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
196 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
197 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
198 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
199 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
200 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
201 /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
202 INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
203 /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
204 INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
205 /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
206 INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
207 EVENT_CONSTRAINT_END
208};
209
193static u64 intel_pmu_event_map(int hw_event) 210static u64 intel_pmu_event_map(int hw_event)
194{ 211{
195 return intel_perfmon_event_map[hw_event]; 212 return intel_perfmon_event_map[hw_event];
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
872 return true; 889 return true;
873 890
874 /* implicit branch sampling to correct PEBS skid */ 891 /* implicit branch sampling to correct PEBS skid */
875 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) 892 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
893 x86_pmu.intel_cap.pebs_format < 2)
876 return true; 894 return true;
877 895
878 return false; 896 return false;
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1167 cpuc = &__get_cpu_var(cpu_hw_events); 1185 cpuc = &__get_cpu_var(cpu_hw_events);
1168 1186
1169 /* 1187 /*
1170 * Some chipsets need to unmask the LVTPC in a particular spot 1188 * No known reason to not always do late ACK,
1171 * inside the nmi handler. As a result, the unmasking was pushed 1189 * but just in case do it opt-in.
1172 * into all the nmi handlers.
1173 *
1174 * This handler doesn't seem to have any issues with the unmasking
1175 * so it was left at the top.
1176 */ 1190 */
1177 apic_write(APIC_LVTPC, APIC_DM_NMI); 1191 if (!x86_pmu.late_ack)
1178 1192 apic_write(APIC_LVTPC, APIC_DM_NMI);
1179 intel_pmu_disable_all(); 1193 intel_pmu_disable_all();
1180 handled = intel_pmu_drain_bts_buffer(); 1194 handled = intel_pmu_drain_bts_buffer();
1181 status = intel_pmu_get_status(); 1195 status = intel_pmu_get_status();
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1188again: 1202again:
1189 intel_pmu_ack_status(status); 1203 intel_pmu_ack_status(status);
1190 if (++loops > 100) { 1204 if (++loops > 100) {
1191 WARN_ONCE(1, "perfevents: irq loop stuck!\n"); 1205 static bool warned = false;
1192 perf_event_print_debug(); 1206 if (!warned) {
1207 WARN(1, "perfevents: irq loop stuck!\n");
1208 perf_event_print_debug();
1209 warned = true;
1210 }
1193 intel_pmu_reset(); 1211 intel_pmu_reset();
1194 goto done; 1212 goto done;
1195 } 1213 }
@@ -1235,6 +1253,13 @@ again:
1235 1253
1236done: 1254done:
1237 intel_pmu_enable_all(0); 1255 intel_pmu_enable_all(0);
1256 /*
1257 * Only unmask the NMI after the overflow counters
1258 * have been reset. This avoids spurious NMIs on
1259 * Haswell CPUs.
1260 */
1261 if (x86_pmu.late_ack)
1262 apic_write(APIC_LVTPC, APIC_DM_NMI);
1238 return handled; 1263 return handled;
1239} 1264}
1240 1265
@@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1425 if (x86_pmu.event_constraints) { 1450 if (x86_pmu.event_constraints) {
1426 for_each_event_constraint(c, x86_pmu.event_constraints) { 1451 for_each_event_constraint(c, x86_pmu.event_constraints) {
1427 if ((event->hw.config & c->cmask) == c->code) { 1452 if ((event->hw.config & c->cmask) == c->code) {
1428 /* hw.flags zeroed at initialization */
1429 event->hw.flags |= c->flags; 1453 event->hw.flags |= c->flags;
1430 return c; 1454 return c;
1431 } 1455 }
@@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1473static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1497static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1474 struct perf_event *event) 1498 struct perf_event *event)
1475{ 1499{
1476 event->hw.flags = 0;
1477 intel_put_shared_regs_event_constraints(cpuc, event); 1500 intel_put_shared_regs_event_constraints(cpuc, event);
1478} 1501}
1479 1502
@@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added)
1646 } 1669 }
1647} 1670}
1648 1671
1672static int hsw_hw_config(struct perf_event *event)
1673{
1674 int ret = intel_pmu_hw_config(event);
1675
1676 if (ret)
1677 return ret;
1678 if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
1679 return 0;
1680 event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
1681
1682 /*
1683 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
1684 * PEBS or in ANY thread mode. Since the results are non-sensical forbid
1685 * this combination.
1686 */
1687 if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
1688 ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
1689 event->attr.precise_ip > 0))
1690 return -EOPNOTSUPP;
1691
1692 return 0;
1693}
1694
1695static struct event_constraint counter2_constraint =
1696 EVENT_CONSTRAINT(0, 0x4, 0);
1697
1698static struct event_constraint *
1699hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1700{
1701 struct event_constraint *c = intel_get_event_constraints(cpuc, event);
1702
1703 /* Handle special quirk on in_tx_checkpointed only in counter 2 */
1704 if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
1705 if (c->idxmsk64 & (1U << 2))
1706 return &counter2_constraint;
1707 return &emptyconstraint;
1708 }
1709
1710 return c;
1711}
1712
1649PMU_FORMAT_ATTR(event, "config:0-7" ); 1713PMU_FORMAT_ATTR(event, "config:0-7" );
1650PMU_FORMAT_ATTR(umask, "config:8-15" ); 1714PMU_FORMAT_ATTR(umask, "config:8-15" );
1651PMU_FORMAT_ATTR(edge, "config:18" ); 1715PMU_FORMAT_ATTR(edge, "config:18" );
@@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc, "config:19" );
1653PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ 1717PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
1654PMU_FORMAT_ATTR(inv, "config:23" ); 1718PMU_FORMAT_ATTR(inv, "config:23" );
1655PMU_FORMAT_ATTR(cmask, "config:24-31" ); 1719PMU_FORMAT_ATTR(cmask, "config:24-31" );
1720PMU_FORMAT_ATTR(in_tx, "config:32");
1721PMU_FORMAT_ATTR(in_tx_cp, "config:33");
1656 1722
1657static struct attribute *intel_arch_formats_attr[] = { 1723static struct attribute *intel_arch_formats_attr[] = {
1658 &format_attr_event.attr, 1724 &format_attr_event.attr,
@@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = {
1807 &format_attr_any.attr, 1873 &format_attr_any.attr,
1808 &format_attr_inv.attr, 1874 &format_attr_inv.attr,
1809 &format_attr_cmask.attr, 1875 &format_attr_cmask.attr,
1876 &format_attr_in_tx.attr,
1877 &format_attr_in_tx_cp.attr,
1810 1878
1811 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ 1879 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
1812 &format_attr_ldlat.attr, /* PEBS load latency */ 1880 &format_attr_ldlat.attr, /* PEBS load latency */
@@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void)
1966 } 2034 }
1967} 2035}
1968 2036
2037EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
2038EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
2039
2040static struct attribute *hsw_events_attrs[] = {
2041 EVENT_PTR(mem_ld_hsw),
2042 EVENT_PTR(mem_st_hsw),
2043 NULL
2044};
2045
1969__init int intel_pmu_init(void) 2046__init int intel_pmu_init(void)
1970{ 2047{
1971 union cpuid10_edx edx; 2048 union cpuid10_edx edx;
@@ -2189,6 +2266,31 @@ __init int intel_pmu_init(void)
2189 break; 2266 break;
2190 2267
2191 2268
2269 case 60: /* Haswell Client */
2270 case 70:
2271 case 71:
2272 case 63:
2273 case 69:
2274 x86_pmu.late_ack = true;
2275 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2276 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2277
2278 intel_pmu_lbr_init_snb();
2279
2280 x86_pmu.event_constraints = intel_hsw_event_constraints;
2281 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
2282 x86_pmu.extra_regs = intel_snb_extra_regs;
2283 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2284 /* all extra regs are per-cpu when HT is on */
2285 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2286 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2287
2288 x86_pmu.hw_config = hsw_hw_config;
2289 x86_pmu.get_event_constraints = hsw_get_event_constraints;
2290 x86_pmu.cpu_events = hsw_events_attrs;
2291 pr_cont("Haswell events, ");
2292 break;
2293
2192 default: 2294 default:
2193 switch (x86_pmu.version) { 2295 switch (x86_pmu.version) {
2194 case 1: 2296 case 1:
@@ -2227,7 +2329,7 @@ __init int intel_pmu_init(void)
2227 * counter, so do not extend mask to generic counters 2329 * counter, so do not extend mask to generic counters
2228 */ 2330 */
2229 for_each_event_constraint(c, x86_pmu.event_constraints) { 2331 for_each_event_constraint(c, x86_pmu.event_constraints) {
2230 if (c->cmask != X86_RAW_EVENT_MASK 2332 if (c->cmask != FIXED_EVENT_FLAGS
2231 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { 2333 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
2232 continue; 2334 continue;
2233 } 2335 }
@@ -2237,5 +2339,12 @@ __init int intel_pmu_init(void)
2237 } 2339 }
2238 } 2340 }
2239 2341
2342 /* Support full width counters using alternative MSR range */
2343 if (x86_pmu.intel_cap.full_width_write) {
2344 x86_pmu.max_period = x86_pmu.cntval_mask;
2345 x86_pmu.perfctr = MSR_IA32_PMC0;
2346 pr_cont("full-width counters, ");
2347 }
2348
2240 return 0; 2349 return 0;
2241} 2350}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 60250f687052..3065c57a63c1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
107 return val; 107 return val;
108} 108}
109 109
110static u64 precise_store_data_hsw(u64 status)
111{
112 union perf_mem_data_src dse;
113
114 dse.val = 0;
115 dse.mem_op = PERF_MEM_OP_STORE;
116 dse.mem_lvl = PERF_MEM_LVL_NA;
117 if (status & 1)
118 dse.mem_lvl = PERF_MEM_LVL_L1;
119 /* Nothing else supported. Sorry. */
120 return dse.val;
121}
122
110static u64 load_latency_data(u64 status) 123static u64 load_latency_data(u64 status)
111{ 124{
112 union intel_x86_pebs_dse dse; 125 union intel_x86_pebs_dse dse;
@@ -165,6 +178,22 @@ struct pebs_record_nhm {
165 u64 status, dla, dse, lat; 178 u64 status, dla, dse, lat;
166}; 179};
167 180
181/*
182 * Same as pebs_record_nhm, with two additional fields.
183 */
184struct pebs_record_hsw {
185 struct pebs_record_nhm nhm;
186 /*
187 * Real IP of the event. In the Intel documentation this
188 * is called eventingrip.
189 */
190 u64 real_ip;
191 /*
192 * TSX tuning information field: abort cycles and abort flags.
193 */
194 u64 tsx_tuning;
195};
196
168void init_debug_store_on_cpu(int cpu) 197void init_debug_store_on_cpu(int cpu)
169{ 198{
170 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 199 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -548,6 +577,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
548 EVENT_CONSTRAINT_END 577 EVENT_CONSTRAINT_END
549}; 578};
550 579
580struct event_constraint intel_hsw_pebs_event_constraints[] = {
581 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
582 INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
583 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
584 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
585 INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
586 INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
587 INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
588 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
589 /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
590 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
591 /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
592 INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
593 INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
594 INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
595 /* MEM_UOPS_RETIRED.SPLIT_STORES */
596 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
597 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
598 INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
599 INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
600 INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
601 INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
602 /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
603 INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
604 /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
605 INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
606 /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
607 INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
608 /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
609 INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
610 INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
611 INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
612
613 EVENT_CONSTRAINT_END
614};
615
551struct event_constraint *intel_pebs_constraints(struct perf_event *event) 616struct event_constraint *intel_pebs_constraints(struct perf_event *event)
552{ 617{
553 struct event_constraint *c; 618 struct event_constraint *c;
@@ -588,6 +653,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
588 struct hw_perf_event *hwc = &event->hw; 653 struct hw_perf_event *hwc = &event->hw;
589 654
590 cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 655 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
656
657 if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
658 cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
659 else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
660 cpuc->pebs_enabled &= ~(1ULL << 63);
661
591 if (cpuc->enabled) 662 if (cpuc->enabled)
592 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 663 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
593 664
@@ -697,6 +768,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
697 */ 768 */
698 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 769 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
699 struct pebs_record_nhm *pebs = __pebs; 770 struct pebs_record_nhm *pebs = __pebs;
771 struct pebs_record_hsw *pebs_hsw = __pebs;
700 struct perf_sample_data data; 772 struct perf_sample_data data;
701 struct pt_regs regs; 773 struct pt_regs regs;
702 u64 sample_type; 774 u64 sample_type;
@@ -706,7 +778,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
706 return; 778 return;
707 779
708 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; 780 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
709 fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST; 781 fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
782 PERF_X86_EVENT_PEBS_ST_HSW);
710 783
711 perf_sample_data_init(&data, 0, event->hw.last_period); 784 perf_sample_data_init(&data, 0, event->hw.last_period);
712 785
@@ -717,9 +790,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
717 * if PEBS-LL or PreciseStore 790 * if PEBS-LL or PreciseStore
718 */ 791 */
719 if (fll || fst) { 792 if (fll || fst) {
720 if (sample_type & PERF_SAMPLE_ADDR)
721 data.addr = pebs->dla;
722
723 /* 793 /*
724 * Use latency for weight (only avail with PEBS-LL) 794 * Use latency for weight (only avail with PEBS-LL)
725 */ 795 */
@@ -732,6 +802,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
732 if (sample_type & PERF_SAMPLE_DATA_SRC) { 802 if (sample_type & PERF_SAMPLE_DATA_SRC) {
733 if (fll) 803 if (fll)
734 data.data_src.val = load_latency_data(pebs->dse); 804 data.data_src.val = load_latency_data(pebs->dse);
805 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
806 data.data_src.val =
807 precise_store_data_hsw(pebs->dse);
735 else 808 else
736 data.data_src.val = precise_store_data(pebs->dse); 809 data.data_src.val = precise_store_data(pebs->dse);
737 } 810 }
@@ -753,11 +826,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
753 regs.bp = pebs->bp; 826 regs.bp = pebs->bp;
754 regs.sp = pebs->sp; 827 regs.sp = pebs->sp;
755 828
756 if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs)) 829 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
830 regs.ip = pebs_hsw->real_ip;
831 regs.flags |= PERF_EFLAGS_EXACT;
832 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
757 regs.flags |= PERF_EFLAGS_EXACT; 833 regs.flags |= PERF_EFLAGS_EXACT;
758 else 834 else
759 regs.flags &= ~PERF_EFLAGS_EXACT; 835 regs.flags &= ~PERF_EFLAGS_EXACT;
760 836
837 if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
838 x86_pmu.intel_cap.pebs_format >= 1)
839 data.addr = pebs->dla;
840
761 if (has_branch_stack(event)) 841 if (has_branch_stack(event))
762 data.br_stack = &cpuc->lbr_stack; 842 data.br_stack = &cpuc->lbr_stack;
763 843
@@ -806,35 +886,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
806 __intel_pmu_pebs_event(event, iregs, at); 886 __intel_pmu_pebs_event(event, iregs, at);
807} 887}
808 888
809static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) 889static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
890 void *top)
810{ 891{
811 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 892 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
812 struct debug_store *ds = cpuc->ds; 893 struct debug_store *ds = cpuc->ds;
813 struct pebs_record_nhm *at, *top;
814 struct perf_event *event = NULL; 894 struct perf_event *event = NULL;
815 u64 status = 0; 895 u64 status = 0;
816 int bit, n; 896 int bit;
817
818 if (!x86_pmu.pebs_active)
819 return;
820
821 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
822 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
823 897
824 ds->pebs_index = ds->pebs_buffer_base; 898 ds->pebs_index = ds->pebs_buffer_base;
825 899
826 n = top - at; 900 for (; at < top; at += x86_pmu.pebs_record_size) {
827 if (n <= 0) 901 struct pebs_record_nhm *p = at;
828 return;
829
830 /*
831 * Should not happen, we program the threshold at 1 and do not
832 * set a reset value.
833 */
834 WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
835 902
836 for ( ; at < top; at++) { 903 for_each_set_bit(bit, (unsigned long *)&p->status,
837 for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { 904 x86_pmu.max_pebs_events) {
838 event = cpuc->events[bit]; 905 event = cpuc->events[bit];
839 if (!test_bit(bit, cpuc->active_mask)) 906 if (!test_bit(bit, cpuc->active_mask))
840 continue; 907 continue;
@@ -857,6 +924,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
857 } 924 }
858} 925}
859 926
927static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
928{
929 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
930 struct debug_store *ds = cpuc->ds;
931 struct pebs_record_nhm *at, *top;
932 int n;
933
934 if (!x86_pmu.pebs_active)
935 return;
936
937 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
938 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
939
940 ds->pebs_index = ds->pebs_buffer_base;
941
942 n = top - at;
943 if (n <= 0)
944 return;
945
946 /*
947 * Should not happen, we program the threshold at 1 and do not
948 * set a reset value.
949 */
950 WARN_ONCE(n > x86_pmu.max_pebs_events,
951 "Unexpected number of pebs records %d\n", n);
952
953 return __intel_pmu_drain_pebs_nhm(iregs, at, top);
954}
955
956static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
957{
958 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
959 struct debug_store *ds = cpuc->ds;
960 struct pebs_record_hsw *at, *top;
961 int n;
962
963 if (!x86_pmu.pebs_active)
964 return;
965
966 at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
967 top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
968
969 n = top - at;
970 if (n <= 0)
971 return;
972 /*
973 * Should not happen, we program the threshold at 1 and do not
974 * set a reset value.
975 */
976 WARN_ONCE(n > x86_pmu.max_pebs_events,
977 "Unexpected number of pebs records %d\n", n);
978
979 return __intel_pmu_drain_pebs_nhm(iregs, at, top);
980}
981
860/* 982/*
861 * BTS, PEBS probe and setup 983 * BTS, PEBS probe and setup
862 */ 984 */
@@ -888,6 +1010,12 @@ void intel_ds_init(void)
888 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1010 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
889 break; 1011 break;
890 1012
1013 case 2:
1014 pr_cont("PEBS fmt2%c, ", pebs_type);
1015 x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
1016 x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
1017 break;
1018
891 default: 1019 default:
892 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); 1020 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
893 x86_pmu.pebs = 0; 1021 x86_pmu.pebs = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d978353c939b..d5be06a5005e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -12,6 +12,16 @@ enum {
12 LBR_FORMAT_LIP = 0x01, 12 LBR_FORMAT_LIP = 0x01,
13 LBR_FORMAT_EIP = 0x02, 13 LBR_FORMAT_EIP = 0x02,
14 LBR_FORMAT_EIP_FLAGS = 0x03, 14 LBR_FORMAT_EIP_FLAGS = 0x03,
15 LBR_FORMAT_EIP_FLAGS2 = 0x04,
16 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
17};
18
19static enum {
20 LBR_EIP_FLAGS = 1,
21 LBR_TSX = 2,
22} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
23 [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
24 [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
15}; 25};
16 26
17/* 27/*
@@ -56,6 +66,8 @@ enum {
56 LBR_FAR) 66 LBR_FAR)
57 67
58#define LBR_FROM_FLAG_MISPRED (1ULL << 63) 68#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
69#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
70#define LBR_FROM_FLAG_ABORT (1ULL << 61)
59 71
60#define for_each_branch_sample_type(x) \ 72#define for_each_branch_sample_type(x) \
61 for ((x) = PERF_SAMPLE_BRANCH_USER; \ 73 for ((x) = PERF_SAMPLE_BRANCH_USER; \
@@ -81,9 +93,13 @@ enum {
81 X86_BR_JMP = 1 << 9, /* jump */ 93 X86_BR_JMP = 1 << 9, /* jump */
82 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ 94 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
83 X86_BR_IND_CALL = 1 << 11,/* indirect calls */ 95 X86_BR_IND_CALL = 1 << 11,/* indirect calls */
96 X86_BR_ABORT = 1 << 12,/* transaction abort */
97 X86_BR_IN_TX = 1 << 13,/* in transaction */
98 X86_BR_NO_TX = 1 << 14,/* not in transaction */
84}; 99};
85 100
86#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) 101#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
102#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
87 103
88#define X86_BR_ANY \ 104#define X86_BR_ANY \
89 (X86_BR_CALL |\ 105 (X86_BR_CALL |\
@@ -95,6 +111,7 @@ enum {
95 X86_BR_JCC |\ 111 X86_BR_JCC |\
96 X86_BR_JMP |\ 112 X86_BR_JMP |\
97 X86_BR_IRQ |\ 113 X86_BR_IRQ |\
114 X86_BR_ABORT |\
98 X86_BR_IND_CALL) 115 X86_BR_IND_CALL)
99 116
100#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) 117#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
270 287
271 for (i = 0; i < x86_pmu.lbr_nr; i++) { 288 for (i = 0; i < x86_pmu.lbr_nr; i++) {
272 unsigned long lbr_idx = (tos - i) & mask; 289 unsigned long lbr_idx = (tos - i) & mask;
273 u64 from, to, mis = 0, pred = 0; 290 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
291 int skip = 0;
292 int lbr_flags = lbr_desc[lbr_format];
274 293
275 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 294 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
276 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 295 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
277 296
278 if (lbr_format == LBR_FORMAT_EIP_FLAGS) { 297 if (lbr_flags & LBR_EIP_FLAGS) {
279 mis = !!(from & LBR_FROM_FLAG_MISPRED); 298 mis = !!(from & LBR_FROM_FLAG_MISPRED);
280 pred = !mis; 299 pred = !mis;
281 from = (u64)((((s64)from) << 1) >> 1); 300 skip = 1;
301 }
302 if (lbr_flags & LBR_TSX) {
303 in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
304 abort = !!(from & LBR_FROM_FLAG_ABORT);
305 skip = 3;
282 } 306 }
307 from = (u64)((((s64)from) << skip) >> skip);
283 308
284 cpuc->lbr_entries[i].from = from; 309 cpuc->lbr_entries[i].from = from;
285 cpuc->lbr_entries[i].to = to; 310 cpuc->lbr_entries[i].to = to;
286 cpuc->lbr_entries[i].mispred = mis; 311 cpuc->lbr_entries[i].mispred = mis;
287 cpuc->lbr_entries[i].predicted = pred; 312 cpuc->lbr_entries[i].predicted = pred;
313 cpuc->lbr_entries[i].in_tx = in_tx;
314 cpuc->lbr_entries[i].abort = abort;
288 cpuc->lbr_entries[i].reserved = 0; 315 cpuc->lbr_entries[i].reserved = 0;
289 } 316 }
290 cpuc->lbr_stack.nr = i; 317 cpuc->lbr_stack.nr = i;
@@ -310,7 +337,7 @@ void intel_pmu_lbr_read(void)
310 * - in case there is no HW filter 337 * - in case there is no HW filter
311 * - in case the HW filter has errata or limitations 338 * - in case the HW filter has errata or limitations
312 */ 339 */
313static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 340static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
314{ 341{
315 u64 br_type = event->attr.branch_sample_type; 342 u64 br_type = event->attr.branch_sample_type;
316 int mask = 0; 343 int mask = 0;
@@ -318,11 +345,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
318 if (br_type & PERF_SAMPLE_BRANCH_USER) 345 if (br_type & PERF_SAMPLE_BRANCH_USER)
319 mask |= X86_BR_USER; 346 mask |= X86_BR_USER;
320 347
321 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) { 348 if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
322 if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
323 return -EACCES;
324 mask |= X86_BR_KERNEL; 349 mask |= X86_BR_KERNEL;
325 }
326 350
327 /* we ignore BRANCH_HV here */ 351 /* we ignore BRANCH_HV here */
328 352
@@ -337,13 +361,21 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
337 361
338 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 362 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
339 mask |= X86_BR_IND_CALL; 363 mask |= X86_BR_IND_CALL;
364
365 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
366 mask |= X86_BR_ABORT;
367
368 if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
369 mask |= X86_BR_IN_TX;
370
371 if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
372 mask |= X86_BR_NO_TX;
373
340 /* 374 /*
341 * stash actual user request into reg, it may 375 * stash actual user request into reg, it may
342 * be used by fixup code for some CPU 376 * be used by fixup code for some CPU
343 */ 377 */
344 event->hw.branch_reg.reg = mask; 378 event->hw.branch_reg.reg = mask;
345
346 return 0;
347} 379}
348 380
349/* 381/*
@@ -391,9 +423,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
391 /* 423 /*
392 * setup SW LBR filter 424 * setup SW LBR filter
393 */ 425 */
394 ret = intel_pmu_setup_sw_lbr_filter(event); 426 intel_pmu_setup_sw_lbr_filter(event);
395 if (ret)
396 return ret;
397 427
398 /* 428 /*
399 * setup HW LBR filter, if any 429 * setup HW LBR filter, if any
@@ -415,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
415 * decoded (e.g., text page not present), then X86_BR_NONE is 445 * decoded (e.g., text page not present), then X86_BR_NONE is
416 * returned. 446 * returned.
417 */ 447 */
418static int branch_type(unsigned long from, unsigned long to) 448static int branch_type(unsigned long from, unsigned long to, int abort)
419{ 449{
420 struct insn insn; 450 struct insn insn;
421 void *addr; 451 void *addr;
@@ -435,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to)
435 if (from == 0 || to == 0) 465 if (from == 0 || to == 0)
436 return X86_BR_NONE; 466 return X86_BR_NONE;
437 467
468 if (abort)
469 return X86_BR_ABORT | to_plm;
470
438 if (from_plm == X86_BR_USER) { 471 if (from_plm == X86_BR_USER) {
439 /* 472 /*
440 * can happen if measuring at the user level only 473 * can happen if measuring at the user level only
@@ -581,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
581 from = cpuc->lbr_entries[i].from; 614 from = cpuc->lbr_entries[i].from;
582 to = cpuc->lbr_entries[i].to; 615 to = cpuc->lbr_entries[i].to;
583 616
584 type = branch_type(from, to); 617 type = branch_type(from, to, cpuc->lbr_entries[i].abort);
618 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
619 if (cpuc->lbr_entries[i].in_tx)
620 type |= X86_BR_IN_TX;
621 else
622 type |= X86_BR_NO_TX;
623 }
585 624
586 /* if type does not correspond, then discard */ 625 /* if type does not correspond, then discard */
587 if (type == X86_BR_NONE || (br_sel & type) != type) { 626 if (type == X86_BR_NONE || (br_sel & type) != type) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 52441a2af538..1fb6c72717bd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -314,8 +314,8 @@ static struct uncore_event_desc snbep_uncore_imc_events[] = {
314static struct uncore_event_desc snbep_uncore_qpi_events[] = { 314static struct uncore_event_desc snbep_uncore_qpi_events[] = {
315 INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), 315 INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"),
316 INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), 316 INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
317 INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x02,umask=0x08"), 317 INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"),
318 INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x03,umask=0x04"), 318 INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"),
319 { /* end: all zeroes */ }, 319 { /* end: all zeroes */ },
320}; 320};
321 321
@@ -536,7 +536,7 @@ __snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *eve
536 if (!uncore_box_is_fake(box)) 536 if (!uncore_box_is_fake(box))
537 reg1->alloc |= alloc; 537 reg1->alloc |= alloc;
538 538
539 return 0; 539 return NULL;
540fail: 540fail:
541 for (; i >= 0; i--) { 541 for (; i >= 0; i--) {
542 if (alloc & (0x1 << i)) 542 if (alloc & (0x1 << i))
@@ -644,7 +644,7 @@ snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
644 (!uncore_box_is_fake(box) && reg1->alloc)) 644 (!uncore_box_is_fake(box) && reg1->alloc))
645 return NULL; 645 return NULL;
646again: 646again:
647 mask = 0xff << (idx * 8); 647 mask = 0xffULL << (idx * 8);
648 raw_spin_lock_irqsave(&er->lock, flags); 648 raw_spin_lock_irqsave(&er->lock, flags);
649 if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || 649 if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
650 !((config1 ^ er->config) & mask)) { 650 !((config1 ^ er->config) & mask)) {
@@ -1923,7 +1923,7 @@ static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modif
1923{ 1923{
1924 struct hw_perf_event *hwc = &event->hw; 1924 struct hw_perf_event *hwc = &event->hw;
1925 struct hw_perf_event_extra *reg1 = &hwc->extra_reg; 1925 struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
1926 int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); 1926 u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
1927 u64 config = reg1->config; 1927 u64 config = reg1->config;
1928 1928
1929 /* get the non-shared control bits and shift them */ 1929 /* get the non-shared control bits and shift them */
@@ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per
2723static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 2723static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
2724{ 2724{
2725 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 2725 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
2726 struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; 2726 struct event_constraint *c;
2727 int i, wmin, wmax, ret = 0; 2727 int i, wmin, wmax, ret = 0;
2728 struct hw_perf_event *hwc; 2728 struct hw_perf_event *hwc;
2729 2729
2730 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 2730 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
2731 2731
2732 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 2732 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
2733 hwc = &box->event_list[i]->hw;
2733 c = uncore_get_event_constraint(box, box->event_list[i]); 2734 c = uncore_get_event_constraint(box, box->event_list[i]);
2734 constraints[i] = c; 2735 hwc->constraint = c;
2735 wmin = min(wmin, c->weight); 2736 wmin = min(wmin, c->weight);
2736 wmax = max(wmax, c->weight); 2737 wmax = max(wmax, c->weight);
2737 } 2738 }
@@ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
2739 /* fastpath, try to reuse previous register */ 2740 /* fastpath, try to reuse previous register */
2740 for (i = 0; i < n; i++) { 2741 for (i = 0; i < n; i++) {
2741 hwc = &box->event_list[i]->hw; 2742 hwc = &box->event_list[i]->hw;
2742 c = constraints[i]; 2743 c = hwc->constraint;
2743 2744
2744 /* never assigned */ 2745 /* never assigned */
2745 if (hwc->idx == -1) 2746 if (hwc->idx == -1)
@@ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
2759 } 2760 }
2760 /* slow path */ 2761 /* slow path */
2761 if (i != n) 2762 if (i != n)
2762 ret = perf_assign_events(constraints, n, wmin, wmax, assign); 2763 ret = perf_assign_events(box->event_list, n,
2764 wmin, wmax, assign);
2763 2765
2764 if (!assign || ret) { 2766 if (!assign || ret) {
2765 for (i = 0; i < n; i++) 2767 for (i = 0; i < n; i++)
@@ -3295,7 +3297,7 @@ static void __init uncore_pci_exit(void)
3295/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */ 3297/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
3296static LIST_HEAD(boxes_to_free); 3298static LIST_HEAD(boxes_to_free);
3297 3299
3298static void __cpuinit uncore_kfree_boxes(void) 3300static void uncore_kfree_boxes(void)
3299{ 3301{
3300 struct intel_uncore_box *box; 3302 struct intel_uncore_box *box;
3301 3303
@@ -3307,7 +3309,7 @@ static void __cpuinit uncore_kfree_boxes(void)
3307 } 3309 }
3308} 3310}
3309 3311
3310static void __cpuinit uncore_cpu_dying(int cpu) 3312static void uncore_cpu_dying(int cpu)
3311{ 3313{
3312 struct intel_uncore_type *type; 3314 struct intel_uncore_type *type;
3313 struct intel_uncore_pmu *pmu; 3315 struct intel_uncore_pmu *pmu;
@@ -3326,7 +3328,7 @@ static void __cpuinit uncore_cpu_dying(int cpu)
3326 } 3328 }
3327} 3329}
3328 3330
3329static int __cpuinit uncore_cpu_starting(int cpu) 3331static int uncore_cpu_starting(int cpu)
3330{ 3332{
3331 struct intel_uncore_type *type; 3333 struct intel_uncore_type *type;
3332 struct intel_uncore_pmu *pmu; 3334 struct intel_uncore_pmu *pmu;
@@ -3369,7 +3371,7 @@ static int __cpuinit uncore_cpu_starting(int cpu)
3369 return 0; 3371 return 0;
3370} 3372}
3371 3373
3372static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id) 3374static int uncore_cpu_prepare(int cpu, int phys_id)
3373{ 3375{
3374 struct intel_uncore_type *type; 3376 struct intel_uncore_type *type;
3375 struct intel_uncore_pmu *pmu; 3377 struct intel_uncore_pmu *pmu;
@@ -3395,7 +3397,7 @@ static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
3395 return 0; 3397 return 0;
3396} 3398}
3397 3399
3398static void __cpuinit 3400static void
3399uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu) 3401uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
3400{ 3402{
3401 struct intel_uncore_type *type; 3403 struct intel_uncore_type *type;
@@ -3433,7 +3435,7 @@ uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_c
3433 } 3435 }
3434} 3436}
3435 3437
3436static void __cpuinit uncore_event_exit_cpu(int cpu) 3438static void uncore_event_exit_cpu(int cpu)
3437{ 3439{
3438 int i, phys_id, target; 3440 int i, phys_id, target;
3439 3441
@@ -3461,7 +3463,7 @@ static void __cpuinit uncore_event_exit_cpu(int cpu)
3461 uncore_change_context(pci_uncores, cpu, target); 3463 uncore_change_context(pci_uncores, cpu, target);
3462} 3464}
3463 3465
3464static void __cpuinit uncore_event_init_cpu(int cpu) 3466static void uncore_event_init_cpu(int cpu)
3465{ 3467{
3466 int i, phys_id; 3468 int i, phys_id;
3467 3469
@@ -3477,8 +3479,8 @@ static void __cpuinit uncore_event_init_cpu(int cpu)
3477 uncore_change_context(pci_uncores, -1, cpu); 3479 uncore_change_context(pci_uncores, -1, cpu);
3478} 3480}
3479 3481
3480static int 3482static int uncore_cpu_notifier(struct notifier_block *self,
3481 __cpuinit uncore_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) 3483 unsigned long action, void *hcpu)
3482{ 3484{
3483 unsigned int cpu = (long)hcpu; 3485 unsigned int cpu = (long)hcpu;
3484 3486
@@ -3518,7 +3520,7 @@ static int
3518 return NOTIFY_OK; 3520 return NOTIFY_OK;
3519} 3521}
3520 3522
3521static struct notifier_block uncore_cpu_nb __cpuinitdata = { 3523static struct notifier_block uncore_cpu_nb = {
3522 .notifier_call = uncore_cpu_notifier, 3524 .notifier_call = uncore_cpu_notifier,
3523 /* 3525 /*
3524 * to migrate uncore events, our notifier should be executed 3526 * to migrate uncore events, our notifier should be executed
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index f9528917f6e8..47b3d00c9d89 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -337,10 +337,10 @@
337 NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) 337 NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
338 338
339#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) 339#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
340#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n))) 340#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
341 341
342#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) 342#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
343#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n))) 343#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
344 344
345/* 345/*
346 * use the 9~13 bits to select event If the 7th bit is not set, 346 * use the 9~13 bits to select event If the 7th bit is not set,
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 7b3fe56b1c21..31f0f335ed22 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -11,10 +11,10 @@ const char *const x86_power_flags[32] = {
11 "fid", /* frequency id control */ 11 "fid", /* frequency id control */
12 "vid", /* voltage id control */ 12 "vid", /* voltage id control */
13 "ttp", /* thermal trip */ 13 "ttp", /* thermal trip */
14 "tm", 14 "tm", /* hardware thermal control */
15 "stc", 15 "stc", /* software thermal control */
16 "100mhzsteps", 16 "100mhzsteps", /* 100 MHz multiplier control */
17 "hwpstate", 17 "hwpstate", /* hardware P-state control */
18 "", /* tsc invariant mapped to constant_tsc */ 18 "", /* tsc invariant mapped to constant_tsc */
19 "cpb", /* core performance boost */ 19 "cpb", /* core performance boost */
20 "eff_freq_ro", /* Readonly aperf/mperf */ 20 "eff_freq_ro", /* Readonly aperf/mperf */
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 37a198bd48c8..aee6317b902f 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -37,8 +37,8 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
37 static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no", 37 static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
38 static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no", 38 static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
39 static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no", 39 static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
40 c->hard_math ? "yes" : "no", 40 static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
41 c->hard_math ? "yes" : "no", 41 static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
42 c->cpuid_level, 42 c->cpuid_level,
43 c->wp_works_ok ? "yes" : "no"); 43 c->wp_works_ok ? "yes" : "no");
44} 44}
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index feca286c2bb4..88db010845cb 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -52,7 +52,7 @@ static inline int rdrand_long(unsigned long *v)
52 */ 52 */
53#define RESEED_LOOP ((512*128)/sizeof(unsigned long)) 53#define RESEED_LOOP ((512*128)/sizeof(unsigned long))
54 54
55void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c) 55void x86_init_rdrand(struct cpuinfo_x86 *c)
56{ 56{
57#ifdef CONFIG_ARCH_RANDOM 57#ifdef CONFIG_ARCH_RANDOM
58 unsigned long tmp; 58 unsigned long tmp;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d92b5dad15dd..f2cc63e9cf08 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,13 +24,13 @@ enum cpuid_regs {
24 CR_EBX 24 CR_EBX
25}; 25};
26 26
27void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) 27void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
28{ 28{
29 u32 max_level; 29 u32 max_level;
30 u32 regs[4]; 30 u32 regs[4];
31 const struct cpuid_bit *cb; 31 const struct cpuid_bit *cb;
32 32
33 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { 33 static const struct cpuid_bit cpuid_bits[] = {
34 { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 }, 34 { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 },
35 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, 35 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
36 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, 36 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 4397e987a1cf..4c60eaf0571c 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -26,7 +26,7 @@
26 * exists, use it for populating initial_apicid and cpu topology 26 * exists, use it for populating initial_apicid and cpu topology
27 * detection. 27 * detection.
28 */ 28 */
29void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) 29void detect_extended_topology(struct cpuinfo_x86 *c)
30{ 30{
31#ifdef CONFIG_SMP 31#ifdef CONFIG_SMP
32 unsigned int eax, ebx, ecx, edx, sub_index; 32 unsigned int eax, ebx, ecx, edx, sub_index;
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 28000743bbb0..aa0430d69b90 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -5,7 +5,7 @@
5#include <asm/msr.h> 5#include <asm/msr.h>
6#include "cpu.h" 6#include "cpu.h"
7 7
8static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c) 8static void early_init_transmeta(struct cpuinfo_x86 *c)
9{ 9{
10 u32 xlvl; 10 u32 xlvl;
11 11
@@ -17,7 +17,7 @@ static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c)
17 } 17 }
18} 18}
19 19
20static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) 20static void init_transmeta(struct cpuinfo_x86 *c)
21{ 21{
22 unsigned int cap_mask, uk, max, dummy; 22 unsigned int cap_mask, uk, max, dummy;
23 unsigned int cms_rev1, cms_rev2; 23 unsigned int cms_rev1, cms_rev2;
@@ -98,7 +98,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
98#endif 98#endif
99} 99}
100 100
101static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = { 101static const struct cpu_dev transmeta_cpu_dev = {
102 .c_vendor = "Transmeta", 102 .c_vendor = "Transmeta",
103 .c_ident = { "GenuineTMx86", "TransmetaCPU" }, 103 .c_ident = { "GenuineTMx86", "TransmetaCPU" },
104 .c_early_init = early_init_transmeta, 104 .c_early_init = early_init_transmeta,
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index fd2c37bf7acb..202759a14121 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -8,7 +8,7 @@
8 * so no special init takes place. 8 * so no special init takes place.
9 */ 9 */
10 10
11static const struct cpu_dev __cpuinitconst umc_cpu_dev = { 11static const struct cpu_dev umc_cpu_dev = {
12 .c_vendor = "UMC", 12 .c_vendor = "UMC",
13 .c_ident = { "UMC UMC UMC" }, 13 .c_ident = { "UMC UMC UMC" },
14 .c_models = { 14 .c_models = {
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 03a36321ec54..7076878404ec 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -122,7 +122,7 @@ static bool __init vmware_platform(void)
122 * so that the kernel could just trust the hypervisor with providing a 122 * so that the kernel could just trust the hypervisor with providing a
123 * reliable virtual TSC that is suitable for timekeeping. 123 * reliable virtual TSC that is suitable for timekeeping.
124 */ 124 */
125static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) 125static void vmware_set_cpu_features(struct cpuinfo_x86 *c)
126{ 126{
127 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 127 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
128 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); 128 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 1e4dbcfe6d31..7d9481c743f8 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -137,7 +137,7 @@ static const struct file_operations cpuid_fops = {
137 .open = cpuid_open, 137 .open = cpuid_open,
138}; 138};
139 139
140static __cpuinit int cpuid_device_create(int cpu) 140static int cpuid_device_create(int cpu)
141{ 141{
142 struct device *dev; 142 struct device *dev;
143 143
@@ -151,9 +151,8 @@ static void cpuid_device_destroy(int cpu)
151 device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); 151 device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
152} 152}
153 153
154static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, 154static int cpuid_class_cpu_callback(struct notifier_block *nfb,
155 unsigned long action, 155 unsigned long action, void *hcpu)
156 void *hcpu)
157{ 156{
158 unsigned int cpu = (unsigned long)hcpu; 157 unsigned int cpu = (unsigned long)hcpu;
159 int err = 0; 158 int err = 0;
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index b1581527a236..69eb2fa25494 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -133,7 +133,7 @@ static void x86_of_pci_irq_disable(struct pci_dev *dev)
133{ 133{
134} 134}
135 135
136void __cpuinit x86_of_pci_init(void) 136void x86_of_pci_init(void)
137{ 137{
138 pcibios_enable_irq = x86_of_pci_irq_enable; 138 pcibios_enable_irq = x86_of_pci_irq_enable;
139 pcibios_disable_irq = x86_of_pci_irq_disable; 139 pcibios_disable_irq = x86_of_pci_irq_disable;
@@ -364,9 +364,7 @@ static void dt_add_ioapic_domain(unsigned int ioapic_num,
364 * and assigned so we can keep the 1:1 mapping which the ioapic 364 * and assigned so we can keep the 1:1 mapping which the ioapic
365 * is having. 365 * is having.
366 */ 366 */
367 ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); 367 irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
368 if (ret)
369 pr_err("Error mapping legacy IRQs: %d\n", ret);
370 368
371 if (num > NR_IRQS_LEGACY) { 369 if (num > NR_IRQS_LEGACY) {
372 ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, 370 ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault.c
index 155a13f33ed8..5d3fe8d36e4a 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault.c
@@ -9,6 +9,8 @@
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/desc.h> 10#include <asm/desc.h>
11 11
12#ifdef CONFIG_X86_32
13
12#define DOUBLEFAULT_STACKSIZE (1024) 14#define DOUBLEFAULT_STACKSIZE (1024)
13static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; 15static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
14#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) 16#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
@@ -67,3 +69,16 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
67 .__cr3 = __pa_nodebug(swapper_pg_dir), 69 .__cr3 = __pa_nodebug(swapper_pg_dir),
68 } 70 }
69}; 71};
72
73/* dummy for do_double_fault() call */
74void df_debug(struct pt_regs *regs, long error_code) {}
75
76#else /* !CONFIG_X86_32 */
77
78void df_debug(struct pt_regs *regs, long error_code)
79{
80 pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
81 show_regs(regs);
82 panic("Machine halted.");
83}
84#endif
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 94ab6b90dd3f..63bdb29b2549 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -196,15 +196,23 @@ static void __init ati_bugs_contd(int num, int slot, int func)
196static void __init intel_remapping_check(int num, int slot, int func) 196static void __init intel_remapping_check(int num, int slot, int func)
197{ 197{
198 u8 revision; 198 u8 revision;
199 u16 device;
199 200
201 device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
200 revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); 202 revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID);
201 203
202 /* 204 /*
203 * Revision 0x13 of this chipset supports irq remapping 205 * Revision 13 of all triggering devices id in this quirk have
204 * but has an erratum that breaks its behavior, flag it as such 206 * a problem draining interrupts when irq remapping is enabled,
207 * and should be flagged as broken. Additionally revisions 0x12
208 * and 0x22 of device id 0x3405 has this problem.
205 */ 209 */
206 if (revision == 0x13) 210 if (revision == 0x13)
207 set_irq_remapping_broken(); 211 set_irq_remapping_broken();
212 else if ((device == 0x3405) &&
213 ((revision == 0x12) ||
214 (revision == 0x22)))
215 set_irq_remapping_broken();
208 216
209} 217}
210 218
@@ -239,6 +247,8 @@ static struct chipset early_qrk[] __initdata = {
239 PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, 247 PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
240 { PCI_VENDOR_ID_INTEL, 0x3403, PCI_CLASS_BRIDGE_HOST, 248 { PCI_VENDOR_ID_INTEL, 0x3403, PCI_CLASS_BRIDGE_HOST,
241 PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, 249 PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
250 { PCI_VENDOR_ID_INTEL, 0x3405, PCI_CLASS_BRIDGE_HOST,
251 PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
242 { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, 252 { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
243 PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, 253 PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
244 {} 254 {}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 8f3e2dec1df3..2cfbc3a3a2dd 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -801,7 +801,17 @@ ENTRY(name) \
801 CFI_ENDPROC; \ 801 CFI_ENDPROC; \
802ENDPROC(name) 802ENDPROC(name)
803 803
804#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) 804
805#ifdef CONFIG_TRACING
806#define TRACE_BUILD_INTERRUPT(name, nr) \
807 BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
808#else
809#define TRACE_BUILD_INTERRUPT(name, nr)
810#endif
811
812#define BUILD_INTERRUPT(name, nr) \
813 BUILD_INTERRUPT3(name, nr, smp_##name); \
814 TRACE_BUILD_INTERRUPT(name, nr)
805 815
806/* The include is where all of the SMP etc. interrupts come from */ 816/* The include is where all of the SMP etc. interrupts come from */
807#include <asm/entry_arch.h> 817#include <asm/entry_arch.h>
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 727208941030..1b69951a81e2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -365,7 +365,7 @@ ENDPROC(native_usergs_sysret64)
365 /*CFI_REL_OFFSET ss,0*/ 365 /*CFI_REL_OFFSET ss,0*/
366 pushq_cfi %rax /* rsp */ 366 pushq_cfi %rax /* rsp */
367 CFI_REL_OFFSET rsp,0 367 CFI_REL_OFFSET rsp,0
368 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ 368 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
369 /*CFI_REL_OFFSET rflags,0*/ 369 /*CFI_REL_OFFSET rflags,0*/
370 pushq_cfi $__KERNEL_CS /* cs */ 370 pushq_cfi $__KERNEL_CS /* cs */
371 /*CFI_REL_OFFSET cs,0*/ 371 /*CFI_REL_OFFSET cs,0*/
@@ -1138,7 +1138,7 @@ END(common_interrupt)
1138/* 1138/*
1139 * APIC interrupts. 1139 * APIC interrupts.
1140 */ 1140 */
1141.macro apicinterrupt num sym do_sym 1141.macro apicinterrupt3 num sym do_sym
1142ENTRY(\sym) 1142ENTRY(\sym)
1143 INTR_FRAME 1143 INTR_FRAME
1144 ASM_CLAC 1144 ASM_CLAC
@@ -1150,15 +1150,32 @@ ENTRY(\sym)
1150END(\sym) 1150END(\sym)
1151.endm 1151.endm
1152 1152
1153#ifdef CONFIG_TRACING
1154#define trace(sym) trace_##sym
1155#define smp_trace(sym) smp_trace_##sym
1156
1157.macro trace_apicinterrupt num sym
1158apicinterrupt3 \num trace(\sym) smp_trace(\sym)
1159.endm
1160#else
1161.macro trace_apicinterrupt num sym do_sym
1162.endm
1163#endif
1164
1165.macro apicinterrupt num sym do_sym
1166apicinterrupt3 \num \sym \do_sym
1167trace_apicinterrupt \num \sym
1168.endm
1169
1153#ifdef CONFIG_SMP 1170#ifdef CONFIG_SMP
1154apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ 1171apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
1155 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 1172 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
1156apicinterrupt REBOOT_VECTOR \ 1173apicinterrupt3 REBOOT_VECTOR \
1157 reboot_interrupt smp_reboot_interrupt 1174 reboot_interrupt smp_reboot_interrupt
1158#endif 1175#endif
1159 1176
1160#ifdef CONFIG_X86_UV 1177#ifdef CONFIG_X86_UV
1161apicinterrupt UV_BAU_MESSAGE \ 1178apicinterrupt3 UV_BAU_MESSAGE \
1162 uv_bau_message_intr1 uv_bau_message_interrupt 1179 uv_bau_message_intr1 uv_bau_message_interrupt
1163#endif 1180#endif
1164apicinterrupt LOCAL_TIMER_VECTOR \ 1181apicinterrupt LOCAL_TIMER_VECTOR \
@@ -1167,14 +1184,19 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
1167 x86_platform_ipi smp_x86_platform_ipi 1184 x86_platform_ipi smp_x86_platform_ipi
1168 1185
1169#ifdef CONFIG_HAVE_KVM 1186#ifdef CONFIG_HAVE_KVM
1170apicinterrupt POSTED_INTR_VECTOR \ 1187apicinterrupt3 POSTED_INTR_VECTOR \
1171 kvm_posted_intr_ipi smp_kvm_posted_intr_ipi 1188 kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
1172#endif 1189#endif
1173 1190
1191#ifdef CONFIG_X86_MCE_THRESHOLD
1174apicinterrupt THRESHOLD_APIC_VECTOR \ 1192apicinterrupt THRESHOLD_APIC_VECTOR \
1175 threshold_interrupt smp_threshold_interrupt 1193 threshold_interrupt smp_threshold_interrupt
1194#endif
1195
1196#ifdef CONFIG_X86_THERMAL_VECTOR
1176apicinterrupt THERMAL_APIC_VECTOR \ 1197apicinterrupt THERMAL_APIC_VECTOR \
1177 thermal_interrupt smp_thermal_interrupt 1198 thermal_interrupt smp_thermal_interrupt
1199#endif
1178 1200
1179#ifdef CONFIG_SMP 1201#ifdef CONFIG_SMP
1180apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ 1202apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
@@ -1451,13 +1473,13 @@ ENTRY(xen_failsafe_callback)
1451 CFI_ENDPROC 1473 CFI_ENDPROC
1452END(xen_failsafe_callback) 1474END(xen_failsafe_callback)
1453 1475
1454apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ 1476apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
1455 xen_hvm_callback_vector xen_evtchn_do_upcall 1477 xen_hvm_callback_vector xen_evtchn_do_upcall
1456 1478
1457#endif /* CONFIG_XEN */ 1479#endif /* CONFIG_XEN */
1458 1480
1459#if IS_ENABLED(CONFIG_HYPERV) 1481#if IS_ENABLED(CONFIG_HYPERV)
1460apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ 1482apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
1461 hyperv_callback_vector hyperv_vector_handler 1483 hyperv_callback_vector hyperv_vector_handler
1462#endif /* CONFIG_HYPERV */ 1484#endif /* CONFIG_HYPERV */
1463 1485
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 73afd11799ca..5dd87a89f011 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -292,7 +292,6 @@ ENDPROC(start_cpu0)
292 * If cpu hotplug is not supported then this code can go in init section 292 * If cpu hotplug is not supported then this code can go in init section
293 * which will be freed later 293 * which will be freed later
294 */ 294 */
295__CPUINIT
296ENTRY(startup_32_smp) 295ENTRY(startup_32_smp)
297 cld 296 cld
298 movl $(__BOOT_DS),%eax 297 movl $(__BOOT_DS),%eax
@@ -444,7 +443,6 @@ is486:
444 orl %ecx,%eax 443 orl %ecx,%eax
445 movl %eax,%cr0 444 movl %eax,%cr0
446 445
447 call check_x87
448 lgdt early_gdt_descr 446 lgdt early_gdt_descr
449 lidt idt_descr 447 lidt idt_descr
450 ljmp $(__KERNEL_CS),$1f 448 ljmp $(__KERNEL_CS),$1f
@@ -467,26 +465,6 @@ is486:
467 pushl $0 # fake return address for unwinder 465 pushl $0 # fake return address for unwinder
468 jmp *(initial_code) 466 jmp *(initial_code)
469 467
470/*
471 * We depend on ET to be correct. This checks for 287/387.
472 */
473check_x87:
474 movb $0,X86_HARD_MATH
475 clts
476 fninit
477 fstsw %ax
478 cmpb $0,%al
479 je 1f
480 movl %cr0,%eax /* no coprocessor: have to set bits */
481 xorl $4,%eax /* set EM */
482 movl %eax,%cr0
483 ret
484 ALIGN
4851: movb $1,X86_HARD_MATH
486 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
487 ret
488
489
490#include "verify_cpu.S" 468#include "verify_cpu.S"
491 469
492/* 470/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 321d65ebaffe..e1aabdb314c8 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -512,15 +512,6 @@ ENTRY(phys_base)
512 512
513#include "../../x86/xen/xen-head.S" 513#include "../../x86/xen/xen-head.S"
514 514
515 .section .bss, "aw", @nobits
516 .align L1_CACHE_BYTES
517ENTRY(idt_table)
518 .skip IDT_ENTRIES * 16
519
520 .align L1_CACHE_BYTES
521ENTRY(nmi_idt_table)
522 .skip IDT_ENTRIES * 16
523
524 __PAGE_ALIGNED_BSS 515 __PAGE_ALIGNED_BSS
525NEXT_PAGE(empty_zero_page) 516NEXT_PAGE(empty_zero_page)
526 .skip PAGE_SIZE 517 .skip PAGE_SIZE
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 02f07634d265..f66ff162dce8 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -393,6 +393,9 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
393 unregister_hw_breakpoint(t->ptrace_bps[i]); 393 unregister_hw_breakpoint(t->ptrace_bps[i]);
394 t->ptrace_bps[i] = NULL; 394 t->ptrace_bps[i] = NULL;
395 } 395 }
396
397 t->debugreg6 = 0;
398 t->ptrace_dr7 = 0;
396} 399}
397 400
398void hw_breakpoint_restore(void) 401void hw_breakpoint_restore(void)
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index cb339097b9ea..5d576ab34403 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -108,15 +108,15 @@ EXPORT_SYMBOL(unlazy_fpu);
108unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 108unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
109unsigned int xstate_size; 109unsigned int xstate_size;
110EXPORT_SYMBOL_GPL(xstate_size); 110EXPORT_SYMBOL_GPL(xstate_size);
111static struct i387_fxsave_struct fx_scratch __cpuinitdata; 111static struct i387_fxsave_struct fx_scratch;
112 112
113static void __cpuinit mxcsr_feature_mask_init(void) 113static void mxcsr_feature_mask_init(void)
114{ 114{
115 unsigned long mask = 0; 115 unsigned long mask = 0;
116 116
117 if (cpu_has_fxsr) { 117 if (cpu_has_fxsr) {
118 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); 118 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
119 asm volatile("fxsave %0" : : "m" (fx_scratch)); 119 asm volatile("fxsave %0" : "+m" (fx_scratch));
120 mask = fx_scratch.mxcsr_mask; 120 mask = fx_scratch.mxcsr_mask;
121 if (mask == 0) 121 if (mask == 0)
122 mask = 0x0000ffbf; 122 mask = 0x0000ffbf;
@@ -124,14 +124,14 @@ static void __cpuinit mxcsr_feature_mask_init(void)
124 mxcsr_feature_mask &= mask; 124 mxcsr_feature_mask &= mask;
125} 125}
126 126
127static void __cpuinit init_thread_xstate(void) 127static void init_thread_xstate(void)
128{ 128{
129 /* 129 /*
130 * Note that xstate_size might be overwriten later during 130 * Note that xstate_size might be overwriten later during
131 * xsave_init(). 131 * xsave_init().
132 */ 132 */
133 133
134 if (!HAVE_HWFP) { 134 if (!cpu_has_fpu) {
135 /* 135 /*
136 * Disable xsave as we do not support it if i387 136 * Disable xsave as we do not support it if i387
137 * emulation is enabled. 137 * emulation is enabled.
@@ -153,11 +153,19 @@ static void __cpuinit init_thread_xstate(void)
153 * into all processes. 153 * into all processes.
154 */ 154 */
155 155
156void __cpuinit fpu_init(void) 156void fpu_init(void)
157{ 157{
158 unsigned long cr0; 158 unsigned long cr0;
159 unsigned long cr4_mask = 0; 159 unsigned long cr4_mask = 0;
160 160
161#ifndef CONFIG_MATH_EMULATION
162 if (!cpu_has_fpu) {
163 pr_emerg("No FPU found and no math emulation present\n");
164 pr_emerg("Giving up\n");
165 for (;;)
166 asm volatile("hlt");
167 }
168#endif
161 if (cpu_has_fxsr) 169 if (cpu_has_fxsr)
162 cr4_mask |= X86_CR4_OSFXSR; 170 cr4_mask |= X86_CR4_OSFXSR;
163 if (cpu_has_xmm) 171 if (cpu_has_xmm)
@@ -167,7 +175,7 @@ void __cpuinit fpu_init(void)
167 175
168 cr0 = read_cr0(); 176 cr0 = read_cr0();
169 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ 177 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
170 if (!HAVE_HWFP) 178 if (!cpu_has_fpu)
171 cr0 |= X86_CR0_EM; 179 cr0 |= X86_CR0_EM;
172 write_cr0(cr0); 180 write_cr0(cr0);
173 181
@@ -185,7 +193,7 @@ void __cpuinit fpu_init(void)
185 193
186void fpu_finit(struct fpu *fpu) 194void fpu_finit(struct fpu *fpu)
187{ 195{
188 if (!HAVE_HWFP) { 196 if (!cpu_has_fpu) {
189 finit_soft_fpu(&fpu->state->soft); 197 finit_soft_fpu(&fpu->state->soft);
190 return; 198 return;
191 } 199 }
@@ -214,7 +222,7 @@ int init_fpu(struct task_struct *tsk)
214 int ret; 222 int ret;
215 223
216 if (tsk_used_math(tsk)) { 224 if (tsk_used_math(tsk)) {
217 if (HAVE_HWFP && tsk == current) 225 if (cpu_has_fpu && tsk == current)
218 unlazy_fpu(tsk); 226 unlazy_fpu(tsk);
219 tsk->thread.fpu.last_cpu = ~0; 227 tsk->thread.fpu.last_cpu = ~0;
220 return 0; 228 return 0;
@@ -511,14 +519,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
511 if (ret) 519 if (ret)
512 return ret; 520 return ret;
513 521
514 if (!HAVE_HWFP) 522 if (!static_cpu_has(X86_FEATURE_FPU))
515 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); 523 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
516 524
517 if (!cpu_has_fxsr) { 525 if (!cpu_has_fxsr)
518 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 526 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
519 &target->thread.fpu.state->fsave, 0, 527 &target->thread.fpu.state->fsave, 0,
520 -1); 528 -1);
521 }
522 529
523 sanitize_i387_state(target); 530 sanitize_i387_state(target);
524 531
@@ -545,13 +552,13 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
545 552
546 sanitize_i387_state(target); 553 sanitize_i387_state(target);
547 554
548 if (!HAVE_HWFP) 555 if (!static_cpu_has(X86_FEATURE_FPU))
549 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 556 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
550 557
551 if (!cpu_has_fxsr) { 558 if (!cpu_has_fxsr)
552 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 559 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
553 &target->thread.fpu.state->fsave, 0, -1); 560 &target->thread.fpu.state->fsave, 0,
554 } 561 -1);
555 562
556 if (pos > 0 || count < sizeof(env)) 563 if (pos > 0 || count < sizeof(env))
557 convert_from_fxsr(&env, target); 564 convert_from_fxsr(&env, target);
@@ -592,3 +599,33 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
592EXPORT_SYMBOL(dump_fpu); 599EXPORT_SYMBOL(dump_fpu);
593 600
594#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ 601#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
602
603static int __init no_387(char *s)
604{
605 setup_clear_cpu_cap(X86_FEATURE_FPU);
606 return 1;
607}
608
609__setup("no387", no_387);
610
611void fpu_detect(struct cpuinfo_x86 *c)
612{
613 unsigned long cr0;
614 u16 fsw, fcw;
615
616 fsw = fcw = 0xffff;
617
618 cr0 = read_cr0();
619 cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
620 write_cr0(cr0);
621
622 asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
623 : "+m" (fsw), "+m" (fcw));
624
625 if (fsw == 0 && (fcw & 0x103f) == 0x003f)
626 set_cpu_cap(c, X86_FEATURE_FPU);
627 else
628 clear_cpu_cap(c, X86_FEATURE_FPU);
629
630 /* The final cr0 value is set in fpu_init() */
631}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index ac0631d8996f..3a8185c042a2 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -18,6 +18,9 @@
18#include <asm/mce.h> 18#include <asm/mce.h>
19#include <asm/hw_irq.h> 19#include <asm/hw_irq.h>
20 20
21#define CREATE_TRACE_POINTS
22#include <asm/trace/irq_vectors.h>
23
21atomic_t irq_err_count; 24atomic_t irq_err_count;
22 25
23/* Function pointer for generic interrupt vector handling */ 26/* Function pointer for generic interrupt vector handling */
@@ -204,23 +207,21 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
204/* 207/*
205 * Handler for X86_PLATFORM_IPI_VECTOR. 208 * Handler for X86_PLATFORM_IPI_VECTOR.
206 */ 209 */
207void smp_x86_platform_ipi(struct pt_regs *regs) 210void __smp_x86_platform_ipi(void)
208{ 211{
209 struct pt_regs *old_regs = set_irq_regs(regs);
210
211 ack_APIC_irq();
212
213 irq_enter();
214
215 exit_idle();
216
217 inc_irq_stat(x86_platform_ipis); 212 inc_irq_stat(x86_platform_ipis);
218 213
219 if (x86_platform_ipi_callback) 214 if (x86_platform_ipi_callback)
220 x86_platform_ipi_callback(); 215 x86_platform_ipi_callback();
216}
221 217
222 irq_exit(); 218void smp_x86_platform_ipi(struct pt_regs *regs)
219{
220 struct pt_regs *old_regs = set_irq_regs(regs);
223 221
222 entering_ack_irq();
223 __smp_x86_platform_ipi();
224 exiting_irq();
224 set_irq_regs(old_regs); 225 set_irq_regs(old_regs);
225} 226}
226 227
@@ -246,6 +247,18 @@ void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
246} 247}
247#endif 248#endif
248 249
250void smp_trace_x86_platform_ipi(struct pt_regs *regs)
251{
252 struct pt_regs *old_regs = set_irq_regs(regs);
253
254 entering_ack_irq();
255 trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
256 __smp_x86_platform_ipi();
257 trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
258 exiting_irq();
259 set_irq_regs(old_regs);
260}
261
249EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); 262EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
250 263
251#ifdef CONFIG_HOTPLUG_CPU 264#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 344faf8d0d62..4186755f1d7c 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -119,7 +119,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
119/* 119/*
120 * allocate per-cpu stacks for hardirq and for softirq processing 120 * allocate per-cpu stacks for hardirq and for softirq processing
121 */ 121 */
122void __cpuinit irq_ctx_init(int cpu) 122void irq_ctx_init(int cpu)
123{ 123{
124 union irq_ctx *irqctx; 124 union irq_ctx *irqctx;
125 125
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index ca8f703a1e70..636a55e4a13c 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -8,14 +8,34 @@
8#include <linux/irq_work.h> 8#include <linux/irq_work.h>
9#include <linux/hardirq.h> 9#include <linux/hardirq.h>
10#include <asm/apic.h> 10#include <asm/apic.h>
11#include <asm/trace/irq_vectors.h>
11 12
12void smp_irq_work_interrupt(struct pt_regs *regs) 13static inline void irq_work_entering_irq(void)
13{ 14{
14 irq_enter(); 15 irq_enter();
15 ack_APIC_irq(); 16 ack_APIC_irq();
17}
18
19static inline void __smp_irq_work_interrupt(void)
20{
16 inc_irq_stat(apic_irq_work_irqs); 21 inc_irq_stat(apic_irq_work_irqs);
17 irq_work_run(); 22 irq_work_run();
18 irq_exit(); 23}
24
25void smp_irq_work_interrupt(struct pt_regs *regs)
26{
27 irq_work_entering_irq();
28 __smp_irq_work_interrupt();
29 exiting_irq();
30}
31
32void smp_trace_irq_work_interrupt(struct pt_regs *regs)
33{
34 irq_work_entering_irq();
35 trace_irq_work_entry(IRQ_WORK_VECTOR);
36 __smp_irq_work_interrupt();
37 trace_irq_work_exit(IRQ_WORK_VECTOR);
38 exiting_irq();
19} 39}
20 40
21void arch_irq_work_raise(void) 41void arch_irq_work_raise(void)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index cd6d9a5a42f6..a96d32cc55b8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -320,7 +320,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
320 apic_write(APIC_EOI, APIC_EOI_ACK); 320 apic_write(APIC_EOI, APIC_EOI_ACK);
321} 321}
322 322
323void __cpuinit kvm_guest_cpu_init(void) 323void kvm_guest_cpu_init(void)
324{ 324{
325 if (!kvm_para_available()) 325 if (!kvm_para_available())
326 return; 326 return;
@@ -421,7 +421,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
421 native_smp_prepare_boot_cpu(); 421 native_smp_prepare_boot_cpu();
422} 422}
423 423
424static void __cpuinit kvm_guest_cpu_online(void *dummy) 424static void kvm_guest_cpu_online(void *dummy)
425{ 425{
426 kvm_guest_cpu_init(); 426 kvm_guest_cpu_init();
427} 427}
@@ -435,8 +435,8 @@ static void kvm_guest_cpu_offline(void *dummy)
435 apf_task_wake_all(); 435 apf_task_wake_all();
436} 436}
437 437
438static int __cpuinit kvm_cpu_notify(struct notifier_block *self, 438static int kvm_cpu_notify(struct notifier_block *self, unsigned long action,
439 unsigned long action, void *hcpu) 439 void *hcpu)
440{ 440{
441 int cpu = (unsigned long)hcpu; 441 int cpu = (unsigned long)hcpu;
442 switch (action) { 442 switch (action) {
@@ -455,7 +455,7 @@ static int __cpuinit kvm_cpu_notify(struct notifier_block *self,
455 return NOTIFY_OK; 455 return NOTIFY_OK;
456} 456}
457 457
458static struct notifier_block __cpuinitdata kvm_cpu_notifier = { 458static struct notifier_block kvm_cpu_notifier = {
459 .notifier_call = kvm_cpu_notify, 459 .notifier_call = kvm_cpu_notify,
460}; 460};
461#endif 461#endif
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 3dd37ebd591b..1570e0741344 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -48,10 +48,9 @@ static struct pvclock_wall_clock wall_clock;
48 * have elapsed since the hypervisor wrote the data. So we try to account for 48 * have elapsed since the hypervisor wrote the data. So we try to account for
49 * that with system time 49 * that with system time
50 */ 50 */
51static unsigned long kvm_get_wallclock(void) 51static void kvm_get_wallclock(struct timespec *now)
52{ 52{
53 struct pvclock_vcpu_time_info *vcpu_time; 53 struct pvclock_vcpu_time_info *vcpu_time;
54 struct timespec ts;
55 int low, high; 54 int low, high;
56 int cpu; 55 int cpu;
57 56
@@ -64,14 +63,12 @@ static unsigned long kvm_get_wallclock(void)
64 cpu = smp_processor_id(); 63 cpu = smp_processor_id();
65 64
66 vcpu_time = &hv_clock[cpu].pvti; 65 vcpu_time = &hv_clock[cpu].pvti;
67 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); 66 pvclock_read_wallclock(&wall_clock, vcpu_time, now);
68 67
69 preempt_enable(); 68 preempt_enable();
70
71 return ts.tv_sec;
72} 69}
73 70
74static int kvm_set_wallclock(unsigned long now) 71static int kvm_set_wallclock(const struct timespec *now)
75{ 72{
76 return -1; 73 return -1;
77} 74}
@@ -185,7 +182,7 @@ static void kvm_restore_sched_clock_state(void)
185} 182}
186 183
187#ifdef CONFIG_X86_LOCAL_APIC 184#ifdef CONFIG_X86_LOCAL_APIC
188static void __cpuinit kvm_setup_secondary_clock(void) 185static void kvm_setup_secondary_clock(void)
189{ 186{
190 /* 187 /*
191 * Now that the first cpu already had this clocksource initialized, 188 * Now that the first cpu already had this clocksource initialized,
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index efdec7cd8e01..7a0adb7ee433 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -31,48 +31,12 @@
31#include <asm/microcode.h> 31#include <asm/microcode.h>
32#include <asm/processor.h> 32#include <asm/processor.h>
33#include <asm/msr.h> 33#include <asm/msr.h>
34#include <asm/microcode_amd.h>
34 35
35MODULE_DESCRIPTION("AMD Microcode Update Driver"); 36MODULE_DESCRIPTION("AMD Microcode Update Driver");
36MODULE_AUTHOR("Peter Oruba"); 37MODULE_AUTHOR("Peter Oruba");
37MODULE_LICENSE("GPL v2"); 38MODULE_LICENSE("GPL v2");
38 39
39#define UCODE_MAGIC 0x00414d44
40#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
41#define UCODE_UCODE_TYPE 0x00000001
42
43struct equiv_cpu_entry {
44 u32 installed_cpu;
45 u32 fixed_errata_mask;
46 u32 fixed_errata_compare;
47 u16 equiv_cpu;
48 u16 res;
49} __attribute__((packed));
50
51struct microcode_header_amd {
52 u32 data_code;
53 u32 patch_id;
54 u16 mc_patch_data_id;
55 u8 mc_patch_data_len;
56 u8 init_flag;
57 u32 mc_patch_data_checksum;
58 u32 nb_dev_id;
59 u32 sb_dev_id;
60 u16 processor_rev_id;
61 u8 nb_rev_id;
62 u8 sb_rev_id;
63 u8 bios_api_rev;
64 u8 reserved1[3];
65 u32 match_reg[8];
66} __attribute__((packed));
67
68struct microcode_amd {
69 struct microcode_header_amd hdr;
70 unsigned int mpb[0];
71};
72
73#define SECTION_HDR_SIZE 8
74#define CONTAINER_HDR_SZ 12
75
76static struct equiv_cpu_entry *equiv_cpu_table; 40static struct equiv_cpu_entry *equiv_cpu_table;
77 41
78struct ucode_patch { 42struct ucode_patch {
@@ -84,21 +48,10 @@ struct ucode_patch {
84 48
85static LIST_HEAD(pcache); 49static LIST_HEAD(pcache);
86 50
87static u16 find_equiv_id(unsigned int cpu) 51static u16 __find_equiv_id(unsigned int cpu)
88{ 52{
89 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 53 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
90 int i = 0; 54 return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig);
91
92 if (!equiv_cpu_table)
93 return 0;
94
95 while (equiv_cpu_table[i].installed_cpu != 0) {
96 if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
97 return equiv_cpu_table[i].equiv_cpu;
98
99 i++;
100 }
101 return 0;
102} 55}
103 56
104static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) 57static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
@@ -163,7 +116,7 @@ static struct ucode_patch *find_patch(unsigned int cpu)
163{ 116{
164 u16 equiv_id; 117 u16 equiv_id;
165 118
166 equiv_id = find_equiv_id(cpu); 119 equiv_id = __find_equiv_id(cpu);
167 if (!equiv_id) 120 if (!equiv_id)
168 return NULL; 121 return NULL;
169 122
@@ -173,9 +126,20 @@ static struct ucode_patch *find_patch(unsigned int cpu)
173static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 126static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
174{ 127{
175 struct cpuinfo_x86 *c = &cpu_data(cpu); 128 struct cpuinfo_x86 *c = &cpu_data(cpu);
129 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
130 struct ucode_patch *p;
176 131
177 csig->sig = cpuid_eax(0x00000001); 132 csig->sig = cpuid_eax(0x00000001);
178 csig->rev = c->microcode; 133 csig->rev = c->microcode;
134
135 /*
136 * a patch could have been loaded early, set uci->mc so that
137 * mc_bp_resume() can call apply_microcode()
138 */
139 p = find_patch(cpu);
140 if (p && (p->patch_id == csig->rev))
141 uci->mc = p->data;
142
179 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); 143 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
180 144
181 return 0; 145 return 0;
@@ -215,7 +179,21 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size,
215 return patch_size; 179 return patch_size;
216} 180}
217 181
218static int apply_microcode_amd(int cpu) 182int __apply_microcode_amd(struct microcode_amd *mc_amd)
183{
184 u32 rev, dummy;
185
186 wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
187
188 /* verify patch application was successful */
189 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
190 if (rev != mc_amd->hdr.patch_id)
191 return -1;
192
193 return 0;
194}
195
196int apply_microcode_amd(int cpu)
219{ 197{
220 struct cpuinfo_x86 *c = &cpu_data(cpu); 198 struct cpuinfo_x86 *c = &cpu_data(cpu);
221 struct microcode_amd *mc_amd; 199 struct microcode_amd *mc_amd;
@@ -242,19 +220,16 @@ static int apply_microcode_amd(int cpu)
242 return 0; 220 return 0;
243 } 221 }
244 222
245 wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); 223 if (__apply_microcode_amd(mc_amd)) {
246
247 /* verify patch application was successful */
248 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
249 if (rev != mc_amd->hdr.patch_id) {
250 pr_err("CPU%d: update failed for patch_level=0x%08x\n", 224 pr_err("CPU%d: update failed for patch_level=0x%08x\n",
251 cpu, mc_amd->hdr.patch_id); 225 cpu, mc_amd->hdr.patch_id);
252 return -1; 226 return -1;
253 } 227 }
228 pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
229 mc_amd->hdr.patch_id);
254 230
255 pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); 231 uci->cpu_sig.rev = mc_amd->hdr.patch_id;
256 uci->cpu_sig.rev = rev; 232 c->microcode = mc_amd->hdr.patch_id;
257 c->microcode = rev;
258 233
259 return 0; 234 return 0;
260} 235}
@@ -364,7 +339,7 @@ static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
364 return crnt_size; 339 return crnt_size;
365} 340}
366 341
367static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) 342static enum ucode_state __load_microcode_amd(int cpu, const u8 *data, size_t size)
368{ 343{
369 enum ucode_state ret = UCODE_ERROR; 344 enum ucode_state ret = UCODE_ERROR;
370 unsigned int leftover; 345 unsigned int leftover;
@@ -398,6 +373,32 @@ static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
398 return UCODE_OK; 373 return UCODE_OK;
399} 374}
400 375
376enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
377{
378 enum ucode_state ret;
379
380 /* free old equiv table */
381 free_equiv_cpu_table();
382
383 ret = __load_microcode_amd(cpu, data, size);
384
385 if (ret != UCODE_OK)
386 cleanup();
387
388#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
389 /* save BSP's matching patch for early load */
390 if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
391 struct ucode_patch *p = find_patch(cpu);
392 if (p) {
393 memset(amd_bsp_mpb, 0, MPB_MAX_SIZE);
394 memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data),
395 MPB_MAX_SIZE));
396 }
397 }
398#endif
399 return ret;
400}
401
401/* 402/*
402 * AMD microcode firmware naming convention, up to family 15h they are in 403 * AMD microcode firmware naming convention, up to family 15h they are in
403 * the legacy file: 404 * the legacy file:
@@ -440,12 +441,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
440 goto fw_release; 441 goto fw_release;
441 } 442 }
442 443
443 /* free old equiv table */
444 free_equiv_cpu_table();
445
446 ret = load_microcode_amd(cpu, fw->data, fw->size); 444 ret = load_microcode_amd(cpu, fw->data, fw->size);
447 if (ret != UCODE_OK)
448 cleanup();
449 445
450 fw_release: 446 fw_release:
451 release_firmware(fw); 447 release_firmware(fw);
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c
new file mode 100644
index 000000000000..1d14ffee5749
--- /dev/null
+++ b/arch/x86/kernel/microcode_amd_early.c
@@ -0,0 +1,302 @@
1/*
2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
3 *
4 * Author: Jacob Shin <jacob.shin@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/earlycpio.h>
12#include <linux/initrd.h>
13
14#include <asm/cpu.h>
15#include <asm/setup.h>
16#include <asm/microcode_amd.h>
17
18static bool ucode_loaded;
19static u32 ucode_new_rev;
20static unsigned long ucode_offset;
21static size_t ucode_size;
22
23/*
24 * Microcode patch container file is prepended to the initrd in cpio format.
25 * See Documentation/x86/early-microcode.txt
26 */
27static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
28
29static struct cpio_data __init find_ucode_in_initrd(void)
30{
31 long offset = 0;
32 char *path;
33 void *start;
34 size_t size;
35 unsigned long *uoffset;
36 size_t *usize;
37 struct cpio_data cd;
38
39#ifdef CONFIG_X86_32
40 struct boot_params *p;
41
42 /*
43 * On 32-bit, early load occurs before paging is turned on so we need
44 * to use physical addresses.
45 */
46 p = (struct boot_params *)__pa_nodebug(&boot_params);
47 path = (char *)__pa_nodebug(ucode_path);
48 start = (void *)p->hdr.ramdisk_image;
49 size = p->hdr.ramdisk_size;
50 uoffset = (unsigned long *)__pa_nodebug(&ucode_offset);
51 usize = (size_t *)__pa_nodebug(&ucode_size);
52#else
53 path = ucode_path;
54 start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
55 size = boot_params.hdr.ramdisk_size;
56 uoffset = &ucode_offset;
57 usize = &ucode_size;
58#endif
59
60 cd = find_cpio_data(path, start, size, &offset);
61 if (!cd.data)
62 return cd;
63
64 if (*(u32 *)cd.data != UCODE_MAGIC) {
65 cd.data = NULL;
66 cd.size = 0;
67 return cd;
68 }
69
70 *uoffset = (u8 *)cd.data - (u8 *)start;
71 *usize = cd.size;
72
73 return cd;
74}
75
76/*
77 * Early load occurs before we can vmalloc(). So we look for the microcode
78 * patch container file in initrd, traverse equivalent cpu table, look for a
79 * matching microcode patch, and update, all in initrd memory in place.
80 * When vmalloc() is available for use later -- on 64-bit during first AP load,
81 * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
82 * load_microcode_amd() to save equivalent cpu table and microcode patches in
83 * kernel heap memory.
84 */
85static void apply_ucode_in_initrd(void *ucode, size_t size)
86{
87 struct equiv_cpu_entry *eq;
88 u32 *header;
89 u8 *data;
90 u16 eq_id = 0;
91 int offset, left;
92 u32 rev, eax;
93 u32 *new_rev;
94 unsigned long *uoffset;
95 size_t *usize;
96
97#ifdef CONFIG_X86_32
98 new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
99 uoffset = (unsigned long *)__pa_nodebug(&ucode_offset);
100 usize = (size_t *)__pa_nodebug(&ucode_size);
101#else
102 new_rev = &ucode_new_rev;
103 uoffset = &ucode_offset;
104 usize = &ucode_size;
105#endif
106
107 data = ucode;
108 left = size;
109 header = (u32 *)data;
110
111 /* find equiv cpu table */
112
113 if (header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
114 header[2] == 0) /* size */
115 return;
116
117 eax = cpuid_eax(0x00000001);
118
119 while (left > 0) {
120 eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
121
122 offset = header[2] + CONTAINER_HDR_SZ;
123 data += offset;
124 left -= offset;
125
126 eq_id = find_equiv_id(eq, eax);
127 if (eq_id)
128 break;
129
130 /*
131 * support multiple container files appended together. if this
132 * one does not have a matching equivalent cpu entry, we fast
133 * forward to the next container file.
134 */
135 while (left > 0) {
136 header = (u32 *)data;
137 if (header[0] == UCODE_MAGIC &&
138 header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
139 break;
140
141 offset = header[1] + SECTION_HDR_SIZE;
142 data += offset;
143 left -= offset;
144 }
145
146 /* mark where the next microcode container file starts */
147 offset = data - (u8 *)ucode;
148 *uoffset += offset;
149 *usize -= offset;
150 ucode = data;
151 }
152
153 if (!eq_id) {
154 *usize = 0;
155 return;
156 }
157
158 /* find ucode and update if needed */
159
160 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
161
162 while (left > 0) {
163 struct microcode_amd *mc;
164
165 header = (u32 *)data;
166 if (header[0] != UCODE_UCODE_TYPE || /* type */
167 header[1] == 0) /* size */
168 break;
169
170 mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
171 if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id)
172 if (__apply_microcode_amd(mc) == 0) {
173 rev = mc->hdr.patch_id;
174 *new_rev = rev;
175 }
176
177 offset = header[1] + SECTION_HDR_SIZE;
178 data += offset;
179 left -= offset;
180 }
181
182 /* mark where this microcode container file ends */
183 offset = *usize - (data - (u8 *)ucode);
184 *usize -= offset;
185
186 if (!(*new_rev))
187 *usize = 0;
188}
189
190void __init load_ucode_amd_bsp(void)
191{
192 struct cpio_data cd = find_ucode_in_initrd();
193 if (!cd.data)
194 return;
195
196 apply_ucode_in_initrd(cd.data, cd.size);
197}
198
199#ifdef CONFIG_X86_32
200u8 amd_bsp_mpb[MPB_MAX_SIZE];
201
202/*
203 * On 32-bit, since AP's early load occurs before paging is turned on, we
204 * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
205 * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
206 * save_microcode_in_initrd_amd() BSP's patch is copied to amd_bsp_mpb, which
207 * is used upon resume from suspend.
208 */
209void load_ucode_amd_ap(void)
210{
211 struct microcode_amd *mc;
212 unsigned long *initrd;
213 unsigned long *uoffset;
214 size_t *usize;
215 void *ucode;
216
217 mc = (struct microcode_amd *)__pa(amd_bsp_mpb);
218 if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
219 __apply_microcode_amd(mc);
220 return;
221 }
222
223 initrd = (unsigned long *)__pa(&initrd_start);
224 uoffset = (unsigned long *)__pa(&ucode_offset);
225 usize = (size_t *)__pa(&ucode_size);
226
227 if (!*usize || !*initrd)
228 return;
229
230 ucode = (void *)((unsigned long)__pa(*initrd) + *uoffset);
231 apply_ucode_in_initrd(ucode, *usize);
232}
233
234static void __init collect_cpu_sig_on_bsp(void *arg)
235{
236 unsigned int cpu = smp_processor_id();
237 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
238 uci->cpu_sig.sig = cpuid_eax(0x00000001);
239}
240#else
241static void collect_cpu_info_amd_early(struct cpuinfo_x86 *c,
242 struct ucode_cpu_info *uci)
243{
244 u32 rev, eax;
245
246 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
247 eax = cpuid_eax(0x00000001);
248
249 uci->cpu_sig.sig = eax;
250 uci->cpu_sig.rev = rev;
251 c->microcode = rev;
252 c->x86 = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
253}
254
255void load_ucode_amd_ap(void)
256{
257 unsigned int cpu = smp_processor_id();
258
259 collect_cpu_info_amd_early(&cpu_data(cpu), ucode_cpu_info + cpu);
260
261 if (cpu && !ucode_loaded) {
262 void *ucode;
263
264 if (!ucode_size || !initrd_start)
265 return;
266
267 ucode = (void *)(initrd_start + ucode_offset);
268 if (load_microcode_amd(0, ucode, ucode_size) != UCODE_OK)
269 return;
270 ucode_loaded = true;
271 }
272
273 apply_microcode_amd(cpu);
274}
275#endif
276
277int __init save_microcode_in_initrd_amd(void)
278{
279 enum ucode_state ret;
280 void *ucode;
281#ifdef CONFIG_X86_32
282 unsigned int bsp = boot_cpu_data.cpu_index;
283 struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
284
285 if (!uci->cpu_sig.sig)
286 smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
287#endif
288 if (ucode_new_rev)
289 pr_info("microcode: updated early to new patch_level=0x%08x\n",
290 ucode_new_rev);
291
292 if (ucode_loaded || !ucode_size || !initrd_start)
293 return 0;
294
295 ucode = (void *)(initrd_start + ucode_offset);
296 ret = load_microcode_amd(0, ucode, ucode_size);
297 if (ret != UCODE_OK)
298 return -EINVAL;
299
300 ucode_loaded = true;
301 return 0;
302}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 22db92bbdf1a..15c987698b0f 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -468,7 +468,7 @@ static struct syscore_ops mc_syscore_ops = {
468 .resume = mc_bp_resume, 468 .resume = mc_bp_resume,
469}; 469};
470 470
471static __cpuinit int 471static int
472mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) 472mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
473{ 473{
474 unsigned int cpu = (unsigned long)hcpu; 474 unsigned int cpu = (unsigned long)hcpu;
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
index 833d51d6ee06..be7f8514f577 100644
--- a/arch/x86/kernel/microcode_core_early.c
+++ b/arch/x86/kernel/microcode_core_early.c
@@ -18,6 +18,7 @@
18 */ 18 */
19#include <linux/module.h> 19#include <linux/module.h>
20#include <asm/microcode_intel.h> 20#include <asm/microcode_intel.h>
21#include <asm/microcode_amd.h>
21#include <asm/processor.h> 22#include <asm/processor.h>
22 23
23#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) 24#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
@@ -40,7 +41,7 @@
40 * 41 *
41 * x86_vendor() gets vendor information directly through cpuid. 42 * x86_vendor() gets vendor information directly through cpuid.
42 */ 43 */
43static int __cpuinit x86_vendor(void) 44static int x86_vendor(void)
44{ 45{
45 u32 eax = 0x00000000; 46 u32 eax = 0x00000000;
46 u32 ebx, ecx = 0, edx; 47 u32 ebx, ecx = 0, edx;
@@ -56,7 +57,7 @@ static int __cpuinit x86_vendor(void)
56 return X86_VENDOR_UNKNOWN; 57 return X86_VENDOR_UNKNOWN;
57} 58}
58 59
59static int __cpuinit x86_family(void) 60static int x86_family(void)
60{ 61{
61 u32 eax = 0x00000001; 62 u32 eax = 0x00000001;
62 u32 ebx, ecx = 0, edx; 63 u32 ebx, ecx = 0, edx;
@@ -81,11 +82,21 @@ void __init load_ucode_bsp(void)
81 vendor = x86_vendor(); 82 vendor = x86_vendor();
82 x86 = x86_family(); 83 x86 = x86_family();
83 84
84 if (vendor == X86_VENDOR_INTEL && x86 >= 6) 85 switch (vendor) {
85 load_ucode_intel_bsp(); 86 case X86_VENDOR_INTEL:
87 if (x86 >= 6)
88 load_ucode_intel_bsp();
89 break;
90 case X86_VENDOR_AMD:
91 if (x86 >= 0x10)
92 load_ucode_amd_bsp();
93 break;
94 default:
95 break;
96 }
86} 97}
87 98
88void __cpuinit load_ucode_ap(void) 99void load_ucode_ap(void)
89{ 100{
90 int vendor, x86; 101 int vendor, x86;
91 102
@@ -95,6 +106,36 @@ void __cpuinit load_ucode_ap(void)
95 vendor = x86_vendor(); 106 vendor = x86_vendor();
96 x86 = x86_family(); 107 x86 = x86_family();
97 108
98 if (vendor == X86_VENDOR_INTEL && x86 >= 6) 109 switch (vendor) {
99 load_ucode_intel_ap(); 110 case X86_VENDOR_INTEL:
111 if (x86 >= 6)
112 load_ucode_intel_ap();
113 break;
114 case X86_VENDOR_AMD:
115 if (x86 >= 0x10)
116 load_ucode_amd_ap();
117 break;
118 default:
119 break;
120 }
121}
122
123int __init save_microcode_in_initrd(void)
124{
125 struct cpuinfo_x86 *c = &boot_cpu_data;
126
127 switch (c->x86_vendor) {
128 case X86_VENDOR_INTEL:
129 if (c->x86 >= 6)
130 save_microcode_in_initrd_intel();
131 break;
132 case X86_VENDOR_AMD:
133 if (c->x86 >= 0x10)
134 save_microcode_in_initrd_amd();
135 break;
136 default:
137 break;
138 }
139
140 return 0;
100} 141}
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
index 2e9e12871c2b..1575deb2e636 100644
--- a/arch/x86/kernel/microcode_intel_early.c
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -34,7 +34,7 @@ struct mc_saved_data {
34 struct microcode_intel **mc_saved; 34 struct microcode_intel **mc_saved;
35} mc_saved_data; 35} mc_saved_data;
36 36
37static enum ucode_state __cpuinit 37static enum ucode_state
38generic_load_microcode_early(struct microcode_intel **mc_saved_p, 38generic_load_microcode_early(struct microcode_intel **mc_saved_p,
39 unsigned int mc_saved_count, 39 unsigned int mc_saved_count,
40 struct ucode_cpu_info *uci) 40 struct ucode_cpu_info *uci)
@@ -69,7 +69,7 @@ out:
69 return state; 69 return state;
70} 70}
71 71
72static void __cpuinit 72static void
73microcode_pointer(struct microcode_intel **mc_saved, 73microcode_pointer(struct microcode_intel **mc_saved,
74 unsigned long *mc_saved_in_initrd, 74 unsigned long *mc_saved_in_initrd,
75 unsigned long initrd_start, int mc_saved_count) 75 unsigned long initrd_start, int mc_saved_count)
@@ -82,7 +82,7 @@ microcode_pointer(struct microcode_intel **mc_saved,
82} 82}
83 83
84#ifdef CONFIG_X86_32 84#ifdef CONFIG_X86_32
85static void __cpuinit 85static void
86microcode_phys(struct microcode_intel **mc_saved_tmp, 86microcode_phys(struct microcode_intel **mc_saved_tmp,
87 struct mc_saved_data *mc_saved_data) 87 struct mc_saved_data *mc_saved_data)
88{ 88{
@@ -101,7 +101,7 @@ microcode_phys(struct microcode_intel **mc_saved_tmp,
101} 101}
102#endif 102#endif
103 103
104static enum ucode_state __cpuinit 104static enum ucode_state
105load_microcode(struct mc_saved_data *mc_saved_data, 105load_microcode(struct mc_saved_data *mc_saved_data,
106 unsigned long *mc_saved_in_initrd, 106 unsigned long *mc_saved_in_initrd,
107 unsigned long initrd_start, 107 unsigned long initrd_start,
@@ -375,7 +375,7 @@ do { \
375#define native_wrmsr(msr, low, high) \ 375#define native_wrmsr(msr, low, high) \
376 native_write_msr(msr, low, high); 376 native_write_msr(msr, low, high);
377 377
378static int __cpuinit collect_cpu_info_early(struct ucode_cpu_info *uci) 378static int collect_cpu_info_early(struct ucode_cpu_info *uci)
379{ 379{
380 unsigned int val[2]; 380 unsigned int val[2];
381 u8 x86, x86_model; 381 u8 x86, x86_model;
@@ -529,7 +529,7 @@ int save_mc_for_early(u8 *mc)
529 */ 529 */
530 ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); 530 ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
531 if (ret) { 531 if (ret) {
532 pr_err("Can not save microcode patch.\n"); 532 pr_err("Cannot save microcode patch.\n");
533 goto out; 533 goto out;
534 } 534 }
535 535
@@ -584,7 +584,7 @@ scan_microcode(unsigned long start, unsigned long end,
584/* 584/*
585 * Print ucode update info. 585 * Print ucode update info.
586 */ 586 */
587static void __cpuinit 587static void
588print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) 588print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
589{ 589{
590 int cpu = smp_processor_id(); 590 int cpu = smp_processor_id();
@@ -605,7 +605,7 @@ static int current_mc_date;
605/* 605/*
606 * Print early updated ucode info after printk works. This is delayed info dump. 606 * Print early updated ucode info after printk works. This is delayed info dump.
607 */ 607 */
608void __cpuinit show_ucode_info_early(void) 608void show_ucode_info_early(void)
609{ 609{
610 struct ucode_cpu_info uci; 610 struct ucode_cpu_info uci;
611 611
@@ -621,7 +621,7 @@ void __cpuinit show_ucode_info_early(void)
621 * mc_saved_data.mc_saved and delay printing microcode info in 621 * mc_saved_data.mc_saved and delay printing microcode info in
622 * show_ucode_info_early() until printk() works. 622 * show_ucode_info_early() until printk() works.
623 */ 623 */
624static void __cpuinit print_ucode(struct ucode_cpu_info *uci) 624static void print_ucode(struct ucode_cpu_info *uci)
625{ 625{
626 struct microcode_intel *mc_intel; 626 struct microcode_intel *mc_intel;
627 int *delay_ucode_info_p; 627 int *delay_ucode_info_p;
@@ -643,12 +643,12 @@ static void __cpuinit print_ucode(struct ucode_cpu_info *uci)
643 * Flush global tlb. We only do this in x86_64 where paging has been enabled 643 * Flush global tlb. We only do this in x86_64 where paging has been enabled
644 * already and PGE should be enabled as well. 644 * already and PGE should be enabled as well.
645 */ 645 */
646static inline void __cpuinit flush_tlb_early(void) 646static inline void flush_tlb_early(void)
647{ 647{
648 __native_flush_tlb_global_irq_disabled(); 648 __native_flush_tlb_global_irq_disabled();
649} 649}
650 650
651static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci) 651static inline void print_ucode(struct ucode_cpu_info *uci)
652{ 652{
653 struct microcode_intel *mc_intel; 653 struct microcode_intel *mc_intel;
654 654
@@ -660,8 +660,8 @@ static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci)
660} 660}
661#endif 661#endif
662 662
663static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data, 663static int apply_microcode_early(struct mc_saved_data *mc_saved_data,
664 struct ucode_cpu_info *uci) 664 struct ucode_cpu_info *uci)
665{ 665{
666 struct microcode_intel *mc_intel; 666 struct microcode_intel *mc_intel;
667 unsigned int val[2]; 667 unsigned int val[2];
@@ -699,7 +699,7 @@ static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data,
699 * This function converts microcode patch offsets previously stored in 699 * This function converts microcode patch offsets previously stored in
700 * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. 700 * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
701 */ 701 */
702int __init save_microcode_in_initrd(void) 702int __init save_microcode_in_initrd_intel(void)
703{ 703{
704 unsigned int count = mc_saved_data.mc_saved_count; 704 unsigned int count = mc_saved_data.mc_saved_count;
705 struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; 705 struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
@@ -711,7 +711,7 @@ int __init save_microcode_in_initrd(void)
711 microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count); 711 microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
712 ret = save_microcode(&mc_saved_data, mc_saved, count); 712 ret = save_microcode(&mc_saved_data, mc_saved, count);
713 if (ret) 713 if (ret)
714 pr_err("Can not save microcod patches from initrd"); 714 pr_err("Cannot save microcode patches from initrd.\n");
715 715
716 show_saved_mc(); 716 show_saved_mc();
717 717
@@ -763,7 +763,7 @@ load_ucode_intel_bsp(void)
763#endif 763#endif
764} 764}
765 765
766void __cpuinit load_ucode_intel_ap(void) 766void load_ucode_intel_ap(void)
767{ 767{
768 struct mc_saved_data *mc_saved_data_p; 768 struct mc_saved_data *mc_saved_data_p;
769 struct ucode_cpu_info uci; 769 struct ucode_cpu_info uci;
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index ac861b8348e2..f4c886d9165c 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -24,14 +24,14 @@ struct pci_hostbridge_probe {
24 u32 device; 24 u32 device;
25}; 25};
26 26
27static u64 __cpuinitdata fam10h_pci_mmconf_base; 27static u64 fam10h_pci_mmconf_base;
28 28
29static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { 29static struct pci_hostbridge_probe pci_probes[] = {
30 { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, 30 { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
31 { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, 31 { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
32}; 32};
33 33
34static int __cpuinit cmp_range(const void *x1, const void *x2) 34static int cmp_range(const void *x1, const void *x2)
35{ 35{
36 const struct range *r1 = x1; 36 const struct range *r1 = x1;
37 const struct range *r2 = x2; 37 const struct range *r2 = x2;
@@ -49,7 +49,7 @@ static int __cpuinit cmp_range(const void *x1, const void *x2)
49/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */ 49/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */
50#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) 50#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32)
51#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40)) 51#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40))
52static void __cpuinit get_fam10h_pci_mmconf_base(void) 52static void get_fam10h_pci_mmconf_base(void)
53{ 53{
54 int i; 54 int i;
55 unsigned bus; 55 unsigned bus;
@@ -166,7 +166,7 @@ out:
166 fam10h_pci_mmconf_base = base; 166 fam10h_pci_mmconf_base = base;
167} 167}
168 168
169void __cpuinit fam10h_check_enable_mmcfg(void) 169void fam10h_check_enable_mmcfg(void)
170{ 170{
171 u64 val; 171 u64 val;
172 u32 address; 172 u32 address;
@@ -230,7 +230,7 @@ static const struct dmi_system_id __initconst mmconf_dmi_table[] = {
230 {} 230 {}
231}; 231};
232 232
233/* Called from a __cpuinit function, but only on the BSP. */ 233/* Called from a non __init function, but only on the BSP. */
234void __ref check_enable_amd_mmconf_dmi(void) 234void __ref check_enable_amd_mmconf_dmi(void)
235{ 235{
236 dmi_check_system(mmconf_dmi_table); 236 dmi_check_system(mmconf_dmi_table);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index ce130493b802..88458faea2f8 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -200,7 +200,7 @@ static const struct file_operations msr_fops = {
200 .compat_ioctl = msr_ioctl, 200 .compat_ioctl = msr_ioctl,
201}; 201};
202 202
203static int __cpuinit msr_device_create(int cpu) 203static int msr_device_create(int cpu)
204{ 204{
205 struct device *dev; 205 struct device *dev;
206 206
@@ -214,8 +214,8 @@ static void msr_device_destroy(int cpu)
214 device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); 214 device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
215} 215}
216 216
217static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, 217static int msr_class_cpu_callback(struct notifier_block *nfb,
218 unsigned long action, void *hcpu) 218 unsigned long action, void *hcpu)
219{ 219{
220 unsigned int cpu = (unsigned long)hcpu; 220 unsigned int cpu = (unsigned long)hcpu;
221 int err = 0; 221 int err = 0;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 60308053fdb2..ba77ebc2c353 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -14,6 +14,7 @@
14#include <linux/kprobes.h> 14#include <linux/kprobes.h>
15#include <linux/kdebug.h> 15#include <linux/kdebug.h>
16#include <linux/nmi.h> 16#include <linux/nmi.h>
17#include <linux/debugfs.h>
17#include <linux/delay.h> 18#include <linux/delay.h>
18#include <linux/hardirq.h> 19#include <linux/hardirq.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
@@ -29,6 +30,9 @@
29#include <asm/nmi.h> 30#include <asm/nmi.h>
30#include <asm/x86_init.h> 31#include <asm/x86_init.h>
31 32
33#define CREATE_TRACE_POINTS
34#include <trace/events/nmi.h>
35
32struct nmi_desc { 36struct nmi_desc {
33 spinlock_t lock; 37 spinlock_t lock;
34 struct list_head head; 38 struct list_head head;
@@ -82,6 +86,15 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
82 86
83#define nmi_to_desc(type) (&nmi_desc[type]) 87#define nmi_to_desc(type) (&nmi_desc[type])
84 88
89static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
90static int __init nmi_warning_debugfs(void)
91{
92 debugfs_create_u64("nmi_longest_ns", 0644,
93 arch_debugfs_dir, &nmi_longest_ns);
94 return 0;
95}
96fs_initcall(nmi_warning_debugfs);
97
85static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 98static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
86{ 99{
87 struct nmi_desc *desc = nmi_to_desc(type); 100 struct nmi_desc *desc = nmi_to_desc(type);
@@ -96,8 +109,28 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
96 * can be latched at any given time. Walk the whole list 109 * can be latched at any given time. Walk the whole list
97 * to handle those situations. 110 * to handle those situations.
98 */ 111 */
99 list_for_each_entry_rcu(a, &desc->head, list) 112 list_for_each_entry_rcu(a, &desc->head, list) {
100 handled += a->handler(type, regs); 113 u64 before, delta, whole_msecs;
114 int remainder_ns, decimal_msecs, thishandled;
115
116 before = local_clock();
117 thishandled = a->handler(type, regs);
118 handled += thishandled;
119 delta = local_clock() - before;
120 trace_nmi_handler(a->handler, (int)delta, thishandled);
121
122 if (delta < nmi_longest_ns)
123 continue;
124
125 nmi_longest_ns = delta;
126 whole_msecs = delta;
127 remainder_ns = do_div(whole_msecs, (1000 * 1000));
128 decimal_msecs = remainder_ns / 1000;
129 printk_ratelimited(KERN_INFO
130 "INFO: NMI handler (%ps) took too long to run: "
131 "%lld.%03d msecs\n", a->handler, whole_msecs,
132 decimal_msecs);
133 }
101 134
102 rcu_read_unlock(); 135 rcu_read_unlock();
103 136
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 81a5f5e8f142..83369e5a1d27 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -398,7 +398,7 @@ static void amd_e400_idle(void)
398 default_idle(); 398 default_idle();
399} 399}
400 400
401void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 401void select_idle_routine(const struct cpuinfo_x86 *c)
402{ 402{
403#ifdef CONFIG_SMP 403#ifdef CONFIG_SMP
404 if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) 404 if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7305f7dfc7ab..f8adefca71dc 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -110,11 +110,16 @@ void __show_regs(struct pt_regs *regs, int all)
110 get_debugreg(d1, 1); 110 get_debugreg(d1, 1);
111 get_debugreg(d2, 2); 111 get_debugreg(d2, 2);
112 get_debugreg(d3, 3); 112 get_debugreg(d3, 3);
113 printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
114 d0, d1, d2, d3);
115
116 get_debugreg(d6, 6); 113 get_debugreg(d6, 6);
117 get_debugreg(d7, 7); 114 get_debugreg(d7, 7);
115
116 /* Only print out debug registers if they are in their non-default state. */
117 if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
118 (d6 == DR6_RESERVED) && (d7 == 0x400))
119 return;
120
121 printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
122 d0, d1, d2, d3);
118 printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n", 123 printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
119 d6, d7); 124 d6, d7);
120} 125}
@@ -147,7 +152,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
147 childregs->bp = arg; 152 childregs->bp = arg;
148 childregs->orig_ax = -1; 153 childregs->orig_ax = -1;
149 childregs->cs = __KERNEL_CS | get_kernel_rpl(); 154 childregs->cs = __KERNEL_CS | get_kernel_rpl();
150 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; 155 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
151 p->fpu_counter = 0; 156 p->fpu_counter = 0;
152 p->thread.io_bitmap_ptr = NULL; 157 p->thread.io_bitmap_ptr = NULL;
153 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 158 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 355ae06dbf94..05646bab4ca6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -105,11 +105,18 @@ void __show_regs(struct pt_regs *regs, int all)
105 get_debugreg(d0, 0); 105 get_debugreg(d0, 0);
106 get_debugreg(d1, 1); 106 get_debugreg(d1, 1);
107 get_debugreg(d2, 2); 107 get_debugreg(d2, 2);
108 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
109 get_debugreg(d3, 3); 108 get_debugreg(d3, 3);
110 get_debugreg(d6, 6); 109 get_debugreg(d6, 6);
111 get_debugreg(d7, 7); 110 get_debugreg(d7, 7);
111
112 /* Only print out debug registers if they are in their non-default state. */
113 if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
114 (d6 == DR6_RESERVED) && (d7 == 0x400))
115 return;
116
117 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
112 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); 118 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
119
113} 120}
114 121
115void release_thread(struct task_struct *dead_task) 122void release_thread(struct task_struct *dead_task)
@@ -176,7 +183,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
176 childregs->bp = arg; 183 childregs->bp = arg;
177 childregs->orig_ax = -1; 184 childregs->orig_ax = -1;
178 childregs->cs = __KERNEL_CS | get_kernel_rpl(); 185 childregs->cs = __KERNEL_CS | get_kernel_rpl();
179 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; 186 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
180 return 0; 187 return 0;
181 } 188 }
182 *childregs = *current_pt_regs(); 189 *childregs = *current_pt_regs();
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 29a8120e6fe8..7461f50d5bb1 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -601,30 +601,48 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
601 return dr7; 601 return dr7;
602} 602}
603 603
604static int 604static int ptrace_fill_bp_fields(struct perf_event_attr *attr,
605ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, 605 int len, int type, bool disabled)
606 struct task_struct *tsk, int disabled) 606{
607 int err, bp_len, bp_type;
608
609 err = arch_bp_generic_fields(len, type, &bp_len, &bp_type);
610 if (!err) {
611 attr->bp_len = bp_len;
612 attr->bp_type = bp_type;
613 attr->disabled = disabled;
614 }
615
616 return err;
617}
618
619static struct perf_event *
620ptrace_register_breakpoint(struct task_struct *tsk, int len, int type,
621 unsigned long addr, bool disabled)
607{ 622{
608 int err;
609 int gen_len, gen_type;
610 struct perf_event_attr attr; 623 struct perf_event_attr attr;
624 int err;
611 625
612 /* 626 ptrace_breakpoint_init(&attr);
613 * We should have at least an inactive breakpoint at this 627 attr.bp_addr = addr;
614 * slot. It means the user is writing dr7 without having
615 * written the address register first
616 */
617 if (!bp)
618 return -EINVAL;
619 628
620 err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); 629 err = ptrace_fill_bp_fields(&attr, len, type, disabled);
621 if (err) 630 if (err)
622 return err; 631 return ERR_PTR(err);
632
633 return register_user_hw_breakpoint(&attr, ptrace_triggered,
634 NULL, tsk);
635}
623 636
624 attr = bp->attr; 637static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
625 attr.bp_len = gen_len; 638 int disabled)
626 attr.bp_type = gen_type; 639{
627 attr.disabled = disabled; 640 struct perf_event_attr attr = bp->attr;
641 int err;
642
643 err = ptrace_fill_bp_fields(&attr, len, type, disabled);
644 if (err)
645 return err;
628 646
629 return modify_user_hw_breakpoint(bp, &attr); 647 return modify_user_hw_breakpoint(bp, &attr);
630} 648}
@@ -634,67 +652,50 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
634 */ 652 */
635static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) 653static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
636{ 654{
637 struct thread_struct *thread = &(tsk->thread); 655 struct thread_struct *thread = &tsk->thread;
638 unsigned long old_dr7; 656 unsigned long old_dr7;
639 int i, orig_ret = 0, rc = 0; 657 bool second_pass = false;
640 int enabled, second_pass = 0; 658 int i, rc, ret = 0;
641 unsigned len, type;
642 struct perf_event *bp;
643
644 if (ptrace_get_breakpoints(tsk) < 0)
645 return -ESRCH;
646 659
647 data &= ~DR_CONTROL_RESERVED; 660 data &= ~DR_CONTROL_RESERVED;
648 old_dr7 = ptrace_get_dr7(thread->ptrace_bps); 661 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
662
649restore: 663restore:
650 /* 664 rc = 0;
651 * Loop through all the hardware breakpoints, making the
652 * appropriate changes to each.
653 */
654 for (i = 0; i < HBP_NUM; i++) { 665 for (i = 0; i < HBP_NUM; i++) {
655 enabled = decode_dr7(data, i, &len, &type); 666 unsigned len, type;
656 bp = thread->ptrace_bps[i]; 667 bool disabled = !decode_dr7(data, i, &len, &type);
657 668 struct perf_event *bp = thread->ptrace_bps[i];
658 if (!enabled) { 669
659 if (bp) { 670 if (!bp) {
660 /* 671 if (disabled)
661 * Don't unregister the breakpoints right-away, 672 continue;
662 * unless all register_user_hw_breakpoint() 673
663 * requests have succeeded. This prevents 674 bp = ptrace_register_breakpoint(tsk,
664 * any window of opportunity for debug 675 len, type, 0, disabled);
665 * register grabbing by other users. 676 if (IS_ERR(bp)) {
666 */ 677 rc = PTR_ERR(bp);
667 if (!second_pass) 678 break;
668 continue;
669
670 rc = ptrace_modify_breakpoint(bp, len, type,
671 tsk, 1);
672 if (rc)
673 break;
674 } 679 }
680
681 thread->ptrace_bps[i] = bp;
675 continue; 682 continue;
676 } 683 }
677 684
678 rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); 685 rc = ptrace_modify_breakpoint(bp, len, type, disabled);
679 if (rc) 686 if (rc)
680 break; 687 break;
681 } 688 }
682 /* 689
683 * Make a second pass to free the remaining unused breakpoints 690 /* Restore if the first pass failed, second_pass shouldn't fail. */
684 * or to restore the original breakpoints if an error occurred. 691 if (rc && !WARN_ON(second_pass)) {
685 */ 692 ret = rc;
686 if (!second_pass) { 693 data = old_dr7;
687 second_pass = 1; 694 second_pass = true;
688 if (rc < 0) {
689 orig_ret = rc;
690 data = old_dr7;
691 }
692 goto restore; 695 goto restore;
693 } 696 }
694 697
695 ptrace_put_breakpoints(tsk); 698 return ret;
696
697 return ((orig_ret < 0) ? orig_ret : rc);
698} 699}
699 700
700/* 701/*
@@ -702,25 +703,17 @@ restore:
702 */ 703 */
703static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) 704static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
704{ 705{
705 struct thread_struct *thread = &(tsk->thread); 706 struct thread_struct *thread = &tsk->thread;
706 unsigned long val = 0; 707 unsigned long val = 0;
707 708
708 if (n < HBP_NUM) { 709 if (n < HBP_NUM) {
709 struct perf_event *bp; 710 struct perf_event *bp = thread->ptrace_bps[n];
710 711
711 if (ptrace_get_breakpoints(tsk) < 0) 712 if (bp)
712 return -ESRCH;
713
714 bp = thread->ptrace_bps[n];
715 if (!bp)
716 val = 0;
717 else
718 val = bp->hw.info.address; 713 val = bp->hw.info.address;
719
720 ptrace_put_breakpoints(tsk);
721 } else if (n == 6) { 714 } else if (n == 6) {
722 val = thread->debugreg6; 715 val = thread->debugreg6;
723 } else if (n == 7) { 716 } else if (n == 7) {
724 val = thread->ptrace_dr7; 717 val = thread->ptrace_dr7;
725 } 718 }
726 return val; 719 return val;
@@ -729,29 +722,14 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
729static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, 722static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
730 unsigned long addr) 723 unsigned long addr)
731{ 724{
732 struct perf_event *bp;
733 struct thread_struct *t = &tsk->thread; 725 struct thread_struct *t = &tsk->thread;
734 struct perf_event_attr attr; 726 struct perf_event *bp = t->ptrace_bps[nr];
735 int err = 0; 727 int err = 0;
736 728
737 if (ptrace_get_breakpoints(tsk) < 0) 729 if (!bp) {
738 return -ESRCH;
739
740 if (!t->ptrace_bps[nr]) {
741 ptrace_breakpoint_init(&attr);
742 /*
743 * Put stub len and type to register (reserve) an inactive but
744 * correct bp
745 */
746 attr.bp_addr = addr;
747 attr.bp_len = HW_BREAKPOINT_LEN_1;
748 attr.bp_type = HW_BREAKPOINT_W;
749 attr.disabled = 1;
750
751 bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
752 NULL, tsk);
753
754 /* 730 /*
731 * Put stub len and type to create an inactive but correct bp.
732 *
755 * CHECKME: the previous code returned -EIO if the addr wasn't 733 * CHECKME: the previous code returned -EIO if the addr wasn't
756 * a valid task virtual addr. The new one will return -EINVAL in 734 * a valid task virtual addr. The new one will return -EINVAL in
757 * this case. 735 * this case.
@@ -760,22 +738,20 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
760 * writing for the user. And anyway this is the previous 738 * writing for the user. And anyway this is the previous
761 * behaviour. 739 * behaviour.
762 */ 740 */
763 if (IS_ERR(bp)) { 741 bp = ptrace_register_breakpoint(tsk,
742 X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE,
743 addr, true);
744 if (IS_ERR(bp))
764 err = PTR_ERR(bp); 745 err = PTR_ERR(bp);
765 goto put; 746 else
766 } 747 t->ptrace_bps[nr] = bp;
767
768 t->ptrace_bps[nr] = bp;
769 } else { 748 } else {
770 bp = t->ptrace_bps[nr]; 749 struct perf_event_attr attr = bp->attr;
771 750
772 attr = bp->attr;
773 attr.bp_addr = addr; 751 attr.bp_addr = addr;
774 err = modify_user_hw_breakpoint(bp, &attr); 752 err = modify_user_hw_breakpoint(bp, &attr);
775 } 753 }
776 754
777put:
778 ptrace_put_breakpoints(tsk);
779 return err; 755 return err;
780} 756}
781 757
@@ -785,30 +761,20 @@ put:
785static int ptrace_set_debugreg(struct task_struct *tsk, int n, 761static int ptrace_set_debugreg(struct task_struct *tsk, int n,
786 unsigned long val) 762 unsigned long val)
787{ 763{
788 struct thread_struct *thread = &(tsk->thread); 764 struct thread_struct *thread = &tsk->thread;
789 int rc = 0;
790
791 /* There are no DR4 or DR5 registers */ 765 /* There are no DR4 or DR5 registers */
792 if (n == 4 || n == 5) 766 int rc = -EIO;
793 return -EIO;
794 767
795 if (n == 6) {
796 thread->debugreg6 = val;
797 goto ret_path;
798 }
799 if (n < HBP_NUM) { 768 if (n < HBP_NUM) {
800 rc = ptrace_set_breakpoint_addr(tsk, n, val); 769 rc = ptrace_set_breakpoint_addr(tsk, n, val);
801 if (rc) 770 } else if (n == 6) {
802 return rc; 771 thread->debugreg6 = val;
803 } 772 rc = 0;
804 /* All that's left is DR7 */ 773 } else if (n == 7) {
805 if (n == 7) {
806 rc = ptrace_write_dr7(tsk, val); 774 rc = ptrace_write_dr7(tsk, val);
807 if (!rc) 775 if (!rc)
808 thread->ptrace_dr7 = val; 776 thread->ptrace_dr7 = val;
809 } 777 }
810
811ret_path:
812 return rc; 778 return rc;
813} 779}
814 780
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 76fa1e9a2b39..563ed91e6faa 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -36,22 +36,6 @@ void (*pm_power_off)(void);
36EXPORT_SYMBOL(pm_power_off); 36EXPORT_SYMBOL(pm_power_off);
37 37
38static const struct desc_ptr no_idt = {}; 38static const struct desc_ptr no_idt = {};
39static int reboot_mode;
40enum reboot_type reboot_type = BOOT_ACPI;
41int reboot_force;
42
43/*
44 * This variable is used privately to keep track of whether or not
45 * reboot_type is still set to its default value (i.e., reboot= hasn't
46 * been set on the command line). This is needed so that we can
47 * suppress DMI scanning for reboot quirks. Without it, it's
48 * impossible to override a faulty reboot quirk without recompiling.
49 */
50static int reboot_default = 1;
51
52#ifdef CONFIG_SMP
53static int reboot_cpu = -1;
54#endif
55 39
56/* 40/*
57 * This is set if we need to go through the 'emergency' path. 41 * This is set if we need to go through the 'emergency' path.
@@ -64,79 +48,6 @@ static int reboot_emergency;
64bool port_cf9_safe = false; 48bool port_cf9_safe = false;
65 49
66/* 50/*
67 * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
68 * warm Don't set the cold reboot flag
69 * cold Set the cold reboot flag
70 * bios Reboot by jumping through the BIOS
71 * smp Reboot by executing reset on BSP or other CPU
72 * triple Force a triple fault (init)
73 * kbd Use the keyboard controller. cold reset (default)
74 * acpi Use the RESET_REG in the FADT
75 * efi Use efi reset_system runtime service
76 * pci Use the so-called "PCI reset register", CF9
77 * force Avoid anything that could hang.
78 */
79static int __init reboot_setup(char *str)
80{
81 for (;;) {
82 /*
83 * Having anything passed on the command line via
84 * reboot= will cause us to disable DMI checking
85 * below.
86 */
87 reboot_default = 0;
88
89 switch (*str) {
90 case 'w':
91 reboot_mode = 0x1234;
92 break;
93
94 case 'c':
95 reboot_mode = 0;
96 break;
97
98#ifdef CONFIG_SMP
99 case 's':
100 if (isdigit(*(str+1))) {
101 reboot_cpu = (int) (*(str+1) - '0');
102 if (isdigit(*(str+2)))
103 reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
104 }
105 /*
106 * We will leave sorting out the final value
107 * when we are ready to reboot, since we might not
108 * have detected BSP APIC ID or smp_num_cpu
109 */
110 break;
111#endif /* CONFIG_SMP */
112
113 case 'b':
114 case 'a':
115 case 'k':
116 case 't':
117 case 'e':
118 case 'p':
119 reboot_type = *str;
120 break;
121
122 case 'f':
123 reboot_force = 1;
124 break;
125 }
126
127 str = strchr(str, ',');
128 if (str)
129 str++;
130 else
131 break;
132 }
133 return 1;
134}
135
136__setup("reboot=", reboot_setup);
137
138
139/*
140 * Reboot options and system auto-detection code provided by 51 * Reboot options and system auto-detection code provided by
141 * Dell Inc. so their systems "just work". :-) 52 * Dell Inc. so their systems "just work". :-)
142 */ 53 */
@@ -536,6 +447,7 @@ static void native_machine_emergency_restart(void)
536 int i; 447 int i;
537 int attempt = 0; 448 int attempt = 0;
538 int orig_reboot_type = reboot_type; 449 int orig_reboot_type = reboot_type;
450 unsigned short mode;
539 451
540 if (reboot_emergency) 452 if (reboot_emergency)
541 emergency_vmx_disable_all(); 453 emergency_vmx_disable_all();
@@ -543,7 +455,8 @@ static void native_machine_emergency_restart(void)
543 tboot_shutdown(TB_SHUTDOWN_REBOOT); 455 tboot_shutdown(TB_SHUTDOWN_REBOOT);
544 456
545 /* Tell the BIOS if we want cold or warm reboot */ 457 /* Tell the BIOS if we want cold or warm reboot */
546 *((unsigned short *)__va(0x472)) = reboot_mode; 458 mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
459 *((unsigned short *)__va(0x472)) = mode;
547 460
548 for (;;) { 461 for (;;) {
549 /* Could also try the reset bit in the Hammer NB */ 462 /* Could also try the reset bit in the Hammer NB */
@@ -585,7 +498,7 @@ static void native_machine_emergency_restart(void)
585 498
586 case BOOT_EFI: 499 case BOOT_EFI:
587 if (efi_enabled(EFI_RUNTIME_SERVICES)) 500 if (efi_enabled(EFI_RUNTIME_SERVICES))
588 efi.reset_system(reboot_mode ? 501 efi.reset_system(reboot_mode == REBOOT_WARM ?
589 EFI_RESET_WARM : 502 EFI_RESET_WARM :
590 EFI_RESET_COLD, 503 EFI_RESET_COLD,
591 EFI_SUCCESS, 0, NULL); 504 EFI_SUCCESS, 0, NULL);
@@ -614,26 +527,10 @@ void native_machine_shutdown(void)
614{ 527{
615 /* Stop the cpus and apics */ 528 /* Stop the cpus and apics */
616#ifdef CONFIG_SMP 529#ifdef CONFIG_SMP
617
618 /* The boot cpu is always logical cpu 0 */
619 int reboot_cpu_id = 0;
620
621 /* See if there has been given a command line override */
622 if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
623 cpu_online(reboot_cpu))
624 reboot_cpu_id = reboot_cpu;
625
626 /* Make certain the cpu I'm about to reboot on is online */
627 if (!cpu_online(reboot_cpu_id))
628 reboot_cpu_id = smp_processor_id();
629
630 /* Make certain I only run on the appropriate processor */
631 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
632
633 /* 530 /*
634 * O.K Now that I'm on the appropriate processor, stop all of the 531 * Stop all of the others. Also disable the local irq to
635 * others. Also disable the local irq to not receive the per-cpu 532 * not receive the per-cpu timer interrupt which may trigger
636 * timer interrupt which may trigger scheduler's load balance. 533 * scheduler's load balance.
637 */ 534 */
638 local_irq_disable(); 535 local_irq_disable();
639 stop_other_cpus(); 536 stop_other_cpus();
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 36818f8ec2be..e13f8e7c22a6 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -186,7 +186,7 @@ identity_mapped:
186 movl CP_PA_PGD(%ebx), %eax 186 movl CP_PA_PGD(%ebx), %eax
187 movl %eax, %cr3 187 movl %eax, %cr3
188 movl %cr0, %eax 188 movl %cr0, %eax
189 orl $(1<<31), %eax 189 orl $X86_CR0_PG, %eax
190 movl %eax, %cr0 190 movl %eax, %cr0
191 lea PAGE_SIZE(%edi), %esp 191 lea PAGE_SIZE(%edi), %esp
192 movl %edi, %eax 192 movl %edi, %eax
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index f2bb9c96720a..3fd2c693e475 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -151,21 +151,21 @@ identity_mapped:
151 151
152 testq %r11, %r11 152 testq %r11, %r11
153 jnz 1f 153 jnz 1f
154 xorq %rax, %rax 154 xorl %eax, %eax
155 xorq %rbx, %rbx 155 xorl %ebx, %ebx
156 xorq %rcx, %rcx 156 xorl %ecx, %ecx
157 xorq %rdx, %rdx 157 xorl %edx, %edx
158 xorq %rsi, %rsi 158 xorl %esi, %esi
159 xorq %rdi, %rdi 159 xorl %edi, %edi
160 xorq %rbp, %rbp 160 xorl %ebp, %ebp
161 xorq %r8, %r8 161 xorl %r8d, %r8d
162 xorq %r9, %r9 162 xorl %r9d, %r9d
163 xorq %r10, %r10 163 xorl %r10d, %r10d
164 xorq %r11, %r11 164 xorl %r11d, %r11d
165 xorq %r12, %r12 165 xorl %r12d, %r12d
166 xorq %r13, %r13 166 xorl %r13d, %r13d
167 xorq %r14, %r14 167 xorl %r14d, %r14d
168 xorq %r15, %r15 168 xorl %r15d, %r15d
169 169
170 ret 170 ret
171 171
@@ -212,8 +212,8 @@ virtual_mapped:
212 /* Do the copies */ 212 /* Do the copies */
213swap_pages: 213swap_pages:
214 movq %rdi, %rcx /* Put the page_list in %rcx */ 214 movq %rdi, %rcx /* Put the page_list in %rcx */
215 xorq %rdi, %rdi 215 xorl %edi, %edi
216 xorq %rsi, %rsi 216 xorl %esi, %esi
217 jmp 1f 217 jmp 1f
218 218
2190: /* top, read another word for the indirection page */ 2190: /* top, read another word for the indirection page */
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 198eb201ed3b..0aa29394ed6f 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -38,8 +38,9 @@ EXPORT_SYMBOL(rtc_lock);
38 * jump to the next second precisely 500 ms later. Check the Motorola 38 * jump to the next second precisely 500 ms later. Check the Motorola
39 * MC146818A or Dallas DS12887 data sheet for details. 39 * MC146818A or Dallas DS12887 data sheet for details.
40 */ 40 */
41int mach_set_rtc_mmss(unsigned long nowtime) 41int mach_set_rtc_mmss(const struct timespec *now)
42{ 42{
43 unsigned long nowtime = now->tv_sec;
43 struct rtc_time tm; 44 struct rtc_time tm;
44 int retval = 0; 45 int retval = 0;
45 46
@@ -58,7 +59,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
58 return retval; 59 return retval;
59} 60}
60 61
61unsigned long mach_get_cmos_time(void) 62void mach_get_cmos_time(struct timespec *now)
62{ 63{
63 unsigned int status, year, mon, day, hour, min, sec, century = 0; 64 unsigned int status, year, mon, day, hour, min, sec, century = 0;
64 unsigned long flags; 65 unsigned long flags;
@@ -107,7 +108,8 @@ unsigned long mach_get_cmos_time(void)
107 } else 108 } else
108 year += CMOS_YEARS_OFFS; 109 year += CMOS_YEARS_OFFS;
109 110
110 return mktime(year, mon, day, hour, min, sec); 111 now->tv_sec = mktime(year, mon, day, hour, min, sec);
112 now->tv_nsec = 0;
111} 113}
112 114
113/* Routines for accessing the CMOS RAM/RTC. */ 115/* Routines for accessing the CMOS RAM/RTC. */
@@ -135,18 +137,13 @@ EXPORT_SYMBOL(rtc_cmos_write);
135 137
136int update_persistent_clock(struct timespec now) 138int update_persistent_clock(struct timespec now)
137{ 139{
138 return x86_platform.set_wallclock(now.tv_sec); 140 return x86_platform.set_wallclock(&now);
139} 141}
140 142
141/* not static: needed by APM */ 143/* not static: needed by APM */
142void read_persistent_clock(struct timespec *ts) 144void read_persistent_clock(struct timespec *ts)
143{ 145{
144 unsigned long retval; 146 x86_platform.get_wallclock(ts);
145
146 retval = x86_platform.get_wallclock();
147
148 ts->tv_sec = retval;
149 ts->tv_nsec = 0;
150} 147}
151 148
152 149
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 56f7fcfe7fa2..f8ec57815c05 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -170,7 +170,7 @@ static struct resource bss_resource = {
170 170
171#ifdef CONFIG_X86_32 171#ifdef CONFIG_X86_32
172/* cpu data as detected by the assembly code in head.S */ 172/* cpu data as detected by the assembly code in head.S */
173struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 173struct cpuinfo_x86 new_cpu_data = {
174 .wp_works_ok = -1, 174 .wp_works_ok = -1,
175}; 175};
176/* common cpu data for all cpus */ 176/* common cpu data for all cpus */
@@ -1040,8 +1040,6 @@ void __init setup_arch(char **cmdline_p)
1040 /* max_low_pfn get updated here */ 1040 /* max_low_pfn get updated here */
1041 find_low_pfn_range(); 1041 find_low_pfn_range();
1042#else 1042#else
1043 num_physpages = max_pfn;
1044
1045 check_x2apic(); 1043 check_x2apic();
1046 1044
1047 /* How many end-of-memory variables you have, grandma! */ 1045 /* How many end-of-memory variables you have, grandma! */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 69562992e457..cf913587d4dd 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -43,12 +43,6 @@
43 43
44#include <asm/sigframe.h> 44#include <asm/sigframe.h>
45 45
46#ifdef CONFIG_X86_32
47# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF)
48#else
49# define FIX_EFLAGS __FIX_EFLAGS
50#endif
51
52#define COPY(x) do { \ 46#define COPY(x) do { \
53 get_user_ex(regs->x, &sc->x); \ 47 get_user_ex(regs->x, &sc->x); \
54} while (0) 48} while (0)
@@ -668,15 +662,17 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
668 if (!failed) { 662 if (!failed) {
669 /* 663 /*
670 * Clear the direction flag as per the ABI for function entry. 664 * Clear the direction flag as per the ABI for function entry.
671 */ 665 *
672 regs->flags &= ~X86_EFLAGS_DF; 666 * Clear RF when entering the signal handler, because
673 /* 667 * it might disable possible debug exception from the
668 * signal handler.
669 *
674 * Clear TF when entering the signal handler, but 670 * Clear TF when entering the signal handler, but
675 * notify any tracer that was single-stepping it. 671 * notify any tracer that was single-stepping it.
676 * The tracer may want to single-step inside the 672 * The tracer may want to single-step inside the
677 * handler too. 673 * handler too.
678 */ 674 */
679 regs->flags &= ~X86_EFLAGS_TF; 675 regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
680 } 676 }
681 signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); 677 signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
682} 678}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 48d2b7ded422..cdaa347dfcad 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -30,6 +30,7 @@
30#include <asm/proto.h> 30#include <asm/proto.h>
31#include <asm/apic.h> 31#include <asm/apic.h>
32#include <asm/nmi.h> 32#include <asm/nmi.h>
33#include <asm/trace/irq_vectors.h>
33/* 34/*
34 * Some notes on x86 processor bugs affecting SMP operation: 35 * Some notes on x86 processor bugs affecting SMP operation:
35 * 36 *
@@ -249,32 +250,87 @@ finish:
249/* 250/*
250 * Reschedule call back. 251 * Reschedule call back.
251 */ 252 */
252void smp_reschedule_interrupt(struct pt_regs *regs) 253static inline void __smp_reschedule_interrupt(void)
253{ 254{
254 ack_APIC_irq();
255 inc_irq_stat(irq_resched_count); 255 inc_irq_stat(irq_resched_count);
256 scheduler_ipi(); 256 scheduler_ipi();
257}
258
259void smp_reschedule_interrupt(struct pt_regs *regs)
260{
261 ack_APIC_irq();
262 __smp_reschedule_interrupt();
257 /* 263 /*
258 * KVM uses this interrupt to force a cpu out of guest mode 264 * KVM uses this interrupt to force a cpu out of guest mode
259 */ 265 */
260} 266}
261 267
262void smp_call_function_interrupt(struct pt_regs *regs) 268static inline void smp_entering_irq(void)
263{ 269{
264 ack_APIC_irq(); 270 ack_APIC_irq();
265 irq_enter(); 271 irq_enter();
272}
273
274void smp_trace_reschedule_interrupt(struct pt_regs *regs)
275{
276 /*
277 * Need to call irq_enter() before calling the trace point.
278 * __smp_reschedule_interrupt() calls irq_enter/exit() too (in
279 * scheduler_ipi(). This is OK, since those functions are allowed
280 * to nest.
281 */
282 smp_entering_irq();
283 trace_reschedule_entry(RESCHEDULE_VECTOR);
284 __smp_reschedule_interrupt();
285 trace_reschedule_exit(RESCHEDULE_VECTOR);
286 exiting_irq();
287 /*
288 * KVM uses this interrupt to force a cpu out of guest mode
289 */
290}
291
292static inline void __smp_call_function_interrupt(void)
293{
266 generic_smp_call_function_interrupt(); 294 generic_smp_call_function_interrupt();
267 inc_irq_stat(irq_call_count); 295 inc_irq_stat(irq_call_count);
268 irq_exit();
269} 296}
270 297
271void smp_call_function_single_interrupt(struct pt_regs *regs) 298void smp_call_function_interrupt(struct pt_regs *regs)
299{
300 smp_entering_irq();
301 __smp_call_function_interrupt();
302 exiting_irq();
303}
304
305void smp_trace_call_function_interrupt(struct pt_regs *regs)
306{
307 smp_entering_irq();
308 trace_call_function_entry(CALL_FUNCTION_VECTOR);
309 __smp_call_function_interrupt();
310 trace_call_function_exit(CALL_FUNCTION_VECTOR);
311 exiting_irq();
312}
313
314static inline void __smp_call_function_single_interrupt(void)
272{ 315{
273 ack_APIC_irq();
274 irq_enter();
275 generic_smp_call_function_single_interrupt(); 316 generic_smp_call_function_single_interrupt();
276 inc_irq_stat(irq_call_count); 317 inc_irq_stat(irq_call_count);
277 irq_exit(); 318}
319
320void smp_call_function_single_interrupt(struct pt_regs *regs)
321{
322 smp_entering_irq();
323 __smp_call_function_single_interrupt();
324 exiting_irq();
325}
326
327void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
328{
329 smp_entering_irq();
330 trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
331 __smp_call_function_single_interrupt();
332 trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
333 exiting_irq();
278} 334}
279 335
280static int __init nonmi_ipi_setup(char *str) 336static int __init nonmi_ipi_setup(char *str)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bfd348e99369..aecc98a93d1b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -130,7 +130,7 @@ atomic_t init_deasserted;
130 * Report back to the Boot Processor during boot time or to the caller processor 130 * Report back to the Boot Processor during boot time or to the caller processor
131 * during CPU online. 131 * during CPU online.
132 */ 132 */
133static void __cpuinit smp_callin(void) 133static void smp_callin(void)
134{ 134{
135 int cpuid, phys_id; 135 int cpuid, phys_id;
136 unsigned long timeout; 136 unsigned long timeout;
@@ -237,7 +237,7 @@ static int enable_start_cpu0;
237/* 237/*
238 * Activate a secondary processor. 238 * Activate a secondary processor.
239 */ 239 */
240notrace static void __cpuinit start_secondary(void *unused) 240static void notrace start_secondary(void *unused)
241{ 241{
242 /* 242 /*
243 * Don't put *anything* before cpu_init(), SMP booting is too 243 * Don't put *anything* before cpu_init(), SMP booting is too
@@ -300,7 +300,7 @@ void __init smp_store_boot_cpu_info(void)
300 * The bootstrap kernel entry code has set these up. Save them for 300 * The bootstrap kernel entry code has set these up. Save them for
301 * a given CPU 301 * a given CPU
302 */ 302 */
303void __cpuinit smp_store_cpu_info(int id) 303void smp_store_cpu_info(int id)
304{ 304{
305 struct cpuinfo_x86 *c = &cpu_data(id); 305 struct cpuinfo_x86 *c = &cpu_data(id);
306 306
@@ -313,7 +313,7 @@ void __cpuinit smp_store_cpu_info(int id)
313 identify_secondary_cpu(c); 313 identify_secondary_cpu(c);
314} 314}
315 315
316static bool __cpuinit 316static bool
317topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name) 317topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
318{ 318{
319 int cpu1 = c->cpu_index, cpu2 = o->cpu_index; 319 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
@@ -330,7 +330,7 @@ do { \
330 cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \ 330 cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
331} while (0) 331} while (0)
332 332
333static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) 333static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
334{ 334{
335 if (cpu_has_topoext) { 335 if (cpu_has_topoext) {
336 int cpu1 = c->cpu_index, cpu2 = o->cpu_index; 336 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
@@ -348,7 +348,7 @@ static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
348 return false; 348 return false;
349} 349}
350 350
351static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) 351static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
352{ 352{
353 int cpu1 = c->cpu_index, cpu2 = o->cpu_index; 353 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
354 354
@@ -359,7 +359,7 @@ static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
359 return false; 359 return false;
360} 360}
361 361
362static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) 362static bool match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
363{ 363{
364 if (c->phys_proc_id == o->phys_proc_id) { 364 if (c->phys_proc_id == o->phys_proc_id) {
365 if (cpu_has(c, X86_FEATURE_AMD_DCM)) 365 if (cpu_has(c, X86_FEATURE_AMD_DCM))
@@ -370,7 +370,7 @@ static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
370 return false; 370 return false;
371} 371}
372 372
373void __cpuinit set_cpu_sibling_map(int cpu) 373void set_cpu_sibling_map(int cpu)
374{ 374{
375 bool has_smt = smp_num_siblings > 1; 375 bool has_smt = smp_num_siblings > 1;
376 bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; 376 bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
@@ -499,7 +499,7 @@ void __inquire_remote_apic(int apicid)
499 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this 499 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
500 * won't ... remember to clear down the APIC, etc later. 500 * won't ... remember to clear down the APIC, etc later.
501 */ 501 */
502int __cpuinit 502int
503wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) 503wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
504{ 504{
505 unsigned long send_status, accept_status = 0; 505 unsigned long send_status, accept_status = 0;
@@ -533,7 +533,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
533 return (send_status | accept_status); 533 return (send_status | accept_status);
534} 534}
535 535
536static int __cpuinit 536static int
537wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) 537wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
538{ 538{
539 unsigned long send_status, accept_status = 0; 539 unsigned long send_status, accept_status = 0;
@@ -649,7 +649,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
649} 649}
650 650
651/* reduce the number of lines printed when booting a large cpu count system */ 651/* reduce the number of lines printed when booting a large cpu count system */
652static void __cpuinit announce_cpu(int cpu, int apicid) 652static void announce_cpu(int cpu, int apicid)
653{ 653{
654 static int current_node = -1; 654 static int current_node = -1;
655 int node = early_cpu_to_node(cpu); 655 int node = early_cpu_to_node(cpu);
@@ -691,7 +691,7 @@ static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
691 * We'll change this code in the future to wake up hard offlined CPU0 if 691 * We'll change this code in the future to wake up hard offlined CPU0 if
692 * real platform and request are available. 692 * real platform and request are available.
693 */ 693 */
694static int __cpuinit 694static int
695wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, 695wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
696 int *cpu0_nmi_registered) 696 int *cpu0_nmi_registered)
697{ 697{
@@ -731,7 +731,7 @@ wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
731 * Returns zero if CPU booted OK, else error code from 731 * Returns zero if CPU booted OK, else error code from
732 * ->wakeup_secondary_cpu. 732 * ->wakeup_secondary_cpu.
733 */ 733 */
734static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) 734static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
735{ 735{
736 volatile u32 *trampoline_status = 736 volatile u32 *trampoline_status =
737 (volatile u32 *) __va(real_mode_header->trampoline_status); 737 (volatile u32 *) __va(real_mode_header->trampoline_status);
@@ -872,7 +872,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
872 return boot_error; 872 return boot_error;
873} 873}
874 874
875int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) 875int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
876{ 876{
877 int apicid = apic->cpu_present_to_apicid(cpu); 877 int apicid = apic->cpu_present_to_apicid(cpu);
878 unsigned long flags; 878 unsigned long flags;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index dbded5aedb81..48f8375e4c6b 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
101 *begin = new_begin; 101 *begin = new_begin;
102 } 102 }
103 } else { 103 } else {
104 *begin = TASK_UNMAPPED_BASE; 104 *begin = mmap_legacy_base();
105 *end = TASK_SIZE; 105 *end = TASK_SIZE;
106 } 106 }
107} 107}
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index f84fe00fad48..addf7b58f4e8 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -31,6 +31,7 @@
31#include <linux/pfn.h> 31#include <linux/pfn.h>
32#include <linux/mm.h> 32#include <linux/mm.h>
33#include <linux/tboot.h> 33#include <linux/tboot.h>
34#include <linux/debugfs.h>
34 35
35#include <asm/realmode.h> 36#include <asm/realmode.h>
36#include <asm/processor.h> 37#include <asm/processor.h>
@@ -319,8 +320,8 @@ static int tboot_wait_for_aps(int num_aps)
319 return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps); 320 return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
320} 321}
321 322
322static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb, 323static int tboot_cpu_callback(struct notifier_block *nfb, unsigned long action,
323 unsigned long action, void *hcpu) 324 void *hcpu)
324{ 325{
325 switch (action) { 326 switch (action) {
326 case CPU_DYING: 327 case CPU_DYING:
@@ -333,11 +334,78 @@ static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb,
333 return NOTIFY_OK; 334 return NOTIFY_OK;
334} 335}
335 336
336static struct notifier_block tboot_cpu_notifier __cpuinitdata = 337static struct notifier_block tboot_cpu_notifier =
337{ 338{
338 .notifier_call = tboot_cpu_callback, 339 .notifier_call = tboot_cpu_callback,
339}; 340};
340 341
342#ifdef CONFIG_DEBUG_FS
343
344#define TBOOT_LOG_UUID { 0x26, 0x25, 0x19, 0xc0, 0x30, 0x6b, 0xb4, 0x4d, \
345 0x4c, 0x84, 0xa3, 0xe9, 0x53, 0xb8, 0x81, 0x74 }
346
347#define TBOOT_SERIAL_LOG_ADDR 0x60000
348#define TBOOT_SERIAL_LOG_SIZE 0x08000
349#define LOG_MAX_SIZE_OFF 16
350#define LOG_BUF_OFF 24
351
352static uint8_t tboot_log_uuid[16] = TBOOT_LOG_UUID;
353
354static ssize_t tboot_log_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
355{
356 void __iomem *log_base;
357 u8 log_uuid[16];
358 u32 max_size;
359 void *kbuf;
360 int ret = -EFAULT;
361
362 log_base = ioremap_nocache(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE);
363 if (!log_base)
364 return ret;
365
366 memcpy_fromio(log_uuid, log_base, sizeof(log_uuid));
367 if (memcmp(&tboot_log_uuid, log_uuid, sizeof(log_uuid)))
368 goto err_iounmap;
369
370 max_size = readl(log_base + LOG_MAX_SIZE_OFF);
371 if (*ppos >= max_size) {
372 ret = 0;
373 goto err_iounmap;
374 }
375
376 if (*ppos + count > max_size)
377 count = max_size - *ppos;
378
379 kbuf = kmalloc(count, GFP_KERNEL);
380 if (!kbuf) {
381 ret = -ENOMEM;
382 goto err_iounmap;
383 }
384
385 memcpy_fromio(kbuf, log_base + LOG_BUF_OFF + *ppos, count);
386 if (copy_to_user(user_buf, kbuf, count))
387 goto err_kfree;
388
389 *ppos += count;
390
391 ret = count;
392
393err_kfree:
394 kfree(kbuf);
395
396err_iounmap:
397 iounmap(log_base);
398
399 return ret;
400}
401
402static const struct file_operations tboot_log_fops = {
403 .read = tboot_log_read,
404 .llseek = default_llseek,
405};
406
407#endif /* CONFIG_DEBUG_FS */
408
341static __init int tboot_late_init(void) 409static __init int tboot_late_init(void)
342{ 410{
343 if (!tboot_enabled()) 411 if (!tboot_enabled())
@@ -348,6 +416,11 @@ static __init int tboot_late_init(void)
348 atomic_set(&ap_wfs_count, 0); 416 atomic_set(&ap_wfs_count, 0);
349 register_hotcpu_notifier(&tboot_cpu_notifier); 417 register_hotcpu_notifier(&tboot_cpu_notifier);
350 418
419#ifdef CONFIG_DEBUG_FS
420 debugfs_create_file("tboot_log", S_IRUSR,
421 arch_debugfs_dir, NULL, &tboot_log_fops);
422#endif
423
351 acpi_os_set_prepare_sleep(&tboot_sleep); 424 acpi_os_set_prepare_sleep(&tboot_sleep);
352 return 0; 425 return 0;
353} 426}
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
new file mode 100644
index 000000000000..1c113db9ed57
--- /dev/null
+++ b/arch/x86/kernel/tracepoint.c
@@ -0,0 +1,59 @@
1/*
2 * Code for supporting irq vector tracepoints.
3 *
4 * Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
5 *
6 */
7#include <asm/hw_irq.h>
8#include <asm/desc.h>
9#include <linux/atomic.h>
10
11atomic_t trace_idt_ctr = ATOMIC_INIT(0);
12struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
13 (unsigned long) trace_idt_table };
14
15/* No need to be aligned, but done to keep all IDTs defined the same way. */
16gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
17
18static int trace_irq_vector_refcount;
19static DEFINE_MUTEX(irq_vector_mutex);
20
21static void set_trace_idt_ctr(int val)
22{
23 atomic_set(&trace_idt_ctr, val);
24 /* Ensure the trace_idt_ctr is set before sending IPI */
25 wmb();
26}
27
28static void switch_idt(void *arg)
29{
30 unsigned long flags;
31
32 local_irq_save(flags);
33 load_current_idt();
34 local_irq_restore(flags);
35}
36
37void trace_irq_vector_regfunc(void)
38{
39 mutex_lock(&irq_vector_mutex);
40 if (!trace_irq_vector_refcount) {
41 set_trace_idt_ctr(1);
42 smp_call_function(switch_idt, NULL, 0);
43 switch_idt(NULL);
44 }
45 trace_irq_vector_refcount++;
46 mutex_unlock(&irq_vector_mutex);
47}
48
49void trace_irq_vector_unregfunc(void)
50{
51 mutex_lock(&irq_vector_mutex);
52 trace_irq_vector_refcount--;
53 if (!trace_irq_vector_refcount) {
54 set_trace_idt_ctr(0);
55 smp_call_function(switch_idt, NULL, 0);
56 switch_idt(NULL);
57 }
58 mutex_unlock(&irq_vector_mutex);
59}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 772e2a846dec..1b23a1c92746 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -63,19 +63,19 @@
63#include <asm/x86_init.h> 63#include <asm/x86_init.h>
64#include <asm/pgalloc.h> 64#include <asm/pgalloc.h>
65#include <asm/proto.h> 65#include <asm/proto.h>
66
67/* No need to be aligned, but done to keep all IDTs defined the same way. */
68gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss;
66#else 69#else
67#include <asm/processor-flags.h> 70#include <asm/processor-flags.h>
68#include <asm/setup.h> 71#include <asm/setup.h>
69 72
70asmlinkage int system_call(void); 73asmlinkage int system_call(void);
71
72/*
73 * The IDT has to be page-aligned to simplify the Pentium
74 * F0 0F bug workaround.
75 */
76gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
77#endif 74#endif
78 75
76/* Must be page-aligned because the real IDT is used in a fixmap. */
77gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
78
79DECLARE_BITMAP(used_vectors, NR_VECTORS); 79DECLARE_BITMAP(used_vectors, NR_VECTORS);
80EXPORT_SYMBOL_GPL(used_vectors); 80EXPORT_SYMBOL_GPL(used_vectors);
81 81
@@ -254,6 +254,9 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
254 tsk->thread.error_code = error_code; 254 tsk->thread.error_code = error_code;
255 tsk->thread.trap_nr = X86_TRAP_DF; 255 tsk->thread.trap_nr = X86_TRAP_DF;
256 256
257#ifdef CONFIG_DOUBLEFAULT
258 df_debug(regs, error_code);
259#endif
257 /* 260 /*
258 * This is always a kernel trap and never fixable (and thus must 261 * This is always a kernel trap and never fixable (and thus must
259 * never return). 262 * never return).
@@ -437,7 +440,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
437 /* Store the virtualized DR6 value */ 440 /* Store the virtualized DR6 value */
438 tsk->thread.debugreg6 = dr6; 441 tsk->thread.debugreg6 = dr6;
439 442
440 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, 443 if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code,
441 SIGTRAP) == NOTIFY_STOP) 444 SIGTRAP) == NOTIFY_STOP)
442 goto exit; 445 goto exit;
443 446
@@ -785,7 +788,7 @@ void __init trap_init(void)
785 x86_init.irqs.trap_init(); 788 x86_init.irqs.trap_init();
786 789
787#ifdef CONFIG_X86_64 790#ifdef CONFIG_X86_64
788 memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16); 791 memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
789 set_nmi_gate(X86_TRAP_DB, &debug); 792 set_nmi_gate(X86_TRAP_DB, &debug);
790 set_nmi_gate(X86_TRAP_BP, &int3); 793 set_nmi_gate(X86_TRAP_BP, &int3);
791#endif 794#endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 098b3cfda72e..6ff49247edf8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -824,7 +824,7 @@ static void __init check_system_tsc_reliable(void)
824 * Make an educated guess if the TSC is trustworthy and synchronized 824 * Make an educated guess if the TSC is trustworthy and synchronized
825 * over all CPUs. 825 * over all CPUs.
826 */ 826 */
827__cpuinit int unsynchronized_tsc(void) 827int unsynchronized_tsc(void)
828{ 828{
829 if (!cpu_has_tsc || tsc_unstable) 829 if (!cpu_has_tsc || tsc_unstable)
830 return 1; 830 return 1;
@@ -1020,7 +1020,7 @@ void __init tsc_init(void)
1020 * been calibrated. This assumes that CONSTANT_TSC applies to all 1020 * been calibrated. This assumes that CONSTANT_TSC applies to all
1021 * cpus in the socket - this should be a safe assumption. 1021 * cpus in the socket - this should be a safe assumption.
1022 */ 1022 */
1023unsigned long __cpuinit calibrate_delay_is_known(void) 1023unsigned long calibrate_delay_is_known(void)
1024{ 1024{
1025 int i, cpu = smp_processor_id(); 1025 int i, cpu = smp_processor_id();
1026 1026
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index fc25e60a5884..adfdf56a3714 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -25,24 +25,24 @@
25 * Entry/exit counters that make sure that both CPUs 25 * Entry/exit counters that make sure that both CPUs
26 * run the measurement code at once: 26 * run the measurement code at once:
27 */ 27 */
28static __cpuinitdata atomic_t start_count; 28static atomic_t start_count;
29static __cpuinitdata atomic_t stop_count; 29static atomic_t stop_count;
30 30
31/* 31/*
32 * We use a raw spinlock in this exceptional case, because 32 * We use a raw spinlock in this exceptional case, because
33 * we want to have the fastest, inlined, non-debug version 33 * we want to have the fastest, inlined, non-debug version
34 * of a critical section, to be able to prove TSC time-warps: 34 * of a critical section, to be able to prove TSC time-warps:
35 */ 35 */
36static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED; 36static arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
37 37
38static __cpuinitdata cycles_t last_tsc; 38static cycles_t last_tsc;
39static __cpuinitdata cycles_t max_warp; 39static cycles_t max_warp;
40static __cpuinitdata int nr_warps; 40static int nr_warps;
41 41
42/* 42/*
43 * TSC-warp measurement loop running on both CPUs: 43 * TSC-warp measurement loop running on both CPUs:
44 */ 44 */
45static __cpuinit void check_tsc_warp(unsigned int timeout) 45static void check_tsc_warp(unsigned int timeout)
46{ 46{
47 cycles_t start, now, prev, end; 47 cycles_t start, now, prev, end;
48 int i; 48 int i;
@@ -121,7 +121,7 @@ static inline unsigned int loop_timeout(int cpu)
121 * Source CPU calls into this - it waits for the freshly booted 121 * Source CPU calls into this - it waits for the freshly booted
122 * target CPU to arrive and then starts the measurement: 122 * target CPU to arrive and then starts the measurement:
123 */ 123 */
124void __cpuinit check_tsc_sync_source(int cpu) 124void check_tsc_sync_source(int cpu)
125{ 125{
126 int cpus = 2; 126 int cpus = 2;
127 127
@@ -187,7 +187,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
187/* 187/*
188 * Freshly booted CPUs call into this: 188 * Freshly booted CPUs call into this:
189 */ 189 */
190void __cpuinit check_tsc_sync_target(void) 190void check_tsc_sync_target(void)
191{ 191{
192 int cpus = 2; 192 int cpus = 2;
193 193
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 9a907a67be8f..1f96f9347ed9 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -331,7 +331,7 @@ sigsegv:
331 * Assume __initcall executes before all user space. Hopefully kmod 331 * Assume __initcall executes before all user space. Hopefully kmod
332 * doesn't violate that. We'll find out if it does. 332 * doesn't violate that. We'll find out if it does.
333 */ 333 */
334static void __cpuinit vsyscall_set_cpu(int cpu) 334static void vsyscall_set_cpu(int cpu)
335{ 335{
336 unsigned long d; 336 unsigned long d;
337 unsigned long node = 0; 337 unsigned long node = 0;
@@ -353,13 +353,13 @@ static void __cpuinit vsyscall_set_cpu(int cpu)
353 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); 353 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
354} 354}
355 355
356static void __cpuinit cpu_vsyscall_init(void *arg) 356static void cpu_vsyscall_init(void *arg)
357{ 357{
358 /* preemption should be already off */ 358 /* preemption should be already off */
359 vsyscall_set_cpu(raw_smp_processor_id()); 359 vsyscall_set_cpu(raw_smp_processor_id());
360} 360}
361 361
362static int __cpuinit 362static int
363cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) 363cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
364{ 364{
365 long cpu = (long)arg; 365 long cpu = (long)arg;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 45a14dbbddaf..5f24c71accaa 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -25,7 +25,7 @@
25#include <asm/iommu.h> 25#include <asm/iommu.h>
26#include <asm/mach_traps.h> 26#include <asm/mach_traps.h>
27 27
28void __cpuinit x86_init_noop(void) { } 28void x86_init_noop(void) { }
29void __init x86_init_uint_noop(unsigned int unused) { } 29void __init x86_init_uint_noop(unsigned int unused) { }
30int __init iommu_init_noop(void) { return 0; } 30int __init iommu_init_noop(void) { return 0; }
31void iommu_shutdown_noop(void) { } 31void iommu_shutdown_noop(void) { }
@@ -85,7 +85,7 @@ struct x86_init_ops x86_init __initdata = {
85 }, 85 },
86}; 86};
87 87
88struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 88struct x86_cpuinit_ops x86_cpuinit = {
89 .early_percpu_clock_init = x86_init_noop, 89 .early_percpu_clock_init = x86_init_noop,
90 .setup_percpu_clockev = setup_secondary_APIC_clock, 90 .setup_percpu_clockev = setup_secondary_APIC_clock,
91}; 91};
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index ada87a329edc..422fd8223470 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -243,7 +243,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
243 if (!access_ok(VERIFY_WRITE, buf, size)) 243 if (!access_ok(VERIFY_WRITE, buf, size))
244 return -EACCES; 244 return -EACCES;
245 245
246 if (!HAVE_HWFP) 246 if (!static_cpu_has(X86_FEATURE_FPU))
247 return fpregs_soft_get(current, NULL, 0, 247 return fpregs_soft_get(current, NULL, 0,
248 sizeof(struct user_i387_ia32_struct), NULL, 248 sizeof(struct user_i387_ia32_struct), NULL,
249 (struct _fpstate_ia32 __user *) buf) ? -1 : 1; 249 (struct _fpstate_ia32 __user *) buf) ? -1 : 1;
@@ -350,11 +350,10 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
350 if (!used_math() && init_fpu(tsk)) 350 if (!used_math() && init_fpu(tsk))
351 return -1; 351 return -1;
352 352
353 if (!HAVE_HWFP) { 353 if (!static_cpu_has(X86_FEATURE_FPU))
354 return fpregs_soft_set(current, NULL, 354 return fpregs_soft_set(current, NULL,
355 0, sizeof(struct user_i387_ia32_struct), 355 0, sizeof(struct user_i387_ia32_struct),
356 NULL, buf) != 0; 356 NULL, buf) != 0;
357 }
358 357
359 if (use_xsave()) { 358 if (use_xsave()) {
360 struct _fpx_sw_bytes fx_sw_user; 359 struct _fpx_sw_bytes fx_sw_user;
@@ -574,7 +573,7 @@ static void __init xstate_enable_boot_cpu(void)
574 * This is somewhat obfuscated due to the lack of powerful enough 573 * This is somewhat obfuscated due to the lack of powerful enough
575 * overrides for the section checks. 574 * overrides for the section checks.
576 */ 575 */
577void __cpuinit xsave_init(void) 576void xsave_init(void)
578{ 577{
579 static __refdata void (*next_func)(void) = xstate_enable_boot_cpu; 578 static __refdata void (*next_func)(void) = xstate_enable_boot_cpu;
580 void (*this_func)(void); 579 void (*this_func)(void);
@@ -595,7 +594,7 @@ static inline void __init eager_fpu_init_bp(void)
595 setup_init_fpu_buf(); 594 setup_init_fpu_buf();
596} 595}
597 596
598void __cpuinit eager_fpu_init(void) 597void eager_fpu_init(void)
599{ 598{
600 static __refdata void (*boot_func)(void) = eager_fpu_init_bp; 599 static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
601 600
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d609e1d84048..bf4fb04d0112 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -5,12 +5,13 @@ CFLAGS_x86.o := -I.
5CFLAGS_svm.o := -I. 5CFLAGS_svm.o := -I.
6CFLAGS_vmx.o := -I. 6CFLAGS_vmx.o := -I.
7 7
8kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ 8KVM := ../../../virt/kvm
9 coalesced_mmio.o irq_comm.o eventfd.o \ 9
10 irqchip.o) 10kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \
11kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ 11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
12 assigned-dev.o iommu.o) 12 $(KVM)/eventfd.o $(KVM)/irqchip.o
13kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) 13kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o
14kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
14 15
15kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ 16kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
16 i8254.o cpuid.o pmu.o 17 i8254.o cpuid.o pmu.o
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5953dcea752d..2bc1e81045b0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -61,6 +61,8 @@
61#define OpMem8 26ull /* 8-bit zero extended memory operand */ 61#define OpMem8 26ull /* 8-bit zero extended memory operand */
62#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */ 62#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
63#define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */ 63#define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
64#define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
65#define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
64 66
65#define OpBits 5 /* Width of operand field */ 67#define OpBits 5 /* Width of operand field */
66#define OpMask ((1ull << OpBits) - 1) 68#define OpMask ((1ull << OpBits) - 1)
@@ -86,6 +88,7 @@
86#define DstMem64 (OpMem64 << DstShift) 88#define DstMem64 (OpMem64 << DstShift)
87#define DstImmUByte (OpImmUByte << DstShift) 89#define DstImmUByte (OpImmUByte << DstShift)
88#define DstDX (OpDX << DstShift) 90#define DstDX (OpDX << DstShift)
91#define DstAccLo (OpAccLo << DstShift)
89#define DstMask (OpMask << DstShift) 92#define DstMask (OpMask << DstShift)
90/* Source operand type. */ 93/* Source operand type. */
91#define SrcShift 6 94#define SrcShift 6
@@ -108,6 +111,7 @@
108#define SrcImm64 (OpImm64 << SrcShift) 111#define SrcImm64 (OpImm64 << SrcShift)
109#define SrcDX (OpDX << SrcShift) 112#define SrcDX (OpDX << SrcShift)
110#define SrcMem8 (OpMem8 << SrcShift) 113#define SrcMem8 (OpMem8 << SrcShift)
114#define SrcAccHi (OpAccHi << SrcShift)
111#define SrcMask (OpMask << SrcShift) 115#define SrcMask (OpMask << SrcShift)
112#define BitOp (1<<11) 116#define BitOp (1<<11)
113#define MemAbs (1<<12) /* Memory operand is absolute displacement */ 117#define MemAbs (1<<12) /* Memory operand is absolute displacement */
@@ -138,6 +142,7 @@
138/* Source 2 operand type */ 142/* Source 2 operand type */
139#define Src2Shift (31) 143#define Src2Shift (31)
140#define Src2None (OpNone << Src2Shift) 144#define Src2None (OpNone << Src2Shift)
145#define Src2Mem (OpMem << Src2Shift)
141#define Src2CL (OpCL << Src2Shift) 146#define Src2CL (OpCL << Src2Shift)
142#define Src2ImmByte (OpImmByte << Src2Shift) 147#define Src2ImmByte (OpImmByte << Src2Shift)
143#define Src2One (OpOne << Src2Shift) 148#define Src2One (OpOne << Src2Shift)
@@ -155,6 +160,9 @@
155#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ 160#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
156#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ 161#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
157#define NoWrite ((u64)1 << 45) /* No writeback */ 162#define NoWrite ((u64)1 << 45) /* No writeback */
163#define SrcWrite ((u64)1 << 46) /* Write back src operand */
164
165#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
158 166
159#define X2(x...) x, x 167#define X2(x...) x, x
160#define X3(x...) X2(x), x 168#define X3(x...) X2(x), x
@@ -171,10 +179,11 @@
171/* 179/*
172 * fastop functions have a special calling convention: 180 * fastop functions have a special calling convention:
173 * 181 *
174 * dst: [rdx]:rax (in/out) 182 * dst: rax (in/out)
175 * src: rbx (in/out) 183 * src: rdx (in/out)
176 * src2: rcx (in) 184 * src2: rcx (in)
177 * flags: rflags (in/out) 185 * flags: rflags (in/out)
186 * ex: rsi (in:fastop pointer, out:zero if exception)
178 * 187 *
179 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for 188 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
180 * different operand sizes can be reached by calculation, rather than a jump 189 * different operand sizes can be reached by calculation, rather than a jump
@@ -276,174 +285,17 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
276} 285}
277 286
278/* 287/*
279 * Instruction emulation:
280 * Most instructions are emulated directly via a fragment of inline assembly
281 * code. This allows us to save/restore EFLAGS and thus very easily pick up
282 * any modified flags.
283 */
284
285#if defined(CONFIG_X86_64)
286#define _LO32 "k" /* force 32-bit operand */
287#define _STK "%%rsp" /* stack pointer */
288#elif defined(__i386__)
289#define _LO32 "" /* force 32-bit operand */
290#define _STK "%%esp" /* stack pointer */
291#endif
292
293/*
294 * These EFLAGS bits are restored from saved value during emulation, and 288 * These EFLAGS bits are restored from saved value during emulation, and
295 * any changes are written back to the saved value after emulation. 289 * any changes are written back to the saved value after emulation.
296 */ 290 */
297#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) 291#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
298 292
299/* Before executing instruction: restore necessary bits in EFLAGS. */
300#define _PRE_EFLAGS(_sav, _msk, _tmp) \
301 /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
302 "movl %"_sav",%"_LO32 _tmp"; " \
303 "push %"_tmp"; " \
304 "push %"_tmp"; " \
305 "movl %"_msk",%"_LO32 _tmp"; " \
306 "andl %"_LO32 _tmp",("_STK"); " \
307 "pushf; " \
308 "notl %"_LO32 _tmp"; " \
309 "andl %"_LO32 _tmp",("_STK"); " \
310 "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
311 "pop %"_tmp"; " \
312 "orl %"_LO32 _tmp",("_STK"); " \
313 "popf; " \
314 "pop %"_sav"; "
315
316/* After executing instruction: write-back necessary bits in EFLAGS. */
317#define _POST_EFLAGS(_sav, _msk, _tmp) \
318 /* _sav |= EFLAGS & _msk; */ \
319 "pushf; " \
320 "pop %"_tmp"; " \
321 "andl %"_msk",%"_LO32 _tmp"; " \
322 "orl %"_LO32 _tmp",%"_sav"; "
323
324#ifdef CONFIG_X86_64 293#ifdef CONFIG_X86_64
325#define ON64(x) x 294#define ON64(x) x
326#else 295#else
327#define ON64(x) 296#define ON64(x)
328#endif 297#endif
329 298
330#define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype) \
331 do { \
332 __asm__ __volatile__ ( \
333 _PRE_EFLAGS("0", "4", "2") \
334 _op _suffix " %"_x"3,%1; " \
335 _POST_EFLAGS("0", "4", "2") \
336 : "=m" ((ctxt)->eflags), \
337 "+q" (*(_dsttype*)&(ctxt)->dst.val), \
338 "=&r" (_tmp) \
339 : _y ((ctxt)->src.val), "i" (EFLAGS_MASK)); \
340 } while (0)
341
342
343/* Raw emulation: instruction has two explicit operands. */
344#define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy) \
345 do { \
346 unsigned long _tmp; \
347 \
348 switch ((ctxt)->dst.bytes) { \
349 case 2: \
350 ____emulate_2op(ctxt,_op,_wx,_wy,"w",u16); \
351 break; \
352 case 4: \
353 ____emulate_2op(ctxt,_op,_lx,_ly,"l",u32); \
354 break; \
355 case 8: \
356 ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \
357 break; \
358 } \
359 } while (0)
360
361#define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
362 do { \
363 unsigned long _tmp; \
364 switch ((ctxt)->dst.bytes) { \
365 case 1: \
366 ____emulate_2op(ctxt,_op,_bx,_by,"b",u8); \
367 break; \
368 default: \
369 __emulate_2op_nobyte(ctxt, _op, \
370 _wx, _wy, _lx, _ly, _qx, _qy); \
371 break; \
372 } \
373 } while (0)
374
375/* Source operand is byte-sized and may be restricted to just %cl. */
376#define emulate_2op_SrcB(ctxt, _op) \
377 __emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c")
378
379/* Source operand is byte, word, long or quad sized. */
380#define emulate_2op_SrcV(ctxt, _op) \
381 __emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r")
382
383/* Source operand is word, long or quad sized. */
384#define emulate_2op_SrcV_nobyte(ctxt, _op) \
385 __emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r")
386
387/* Instruction has three operands and one operand is stored in ECX register */
388#define __emulate_2op_cl(ctxt, _op, _suffix, _type) \
389 do { \
390 unsigned long _tmp; \
391 _type _clv = (ctxt)->src2.val; \
392 _type _srcv = (ctxt)->src.val; \
393 _type _dstv = (ctxt)->dst.val; \
394 \
395 __asm__ __volatile__ ( \
396 _PRE_EFLAGS("0", "5", "2") \
397 _op _suffix " %4,%1 \n" \
398 _POST_EFLAGS("0", "5", "2") \
399 : "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \
400 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
401 ); \
402 \
403 (ctxt)->src2.val = (unsigned long) _clv; \
404 (ctxt)->src2.val = (unsigned long) _srcv; \
405 (ctxt)->dst.val = (unsigned long) _dstv; \
406 } while (0)
407
408#define emulate_2op_cl(ctxt, _op) \
409 do { \
410 switch ((ctxt)->dst.bytes) { \
411 case 2: \
412 __emulate_2op_cl(ctxt, _op, "w", u16); \
413 break; \
414 case 4: \
415 __emulate_2op_cl(ctxt, _op, "l", u32); \
416 break; \
417 case 8: \
418 ON64(__emulate_2op_cl(ctxt, _op, "q", ulong)); \
419 break; \
420 } \
421 } while (0)
422
423#define __emulate_1op(ctxt, _op, _suffix) \
424 do { \
425 unsigned long _tmp; \
426 \
427 __asm__ __volatile__ ( \
428 _PRE_EFLAGS("0", "3", "2") \
429 _op _suffix " %1; " \
430 _POST_EFLAGS("0", "3", "2") \
431 : "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \
432 "=&r" (_tmp) \
433 : "i" (EFLAGS_MASK)); \
434 } while (0)
435
436/* Instruction has only one explicit operand (no source operand). */
437#define emulate_1op(ctxt, _op) \
438 do { \
439 switch ((ctxt)->dst.bytes) { \
440 case 1: __emulate_1op(ctxt, _op, "b"); break; \
441 case 2: __emulate_1op(ctxt, _op, "w"); break; \
442 case 4: __emulate_1op(ctxt, _op, "l"); break; \
443 case 8: ON64(__emulate_1op(ctxt, _op, "q")); break; \
444 } \
445 } while (0)
446
447static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); 299static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
448 300
449#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" 301#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
@@ -462,7 +314,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
462#define FOPNOP() FOP_ALIGN FOP_RET 314#define FOPNOP() FOP_ALIGN FOP_RET
463 315
464#define FOP1E(op, dst) \ 316#define FOP1E(op, dst) \
465 FOP_ALIGN #op " %" #dst " \n\t" FOP_RET 317 FOP_ALIGN "10: " #op " %" #dst " \n\t" FOP_RET
318
319#define FOP1EEX(op, dst) \
320 FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
466 321
467#define FASTOP1(op) \ 322#define FASTOP1(op) \
468 FOP_START(op) \ 323 FOP_START(op) \
@@ -472,24 +327,42 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
472 ON64(FOP1E(op##q, rax)) \ 327 ON64(FOP1E(op##q, rax)) \
473 FOP_END 328 FOP_END
474 329
330/* 1-operand, using src2 (for MUL/DIV r/m) */
331#define FASTOP1SRC2(op, name) \
332 FOP_START(name) \
333 FOP1E(op, cl) \
334 FOP1E(op, cx) \
335 FOP1E(op, ecx) \
336 ON64(FOP1E(op, rcx)) \
337 FOP_END
338
339/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
340#define FASTOP1SRC2EX(op, name) \
341 FOP_START(name) \
342 FOP1EEX(op, cl) \
343 FOP1EEX(op, cx) \
344 FOP1EEX(op, ecx) \
345 ON64(FOP1EEX(op, rcx)) \
346 FOP_END
347
475#define FOP2E(op, dst, src) \ 348#define FOP2E(op, dst, src) \
476 FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET 349 FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
477 350
478#define FASTOP2(op) \ 351#define FASTOP2(op) \
479 FOP_START(op) \ 352 FOP_START(op) \
480 FOP2E(op##b, al, bl) \ 353 FOP2E(op##b, al, dl) \
481 FOP2E(op##w, ax, bx) \ 354 FOP2E(op##w, ax, dx) \
482 FOP2E(op##l, eax, ebx) \ 355 FOP2E(op##l, eax, edx) \
483 ON64(FOP2E(op##q, rax, rbx)) \ 356 ON64(FOP2E(op##q, rax, rdx)) \
484 FOP_END 357 FOP_END
485 358
486/* 2 operand, word only */ 359/* 2 operand, word only */
487#define FASTOP2W(op) \ 360#define FASTOP2W(op) \
488 FOP_START(op) \ 361 FOP_START(op) \
489 FOPNOP() \ 362 FOPNOP() \
490 FOP2E(op##w, ax, bx) \ 363 FOP2E(op##w, ax, dx) \
491 FOP2E(op##l, eax, ebx) \ 364 FOP2E(op##l, eax, edx) \
492 ON64(FOP2E(op##q, rax, rbx)) \ 365 ON64(FOP2E(op##q, rax, rdx)) \
493 FOP_END 366 FOP_END
494 367
495/* 2 operand, src is CL */ 368/* 2 operand, src is CL */
@@ -508,14 +381,17 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
508#define FASTOP3WCL(op) \ 381#define FASTOP3WCL(op) \
509 FOP_START(op) \ 382 FOP_START(op) \
510 FOPNOP() \ 383 FOPNOP() \
511 FOP3E(op##w, ax, bx, cl) \ 384 FOP3E(op##w, ax, dx, cl) \
512 FOP3E(op##l, eax, ebx, cl) \ 385 FOP3E(op##l, eax, edx, cl) \
513 ON64(FOP3E(op##q, rax, rbx, cl)) \ 386 ON64(FOP3E(op##q, rax, rdx, cl)) \
514 FOP_END 387 FOP_END
515 388
516/* Special case for SETcc - 1 instruction per cc */ 389/* Special case for SETcc - 1 instruction per cc */
517#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t" 390#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
518 391
392asm(".global kvm_fastop_exception \n"
393 "kvm_fastop_exception: xor %esi, %esi; ret");
394
519FOP_START(setcc) 395FOP_START(setcc)
520FOP_SETCC(seto) 396FOP_SETCC(seto)
521FOP_SETCC(setno) 397FOP_SETCC(setno)
@@ -538,47 +414,6 @@ FOP_END;
538FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET 414FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
539FOP_END; 415FOP_END;
540 416
541#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \
542 do { \
543 unsigned long _tmp; \
544 ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX); \
545 ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX); \
546 \
547 __asm__ __volatile__ ( \
548 _PRE_EFLAGS("0", "5", "1") \
549 "1: \n\t" \
550 _op _suffix " %6; " \
551 "2: \n\t" \
552 _POST_EFLAGS("0", "5", "1") \
553 ".pushsection .fixup,\"ax\" \n\t" \
554 "3: movb $1, %4 \n\t" \
555 "jmp 2b \n\t" \
556 ".popsection \n\t" \
557 _ASM_EXTABLE(1b, 3b) \
558 : "=m" ((ctxt)->eflags), "=&r" (_tmp), \
559 "+a" (*rax), "+d" (*rdx), "+qm"(_ex) \
560 : "i" (EFLAGS_MASK), "m" ((ctxt)->src.val)); \
561 } while (0)
562
563/* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
564#define emulate_1op_rax_rdx(ctxt, _op, _ex) \
565 do { \
566 switch((ctxt)->src.bytes) { \
567 case 1: \
568 __emulate_1op_rax_rdx(ctxt, _op, "b", _ex); \
569 break; \
570 case 2: \
571 __emulate_1op_rax_rdx(ctxt, _op, "w", _ex); \
572 break; \
573 case 4: \
574 __emulate_1op_rax_rdx(ctxt, _op, "l", _ex); \
575 break; \
576 case 8: ON64( \
577 __emulate_1op_rax_rdx(ctxt, _op, "q", _ex)); \
578 break; \
579 } \
580 } while (0)
581
582static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, 417static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
583 enum x86_intercept intercept, 418 enum x86_intercept intercept,
584 enum x86_intercept_stage stage) 419 enum x86_intercept_stage stage)
@@ -988,6 +823,11 @@ FASTOP2(xor);
988FASTOP2(cmp); 823FASTOP2(cmp);
989FASTOP2(test); 824FASTOP2(test);
990 825
826FASTOP1SRC2(mul, mul_ex);
827FASTOP1SRC2(imul, imul_ex);
828FASTOP1SRC2EX(div, div_ex);
829FASTOP1SRC2EX(idiv, idiv_ex);
830
991FASTOP3WCL(shld); 831FASTOP3WCL(shld);
992FASTOP3WCL(shrd); 832FASTOP3WCL(shrd);
993 833
@@ -1013,6 +853,8 @@ FASTOP2W(bts);
1013FASTOP2W(btr); 853FASTOP2W(btr);
1014FASTOP2W(btc); 854FASTOP2W(btc);
1015 855
856FASTOP2(xadd);
857
1016static u8 test_cc(unsigned int condition, unsigned long flags) 858static u8 test_cc(unsigned int condition, unsigned long flags)
1017{ 859{
1018 u8 rc; 860 u8 rc;
@@ -1726,45 +1568,42 @@ static void write_register_operand(struct operand *op)
1726 } 1568 }
1727} 1569}
1728 1570
1729static int writeback(struct x86_emulate_ctxt *ctxt) 1571static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1730{ 1572{
1731 int rc; 1573 int rc;
1732 1574
1733 if (ctxt->d & NoWrite) 1575 switch (op->type) {
1734 return X86EMUL_CONTINUE;
1735
1736 switch (ctxt->dst.type) {
1737 case OP_REG: 1576 case OP_REG:
1738 write_register_operand(&ctxt->dst); 1577 write_register_operand(op);
1739 break; 1578 break;
1740 case OP_MEM: 1579 case OP_MEM:
1741 if (ctxt->lock_prefix) 1580 if (ctxt->lock_prefix)
1742 rc = segmented_cmpxchg(ctxt, 1581 rc = segmented_cmpxchg(ctxt,
1743 ctxt->dst.addr.mem, 1582 op->addr.mem,
1744 &ctxt->dst.orig_val, 1583 &op->orig_val,
1745 &ctxt->dst.val, 1584 &op->val,
1746 ctxt->dst.bytes); 1585 op->bytes);
1747 else 1586 else
1748 rc = segmented_write(ctxt, 1587 rc = segmented_write(ctxt,
1749 ctxt->dst.addr.mem, 1588 op->addr.mem,
1750 &ctxt->dst.val, 1589 &op->val,
1751 ctxt->dst.bytes); 1590 op->bytes);
1752 if (rc != X86EMUL_CONTINUE) 1591 if (rc != X86EMUL_CONTINUE)
1753 return rc; 1592 return rc;
1754 break; 1593 break;
1755 case OP_MEM_STR: 1594 case OP_MEM_STR:
1756 rc = segmented_write(ctxt, 1595 rc = segmented_write(ctxt,
1757 ctxt->dst.addr.mem, 1596 op->addr.mem,
1758 ctxt->dst.data, 1597 op->data,
1759 ctxt->dst.bytes * ctxt->dst.count); 1598 op->bytes * op->count);
1760 if (rc != X86EMUL_CONTINUE) 1599 if (rc != X86EMUL_CONTINUE)
1761 return rc; 1600 return rc;
1762 break; 1601 break;
1763 case OP_XMM: 1602 case OP_XMM:
1764 write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); 1603 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
1765 break; 1604 break;
1766 case OP_MM: 1605 case OP_MM:
1767 write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm); 1606 write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
1768 break; 1607 break;
1769 case OP_NONE: 1608 case OP_NONE:
1770 /* no writeback */ 1609 /* no writeback */
@@ -2117,42 +1956,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2117 return X86EMUL_CONTINUE; 1956 return X86EMUL_CONTINUE;
2118} 1957}
2119 1958
2120static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
2121{
2122 u8 ex = 0;
2123
2124 emulate_1op_rax_rdx(ctxt, "mul", ex);
2125 return X86EMUL_CONTINUE;
2126}
2127
2128static int em_imul_ex(struct x86_emulate_ctxt *ctxt)
2129{
2130 u8 ex = 0;
2131
2132 emulate_1op_rax_rdx(ctxt, "imul", ex);
2133 return X86EMUL_CONTINUE;
2134}
2135
2136static int em_div_ex(struct x86_emulate_ctxt *ctxt)
2137{
2138 u8 de = 0;
2139
2140 emulate_1op_rax_rdx(ctxt, "div", de);
2141 if (de)
2142 return emulate_de(ctxt);
2143 return X86EMUL_CONTINUE;
2144}
2145
2146static int em_idiv_ex(struct x86_emulate_ctxt *ctxt)
2147{
2148 u8 de = 0;
2149
2150 emulate_1op_rax_rdx(ctxt, "idiv", de);
2151 if (de)
2152 return emulate_de(ctxt);
2153 return X86EMUL_CONTINUE;
2154}
2155
2156static int em_grp45(struct x86_emulate_ctxt *ctxt) 1959static int em_grp45(struct x86_emulate_ctxt *ctxt)
2157{ 1960{
2158 int rc = X86EMUL_CONTINUE; 1961 int rc = X86EMUL_CONTINUE;
@@ -3734,10 +3537,10 @@ static const struct opcode group3[] = {
3734 F(DstMem | SrcImm | NoWrite, em_test), 3537 F(DstMem | SrcImm | NoWrite, em_test),
3735 F(DstMem | SrcNone | Lock, em_not), 3538 F(DstMem | SrcNone | Lock, em_not),
3736 F(DstMem | SrcNone | Lock, em_neg), 3539 F(DstMem | SrcNone | Lock, em_neg),
3737 I(SrcMem, em_mul_ex), 3540 F(DstXacc | Src2Mem, em_mul_ex),
3738 I(SrcMem, em_imul_ex), 3541 F(DstXacc | Src2Mem, em_imul_ex),
3739 I(SrcMem, em_div_ex), 3542 F(DstXacc | Src2Mem, em_div_ex),
3740 I(SrcMem, em_idiv_ex), 3543 F(DstXacc | Src2Mem, em_idiv_ex),
3741}; 3544};
3742 3545
3743static const struct opcode group4[] = { 3546static const struct opcode group4[] = {
@@ -4064,7 +3867,7 @@ static const struct opcode twobyte_table[256] = {
4064 F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), 3867 F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
4065 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 3868 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4066 /* 0xC0 - 0xC7 */ 3869 /* 0xC0 - 0xC7 */
4067 D2bv(DstMem | SrcReg | ModRM | Lock), 3870 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4068 N, D(DstMem | SrcReg | ModRM | Mov), 3871 N, D(DstMem | SrcReg | ModRM | Mov),
4069 N, N, N, GD(0, &group9), 3872 N, N, N, GD(0, &group9),
4070 /* 0xC8 - 0xCF */ 3873 /* 0xC8 - 0xCF */
@@ -4172,6 +3975,24 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4172 fetch_register_operand(op); 3975 fetch_register_operand(op);
4173 op->orig_val = op->val; 3976 op->orig_val = op->val;
4174 break; 3977 break;
3978 case OpAccLo:
3979 op->type = OP_REG;
3980 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
3981 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
3982 fetch_register_operand(op);
3983 op->orig_val = op->val;
3984 break;
3985 case OpAccHi:
3986 if (ctxt->d & ByteOp) {
3987 op->type = OP_NONE;
3988 break;
3989 }
3990 op->type = OP_REG;
3991 op->bytes = ctxt->op_bytes;
3992 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3993 fetch_register_operand(op);
3994 op->orig_val = op->val;
3995 break;
4175 case OpDI: 3996 case OpDI:
4176 op->type = OP_MEM; 3997 op->type = OP_MEM;
4177 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 3998 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
@@ -4553,11 +4374,15 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
4553static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) 4374static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
4554{ 4375{
4555 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; 4376 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
4556 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; 4377 if (!(ctxt->d & ByteOp))
4378 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
4557 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" 4379 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
4558 : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags) 4380 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
4559 : "c"(ctxt->src2.val), [fastop]"S"(fop)); 4381 [fastop]"+S"(fop)
4382 : "c"(ctxt->src2.val));
4560 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); 4383 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
4384 if (!fop) /* exception is returned in fop variable */
4385 return emulate_de(ctxt);
4561 return X86EMUL_CONTINUE; 4386 return X86EMUL_CONTINUE;
4562} 4387}
4563 4388
@@ -4773,9 +4598,17 @@ special_insn:
4773 goto done; 4598 goto done;
4774 4599
4775writeback: 4600writeback:
4776 rc = writeback(ctxt); 4601 if (!(ctxt->d & NoWrite)) {
4777 if (rc != X86EMUL_CONTINUE) 4602 rc = writeback(ctxt, &ctxt->dst);
4778 goto done; 4603 if (rc != X86EMUL_CONTINUE)
4604 goto done;
4605 }
4606 if (ctxt->d & SrcWrite) {
4607 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
4608 rc = writeback(ctxt, &ctxt->src);
4609 if (rc != X86EMUL_CONTINUE)
4610 goto done;
4611 }
4779 4612
4780 /* 4613 /*
4781 * restore dst type in case the decoding will be reused 4614 * restore dst type in case the decoding will be reused
@@ -4872,12 +4705,6 @@ twobyte_insn:
4872 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val : 4705 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
4873 (s16) ctxt->src.val; 4706 (s16) ctxt->src.val;
4874 break; 4707 break;
4875 case 0xc0 ... 0xc1: /* xadd */
4876 fastop(ctxt, em_add);
4877 /* Write back the register source. */
4878 ctxt->src.val = ctxt->dst.orig_val;
4879 write_register_operand(&ctxt->src);
4880 break;
4881 case 0xc3: /* movnti */ 4708 case 0xc3: /* movnti */
4882 ctxt->dst.bytes = ctxt->op_bytes; 4709 ctxt->dst.bytes = ctxt->op_bytes;
4883 ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : 4710 ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val :
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0eee2c8b64d1..afc11245827c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1608,8 +1608,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1608 return; 1608 return;
1609 1609
1610 if (atomic_read(&apic->lapic_timer.pending) > 0) { 1610 if (atomic_read(&apic->lapic_timer.pending) > 0) {
1611 if (kvm_apic_local_deliver(apic, APIC_LVTT)) 1611 kvm_apic_local_deliver(apic, APIC_LVTT);
1612 atomic_dec(&apic->lapic_timer.pending); 1612 atomic_set(&apic->lapic_timer.pending, 0);
1613 } 1613 }
1614} 1614}
1615 1615
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 004cc87b781c..9e9285ae9b94 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -197,15 +197,63 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
197} 197}
198EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); 198EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
199 199
200static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) 200/*
201 * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number,
202 * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation
203 * number.
204 */
205#define MMIO_SPTE_GEN_LOW_SHIFT 3
206#define MMIO_SPTE_GEN_HIGH_SHIFT 52
207
208#define MMIO_GEN_SHIFT 19
209#define MMIO_GEN_LOW_SHIFT 9
210#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1)
211#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
212#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
213
214static u64 generation_mmio_spte_mask(unsigned int gen)
201{ 215{
202 struct kvm_mmu_page *sp = page_header(__pa(sptep)); 216 u64 mask;
217
218 WARN_ON(gen > MMIO_MAX_GEN);
219
220 mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
221 mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
222 return mask;
223}
224
225static unsigned int get_mmio_spte_generation(u64 spte)
226{
227 unsigned int gen;
228
229 spte &= ~shadow_mmio_mask;
230
231 gen = (spte >> MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_GEN_LOW_MASK;
232 gen |= (spte >> MMIO_SPTE_GEN_HIGH_SHIFT) << MMIO_GEN_LOW_SHIFT;
233 return gen;
234}
235
236static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
237{
238 /*
239 * Init kvm generation close to MMIO_MAX_GEN to easily test the
240 * code of handling generation number wrap-around.
241 */
242 return (kvm_memslots(kvm)->generation +
243 MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
244}
245
246static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
247 unsigned access)
248{
249 unsigned int gen = kvm_current_mmio_generation(kvm);
250 u64 mask = generation_mmio_spte_mask(gen);
203 251
204 access &= ACC_WRITE_MASK | ACC_USER_MASK; 252 access &= ACC_WRITE_MASK | ACC_USER_MASK;
253 mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
205 254
206 sp->mmio_cached = true; 255 trace_mark_mmio_spte(sptep, gfn, access, gen);
207 trace_mark_mmio_spte(sptep, gfn, access); 256 mmu_spte_set(sptep, mask);
208 mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT);
209} 257}
210 258
211static bool is_mmio_spte(u64 spte) 259static bool is_mmio_spte(u64 spte)
@@ -215,24 +263,38 @@ static bool is_mmio_spte(u64 spte)
215 263
216static gfn_t get_mmio_spte_gfn(u64 spte) 264static gfn_t get_mmio_spte_gfn(u64 spte)
217{ 265{
218 return (spte & ~shadow_mmio_mask) >> PAGE_SHIFT; 266 u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
267 return (spte & ~mask) >> PAGE_SHIFT;
219} 268}
220 269
221static unsigned get_mmio_spte_access(u64 spte) 270static unsigned get_mmio_spte_access(u64 spte)
222{ 271{
223 return (spte & ~shadow_mmio_mask) & ~PAGE_MASK; 272 u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
273 return (spte & ~mask) & ~PAGE_MASK;
224} 274}
225 275
226static bool set_mmio_spte(u64 *sptep, gfn_t gfn, pfn_t pfn, unsigned access) 276static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
277 pfn_t pfn, unsigned access)
227{ 278{
228 if (unlikely(is_noslot_pfn(pfn))) { 279 if (unlikely(is_noslot_pfn(pfn))) {
229 mark_mmio_spte(sptep, gfn, access); 280 mark_mmio_spte(kvm, sptep, gfn, access);
230 return true; 281 return true;
231 } 282 }
232 283
233 return false; 284 return false;
234} 285}
235 286
287static bool check_mmio_spte(struct kvm *kvm, u64 spte)
288{
289 unsigned int kvm_gen, spte_gen;
290
291 kvm_gen = kvm_current_mmio_generation(kvm);
292 spte_gen = get_mmio_spte_generation(spte);
293
294 trace_check_mmio_spte(spte, kvm_gen, spte_gen);
295 return likely(kvm_gen == spte_gen);
296}
297
236static inline u64 rsvd_bits(int s, int e) 298static inline u64 rsvd_bits(int s, int e)
237{ 299{
238 return ((1ULL << (e - s + 1)) - 1) << s; 300 return ((1ULL << (e - s + 1)) - 1) << s;
@@ -404,9 +466,20 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
404/* 466/*
405 * The idea using the light way get the spte on x86_32 guest is from 467 * The idea using the light way get the spte on x86_32 guest is from
406 * gup_get_pte(arch/x86/mm/gup.c). 468 * gup_get_pte(arch/x86/mm/gup.c).
407 * The difference is we can not catch the spte tlb flush if we leave 469 *
408 * guest mode, so we emulate it by increase clear_spte_count when spte 470 * An spte tlb flush may be pending, because kvm_set_pte_rmapp
409 * is cleared. 471 * coalesces them and we are running out of the MMU lock. Therefore
472 * we need to protect against in-progress updates of the spte.
473 *
474 * Reading the spte while an update is in progress may get the old value
475 * for the high part of the spte. The race is fine for a present->non-present
476 * change (because the high part of the spte is ignored for non-present spte),
477 * but for a present->present change we must reread the spte.
478 *
479 * All such changes are done in two steps (present->non-present and
480 * non-present->present), hence it is enough to count the number of
481 * present->non-present updates: if it changed while reading the spte,
482 * we might have hit the race. This is done using clear_spte_count.
410 */ 483 */
411static u64 __get_spte_lockless(u64 *sptep) 484static u64 __get_spte_lockless(u64 *sptep)
412{ 485{
@@ -1511,6 +1584,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
1511 if (!direct) 1584 if (!direct)
1512 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); 1585 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
1513 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 1586 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
1587
1588 /*
1589 * The active_mmu_pages list is the FIFO list, do not move the
1590 * page until it is zapped. kvm_zap_obsolete_pages depends on
1591 * this feature. See the comments in kvm_zap_obsolete_pages().
1592 */
1514 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 1593 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
1515 sp->parent_ptes = 0; 1594 sp->parent_ptes = 0;
1516 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1595 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
@@ -1648,6 +1727,16 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
1648static void kvm_mmu_commit_zap_page(struct kvm *kvm, 1727static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1649 struct list_head *invalid_list); 1728 struct list_head *invalid_list);
1650 1729
1730/*
1731 * NOTE: we should pay more attention on the zapped-obsolete page
1732 * (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk
1733 * since it has been deleted from active_mmu_pages but still can be found
1734 * at hast list.
1735 *
1736 * for_each_gfn_indirect_valid_sp has skipped that kind of page and
1737 * kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped
1738 * all the obsolete pages.
1739 */
1651#define for_each_gfn_sp(_kvm, _sp, _gfn) \ 1740#define for_each_gfn_sp(_kvm, _sp, _gfn) \
1652 hlist_for_each_entry(_sp, \ 1741 hlist_for_each_entry(_sp, \
1653 &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ 1742 &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
@@ -1838,6 +1927,11 @@ static void clear_sp_write_flooding_count(u64 *spte)
1838 __clear_sp_write_flooding_count(sp); 1927 __clear_sp_write_flooding_count(sp);
1839} 1928}
1840 1929
1930static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
1931{
1932 return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
1933}
1934
1841static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, 1935static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1842 gfn_t gfn, 1936 gfn_t gfn,
1843 gva_t gaddr, 1937 gva_t gaddr,
@@ -1864,6 +1958,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1864 role.quadrant = quadrant; 1958 role.quadrant = quadrant;
1865 } 1959 }
1866 for_each_gfn_sp(vcpu->kvm, sp, gfn) { 1960 for_each_gfn_sp(vcpu->kvm, sp, gfn) {
1961 if (is_obsolete_sp(vcpu->kvm, sp))
1962 continue;
1963
1867 if (!need_sync && sp->unsync) 1964 if (!need_sync && sp->unsync)
1868 need_sync = true; 1965 need_sync = true;
1869 1966
@@ -1900,6 +1997,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1900 1997
1901 account_shadowed(vcpu->kvm, gfn); 1998 account_shadowed(vcpu->kvm, gfn);
1902 } 1999 }
2000 sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
1903 init_shadow_page_table(sp); 2001 init_shadow_page_table(sp);
1904 trace_kvm_mmu_get_page(sp, true); 2002 trace_kvm_mmu_get_page(sp, true);
1905 return sp; 2003 return sp;
@@ -2070,8 +2168,10 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
2070 ret = mmu_zap_unsync_children(kvm, sp, invalid_list); 2168 ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
2071 kvm_mmu_page_unlink_children(kvm, sp); 2169 kvm_mmu_page_unlink_children(kvm, sp);
2072 kvm_mmu_unlink_parents(kvm, sp); 2170 kvm_mmu_unlink_parents(kvm, sp);
2171
2073 if (!sp->role.invalid && !sp->role.direct) 2172 if (!sp->role.invalid && !sp->role.direct)
2074 unaccount_shadowed(kvm, sp->gfn); 2173 unaccount_shadowed(kvm, sp->gfn);
2174
2075 if (sp->unsync) 2175 if (sp->unsync)
2076 kvm_unlink_unsync_page(kvm, sp); 2176 kvm_unlink_unsync_page(kvm, sp);
2077 if (!sp->root_count) { 2177 if (!sp->root_count) {
@@ -2081,7 +2181,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
2081 kvm_mod_used_mmu_pages(kvm, -1); 2181 kvm_mod_used_mmu_pages(kvm, -1);
2082 } else { 2182 } else {
2083 list_move(&sp->link, &kvm->arch.active_mmu_pages); 2183 list_move(&sp->link, &kvm->arch.active_mmu_pages);
2084 kvm_reload_remote_mmus(kvm); 2184
2185 /*
2186 * The obsolete pages can not be used on any vcpus.
2187 * See the comments in kvm_mmu_invalidate_zap_all_pages().
2188 */
2189 if (!sp->role.invalid && !is_obsolete_sp(kvm, sp))
2190 kvm_reload_remote_mmus(kvm);
2085 } 2191 }
2086 2192
2087 sp->role.invalid = 1; 2193 sp->role.invalid = 1;
@@ -2331,7 +2437,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2331 u64 spte; 2437 u64 spte;
2332 int ret = 0; 2438 int ret = 0;
2333 2439
2334 if (set_mmio_spte(sptep, gfn, pfn, pte_access)) 2440 if (set_mmio_spte(vcpu->kvm, sptep, gfn, pfn, pte_access))
2335 return 0; 2441 return 0;
2336 2442
2337 spte = PT_PRESENT_MASK; 2443 spte = PT_PRESENT_MASK;
@@ -2705,6 +2811,13 @@ exit:
2705static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code) 2811static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code)
2706{ 2812{
2707 /* 2813 /*
2814 * Do not fix the mmio spte with invalid generation number which
2815 * need to be updated by slow page fault path.
2816 */
2817 if (unlikely(error_code & PFERR_RSVD_MASK))
2818 return false;
2819
2820 /*
2708 * #PF can be fast only if the shadow page table is present and it 2821 * #PF can be fast only if the shadow page table is present and it
2709 * is caused by write-protect, that means we just need change the 2822 * is caused by write-protect, that means we just need change the
2710 * W bit of the spte which can be done out of mmu-lock. 2823 * W bit of the spte which can be done out of mmu-lock.
@@ -2869,22 +2982,25 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
2869 2982
2870 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 2983 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2871 return; 2984 return;
2872 spin_lock(&vcpu->kvm->mmu_lock); 2985
2873 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL && 2986 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL &&
2874 (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL || 2987 (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL ||
2875 vcpu->arch.mmu.direct_map)) { 2988 vcpu->arch.mmu.direct_map)) {
2876 hpa_t root = vcpu->arch.mmu.root_hpa; 2989 hpa_t root = vcpu->arch.mmu.root_hpa;
2877 2990
2991 spin_lock(&vcpu->kvm->mmu_lock);
2878 sp = page_header(root); 2992 sp = page_header(root);
2879 --sp->root_count; 2993 --sp->root_count;
2880 if (!sp->root_count && sp->role.invalid) { 2994 if (!sp->root_count && sp->role.invalid) {
2881 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); 2995 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
2882 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 2996 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
2883 } 2997 }
2884 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
2885 spin_unlock(&vcpu->kvm->mmu_lock); 2998 spin_unlock(&vcpu->kvm->mmu_lock);
2999 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
2886 return; 3000 return;
2887 } 3001 }
3002
3003 spin_lock(&vcpu->kvm->mmu_lock);
2888 for (i = 0; i < 4; ++i) { 3004 for (i = 0; i < 4; ++i) {
2889 hpa_t root = vcpu->arch.mmu.pae_root[i]; 3005 hpa_t root = vcpu->arch.mmu.pae_root[i];
2890 3006
@@ -3148,17 +3264,12 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
3148 return spte; 3264 return spte;
3149} 3265}
3150 3266
3151/*
3152 * If it is a real mmio page fault, return 1 and emulat the instruction
3153 * directly, return 0 to let CPU fault again on the address, -1 is
3154 * returned if bug is detected.
3155 */
3156int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) 3267int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
3157{ 3268{
3158 u64 spte; 3269 u64 spte;
3159 3270
3160 if (quickly_check_mmio_pf(vcpu, addr, direct)) 3271 if (quickly_check_mmio_pf(vcpu, addr, direct))
3161 return 1; 3272 return RET_MMIO_PF_EMULATE;
3162 3273
3163 spte = walk_shadow_page_get_mmio_spte(vcpu, addr); 3274 spte = walk_shadow_page_get_mmio_spte(vcpu, addr);
3164 3275
@@ -3166,12 +3277,15 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
3166 gfn_t gfn = get_mmio_spte_gfn(spte); 3277 gfn_t gfn = get_mmio_spte_gfn(spte);
3167 unsigned access = get_mmio_spte_access(spte); 3278 unsigned access = get_mmio_spte_access(spte);
3168 3279
3280 if (!check_mmio_spte(vcpu->kvm, spte))
3281 return RET_MMIO_PF_INVALID;
3282
3169 if (direct) 3283 if (direct)
3170 addr = 0; 3284 addr = 0;
3171 3285
3172 trace_handle_mmio_page_fault(addr, gfn, access); 3286 trace_handle_mmio_page_fault(addr, gfn, access);
3173 vcpu_cache_mmio_info(vcpu, addr, gfn, access); 3287 vcpu_cache_mmio_info(vcpu, addr, gfn, access);
3174 return 1; 3288 return RET_MMIO_PF_EMULATE;
3175 } 3289 }
3176 3290
3177 /* 3291 /*
@@ -3179,13 +3293,13 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
3179 * it's a BUG if the gfn is not a mmio page. 3293 * it's a BUG if the gfn is not a mmio page.
3180 */ 3294 */
3181 if (direct && !check_direct_spte_mmio_pf(spte)) 3295 if (direct && !check_direct_spte_mmio_pf(spte))
3182 return -1; 3296 return RET_MMIO_PF_BUG;
3183 3297
3184 /* 3298 /*
3185 * If the page table is zapped by other cpus, let CPU fault again on 3299 * If the page table is zapped by other cpus, let CPU fault again on
3186 * the address. 3300 * the address.
3187 */ 3301 */
3188 return 0; 3302 return RET_MMIO_PF_RETRY;
3189} 3303}
3190EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common); 3304EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common);
3191 3305
@@ -3195,7 +3309,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr,
3195 int ret; 3309 int ret;
3196 3310
3197 ret = handle_mmio_page_fault_common(vcpu, addr, direct); 3311 ret = handle_mmio_page_fault_common(vcpu, addr, direct);
3198 WARN_ON(ret < 0); 3312 WARN_ON(ret == RET_MMIO_PF_BUG);
3199 return ret; 3313 return ret;
3200} 3314}
3201 3315
@@ -3207,8 +3321,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
3207 3321
3208 pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); 3322 pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
3209 3323
3210 if (unlikely(error_code & PFERR_RSVD_MASK)) 3324 if (unlikely(error_code & PFERR_RSVD_MASK)) {
3211 return handle_mmio_page_fault(vcpu, gva, error_code, true); 3325 r = handle_mmio_page_fault(vcpu, gva, error_code, true);
3326
3327 if (likely(r != RET_MMIO_PF_INVALID))
3328 return r;
3329 }
3212 3330
3213 r = mmu_topup_memory_caches(vcpu); 3331 r = mmu_topup_memory_caches(vcpu);
3214 if (r) 3332 if (r)
@@ -3284,8 +3402,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
3284 ASSERT(vcpu); 3402 ASSERT(vcpu);
3285 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3403 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
3286 3404
3287 if (unlikely(error_code & PFERR_RSVD_MASK)) 3405 if (unlikely(error_code & PFERR_RSVD_MASK)) {
3288 return handle_mmio_page_fault(vcpu, gpa, error_code, true); 3406 r = handle_mmio_page_fault(vcpu, gpa, error_code, true);
3407
3408 if (likely(r != RET_MMIO_PF_INVALID))
3409 return r;
3410 }
3289 3411
3290 r = mmu_topup_memory_caches(vcpu); 3412 r = mmu_topup_memory_caches(vcpu);
3291 if (r) 3413 if (r)
@@ -3391,8 +3513,8 @@ static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
3391 *access &= mask; 3513 *access &= mask;
3392} 3514}
3393 3515
3394static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, 3516static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
3395 int *nr_present) 3517 unsigned access, int *nr_present)
3396{ 3518{
3397 if (unlikely(is_mmio_spte(*sptep))) { 3519 if (unlikely(is_mmio_spte(*sptep))) {
3398 if (gfn != get_mmio_spte_gfn(*sptep)) { 3520 if (gfn != get_mmio_spte_gfn(*sptep)) {
@@ -3401,7 +3523,7 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
3401 } 3523 }
3402 3524
3403 (*nr_present)++; 3525 (*nr_present)++;
3404 mark_mmio_spte(sptep, gfn, access); 3526 mark_mmio_spte(kvm, sptep, gfn, access);
3405 return true; 3527 return true;
3406 } 3528 }
3407 3529
@@ -3764,9 +3886,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
3764 if (r) 3886 if (r)
3765 goto out; 3887 goto out;
3766 r = mmu_alloc_roots(vcpu); 3888 r = mmu_alloc_roots(vcpu);
3767 spin_lock(&vcpu->kvm->mmu_lock); 3889 kvm_mmu_sync_roots(vcpu);
3768 mmu_sync_roots(vcpu);
3769 spin_unlock(&vcpu->kvm->mmu_lock);
3770 if (r) 3890 if (r)
3771 goto out; 3891 goto out;
3772 /* set_cr3() should ensure TLB has been flushed */ 3892 /* set_cr3() should ensure TLB has been flushed */
@@ -4179,39 +4299,107 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
4179 spin_unlock(&kvm->mmu_lock); 4299 spin_unlock(&kvm->mmu_lock);
4180} 4300}
4181 4301
4182void kvm_mmu_zap_all(struct kvm *kvm) 4302#define BATCH_ZAP_PAGES 10
4303static void kvm_zap_obsolete_pages(struct kvm *kvm)
4183{ 4304{
4184 struct kvm_mmu_page *sp, *node; 4305 struct kvm_mmu_page *sp, *node;
4185 LIST_HEAD(invalid_list); 4306 int batch = 0;
4186 4307
4187 spin_lock(&kvm->mmu_lock);
4188restart: 4308restart:
4189 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) 4309 list_for_each_entry_safe_reverse(sp, node,
4190 if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) 4310 &kvm->arch.active_mmu_pages, link) {
4311 int ret;
4312
4313 /*
4314 * No obsolete page exists before new created page since
4315 * active_mmu_pages is the FIFO list.
4316 */
4317 if (!is_obsolete_sp(kvm, sp))
4318 break;
4319
4320 /*
4321 * Since we are reversely walking the list and the invalid
4322 * list will be moved to the head, skip the invalid page
4323 * can help us to avoid the infinity list walking.
4324 */
4325 if (sp->role.invalid)
4326 continue;
4327
4328 /*
4329 * Need not flush tlb since we only zap the sp with invalid
4330 * generation number.
4331 */
4332 if (batch >= BATCH_ZAP_PAGES &&
4333 cond_resched_lock(&kvm->mmu_lock)) {
4334 batch = 0;
4191 goto restart; 4335 goto restart;
4336 }
4192 4337
4193 kvm_mmu_commit_zap_page(kvm, &invalid_list); 4338 ret = kvm_mmu_prepare_zap_page(kvm, sp,
4194 spin_unlock(&kvm->mmu_lock); 4339 &kvm->arch.zapped_obsolete_pages);
4340 batch += ret;
4341
4342 if (ret)
4343 goto restart;
4344 }
4345
4346 /*
4347 * Should flush tlb before free page tables since lockless-walking
4348 * may use the pages.
4349 */
4350 kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
4195} 4351}
4196 4352
4197void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) 4353/*
4354 * Fast invalidate all shadow pages and use lock-break technique
4355 * to zap obsolete pages.
4356 *
4357 * It's required when memslot is being deleted or VM is being
4358 * destroyed, in these cases, we should ensure that KVM MMU does
4359 * not use any resource of the being-deleted slot or all slots
4360 * after calling the function.
4361 */
4362void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
4198{ 4363{
4199 struct kvm_mmu_page *sp, *node;
4200 LIST_HEAD(invalid_list);
4201
4202 spin_lock(&kvm->mmu_lock); 4364 spin_lock(&kvm->mmu_lock);
4203restart: 4365 trace_kvm_mmu_invalidate_zap_all_pages(kvm);
4204 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { 4366 kvm->arch.mmu_valid_gen++;
4205 if (!sp->mmio_cached)
4206 continue;
4207 if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
4208 goto restart;
4209 }
4210 4367
4211 kvm_mmu_commit_zap_page(kvm, &invalid_list); 4368 /*
4369 * Notify all vcpus to reload its shadow page table
4370 * and flush TLB. Then all vcpus will switch to new
4371 * shadow page table with the new mmu_valid_gen.
4372 *
4373 * Note: we should do this under the protection of
4374 * mmu-lock, otherwise, vcpu would purge shadow page
4375 * but miss tlb flush.
4376 */
4377 kvm_reload_remote_mmus(kvm);
4378
4379 kvm_zap_obsolete_pages(kvm);
4212 spin_unlock(&kvm->mmu_lock); 4380 spin_unlock(&kvm->mmu_lock);
4213} 4381}
4214 4382
4383static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
4384{
4385 return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
4386}
4387
4388void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
4389{
4390 /*
4391 * The very rare case: if the generation-number is round,
4392 * zap all shadow pages.
4393 *
4394 * The max value is MMIO_MAX_GEN - 1 since it is not called
4395 * when mark memslot invalid.
4396 */
4397 if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) {
4398 printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
4399 kvm_mmu_invalidate_zap_all_pages(kvm);
4400 }
4401}
4402
4215static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) 4403static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
4216{ 4404{
4217 struct kvm *kvm; 4405 struct kvm *kvm;
@@ -4240,15 +4428,23 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
4240 * want to shrink a VM that only started to populate its MMU 4428 * want to shrink a VM that only started to populate its MMU
4241 * anyway. 4429 * anyway.
4242 */ 4430 */
4243 if (!kvm->arch.n_used_mmu_pages) 4431 if (!kvm->arch.n_used_mmu_pages &&
4432 !kvm_has_zapped_obsolete_pages(kvm))
4244 continue; 4433 continue;
4245 4434
4246 idx = srcu_read_lock(&kvm->srcu); 4435 idx = srcu_read_lock(&kvm->srcu);
4247 spin_lock(&kvm->mmu_lock); 4436 spin_lock(&kvm->mmu_lock);
4248 4437
4438 if (kvm_has_zapped_obsolete_pages(kvm)) {
4439 kvm_mmu_commit_zap_page(kvm,
4440 &kvm->arch.zapped_obsolete_pages);
4441 goto unlock;
4442 }
4443
4249 prepare_zap_oldest_mmu_page(kvm, &invalid_list); 4444 prepare_zap_oldest_mmu_page(kvm, &invalid_list);
4250 kvm_mmu_commit_zap_page(kvm, &invalid_list); 4445 kvm_mmu_commit_zap_page(kvm, &invalid_list);
4251 4446
4447unlock:
4252 spin_unlock(&kvm->mmu_lock); 4448 spin_unlock(&kvm->mmu_lock);
4253 srcu_read_unlock(&kvm->srcu, idx); 4449 srcu_read_unlock(&kvm->srcu, idx);
4254 4450
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2adcbc2cac6d..5b59c573aba7 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -52,6 +52,23 @@
52 52
53int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); 53int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
54void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); 54void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
55
56/*
57 * Return values of handle_mmio_page_fault_common:
58 * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
59 * directly.
60 * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
61 * fault path update the mmio spte.
62 * RET_MMIO_PF_RETRY: let CPU fault again on the address.
63 * RET_MMIO_PF_BUG: bug is detected.
64 */
65enum {
66 RET_MMIO_PF_EMULATE = 1,
67 RET_MMIO_PF_INVALID = 2,
68 RET_MMIO_PF_RETRY = 0,
69 RET_MMIO_PF_BUG = -1
70};
71
55int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); 72int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
56int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); 73int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
57 74
@@ -97,4 +114,5 @@ static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
97 return (mmu->permissions[pfec >> 1] >> pte_access) & 1; 114 return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
98} 115}
99 116
117void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
100#endif 118#endif
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index b8f6172f4174..9d2e0ffcb190 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -7,16 +7,18 @@
7#undef TRACE_SYSTEM 7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM kvmmmu 8#define TRACE_SYSTEM kvmmmu
9 9
10#define KVM_MMU_PAGE_FIELDS \ 10#define KVM_MMU_PAGE_FIELDS \
11 __field(__u64, gfn) \ 11 __field(unsigned long, mmu_valid_gen) \
12 __field(__u32, role) \ 12 __field(__u64, gfn) \
13 __field(__u32, root_count) \ 13 __field(__u32, role) \
14 __field(__u32, root_count) \
14 __field(bool, unsync) 15 __field(bool, unsync)
15 16
16#define KVM_MMU_PAGE_ASSIGN(sp) \ 17#define KVM_MMU_PAGE_ASSIGN(sp) \
17 __entry->gfn = sp->gfn; \ 18 __entry->mmu_valid_gen = sp->mmu_valid_gen; \
18 __entry->role = sp->role.word; \ 19 __entry->gfn = sp->gfn; \
19 __entry->root_count = sp->root_count; \ 20 __entry->role = sp->role.word; \
21 __entry->root_count = sp->root_count; \
20 __entry->unsync = sp->unsync; 22 __entry->unsync = sp->unsync;
21 23
22#define KVM_MMU_PAGE_PRINTK() ({ \ 24#define KVM_MMU_PAGE_PRINTK() ({ \
@@ -28,8 +30,8 @@
28 \ 30 \
29 role.word = __entry->role; \ 31 role.word = __entry->role; \
30 \ 32 \
31 trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \ 33 trace_seq_printf(p, "sp gen %lx gfn %llx %u%s q%u%s %s%s" \
32 " %snxe root %u %s%c", \ 34 " %snxe root %u %s%c", __entry->mmu_valid_gen, \
33 __entry->gfn, role.level, \ 35 __entry->gfn, role.level, \
34 role.cr4_pae ? " pae" : "", \ 36 role.cr4_pae ? " pae" : "", \
35 role.quadrant, \ 37 role.quadrant, \
@@ -197,23 +199,25 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
197 199
198TRACE_EVENT( 200TRACE_EVENT(
199 mark_mmio_spte, 201 mark_mmio_spte,
200 TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), 202 TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access, unsigned int gen),
201 TP_ARGS(sptep, gfn, access), 203 TP_ARGS(sptep, gfn, access, gen),
202 204
203 TP_STRUCT__entry( 205 TP_STRUCT__entry(
204 __field(void *, sptep) 206 __field(void *, sptep)
205 __field(gfn_t, gfn) 207 __field(gfn_t, gfn)
206 __field(unsigned, access) 208 __field(unsigned, access)
209 __field(unsigned int, gen)
207 ), 210 ),
208 211
209 TP_fast_assign( 212 TP_fast_assign(
210 __entry->sptep = sptep; 213 __entry->sptep = sptep;
211 __entry->gfn = gfn; 214 __entry->gfn = gfn;
212 __entry->access = access; 215 __entry->access = access;
216 __entry->gen = gen;
213 ), 217 ),
214 218
215 TP_printk("sptep:%p gfn %llx access %x", __entry->sptep, __entry->gfn, 219 TP_printk("sptep:%p gfn %llx access %x gen %x", __entry->sptep,
216 __entry->access) 220 __entry->gfn, __entry->access, __entry->gen)
217); 221);
218 222
219TRACE_EVENT( 223TRACE_EVENT(
@@ -274,6 +278,50 @@ TRACE_EVENT(
274 __spte_satisfied(old_spte), __spte_satisfied(new_spte) 278 __spte_satisfied(old_spte), __spte_satisfied(new_spte)
275 ) 279 )
276); 280);
281
282TRACE_EVENT(
283 kvm_mmu_invalidate_zap_all_pages,
284 TP_PROTO(struct kvm *kvm),
285 TP_ARGS(kvm),
286
287 TP_STRUCT__entry(
288 __field(unsigned long, mmu_valid_gen)
289 __field(unsigned int, mmu_used_pages)
290 ),
291
292 TP_fast_assign(
293 __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
294 __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
295 ),
296
297 TP_printk("kvm-mmu-valid-gen %lx used_pages %x",
298 __entry->mmu_valid_gen, __entry->mmu_used_pages
299 )
300);
301
302
303TRACE_EVENT(
304 check_mmio_spte,
305 TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
306 TP_ARGS(spte, kvm_gen, spte_gen),
307
308 TP_STRUCT__entry(
309 __field(unsigned int, kvm_gen)
310 __field(unsigned int, spte_gen)
311 __field(u64, spte)
312 ),
313
314 TP_fast_assign(
315 __entry->kvm_gen = kvm_gen;
316 __entry->spte_gen = spte_gen;
317 __entry->spte = spte;
318 ),
319
320 TP_printk("spte %llx kvm_gen %x spte-gen %x valid %d", __entry->spte,
321 __entry->kvm_gen, __entry->spte_gen,
322 __entry->kvm_gen == __entry->spte_gen
323 )
324);
277#endif /* _TRACE_KVMMMU_H */ 325#endif /* _TRACE_KVMMMU_H */
278 326
279#undef TRACE_INCLUDE_PATH 327#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index da20860b457a..7769699d48a8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -552,9 +552,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
552 552
553 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 553 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
554 554
555 if (unlikely(error_code & PFERR_RSVD_MASK)) 555 if (unlikely(error_code & PFERR_RSVD_MASK)) {
556 return handle_mmio_page_fault(vcpu, addr, error_code, 556 r = handle_mmio_page_fault(vcpu, addr, error_code,
557 mmu_is_nested(vcpu)); 557 mmu_is_nested(vcpu));
558 if (likely(r != RET_MMIO_PF_INVALID))
559 return r;
560 };
558 561
559 r = mmu_topup_memory_caches(vcpu); 562 r = mmu_topup_memory_caches(vcpu);
560 if (r) 563 if (r)
@@ -792,7 +795,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
792 pte_access &= gpte_access(vcpu, gpte); 795 pte_access &= gpte_access(vcpu, gpte);
793 protect_clean_gpte(&pte_access, gpte); 796 protect_clean_gpte(&pte_access, gpte);
794 797
795 if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) 798 if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access,
799 &nr_present))
796 continue; 800 continue;
797 801
798 if (gfn != sp->gfns[i]) { 802 if (gfn != sp->gfns[i]) {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a14a6eaf871d..c0bc80391e40 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1026,7 +1026,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1026 g_tsc_offset = svm->vmcb->control.tsc_offset - 1026 g_tsc_offset = svm->vmcb->control.tsc_offset -
1027 svm->nested.hsave->control.tsc_offset; 1027 svm->nested.hsave->control.tsc_offset;
1028 svm->nested.hsave->control.tsc_offset = offset; 1028 svm->nested.hsave->control.tsc_offset = offset;
1029 } 1029 } else
1030 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1031 svm->vmcb->control.tsc_offset,
1032 offset);
1030 1033
1031 svm->vmcb->control.tsc_offset = offset + g_tsc_offset; 1034 svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
1032 1035
@@ -1044,6 +1047,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
1044 svm->vmcb->control.tsc_offset += adjustment; 1047 svm->vmcb->control.tsc_offset += adjustment;
1045 if (is_guest_mode(vcpu)) 1048 if (is_guest_mode(vcpu))
1046 svm->nested.hsave->control.tsc_offset += adjustment; 1049 svm->nested.hsave->control.tsc_offset += adjustment;
1050 else
1051 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1052 svm->vmcb->control.tsc_offset - adjustment,
1053 svm->vmcb->control.tsc_offset);
1054
1047 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 1055 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1048} 1056}
1049 1057
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index fe5e00ed7036..545245d7cc63 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -756,6 +756,27 @@ TRACE_EVENT(
756 __entry->gpa_match ? "GPA" : "GVA") 756 __entry->gpa_match ? "GPA" : "GVA")
757); 757);
758 758
759TRACE_EVENT(kvm_write_tsc_offset,
760 TP_PROTO(unsigned int vcpu_id, __u64 previous_tsc_offset,
761 __u64 next_tsc_offset),
762 TP_ARGS(vcpu_id, previous_tsc_offset, next_tsc_offset),
763
764 TP_STRUCT__entry(
765 __field( unsigned int, vcpu_id )
766 __field( __u64, previous_tsc_offset )
767 __field( __u64, next_tsc_offset )
768 ),
769
770 TP_fast_assign(
771 __entry->vcpu_id = vcpu_id;
772 __entry->previous_tsc_offset = previous_tsc_offset;
773 __entry->next_tsc_offset = next_tsc_offset;
774 ),
775
776 TP_printk("vcpu=%u prev=%llu next=%llu", __entry->vcpu_id,
777 __entry->previous_tsc_offset, __entry->next_tsc_offset)
778);
779
759#ifdef CONFIG_X86_64 780#ifdef CONFIG_X86_64
760 781
761#define host_clocks \ 782#define host_clocks \
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 260a91939555..064d0be67ecc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2096,6 +2096,8 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
2096 (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? 2096 (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
2097 vmcs12->tsc_offset : 0)); 2097 vmcs12->tsc_offset : 0));
2098 } else { 2098 } else {
2099 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
2100 vmcs_read64(TSC_OFFSET), offset);
2099 vmcs_write64(TSC_OFFSET, offset); 2101 vmcs_write64(TSC_OFFSET, offset);
2100 } 2102 }
2101} 2103}
@@ -2103,11 +2105,14 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
2103static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) 2105static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
2104{ 2106{
2105 u64 offset = vmcs_read64(TSC_OFFSET); 2107 u64 offset = vmcs_read64(TSC_OFFSET);
2108
2106 vmcs_write64(TSC_OFFSET, offset + adjustment); 2109 vmcs_write64(TSC_OFFSET, offset + adjustment);
2107 if (is_guest_mode(vcpu)) { 2110 if (is_guest_mode(vcpu)) {
2108 /* Even when running L2, the adjustment needs to apply to L1 */ 2111 /* Even when running L2, the adjustment needs to apply to L1 */
2109 to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment; 2112 to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
2110 } 2113 } else
2114 trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset,
2115 offset + adjustment);
2111} 2116}
2112 2117
2113static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) 2118static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
@@ -3399,15 +3404,22 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
3399 var->limit = vmx_read_guest_seg_limit(vmx, seg); 3404 var->limit = vmx_read_guest_seg_limit(vmx, seg);
3400 var->selector = vmx_read_guest_seg_selector(vmx, seg); 3405 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3401 ar = vmx_read_guest_seg_ar(vmx, seg); 3406 ar = vmx_read_guest_seg_ar(vmx, seg);
3407 var->unusable = (ar >> 16) & 1;
3402 var->type = ar & 15; 3408 var->type = ar & 15;
3403 var->s = (ar >> 4) & 1; 3409 var->s = (ar >> 4) & 1;
3404 var->dpl = (ar >> 5) & 3; 3410 var->dpl = (ar >> 5) & 3;
3405 var->present = (ar >> 7) & 1; 3411 /*
3412 * Some userspaces do not preserve unusable property. Since usable
3413 * segment has to be present according to VMX spec we can use present
3414 * property to amend userspace bug by making unusable segment always
3415 * nonpresent. vmx_segment_access_rights() already marks nonpresent
3416 * segment as unusable.
3417 */
3418 var->present = !var->unusable;
3406 var->avl = (ar >> 12) & 1; 3419 var->avl = (ar >> 12) & 1;
3407 var->l = (ar >> 13) & 1; 3420 var->l = (ar >> 13) & 1;
3408 var->db = (ar >> 14) & 1; 3421 var->db = (ar >> 14) & 1;
3409 var->g = (ar >> 15) & 1; 3422 var->g = (ar >> 15) & 1;
3410 var->unusable = (ar >> 16) & 1;
3411} 3423}
3412 3424
3413static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 3425static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
@@ -4176,10 +4188,10 @@ static void ept_set_mmio_spte_mask(void)
4176 /* 4188 /*
4177 * EPT Misconfigurations can be generated if the value of bits 2:0 4189 * EPT Misconfigurations can be generated if the value of bits 2:0
4178 * of an EPT paging-structure entry is 110b (write/execute). 4190 * of an EPT paging-structure entry is 110b (write/execute).
4179 * Also, magic bits (0xffull << 49) is set to quickly identify mmio 4191 * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
4180 * spte. 4192 * spte.
4181 */ 4193 */
4182 kvm_mmu_set_mmio_spte_mask(0xffull << 49 | 0x6ull); 4194 kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
4183} 4195}
4184 4196
4185/* 4197/*
@@ -5366,10 +5378,14 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
5366 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 5378 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5367 5379
5368 ret = handle_mmio_page_fault_common(vcpu, gpa, true); 5380 ret = handle_mmio_page_fault_common(vcpu, gpa, true);
5369 if (likely(ret == 1)) 5381 if (likely(ret == RET_MMIO_PF_EMULATE))
5370 return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == 5382 return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
5371 EMULATE_DONE; 5383 EMULATE_DONE;
5372 if (unlikely(!ret)) 5384
5385 if (unlikely(ret == RET_MMIO_PF_INVALID))
5386 return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0);
5387
5388 if (unlikely(ret == RET_MMIO_PF_RETRY))
5373 return 1; 5389 return 1;
5374 5390
5375 /* It is the real ept misconfig */ 5391 /* It is the real ept misconfig */
@@ -7942,7 +7958,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
7942 7958
7943 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); 7959 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
7944 kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); 7960 kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
7945 vmx_set_rflags(vcpu, X86_EFLAGS_BIT1); 7961 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
7946 /* 7962 /*
7947 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't 7963 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
7948 * actually changed, because it depends on the current state of 7964 * actually changed, because it depends on the current state of
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8ba99c34180..d21bce505315 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -618,7 +618,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
618 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) 618 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
619 return 1; 619 return 1;
620 620
621 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_RDWRGSFS)) 621 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
622 return 1; 622 return 1;
623 623
624 if (is_long_mode(vcpu)) { 624 if (is_long_mode(vcpu)) {
@@ -1193,20 +1193,37 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1193 elapsed = ns - kvm->arch.last_tsc_nsec; 1193 elapsed = ns - kvm->arch.last_tsc_nsec;
1194 1194
1195 if (vcpu->arch.virtual_tsc_khz) { 1195 if (vcpu->arch.virtual_tsc_khz) {
1196 int faulted = 0;
1197
1196 /* n.b - signed multiplication and division required */ 1198 /* n.b - signed multiplication and division required */
1197 usdiff = data - kvm->arch.last_tsc_write; 1199 usdiff = data - kvm->arch.last_tsc_write;
1198#ifdef CONFIG_X86_64 1200#ifdef CONFIG_X86_64
1199 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; 1201 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
1200#else 1202#else
1201 /* do_div() only does unsigned */ 1203 /* do_div() only does unsigned */
1202 asm("idivl %2; xor %%edx, %%edx" 1204 asm("1: idivl %[divisor]\n"
1203 : "=A"(usdiff) 1205 "2: xor %%edx, %%edx\n"
1204 : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); 1206 " movl $0, %[faulted]\n"
1207 "3:\n"
1208 ".section .fixup,\"ax\"\n"
1209 "4: movl $1, %[faulted]\n"
1210 " jmp 3b\n"
1211 ".previous\n"
1212
1213 _ASM_EXTABLE(1b, 4b)
1214
1215 : "=A"(usdiff), [faulted] "=r" (faulted)
1216 : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
1217
1205#endif 1218#endif
1206 do_div(elapsed, 1000); 1219 do_div(elapsed, 1000);
1207 usdiff -= elapsed; 1220 usdiff -= elapsed;
1208 if (usdiff < 0) 1221 if (usdiff < 0)
1209 usdiff = -usdiff; 1222 usdiff = -usdiff;
1223
1224 /* idivl overflow => difference is larger than USEC_PER_SEC */
1225 if (faulted)
1226 usdiff = USEC_PER_SEC;
1210 } else 1227 } else
1211 usdiff = USEC_PER_SEC; /* disable TSC match window below */ 1228 usdiff = USEC_PER_SEC; /* disable TSC match window below */
1212 1229
@@ -1587,6 +1604,30 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1587 return 0; 1604 return 0;
1588} 1605}
1589 1606
1607/*
1608 * kvmclock updates which are isolated to a given vcpu, such as
1609 * vcpu->cpu migration, should not allow system_timestamp from
1610 * the rest of the vcpus to remain static. Otherwise ntp frequency
1611 * correction applies to one vcpu's system_timestamp but not
1612 * the others.
1613 *
1614 * So in those cases, request a kvmclock update for all vcpus.
1615 * The worst case for a remote vcpu to update its kvmclock
1616 * is then bounded by maximum nohz sleep latency.
1617 */
1618
1619static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1620{
1621 int i;
1622 struct kvm *kvm = v->kvm;
1623 struct kvm_vcpu *vcpu;
1624
1625 kvm_for_each_vcpu(i, vcpu, kvm) {
1626 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
1627 kvm_vcpu_kick(vcpu);
1628 }
1629}
1630
1590static bool msr_mtrr_valid(unsigned msr) 1631static bool msr_mtrr_valid(unsigned msr)
1591{ 1632{
1592 switch (msr) { 1633 switch (msr) {
@@ -1984,7 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1984 kvmclock_reset(vcpu); 2025 kvmclock_reset(vcpu);
1985 2026
1986 vcpu->arch.time = data; 2027 vcpu->arch.time = data;
1987 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 2028 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
1988 2029
1989 /* we verify if the enable bit is set... */ 2030 /* we verify if the enable bit is set... */
1990 if (!(data & 1)) 2031 if (!(data & 1))
@@ -2701,7 +2742,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2701 * kvmclock on vcpu->cpu migration 2742 * kvmclock on vcpu->cpu migration
2702 */ 2743 */
2703 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) 2744 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
2704 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 2745 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2705 if (vcpu->cpu != cpu) 2746 if (vcpu->cpu != cpu)
2706 kvm_migrate_timers(vcpu); 2747 kvm_migrate_timers(vcpu);
2707 vcpu->cpu = cpu; 2748 vcpu->cpu = cpu;
@@ -5238,7 +5279,13 @@ static void kvm_set_mmio_spte_mask(void)
5238 * Set the reserved bits and the present bit of an paging-structure 5279 * Set the reserved bits and the present bit of an paging-structure
5239 * entry to generate page fault with PFER.RSV = 1. 5280 * entry to generate page fault with PFER.RSV = 1.
5240 */ 5281 */
5241 mask = ((1ull << (62 - maxphyaddr + 1)) - 1) << maxphyaddr; 5282 /* Mask the reserved physical address bits. */
5283 mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr;
5284
5285 /* Bit 62 is always reserved for 32bit host. */
5286 mask |= 0x3ull << 62;
5287
5288 /* Set the present bit. */
5242 mask |= 1ull; 5289 mask |= 1ull;
5243 5290
5244#ifdef CONFIG_X86_64 5291#ifdef CONFIG_X86_64
@@ -5498,13 +5545,6 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5498 char instruction[3]; 5545 char instruction[3];
5499 unsigned long rip = kvm_rip_read(vcpu); 5546 unsigned long rip = kvm_rip_read(vcpu);
5500 5547
5501 /*
5502 * Blow out the MMU to ensure that no other VCPU has an active mapping
5503 * to ensure that the updated hypercall appears atomically across all
5504 * VCPUs.
5505 */
5506 kvm_mmu_zap_all(vcpu->kvm);
5507
5508 kvm_x86_ops->patch_hypercall(vcpu, instruction); 5548 kvm_x86_ops->patch_hypercall(vcpu, instruction);
5509 5549
5510 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); 5550 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
@@ -5702,6 +5742,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5702 __kvm_migrate_timers(vcpu); 5742 __kvm_migrate_timers(vcpu);
5703 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) 5743 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
5704 kvm_gen_update_masterclock(vcpu->kvm); 5744 kvm_gen_update_masterclock(vcpu->kvm);
5745 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
5746 kvm_gen_kvmclock_update(vcpu);
5705 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { 5747 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5706 r = kvm_guest_time_update(vcpu); 5748 r = kvm_guest_time_update(vcpu);
5707 if (unlikely(r)) 5749 if (unlikely(r))
@@ -6812,6 +6854,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6812 return -EINVAL; 6854 return -EINVAL;
6813 6855
6814 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 6856 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6857 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
6815 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 6858 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
6816 6859
6817 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 6860 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -7040,22 +7083,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
7040 * If memory slot is created, or moved, we need to clear all 7083 * If memory slot is created, or moved, we need to clear all
7041 * mmio sptes. 7084 * mmio sptes.
7042 */ 7085 */
7043 if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { 7086 kvm_mmu_invalidate_mmio_sptes(kvm);
7044 kvm_mmu_zap_mmio_sptes(kvm);
7045 kvm_reload_remote_mmus(kvm);
7046 }
7047} 7087}
7048 7088
7049void kvm_arch_flush_shadow_all(struct kvm *kvm) 7089void kvm_arch_flush_shadow_all(struct kvm *kvm)
7050{ 7090{
7051 kvm_mmu_zap_all(kvm); 7091 kvm_mmu_invalidate_zap_all_pages(kvm);
7052 kvm_reload_remote_mmus(kvm);
7053} 7092}
7054 7093
7055void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 7094void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
7056 struct kvm_memory_slot *slot) 7095 struct kvm_memory_slot *slot)
7057{ 7096{
7058 kvm_arch_flush_shadow_all(kvm); 7097 kvm_mmu_invalidate_zap_all_pages(kvm);
7059} 7098}
7060 7099
7061int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 7100int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
@@ -7263,3 +7302,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
7263EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); 7302EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
7264EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); 7303EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
7265EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); 7304EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
7305EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile
index 94e0e54056a9..8f38d577a2fa 100644
--- a/arch/x86/lguest/Makefile
+++ b/arch/x86/lguest/Makefile
@@ -1,2 +1,2 @@
1obj-y := i386_head.o boot.o 1obj-y := head_32.o boot.o
2CFLAGS_boot.o := $(call cc-option, -fno-stack-protector) 2CFLAGS_boot.o := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 7114c63f047d..6a22c19da663 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -882,9 +882,9 @@ int lguest_setup_irq(unsigned int irq)
882 * It would be far better for everyone if the Guest had its own clock, but 882 * It would be far better for everyone if the Guest had its own clock, but
883 * until then the Host gives us the time on every interrupt. 883 * until then the Host gives us the time on every interrupt.
884 */ 884 */
885static unsigned long lguest_get_wallclock(void) 885static void lguest_get_wallclock(struct timespec *now)
886{ 886{
887 return lguest_data.time.tv_sec; 887 *now = lguest_data.time;
888} 888}
889 889
890/* 890/*
@@ -1410,7 +1410,7 @@ __init void lguest_init(void)
1410 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1410 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1411 1411
1412 /* Math is always hard! */ 1412 /* Math is always hard! */
1413 new_cpu_data.hard_math = 1; 1413 set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
1414 1414
1415 /* We don't have features. We have puppies! Puppies! */ 1415 /* We don't have features. We have puppies! Puppies! */
1416#ifdef CONFIG_X86_MCE 1416#ifdef CONFIG_X86_MCE
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/head_32.S
index 6ddfe4fc23c3..6ddfe4fc23c3 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/head_32.S
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 252b8f5489ba..4500142bc4aa 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -1,6 +1,7 @@
1#include <linux/highmem.h> 1#include <linux/highmem.h>
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/swap.h> /* for totalram_pages */ 3#include <linux/swap.h> /* for totalram_pages */
4#include <linux/bootmem.h>
4 5
5void *kmap(struct page *page) 6void *kmap(struct page *page)
6{ 7{
@@ -121,6 +122,11 @@ void __init set_highmem_pages_init(void)
121 struct zone *zone; 122 struct zone *zone;
122 int nid; 123 int nid;
123 124
125 /*
126 * Explicitly reset zone->managed_pages because set_highmem_pages_init()
127 * is invoked before free_all_bootmem()
128 */
129 reset_all_zones_managed_pages();
124 for_each_zone(zone) { 130 for_each_zone(zone) {
125 unsigned long zone_start_pfn, zone_end_pfn; 131 unsigned long zone_start_pfn, zone_end_pfn;
126 132
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index ae1aa71d0115..7e73e8c69096 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -16,169 +16,6 @@
16#include <asm/tlbflush.h> 16#include <asm/tlbflush.h>
17#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
18 18
19static unsigned long page_table_shareable(struct vm_area_struct *svma,
20 struct vm_area_struct *vma,
21 unsigned long addr, pgoff_t idx)
22{
23 unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
24 svma->vm_start;
25 unsigned long sbase = saddr & PUD_MASK;
26 unsigned long s_end = sbase + PUD_SIZE;
27
28 /* Allow segments to share if only one is marked locked */
29 unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
30 unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
31
32 /*
33 * match the virtual addresses, permission and the alignment of the
34 * page table page.
35 */
36 if (pmd_index(addr) != pmd_index(saddr) ||
37 vm_flags != svm_flags ||
38 sbase < svma->vm_start || svma->vm_end < s_end)
39 return 0;
40
41 return saddr;
42}
43
44static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
45{
46 unsigned long base = addr & PUD_MASK;
47 unsigned long end = base + PUD_SIZE;
48
49 /*
50 * check on proper vm_flags and page table alignment
51 */
52 if (vma->vm_flags & VM_MAYSHARE &&
53 vma->vm_start <= base && end <= vma->vm_end)
54 return 1;
55 return 0;
56}
57
58/*
59 * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
60 * and returns the corresponding pte. While this is not necessary for the
61 * !shared pmd case because we can allocate the pmd later as well, it makes the
62 * code much cleaner. pmd allocation is essential for the shared case because
63 * pud has to be populated inside the same i_mmap_mutex section - otherwise
64 * racing tasks could either miss the sharing (see huge_pte_offset) or select a
65 * bad pmd for sharing.
66 */
67static pte_t *
68huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
69{
70 struct vm_area_struct *vma = find_vma(mm, addr);
71 struct address_space *mapping = vma->vm_file->f_mapping;
72 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
73 vma->vm_pgoff;
74 struct vm_area_struct *svma;
75 unsigned long saddr;
76 pte_t *spte = NULL;
77 pte_t *pte;
78
79 if (!vma_shareable(vma, addr))
80 return (pte_t *)pmd_alloc(mm, pud, addr);
81
82 mutex_lock(&mapping->i_mmap_mutex);
83 vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
84 if (svma == vma)
85 continue;
86
87 saddr = page_table_shareable(svma, vma, addr, idx);
88 if (saddr) {
89 spte = huge_pte_offset(svma->vm_mm, saddr);
90 if (spte) {
91 get_page(virt_to_page(spte));
92 break;
93 }
94 }
95 }
96
97 if (!spte)
98 goto out;
99
100 spin_lock(&mm->page_table_lock);
101 if (pud_none(*pud))
102 pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
103 else
104 put_page(virt_to_page(spte));
105 spin_unlock(&mm->page_table_lock);
106out:
107 pte = (pte_t *)pmd_alloc(mm, pud, addr);
108 mutex_unlock(&mapping->i_mmap_mutex);
109 return pte;
110}
111
112/*
113 * unmap huge page backed by shared pte.
114 *
115 * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
116 * indicated by page_count > 1, unmap is achieved by clearing pud and
117 * decrementing the ref count. If count == 1, the pte page is not shared.
118 *
119 * called with vma->vm_mm->page_table_lock held.
120 *
121 * returns: 1 successfully unmapped a shared pte page
122 * 0 the underlying pte page is not shared, or it is the last user
123 */
124int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
125{
126 pgd_t *pgd = pgd_offset(mm, *addr);
127 pud_t *pud = pud_offset(pgd, *addr);
128
129 BUG_ON(page_count(virt_to_page(ptep)) == 0);
130 if (page_count(virt_to_page(ptep)) == 1)
131 return 0;
132
133 pud_clear(pud);
134 put_page(virt_to_page(ptep));
135 *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
136 return 1;
137}
138
139pte_t *huge_pte_alloc(struct mm_struct *mm,
140 unsigned long addr, unsigned long sz)
141{
142 pgd_t *pgd;
143 pud_t *pud;
144 pte_t *pte = NULL;
145
146 pgd = pgd_offset(mm, addr);
147 pud = pud_alloc(mm, pgd, addr);
148 if (pud) {
149 if (sz == PUD_SIZE) {
150 pte = (pte_t *)pud;
151 } else {
152 BUG_ON(sz != PMD_SIZE);
153 if (pud_none(*pud))
154 pte = huge_pmd_share(mm, addr, pud);
155 else
156 pte = (pte_t *)pmd_alloc(mm, pud, addr);
157 }
158 }
159 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
160
161 return pte;
162}
163
164pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
165{
166 pgd_t *pgd;
167 pud_t *pud;
168 pmd_t *pmd = NULL;
169
170 pgd = pgd_offset(mm, addr);
171 if (pgd_present(*pgd)) {
172 pud = pud_offset(pgd, addr);
173 if (pud_present(*pud)) {
174 if (pud_large(*pud))
175 return (pte_t *)pud;
176 pmd = pmd_offset(pud, addr);
177 }
178 }
179 return (pte_t *) pmd;
180}
181
182#if 0 /* This is just for testing */ 19#if 0 /* This is just for testing */
183struct page * 20struct page *
184follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 21follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
@@ -240,30 +77,6 @@ int pud_huge(pud_t pud)
240 return !!(pud_val(pud) & _PAGE_PSE); 77 return !!(pud_val(pud) & _PAGE_PSE);
241} 78}
242 79
243struct page *
244follow_huge_pmd(struct mm_struct *mm, unsigned long address,
245 pmd_t *pmd, int write)
246{
247 struct page *page;
248
249 page = pte_page(*(pte_t *)pmd);
250 if (page)
251 page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
252 return page;
253}
254
255struct page *
256follow_huge_pud(struct mm_struct *mm, unsigned long address,
257 pud_t *pud, int write)
258{
259 struct page *page;
260
261 page = pte_page(*(pte_t *)pud);
262 if (page)
263 page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
264 return page;
265}
266
267#endif 80#endif
268 81
269/* x86_64 also uses this file */ 82/* x86_64 also uses this file */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1f34e9219775..2ec29ac78ae6 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -494,7 +494,6 @@ int devmem_is_allowed(unsigned long pagenr)
494 494
495void free_init_pages(char *what, unsigned long begin, unsigned long end) 495void free_init_pages(char *what, unsigned long begin, unsigned long end)
496{ 496{
497 unsigned long addr;
498 unsigned long begin_aligned, end_aligned; 497 unsigned long begin_aligned, end_aligned;
499 498
500 /* Make sure boundaries are page aligned */ 499 /* Make sure boundaries are page aligned */
@@ -509,8 +508,6 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
509 if (begin >= end) 508 if (begin >= end)
510 return; 509 return;
511 510
512 addr = begin;
513
514 /* 511 /*
515 * If debugging page accesses then do not free this memory but 512 * If debugging page accesses then do not free this memory but
516 * mark them not present - any buggy init-section access will 513 * mark them not present - any buggy init-section access will
@@ -529,18 +526,13 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
529 set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); 526 set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
530 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); 527 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
531 528
532 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 529 free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what);
533
534 for (; addr < end; addr += PAGE_SIZE) {
535 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
536 free_reserved_page(virt_to_page(addr));
537 }
538#endif 530#endif
539} 531}
540 532
541void free_initmem(void) 533void free_initmem(void)
542{ 534{
543 free_init_pages("unused kernel memory", 535 free_init_pages("unused kernel",
544 (unsigned long)(&__init_begin), 536 (unsigned long)(&__init_begin),
545 (unsigned long)(&__init_end)); 537 (unsigned long)(&__init_end));
546} 538}
@@ -566,7 +558,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
566 * - relocate_initrd() 558 * - relocate_initrd()
567 * So here We can do PAGE_ALIGN() safely to get partial page to be freed 559 * So here We can do PAGE_ALIGN() safely to get partial page to be freed
568 */ 560 */
569 free_init_pages("initrd memory", start, PAGE_ALIGN(end)); 561 free_init_pages("initrd", start, PAGE_ALIGN(end));
570} 562}
571#endif 563#endif
572 564
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3ac7e319918d..4287f1ffba7e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -660,10 +660,8 @@ void __init initmem_init(void)
660 highstart_pfn = max_low_pfn; 660 highstart_pfn = max_low_pfn;
661 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 661 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
662 pages_to_mb(highend_pfn - highstart_pfn)); 662 pages_to_mb(highend_pfn - highstart_pfn));
663 num_physpages = highend_pfn;
664 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 663 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
665#else 664#else
666 num_physpages = max_low_pfn;
667 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 665 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
668#endif 666#endif
669 667
@@ -671,7 +669,7 @@ void __init initmem_init(void)
671 sparse_memory_present_with_active_regions(0); 669 sparse_memory_present_with_active_regions(0);
672 670
673#ifdef CONFIG_FLATMEM 671#ifdef CONFIG_FLATMEM
674 max_mapnr = num_physpages; 672 max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn;
675#endif 673#endif
676 __vmalloc_start_set = true; 674 __vmalloc_start_set = true;
677 675
@@ -739,9 +737,6 @@ static void __init test_wp_bit(void)
739 737
740void __init mem_init(void) 738void __init mem_init(void)
741{ 739{
742 int codesize, reservedpages, datasize, initsize;
743 int tmp;
744
745 pci_iommu_alloc(); 740 pci_iommu_alloc();
746 741
747#ifdef CONFIG_FLATMEM 742#ifdef CONFIG_FLATMEM
@@ -759,32 +754,11 @@ void __init mem_init(void)
759 set_highmem_pages_init(); 754 set_highmem_pages_init();
760 755
761 /* this will put all low memory onto the freelists */ 756 /* this will put all low memory onto the freelists */
762 totalram_pages += free_all_bootmem(); 757 free_all_bootmem();
763
764 reservedpages = 0;
765 for (tmp = 0; tmp < max_low_pfn; tmp++)
766 /*
767 * Only count reserved RAM pages:
768 */
769 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
770 reservedpages++;
771 758
772 after_bootmem = 1; 759 after_bootmem = 1;
773 760
774 codesize = (unsigned long) &_etext - (unsigned long) &_text; 761 mem_init_print_info(NULL);
775 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
776 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
777
778 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
779 "%dk reserved, %dk data, %dk init, %ldk highmem)\n",
780 nr_free_pages() << (PAGE_SHIFT-10),
781 num_physpages << (PAGE_SHIFT-10),
782 codesize >> 10,
783 reservedpages << (PAGE_SHIFT-10),
784 datasize >> 10,
785 initsize >> 10,
786 totalhigh_pages << (PAGE_SHIFT-10));
787
788 printk(KERN_INFO "virtual kernel memory layout:\n" 762 printk(KERN_INFO "virtual kernel memory layout:\n"
789 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 763 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
790#ifdef CONFIG_HIGHMEM 764#ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bb00c4672ad6..104d56a9245f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -368,7 +368,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
368 * 368 *
369 * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) 369 * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
370 * 370 *
371 * phys_addr holds the negative offset to the kernel, which is added 371 * phys_base holds the negative offset to the kernel, which is added
372 * to the compile time generated pmds. This results in invalid pmds up 372 * to the compile time generated pmds. This results in invalid pmds up
373 * to the point where we hit the physaddr 0 mapping. 373 * to the point where we hit the physaddr 0 mapping.
374 * 374 *
@@ -712,36 +712,22 @@ EXPORT_SYMBOL_GPL(arch_add_memory);
712 712
713static void __meminit free_pagetable(struct page *page, int order) 713static void __meminit free_pagetable(struct page *page, int order)
714{ 714{
715 struct zone *zone;
716 bool bootmem = false;
717 unsigned long magic; 715 unsigned long magic;
718 unsigned int nr_pages = 1 << order; 716 unsigned int nr_pages = 1 << order;
719 717
720 /* bootmem page has reserved flag */ 718 /* bootmem page has reserved flag */
721 if (PageReserved(page)) { 719 if (PageReserved(page)) {
722 __ClearPageReserved(page); 720 __ClearPageReserved(page);
723 bootmem = true;
724 721
725 magic = (unsigned long)page->lru.next; 722 magic = (unsigned long)page->lru.next;
726 if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { 723 if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
727 while (nr_pages--) 724 while (nr_pages--)
728 put_page_bootmem(page++); 725 put_page_bootmem(page++);
729 } else 726 } else
730 __free_pages_bootmem(page, order); 727 while (nr_pages--)
728 free_reserved_page(page++);
731 } else 729 } else
732 free_pages((unsigned long)page_address(page), order); 730 free_pages((unsigned long)page_address(page), order);
733
734 /*
735 * SECTION_INFO pages and MIX_SECTION_INFO pages
736 * are all allocated by bootmem.
737 */
738 if (bootmem) {
739 zone = page_zone(page);
740 zone_span_writelock(zone);
741 zone->present_pages += nr_pages;
742 zone_span_writeunlock(zone);
743 totalram_pages += nr_pages;
744 }
745} 731}
746 732
747static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) 733static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
@@ -1058,9 +1044,6 @@ static void __init register_page_bootmem_info(void)
1058 1044
1059void __init mem_init(void) 1045void __init mem_init(void)
1060{ 1046{
1061 long codesize, reservedpages, datasize, initsize;
1062 unsigned long absent_pages;
1063
1064 pci_iommu_alloc(); 1047 pci_iommu_alloc();
1065 1048
1066 /* clear_bss() already clear the empty_zero_page */ 1049 /* clear_bss() already clear the empty_zero_page */
@@ -1068,29 +1051,14 @@ void __init mem_init(void)
1068 register_page_bootmem_info(); 1051 register_page_bootmem_info();
1069 1052
1070 /* this will put all memory onto the freelists */ 1053 /* this will put all memory onto the freelists */
1071 totalram_pages = free_all_bootmem(); 1054 free_all_bootmem();
1072
1073 absent_pages = absent_pages_in_range(0, max_pfn);
1074 reservedpages = max_pfn - totalram_pages - absent_pages;
1075 after_bootmem = 1; 1055 after_bootmem = 1;
1076 1056
1077 codesize = (unsigned long) &_etext - (unsigned long) &_text;
1078 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
1079 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
1080
1081 /* Register memory areas for /proc/kcore */ 1057 /* Register memory areas for /proc/kcore */
1082 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 1058 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
1083 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER); 1059 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
1084 1060
1085 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " 1061 mem_init_print_info(NULL);
1086 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
1087 nr_free_pages() << (PAGE_SHIFT-10),
1088 max_pfn << (PAGE_SHIFT-10),
1089 codesize >> 10,
1090 absent_pages << (PAGE_SHIFT-10),
1091 reservedpages << (PAGE_SHIFT-10),
1092 datasize >> 10,
1093 initsize >> 10);
1094} 1062}
1095 1063
1096#ifdef CONFIG_DEBUG_RODATA 1064#ifdef CONFIG_DEBUG_RODATA
@@ -1166,11 +1134,10 @@ void mark_rodata_ro(void)
1166 set_memory_ro(start, (end-start) >> PAGE_SHIFT); 1134 set_memory_ro(start, (end-start) >> PAGE_SHIFT);
1167#endif 1135#endif
1168 1136
1169 free_init_pages("unused kernel memory", 1137 free_init_pages("unused kernel",
1170 (unsigned long) __va(__pa_symbol(text_end)), 1138 (unsigned long) __va(__pa_symbol(text_end)),
1171 (unsigned long) __va(__pa_symbol(rodata_start))); 1139 (unsigned long) __va(__pa_symbol(rodata_start)));
1172 1140 free_init_pages("unused kernel",
1173 free_init_pages("unused kernel memory",
1174 (unsigned long) __va(__pa_symbol(rodata_end)), 1141 (unsigned long) __va(__pa_symbol(rodata_end)),
1175 (unsigned long) __va(__pa_symbol(_sdata))); 1142 (unsigned long) __va(__pa_symbol(_sdata)));
1176} 1143}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 9a1e6583910c..0215e2c563ef 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -501,15 +501,15 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
501 } 501 }
502 502
503 if (slot < 0) { 503 if (slot < 0) {
504 printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n", 504 printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n",
505 (u64)phys_addr, size); 505 __func__, (u64)phys_addr, size);
506 WARN_ON(1); 506 WARN_ON(1);
507 return NULL; 507 return NULL;
508 } 508 }
509 509
510 if (early_ioremap_debug) { 510 if (early_ioremap_debug) {
511 printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ", 511 printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ",
512 (u64)phys_addr, size, slot); 512 __func__, (u64)phys_addr, size, slot);
513 dump_stack(); 513 dump_stack();
514 } 514 }
515 515
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 845df6835f9f..f63778cb2363 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -98,7 +98,7 @@ static unsigned long mmap_base(void)
98 * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 98 * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
99 * does, but not when emulating X86_32 99 * does, but not when emulating X86_32
100 */ 100 */
101static unsigned long mmap_legacy_base(void) 101unsigned long mmap_legacy_base(void)
102{ 102{
103 if (mmap_is_ia32()) 103 if (mmap_is_ia32())
104 return TASK_UNMAPPED_BASE; 104 return TASK_UNMAPPED_BASE;
@@ -115,10 +115,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
115 if (mmap_is_legacy()) { 115 if (mmap_is_legacy()) {
116 mm->mmap_base = mmap_legacy_base(); 116 mm->mmap_base = mmap_legacy_base();
117 mm->get_unmapped_area = arch_get_unmapped_area; 117 mm->get_unmapped_area = arch_get_unmapped_area;
118 mm->unmap_area = arch_unmap_area;
119 } else { 118 } else {
120 mm->mmap_base = mmap_base(); 119 mm->mmap_base = mmap_base();
121 mm->get_unmapped_area = arch_get_unmapped_area_topdown; 120 mm->get_unmapped_area = arch_get_unmapped_area_topdown;
122 mm->unmap_area = arch_unmap_area_topdown;
123 } 121 }
124} 122}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index dc0b727742f4..0057a7accfb1 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -410,9 +410,7 @@ out:
410 pr_warning("multiple CPUs still online, may miss events.\n"); 410 pr_warning("multiple CPUs still online, may miss events.\n");
411} 411}
412 412
413/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, 413static void leave_uniprocessor(void)
414 but this whole function is ifdefed CONFIG_HOTPLUG_CPU */
415static void __ref leave_uniprocessor(void)
416{ 414{
417 int cpu; 415 int cpu;
418 int err; 416 int err;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index a71c4e207679..8bf93bae1f13 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -60,7 +60,7 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] = {
60 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 60 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
61}; 61};
62 62
63int __cpuinit numa_cpu_node(int cpu) 63int numa_cpu_node(int cpu)
64{ 64{
65 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); 65 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
66 66
@@ -691,12 +691,12 @@ void __init init_cpu_to_node(void)
691#ifndef CONFIG_DEBUG_PER_CPU_MAPS 691#ifndef CONFIG_DEBUG_PER_CPU_MAPS
692 692
693# ifndef CONFIG_NUMA_EMU 693# ifndef CONFIG_NUMA_EMU
694void __cpuinit numa_add_cpu(int cpu) 694void numa_add_cpu(int cpu)
695{ 695{
696 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 696 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
697} 697}
698 698
699void __cpuinit numa_remove_cpu(int cpu) 699void numa_remove_cpu(int cpu)
700{ 700{
701 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 701 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
702} 702}
@@ -763,17 +763,17 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable)
763} 763}
764 764
765# ifndef CONFIG_NUMA_EMU 765# ifndef CONFIG_NUMA_EMU
766static void __cpuinit numa_set_cpumask(int cpu, bool enable) 766static void numa_set_cpumask(int cpu, bool enable)
767{ 767{
768 debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable); 768 debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
769} 769}
770 770
771void __cpuinit numa_add_cpu(int cpu) 771void numa_add_cpu(int cpu)
772{ 772{
773 numa_set_cpumask(cpu, true); 773 numa_set_cpumask(cpu, true);
774} 774}
775 775
776void __cpuinit numa_remove_cpu(int cpu) 776void numa_remove_cpu(int cpu)
777{ 777{
778 numa_set_cpumask(cpu, false); 778 numa_set_cpumask(cpu, false);
779} 779}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 73a6d7395bd3..0342d27ca798 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -83,10 +83,8 @@ void __init initmem_init(void)
83 highstart_pfn = max_low_pfn; 83 highstart_pfn = max_low_pfn;
84 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 84 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
85 pages_to_mb(highend_pfn - highstart_pfn)); 85 pages_to_mb(highend_pfn - highstart_pfn));
86 num_physpages = highend_pfn;
87 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 86 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
88#else 87#else
89 num_physpages = max_low_pfn;
90 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 88 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
91#endif 89#endif
92 printk(KERN_NOTICE "%ldMB LOWMEM available.\n", 90 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index dbbbb47260cc..a8f90ce3dedf 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -10,7 +10,7 @@
10 10
11#include "numa_internal.h" 11#include "numa_internal.h"
12 12
13static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata; 13static int emu_nid_to_phys[MAX_NUMNODES];
14static char *emu_cmdline __initdata; 14static char *emu_cmdline __initdata;
15 15
16void __init numa_emu_cmdline(char *str) 16void __init numa_emu_cmdline(char *str)
@@ -444,7 +444,7 @@ no_emu:
444} 444}
445 445
446#ifndef CONFIG_DEBUG_PER_CPU_MAPS 446#ifndef CONFIG_DEBUG_PER_CPU_MAPS
447void __cpuinit numa_add_cpu(int cpu) 447void numa_add_cpu(int cpu)
448{ 448{
449 int physnid, nid; 449 int physnid, nid;
450 450
@@ -462,7 +462,7 @@ void __cpuinit numa_add_cpu(int cpu)
462 cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); 462 cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
463} 463}
464 464
465void __cpuinit numa_remove_cpu(int cpu) 465void numa_remove_cpu(int cpu)
466{ 466{
467 int i; 467 int i;
468 468
@@ -470,7 +470,7 @@ void __cpuinit numa_remove_cpu(int cpu)
470 cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); 470 cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
471} 471}
472#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ 472#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
473static void __cpuinit numa_set_cpumask(int cpu, bool enable) 473static void numa_set_cpumask(int cpu, bool enable)
474{ 474{
475 int nid, physnid; 475 int nid, physnid;
476 476
@@ -490,12 +490,12 @@ static void __cpuinit numa_set_cpumask(int cpu, bool enable)
490 } 490 }
491} 491}
492 492
493void __cpuinit numa_add_cpu(int cpu) 493void numa_add_cpu(int cpu)
494{ 494{
495 numa_set_cpumask(cpu, true); 495 numa_set_cpumask(cpu, true);
496} 496}
497 497
498void __cpuinit numa_remove_cpu(int cpu) 498void numa_remove_cpu(int cpu)
499{ 499{
500 numa_set_cpumask(cpu, false); 500 numa_set_cpumask(cpu, false);
501} 501}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 17fda6a8b3c2..dfa537a03be1 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -240,7 +240,6 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
240static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) 240static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
241{ 241{
242 pud_t *pud; 242 pud_t *pud;
243 unsigned long addr;
244 int i; 243 int i;
245 244
246 if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ 245 if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
@@ -248,8 +247,7 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
248 247
249 pud = pud_offset(pgd, 0); 248 pud = pud_offset(pgd, 0);
250 249
251 for (addr = i = 0; i < PREALLOCATED_PMDS; 250 for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
252 i++, pud++, addr += PUD_SIZE) {
253 pmd_t *pmd = pmds[i]; 251 pmd_t *pmd = pmds[i];
254 252
255 if (i >= KERNEL_PGD_BOUNDARY) 253 if (i >= KERNEL_PGD_BOUNDARY)
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 410531d3c292..90555bf60aa4 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -5,7 +5,7 @@
5#include <asm/pgtable.h> 5#include <asm/pgtable.h>
6#include <asm/proto.h> 6#include <asm/proto.h>
7 7
8static int disable_nx __cpuinitdata; 8static int disable_nx;
9 9
10/* 10/*
11 * noexec = on|off 11 * noexec = on|off
@@ -29,7 +29,7 @@ static int __init noexec_setup(char *str)
29} 29}
30early_param("noexec", noexec_setup); 30early_param("noexec", noexec_setup);
31 31
32void __cpuinit x86_configure_nx(void) 32void x86_configure_nx(void)
33{ 33{
34 if (cpu_has_nx && !disable_nx) 34 if (cpu_has_nx && !disable_nx)
35 __supported_pte_mask |= _PAGE_NX; 35 __supported_pte_mask |= _PAGE_NX;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f66b54086ce5..79c216aa0e2b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -12,6 +12,7 @@
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/filter.h> 13#include <linux/filter.h>
14#include <linux/if_vlan.h> 14#include <linux/if_vlan.h>
15#include <linux/random.h>
15 16
16/* 17/*
17 * Conventions : 18 * Conventions :
@@ -144,6 +145,39 @@ static int pkt_type_offset(void)
144 return -1; 145 return -1;
145} 146}
146 147
148struct bpf_binary_header {
149 unsigned int pages;
150 /* Note : for security reasons, bpf code will follow a randomly
151 * sized amount of int3 instructions
152 */
153 u8 image[];
154};
155
156static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
157 u8 **image_ptr)
158{
159 unsigned int sz, hole;
160 struct bpf_binary_header *header;
161
162 /* Most of BPF filters are really small,
163 * but if some of them fill a page, allow at least
164 * 128 extra bytes to insert a random section of int3
165 */
166 sz = round_up(proglen + sizeof(*header) + 128, PAGE_SIZE);
167 header = module_alloc(sz);
168 if (!header)
169 return NULL;
170
171 memset(header, 0xcc, sz); /* fill whole space with int3 instructions */
172
173 header->pages = sz / PAGE_SIZE;
174 hole = sz - (proglen + sizeof(*header));
175
176 /* insert a random number of int3 instructions before BPF code */
177 *image_ptr = &header->image[prandom_u32() % hole];
178 return header;
179}
180
147void bpf_jit_compile(struct sk_filter *fp) 181void bpf_jit_compile(struct sk_filter *fp)
148{ 182{
149 u8 temp[64]; 183 u8 temp[64];
@@ -153,6 +187,7 @@ void bpf_jit_compile(struct sk_filter *fp)
153 int t_offset, f_offset; 187 int t_offset, f_offset;
154 u8 t_op, f_op, seen = 0, pass; 188 u8 t_op, f_op, seen = 0, pass;
155 u8 *image = NULL; 189 u8 *image = NULL;
190 struct bpf_binary_header *header = NULL;
156 u8 *func; 191 u8 *func;
157 int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */ 192 int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
158 unsigned int cleanup_addr; /* epilogue code offset */ 193 unsigned int cleanup_addr; /* epilogue code offset */
@@ -693,7 +728,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
693 if (unlikely(proglen + ilen > oldproglen)) { 728 if (unlikely(proglen + ilen > oldproglen)) {
694 pr_err("bpb_jit_compile fatal error\n"); 729 pr_err("bpb_jit_compile fatal error\n");
695 kfree(addrs); 730 kfree(addrs);
696 module_free(NULL, image); 731 module_free(NULL, header);
697 return; 732 return;
698 } 733 }
699 memcpy(image + proglen, temp, ilen); 734 memcpy(image + proglen, temp, ilen);
@@ -717,10 +752,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
717 break; 752 break;
718 } 753 }
719 if (proglen == oldproglen) { 754 if (proglen == oldproglen) {
720 image = module_alloc(max_t(unsigned int, 755 header = bpf_alloc_binary(proglen, &image);
721 proglen, 756 if (!header)
722 sizeof(struct work_struct)));
723 if (!image)
724 goto out; 757 goto out;
725 } 758 }
726 oldproglen = proglen; 759 oldproglen = proglen;
@@ -730,7 +763,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
730 bpf_jit_dump(flen, proglen, pass, image); 763 bpf_jit_dump(flen, proglen, pass, image);
731 764
732 if (image) { 765 if (image) {
733 bpf_flush_icache(image, image + proglen); 766 bpf_flush_icache(header, image + proglen);
767 set_memory_ro((unsigned long)header, header->pages);
734 fp->bpf_func = (void *)image; 768 fp->bpf_func = (void *)image;
735 } 769 }
736out: 770out:
@@ -738,20 +772,13 @@ out:
738 return; 772 return;
739} 773}
740 774
741static void jit_free_defer(struct work_struct *arg)
742{
743 module_free(NULL, arg);
744}
745
746/* run from softirq, we must use a work_struct to call
747 * module_free() from process context
748 */
749void bpf_jit_free(struct sk_filter *fp) 775void bpf_jit_free(struct sk_filter *fp)
750{ 776{
751 if (fp->bpf_func != sk_run_filter) { 777 if (fp->bpf_func != sk_run_filter) {
752 struct work_struct *work = (struct work_struct *)fp->bpf_func; 778 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
779 struct bpf_binary_header *header = (void *)addr;
753 780
754 INIT_WORK(work, jit_free_defer); 781 set_memory_rw(addr, header->pages);
755 schedule_work(work); 782 module_free(NULL, header);
756 } 783 }
757} 784}
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 3e724256dbee..d641897a1f4e 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -324,14 +324,11 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
324 res->start = start; 324 res->start = start;
325 res->end = end; 325 res->end = end;
326 info->res_offset[info->res_num] = addr.translation_offset; 326 info->res_offset[info->res_num] = addr.translation_offset;
327 info->res_num++;
327 328
328 if (!pci_use_crs) { 329 if (!pci_use_crs)
329 dev_printk(KERN_DEBUG, &info->bridge->dev, 330 dev_printk(KERN_DEBUG, &info->bridge->dev,
330 "host bridge window %pR (ignored)\n", res); 331 "host bridge window %pR (ignored)\n", res);
331 return AE_OK;
332 }
333
334 info->res_num++;
335 332
336 return AE_OK; 333 return AE_OK;
337} 334}
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index e9e6ed5cdf94..a48be98e9ded 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -312,7 +312,7 @@ static int __init early_fill_mp_bus_info(void)
312 312
313#define ENABLE_CF8_EXT_CFG (1ULL << 46) 313#define ENABLE_CF8_EXT_CFG (1ULL << 46)
314 314
315static void __cpuinit enable_pci_io_ecs(void *unused) 315static void enable_pci_io_ecs(void *unused)
316{ 316{
317 u64 reg; 317 u64 reg;
318 rdmsrl(MSR_AMD64_NB_CFG, reg); 318 rdmsrl(MSR_AMD64_NB_CFG, reg);
@@ -322,8 +322,8 @@ static void __cpuinit enable_pci_io_ecs(void *unused)
322 } 322 }
323} 323}
324 324
325static int __cpuinit amd_cpu_notify(struct notifier_block *self, 325static int amd_cpu_notify(struct notifier_block *self, unsigned long action,
326 unsigned long action, void *hcpu) 326 void *hcpu)
327{ 327{
328 int cpu = (long)hcpu; 328 int cpu = (long)hcpu;
329 switch (action) { 329 switch (action) {
@@ -337,7 +337,7 @@ static int __cpuinit amd_cpu_notify(struct notifier_block *self,
337 return NOTIFY_OK; 337 return NOTIFY_OK;
338} 338}
339 339
340static struct notifier_block __cpuinitdata amd_cpu_notifier = { 340static struct notifier_block amd_cpu_notifier = {
341 .notifier_call = amd_cpu_notify, 341 .notifier_call = amd_cpu_notify,
342}; 342};
343 343
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index f8ab4945892e..8244f5ec2f4c 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -12,8 +12,10 @@
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/irq.h> 13#include <linux/irq.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/reboot.h>
15#include <linux/serial_reg.h> 16#include <linux/serial_reg.h>
16#include <linux/serial_8250.h> 17#include <linux/serial_8250.h>
18#include <linux/reboot.h>
17 19
18#include <asm/ce4100.h> 20#include <asm/ce4100.h>
19#include <asm/prom.h> 21#include <asm/prom.h>
@@ -134,7 +136,7 @@ static void __init sdv_arch_setup(void)
134} 136}
135 137
136#ifdef CONFIG_X86_IO_APIC 138#ifdef CONFIG_X86_IO_APIC
137static void __cpuinit sdv_pci_init(void) 139static void sdv_pci_init(void)
138{ 140{
139 x86_of_pci_init(); 141 x86_of_pci_init();
140 /* We can't set this earlier, because we need to calibrate the timer */ 142 /* We can't set this earlier, because we need to calibrate the timer */
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index d2fbcedcf6ea..90f6ed127096 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -274,8 +274,9 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
274 return status; 274 return status;
275} 275}
276 276
277int efi_set_rtc_mmss(unsigned long nowtime) 277int efi_set_rtc_mmss(const struct timespec *now)
278{ 278{
279 unsigned long nowtime = now->tv_sec;
279 efi_status_t status; 280 efi_status_t status;
280 efi_time_t eft; 281 efi_time_t eft;
281 efi_time_cap_t cap; 282 efi_time_cap_t cap;
@@ -310,7 +311,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
310 return 0; 311 return 0;
311} 312}
312 313
313unsigned long efi_get_time(void) 314void efi_get_time(struct timespec *now)
314{ 315{
315 efi_status_t status; 316 efi_status_t status;
316 efi_time_t eft; 317 efi_time_t eft;
@@ -320,8 +321,9 @@ unsigned long efi_get_time(void)
320 if (status != EFI_SUCCESS) 321 if (status != EFI_SUCCESS)
321 pr_err("Oops: efitime: can't read time!\n"); 322 pr_err("Oops: efitime: can't read time!\n");
322 323
323 return mktime(eft.year, eft.month, eft.day, eft.hour, 324 now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour,
324 eft.minute, eft.second); 325 eft.minute, eft.second);
326 now->tv_nsec = 0;
325} 327}
326 328
327/* 329/*
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index a0a0a4389bbd..47fe66fe61f1 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -65,7 +65,7 @@
65 * lapic (always-on,ARAT) ------ 150 65 * lapic (always-on,ARAT) ------ 150
66 */ 66 */
67 67
68__cpuinitdata enum mrst_timer_options mrst_timer_options; 68enum mrst_timer_options mrst_timer_options;
69 69
70static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; 70static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
71static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; 71static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
@@ -248,7 +248,7 @@ static void __init mrst_time_init(void)
248 apbt_time_init(); 248 apbt_time_init();
249} 249}
250 250
251static void __cpuinit mrst_arch_setup(void) 251static void mrst_arch_setup(void)
252{ 252{
253 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) 253 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
254 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; 254 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index d62b0a3b5c14..5e355b134ba4 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -56,7 +56,7 @@ void vrtc_cmos_write(unsigned char val, unsigned char reg)
56} 56}
57EXPORT_SYMBOL_GPL(vrtc_cmos_write); 57EXPORT_SYMBOL_GPL(vrtc_cmos_write);
58 58
59unsigned long vrtc_get_time(void) 59void vrtc_get_time(struct timespec *now)
60{ 60{
61 u8 sec, min, hour, mday, mon; 61 u8 sec, min, hour, mday, mon;
62 unsigned long flags; 62 unsigned long flags;
@@ -82,17 +82,18 @@ unsigned long vrtc_get_time(void)
82 printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d " 82 printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
83 "mon: %d year: %d\n", sec, min, hour, mday, mon, year); 83 "mon: %d year: %d\n", sec, min, hour, mday, mon, year);
84 84
85 return mktime(year, mon, mday, hour, min, sec); 85 now->tv_sec = mktime(year, mon, mday, hour, min, sec);
86 now->tv_nsec = 0;
86} 87}
87 88
88int vrtc_set_mmss(unsigned long nowtime) 89int vrtc_set_mmss(const struct timespec *now)
89{ 90{
90 unsigned long flags; 91 unsigned long flags;
91 struct rtc_time tm; 92 struct rtc_time tm;
92 int year; 93 int year;
93 int retval = 0; 94 int retval = 0;
94 95
95 rtc_time_to_tm(nowtime, &tm); 96 rtc_time_to_tm(now->tv_sec, &tm);
96 if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) { 97 if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) {
97 /* 98 /*
98 * tm.year is the number of years since 1900, and the 99 * tm.year is the number of years since 1900, and the
@@ -110,7 +111,7 @@ int vrtc_set_mmss(unsigned long nowtime)
110 } else { 111 } else {
111 printk(KERN_ERR 112 printk(KERN_ERR
112 "%s: Invalid vRTC value: write of %lx to vRTC failed\n", 113 "%s: Invalid vRTC value: write of %lx to vRTC failed\n",
113 __FUNCTION__, nowtime); 114 __FUNCTION__, now->tv_sec);
114 retval = -EINVAL; 115 retval = -EINVAL;
115 } 116 }
116 return retval; 117 return retval;
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index ae7319db18ee..5e04a1c899fa 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -508,7 +508,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
508{ 508{
509 struct rt_sigframe __user *frame; 509 struct rt_sigframe __user *frame;
510 int err = 0; 510 int err = 0;
511 struct task_struct *me = current;
512 511
513 frame = (struct rt_sigframe __user *) 512 frame = (struct rt_sigframe __user *)
514 round_down(stack_top - sizeof(struct rt_sigframe), 16); 513 round_down(stack_top - sizeof(struct rt_sigframe), 16);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 0faad646f5fd..d6bfb876cfb0 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -372,7 +372,7 @@ subsys_initcall(sysenter_setup);
372/* Register vsyscall32 into the ABI table */ 372/* Register vsyscall32 into the ABI table */
373#include <linux/sysctl.h> 373#include <linux/sysctl.h>
374 374
375static ctl_table abi_table2[] = { 375static struct ctl_table abi_table2[] = {
376 { 376 {
377 .procname = "vsyscall32", 377 .procname = "vsyscall32",
378 .data = &sysctl_vsyscall32, 378 .data = &sysctl_vsyscall32,
@@ -383,7 +383,7 @@ static ctl_table abi_table2[] = {
383 {} 383 {}
384}; 384};
385 385
386static ctl_table abi_root_table2[] = { 386static struct ctl_table abi_root_table2[] = {
387 { 387 {
388 .procname = "abi", 388 .procname = "abi",
389 .mode = 0555, 389 .mode = 0555,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a492be2635ac..193097ef3d7d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1557,7 +1557,7 @@ asmlinkage void __init xen_start_kernel(void)
1557#ifdef CONFIG_X86_32 1557#ifdef CONFIG_X86_32
1558 /* set up basic CPUID stuff */ 1558 /* set up basic CPUID stuff */
1559 cpu_detect(&new_cpu_data); 1559 cpu_detect(&new_cpu_data);
1560 new_cpu_data.hard_math = 1; 1560 set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
1561 new_cpu_data.wp_works_ok = 1; 1561 new_cpu_data.wp_works_ok = 1;
1562 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1562 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1563#endif 1563#endif
@@ -1681,8 +1681,8 @@ static void __init init_hvm_pv_info(void)
1681 xen_domain_type = XEN_HVM_DOMAIN; 1681 xen_domain_type = XEN_HVM_DOMAIN;
1682} 1682}
1683 1683
1684static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, 1684static int xen_hvm_cpu_notify(struct notifier_block *self, unsigned long action,
1685 unsigned long action, void *hcpu) 1685 void *hcpu)
1686{ 1686{
1687 int cpu = (long)hcpu; 1687 int cpu = (long)hcpu;
1688 switch (action) { 1688 switch (action) {
@@ -1700,7 +1700,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
1700 return NOTIFY_OK; 1700 return NOTIFY_OK;
1701} 1701}
1702 1702
1703static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { 1703static struct notifier_block xen_hvm_cpu_notifier = {
1704 .notifier_call = xen_hvm_cpu_notify, 1704 .notifier_call = xen_hvm_cpu_notify,
1705}; 1705};
1706 1706
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 94eac5c85cdc..056d11faef21 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -475,7 +475,7 @@ static void __init fiddle_vdso(void)
475#endif 475#endif
476} 476}
477 477
478static int __cpuinit register_callback(unsigned type, const void *func) 478static int register_callback(unsigned type, const void *func)
479{ 479{
480 struct callback_register callback = { 480 struct callback_register callback = {
481 .type = type, 481 .type = type,
@@ -486,7 +486,7 @@ static int __cpuinit register_callback(unsigned type, const void *func)
486 return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); 486 return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
487} 487}
488 488
489void __cpuinit xen_enable_sysenter(void) 489void xen_enable_sysenter(void)
490{ 490{
491 int ret; 491 int ret;
492 unsigned sysenter_feature; 492 unsigned sysenter_feature;
@@ -505,7 +505,7 @@ void __cpuinit xen_enable_sysenter(void)
505 setup_clear_cpu_cap(sysenter_feature); 505 setup_clear_cpu_cap(sysenter_feature);
506} 506}
507 507
508void __cpuinit xen_enable_syscall(void) 508void xen_enable_syscall(void)
509{ 509{
510#ifdef CONFIG_X86_64 510#ifdef CONFIG_X86_64
511 int ret; 511 int ret;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d99cae8147d1..ca92754eb846 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -40,11 +40,15 @@
40 40
41cpumask_var_t xen_cpu_initialized_map; 41cpumask_var_t xen_cpu_initialized_map;
42 42
43static DEFINE_PER_CPU(int, xen_resched_irq); 43struct xen_common_irq {
44static DEFINE_PER_CPU(int, xen_callfunc_irq); 44 int irq;
45static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); 45 char *name;
46static DEFINE_PER_CPU(int, xen_irq_work); 46};
47static DEFINE_PER_CPU(int, xen_debug_irq) = -1; 47static DEFINE_PER_CPU(struct xen_common_irq, xen_resched_irq) = { .irq = -1 };
48static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
49static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
50static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
51static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
48 52
49static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); 53static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
50static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); 54static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -61,7 +65,7 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
61 return IRQ_HANDLED; 65 return IRQ_HANDLED;
62} 66}
63 67
64static void __cpuinit cpu_bringup(void) 68static void cpu_bringup(void)
65{ 69{
66 int cpu; 70 int cpu;
67 71
@@ -93,16 +97,53 @@ static void __cpuinit cpu_bringup(void)
93 wmb(); /* make sure everything is out */ 97 wmb(); /* make sure everything is out */
94} 98}
95 99
96static void __cpuinit cpu_bringup_and_idle(void) 100static void cpu_bringup_and_idle(void)
97{ 101{
98 cpu_bringup(); 102 cpu_bringup();
99 cpu_startup_entry(CPUHP_ONLINE); 103 cpu_startup_entry(CPUHP_ONLINE);
100} 104}
101 105
106static void xen_smp_intr_free(unsigned int cpu)
107{
108 if (per_cpu(xen_resched_irq, cpu).irq >= 0) {
109 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL);
110 per_cpu(xen_resched_irq, cpu).irq = -1;
111 kfree(per_cpu(xen_resched_irq, cpu).name);
112 per_cpu(xen_resched_irq, cpu).name = NULL;
113 }
114 if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) {
115 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL);
116 per_cpu(xen_callfunc_irq, cpu).irq = -1;
117 kfree(per_cpu(xen_callfunc_irq, cpu).name);
118 per_cpu(xen_callfunc_irq, cpu).name = NULL;
119 }
120 if (per_cpu(xen_debug_irq, cpu).irq >= 0) {
121 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL);
122 per_cpu(xen_debug_irq, cpu).irq = -1;
123 kfree(per_cpu(xen_debug_irq, cpu).name);
124 per_cpu(xen_debug_irq, cpu).name = NULL;
125 }
126 if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) {
127 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq,
128 NULL);
129 per_cpu(xen_callfuncsingle_irq, cpu).irq = -1;
130 kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
131 per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
132 }
133 if (xen_hvm_domain())
134 return;
135
136 if (per_cpu(xen_irq_work, cpu).irq >= 0) {
137 unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
138 per_cpu(xen_irq_work, cpu).irq = -1;
139 kfree(per_cpu(xen_irq_work, cpu).name);
140 per_cpu(xen_irq_work, cpu).name = NULL;
141 }
142};
102static int xen_smp_intr_init(unsigned int cpu) 143static int xen_smp_intr_init(unsigned int cpu)
103{ 144{
104 int rc; 145 int rc;
105 const char *resched_name, *callfunc_name, *debug_name; 146 char *resched_name, *callfunc_name, *debug_name;
106 147
107 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); 148 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
108 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, 149 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -113,7 +154,8 @@ static int xen_smp_intr_init(unsigned int cpu)
113 NULL); 154 NULL);
114 if (rc < 0) 155 if (rc < 0)
115 goto fail; 156 goto fail;
116 per_cpu(xen_resched_irq, cpu) = rc; 157 per_cpu(xen_resched_irq, cpu).irq = rc;
158 per_cpu(xen_resched_irq, cpu).name = resched_name;
117 159
118 callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); 160 callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
119 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, 161 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
@@ -124,7 +166,8 @@ static int xen_smp_intr_init(unsigned int cpu)
124 NULL); 166 NULL);
125 if (rc < 0) 167 if (rc < 0)
126 goto fail; 168 goto fail;
127 per_cpu(xen_callfunc_irq, cpu) = rc; 169 per_cpu(xen_callfunc_irq, cpu).irq = rc;
170 per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
128 171
129 debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); 172 debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
130 rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, 173 rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
@@ -132,7 +175,8 @@ static int xen_smp_intr_init(unsigned int cpu)
132 debug_name, NULL); 175 debug_name, NULL);
133 if (rc < 0) 176 if (rc < 0)
134 goto fail; 177 goto fail;
135 per_cpu(xen_debug_irq, cpu) = rc; 178 per_cpu(xen_debug_irq, cpu).irq = rc;
179 per_cpu(xen_debug_irq, cpu).name = debug_name;
136 180
137 callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); 181 callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
138 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, 182 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
@@ -143,7 +187,8 @@ static int xen_smp_intr_init(unsigned int cpu)
143 NULL); 187 NULL);
144 if (rc < 0) 188 if (rc < 0)
145 goto fail; 189 goto fail;
146 per_cpu(xen_callfuncsingle_irq, cpu) = rc; 190 per_cpu(xen_callfuncsingle_irq, cpu).irq = rc;
191 per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
147 192
148 /* 193 /*
149 * The IRQ worker on PVHVM goes through the native path and uses the 194 * The IRQ worker on PVHVM goes through the native path and uses the
@@ -161,26 +206,13 @@ static int xen_smp_intr_init(unsigned int cpu)
161 NULL); 206 NULL);
162 if (rc < 0) 207 if (rc < 0)
163 goto fail; 208 goto fail;
164 per_cpu(xen_irq_work, cpu) = rc; 209 per_cpu(xen_irq_work, cpu).irq = rc;
210 per_cpu(xen_irq_work, cpu).name = callfunc_name;
165 211
166 return 0; 212 return 0;
167 213
168 fail: 214 fail:
169 if (per_cpu(xen_resched_irq, cpu) >= 0) 215 xen_smp_intr_free(cpu);
170 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
171 if (per_cpu(xen_callfunc_irq, cpu) >= 0)
172 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
173 if (per_cpu(xen_debug_irq, cpu) >= 0)
174 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
175 if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
176 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
177 NULL);
178 if (xen_hvm_domain())
179 return rc;
180
181 if (per_cpu(xen_irq_work, cpu) >= 0)
182 unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
183
184 return rc; 216 return rc;
185} 217}
186 218
@@ -294,7 +326,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
294 set_cpu_present(cpu, true); 326 set_cpu_present(cpu, true);
295} 327}
296 328
297static int __cpuinit 329static int
298cpu_initialize_context(unsigned int cpu, struct task_struct *idle) 330cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
299{ 331{
300 struct vcpu_guest_context *ctxt; 332 struct vcpu_guest_context *ctxt;
@@ -365,7 +397,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
365 return 0; 397 return 0;
366} 398}
367 399
368static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle) 400static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
369{ 401{
370 int rc; 402 int rc;
371 403
@@ -433,17 +465,12 @@ static void xen_cpu_die(unsigned int cpu)
433 current->state = TASK_UNINTERRUPTIBLE; 465 current->state = TASK_UNINTERRUPTIBLE;
434 schedule_timeout(HZ/10); 466 schedule_timeout(HZ/10);
435 } 467 }
436 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); 468 xen_smp_intr_free(cpu);
437 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
438 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
439 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
440 if (!xen_hvm_domain())
441 unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
442 xen_uninit_lock_cpu(cpu); 469 xen_uninit_lock_cpu(cpu);
443 xen_teardown_timer(cpu); 470 xen_teardown_timer(cpu);
444} 471}
445 472
446static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ 473static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
447{ 474{
448 play_dead_common(); 475 play_dead_common();
449 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); 476 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
@@ -664,7 +691,7 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
664 xen_init_lock_cpu(0); 691 xen_init_lock_cpu(0);
665} 692}
666 693
667static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) 694static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
668{ 695{
669 int rc; 696 int rc;
670 rc = native_cpu_up(cpu, tidle); 697 rc = native_cpu_up(cpu, tidle);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 3002ec1bb71a..cf3caee356b3 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -7,6 +7,7 @@
7#include <linux/debugfs.h> 7#include <linux/debugfs.h>
8#include <linux/log2.h> 8#include <linux/log2.h>
9#include <linux/gfp.h> 9#include <linux/gfp.h>
10#include <linux/slab.h>
10 11
11#include <asm/paravirt.h> 12#include <asm/paravirt.h>
12 13
@@ -165,6 +166,7 @@ static int xen_spin_trylock(struct arch_spinlock *lock)
165 return old == 0; 166 return old == 0;
166} 167}
167 168
169static DEFINE_PER_CPU(char *, irq_name);
168static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 170static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
169static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); 171static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
170 172
@@ -359,10 +361,10 @@ static irqreturn_t dummy_handler(int irq, void *dev_id)
359 return IRQ_HANDLED; 361 return IRQ_HANDLED;
360} 362}
361 363
362void __cpuinit xen_init_lock_cpu(int cpu) 364void xen_init_lock_cpu(int cpu)
363{ 365{
364 int irq; 366 int irq;
365 const char *name; 367 char *name;
366 368
367 WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", 369 WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n",
368 cpu, per_cpu(lock_kicker_irq, cpu)); 370 cpu, per_cpu(lock_kicker_irq, cpu));
@@ -385,6 +387,7 @@ void __cpuinit xen_init_lock_cpu(int cpu)
385 if (irq >= 0) { 387 if (irq >= 0) {
386 disable_irq(irq); /* make sure it's never delivered */ 388 disable_irq(irq); /* make sure it's never delivered */
387 per_cpu(lock_kicker_irq, cpu) = irq; 389 per_cpu(lock_kicker_irq, cpu) = irq;
390 per_cpu(irq_name, cpu) = name;
388 } 391 }
389 392
390 printk("cpu %d spinlock event irq %d\n", cpu, irq); 393 printk("cpu %d spinlock event irq %d\n", cpu, irq);
@@ -401,6 +404,8 @@ void xen_uninit_lock_cpu(int cpu)
401 404
402 unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); 405 unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
403 per_cpu(lock_kicker_irq, cpu) = -1; 406 per_cpu(lock_kicker_irq, cpu) = -1;
407 kfree(per_cpu(irq_name, cpu));
408 per_cpu(irq_name, cpu) = NULL;
404} 409}
405 410
406void __init xen_init_spinlocks(void) 411void __init xen_init_spinlocks(void)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 3d88bfdf9e1c..ee365895b06b 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -14,6 +14,8 @@
14#include <linux/kernel_stat.h> 14#include <linux/kernel_stat.h>
15#include <linux/math64.h> 15#include <linux/math64.h>
16#include <linux/gfp.h> 16#include <linux/gfp.h>
17#include <linux/slab.h>
18#include <linux/pvclock_gtod.h>
17 19
18#include <asm/pvclock.h> 20#include <asm/pvclock.h>
19#include <asm/xen/hypervisor.h> 21#include <asm/xen/hypervisor.h>
@@ -36,9 +38,8 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
36/* snapshots of runstate info */ 38/* snapshots of runstate info */
37static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); 39static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
38 40
39/* unused ns of stolen and blocked time */ 41/* unused ns of stolen time */
40static DEFINE_PER_CPU(u64, xen_residual_stolen); 42static DEFINE_PER_CPU(u64, xen_residual_stolen);
41static DEFINE_PER_CPU(u64, xen_residual_blocked);
42 43
43/* return an consistent snapshot of 64-bit time/counter value */ 44/* return an consistent snapshot of 64-bit time/counter value */
44static u64 get64(const u64 *p) 45static u64 get64(const u64 *p)
@@ -115,7 +116,7 @@ static void do_stolen_accounting(void)
115{ 116{
116 struct vcpu_runstate_info state; 117 struct vcpu_runstate_info state;
117 struct vcpu_runstate_info *snap; 118 struct vcpu_runstate_info *snap;
118 s64 blocked, runnable, offline, stolen; 119 s64 runnable, offline, stolen;
119 cputime_t ticks; 120 cputime_t ticks;
120 121
121 get_runstate_snapshot(&state); 122 get_runstate_snapshot(&state);
@@ -125,7 +126,6 @@ static void do_stolen_accounting(void)
125 snap = &__get_cpu_var(xen_runstate_snapshot); 126 snap = &__get_cpu_var(xen_runstate_snapshot);
126 127
127 /* work out how much time the VCPU has not been runn*ing* */ 128 /* work out how much time the VCPU has not been runn*ing* */
128 blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
129 runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; 129 runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
130 offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; 130 offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
131 131
@@ -141,17 +141,6 @@ static void do_stolen_accounting(void)
141 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); 141 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
142 __this_cpu_write(xen_residual_stolen, stolen); 142 __this_cpu_write(xen_residual_stolen, stolen);
143 account_steal_ticks(ticks); 143 account_steal_ticks(ticks);
144
145 /* Add the appropriate number of ticks of blocked time,
146 including any left-overs from last time. */
147 blocked += __this_cpu_read(xen_residual_blocked);
148
149 if (blocked < 0)
150 blocked = 0;
151
152 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
153 __this_cpu_write(xen_residual_blocked, blocked);
154 account_idle_ticks(ticks);
155} 144}
156 145
157/* Get the TSC speed from Xen */ 146/* Get the TSC speed from Xen */
@@ -191,34 +180,56 @@ static void xen_read_wallclock(struct timespec *ts)
191 put_cpu_var(xen_vcpu); 180 put_cpu_var(xen_vcpu);
192} 181}
193 182
194static unsigned long xen_get_wallclock(void) 183static void xen_get_wallclock(struct timespec *now)
195{ 184{
196 struct timespec ts; 185 xen_read_wallclock(now);
186}
197 187
198 xen_read_wallclock(&ts); 188static int xen_set_wallclock(const struct timespec *now)
199 return ts.tv_sec; 189{
190 return -1;
200} 191}
201 192
202static int xen_set_wallclock(unsigned long now) 193static int xen_pvclock_gtod_notify(struct notifier_block *nb,
194 unsigned long was_set, void *priv)
203{ 195{
196 /* Protected by the calling core code serialization */
197 static struct timespec next_sync;
198
204 struct xen_platform_op op; 199 struct xen_platform_op op;
205 int rc; 200 struct timespec now;
206 201
207 /* do nothing for domU */ 202 now = __current_kernel_time();
208 if (!xen_initial_domain()) 203
209 return -1; 204 /*
205 * We only take the expensive HV call when the clock was set
206 * or when the 11 minutes RTC synchronization time elapsed.
207 */
208 if (!was_set && timespec_compare(&now, &next_sync) < 0)
209 return NOTIFY_OK;
210 210
211 op.cmd = XENPF_settime; 211 op.cmd = XENPF_settime;
212 op.u.settime.secs = now; 212 op.u.settime.secs = now.tv_sec;
213 op.u.settime.nsecs = 0; 213 op.u.settime.nsecs = now.tv_nsec;
214 op.u.settime.system_time = xen_clocksource_read(); 214 op.u.settime.system_time = xen_clocksource_read();
215 215
216 rc = HYPERVISOR_dom0_op(&op); 216 (void)HYPERVISOR_dom0_op(&op);
217 WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now); 217
218 /*
219 * Move the next drift compensation time 11 minutes
220 * ahead. That's emulating the sync_cmos_clock() update for
221 * the hardware RTC.
222 */
223 next_sync = now;
224 next_sync.tv_sec += 11 * 60;
218 225
219 return rc; 226 return NOTIFY_OK;
220} 227}
221 228
229static struct notifier_block xen_pvclock_gtod_notifier = {
230 .notifier_call = xen_pvclock_gtod_notify,
231};
232
222static struct clocksource xen_clocksource __read_mostly = { 233static struct clocksource xen_clocksource __read_mostly = {
223 .name = "xen", 234 .name = "xen",
224 .rating = 400, 235 .rating = 400,
@@ -377,11 +388,16 @@ static const struct clock_event_device xen_vcpuop_clockevent = {
377 388
378static const struct clock_event_device *xen_clockevent = 389static const struct clock_event_device *xen_clockevent =
379 &xen_timerop_clockevent; 390 &xen_timerop_clockevent;
380static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events) = { .irq = -1 }; 391
392struct xen_clock_event_device {
393 struct clock_event_device evt;
394 char *name;
395};
396static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
381 397
382static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) 398static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
383{ 399{
384 struct clock_event_device *evt = &__get_cpu_var(xen_clock_events); 400 struct clock_event_device *evt = &__get_cpu_var(xen_clock_events).evt;
385 irqreturn_t ret; 401 irqreturn_t ret;
386 402
387 ret = IRQ_NONE; 403 ret = IRQ_NONE;
@@ -395,14 +411,30 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
395 return ret; 411 return ret;
396} 412}
397 413
414void xen_teardown_timer(int cpu)
415{
416 struct clock_event_device *evt;
417 BUG_ON(cpu == 0);
418 evt = &per_cpu(xen_clock_events, cpu).evt;
419
420 if (evt->irq >= 0) {
421 unbind_from_irqhandler(evt->irq, NULL);
422 evt->irq = -1;
423 kfree(per_cpu(xen_clock_events, cpu).name);
424 per_cpu(xen_clock_events, cpu).name = NULL;
425 }
426}
427
398void xen_setup_timer(int cpu) 428void xen_setup_timer(int cpu)
399{ 429{
400 const char *name; 430 char *name;
401 struct clock_event_device *evt; 431 struct clock_event_device *evt;
402 int irq; 432 int irq;
403 433
404 evt = &per_cpu(xen_clock_events, cpu); 434 evt = &per_cpu(xen_clock_events, cpu).evt;
405 WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu); 435 WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
436 if (evt->irq >= 0)
437 xen_teardown_timer(cpu);
406 438
407 printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); 439 printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
408 440
@@ -420,22 +452,15 @@ void xen_setup_timer(int cpu)
420 452
421 evt->cpumask = cpumask_of(cpu); 453 evt->cpumask = cpumask_of(cpu);
422 evt->irq = irq; 454 evt->irq = irq;
455 per_cpu(xen_clock_events, cpu).name = name;
423} 456}
424 457
425void xen_teardown_timer(int cpu)
426{
427 struct clock_event_device *evt;
428 BUG_ON(cpu == 0);
429 evt = &per_cpu(xen_clock_events, cpu);
430 unbind_from_irqhandler(evt->irq, NULL);
431 evt->irq = -1;
432}
433 458
434void xen_setup_cpu_clockevents(void) 459void xen_setup_cpu_clockevents(void)
435{ 460{
436 BUG_ON(preemptible()); 461 BUG_ON(preemptible());
437 462
438 clockevents_register_device(&__get_cpu_var(xen_clock_events)); 463 clockevents_register_device(&__get_cpu_var(xen_clock_events).evt);
439} 464}
440 465
441void xen_timer_resume(void) 466void xen_timer_resume(void)
@@ -480,6 +505,9 @@ static void __init xen_time_init(void)
480 xen_setup_runstate_info(cpu); 505 xen_setup_runstate_info(cpu);
481 xen_setup_timer(cpu); 506 xen_setup_timer(cpu);
482 xen_setup_cpu_clockevents(); 507 xen_setup_cpu_clockevents();
508
509 if (xen_initial_domain())
510 pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
483} 511}
484 512
485void __init xen_init_time_ops(void) 513void __init xen_init_time_ops(void)
@@ -492,7 +520,9 @@ void __init xen_init_time_ops(void)
492 520
493 x86_platform.calibrate_tsc = xen_tsc_khz; 521 x86_platform.calibrate_tsc = xen_tsc_khz;
494 x86_platform.get_wallclock = xen_get_wallclock; 522 x86_platform.get_wallclock = xen_get_wallclock;
495 x86_platform.set_wallclock = xen_set_wallclock; 523 /* Dom0 uses the native method to set the hardware RTC. */
524 if (!xen_initial_domain())
525 x86_platform.set_wallclock = xen_set_wallclock;
496} 526}
497 527
498#ifdef CONFIG_XEN_PVHVM 528#ifdef CONFIG_XEN_PVHVM
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index a95b41744ad0..86782c5d7e2a 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -73,7 +73,7 @@ static inline void xen_hvm_smp_init(void) {}
73 73
74#ifdef CONFIG_PARAVIRT_SPINLOCKS 74#ifdef CONFIG_PARAVIRT_SPINLOCKS
75void __init xen_init_spinlocks(void); 75void __init xen_init_spinlocks(void);
76void __cpuinit xen_init_lock_cpu(int cpu); 76void xen_init_lock_cpu(int cpu);
77void xen_uninit_lock_cpu(int cpu); 77void xen_uninit_lock_cpu(int cpu);
78#else 78#else
79static inline void xen_init_spinlocks(void) 79static inline void xen_init_spinlocks(void)