aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-04-14 07:18:27 -0400
committerIngo Molnar <mingo@kernel.org>2012-04-14 07:19:04 -0400
commit6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e (patch)
tree021cc9f6b477146fcebe6f3be4752abfa2ba18a9 /arch/x86
parent682968e0c425c60f0dde37977e5beb2b12ddc4cc (diff)
parenta385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff)
Merge branch 'perf/core' into perf/uprobes
Merge in latest upstream (and the latest perf development tree), to prepare for tooling changes, and also to pick up v3.4 MM changes that the uprobes code needs to take care of. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig43
-rw-r--r--arch/x86/Kconfig.cpu5
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/Makefile.um4
-rw-r--r--arch/x86/boot/Makefile3
-rw-r--r--arch/x86/boot/boot.h2
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/eboot.c8
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c11
-rw-r--r--arch/x86/boot/compressed/relocs.c6
-rw-r--r--arch/x86/boot/tools/build.c40
-rw-r--r--arch/x86/configs/i386_defconfig64
-rw-r--r--arch/x86/configs/x86_64_defconfig67
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c36
-rw-r--r--arch/x86/crypto/blowfish_glue.c191
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S520
-rw-r--r--arch/x86/crypto/camellia_glue.c1952
-rw-r--r--arch/x86/crypto/crc32c-intel.c11
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c12
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S29
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S29
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c394
-rw-r--r--arch/x86/crypto/twofish_glue.c2
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c269
-rw-r--r--arch/x86/ia32/ia32_aout.c19
-rw-r--r--arch/x86/ia32/ia32_signal.c3
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/apic.h7
-rw-r--r--arch/x86/include/asm/atomic64_32.h146
-rw-r--r--arch/x86/include/asm/auxvec.h7
-rw-r--r--arch/x86/include/asm/barrier.h116
-rw-r--r--arch/x86/include/asm/bug.h4
-rw-r--r--arch/x86/include/asm/cacheflush.h1
-rw-r--r--arch/x86/include/asm/cpu_device_id.h13
-rw-r--r--arch/x86/include/asm/cpufeature.h4
-rw-r--r--arch/x86/include/asm/debugreg.h67
-rw-r--r--arch/x86/include/asm/dma-mapping.h26
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/elf.h1
-rw-r--r--arch/x86/include/asm/exec.h1
-rw-r--r--arch/x86/include/asm/fpu-internal.h520
-rw-r--r--arch/x86/include/asm/futex.h1
-rw-r--r--arch/x86/include/asm/hardirq.h1
-rw-r--r--arch/x86/include/asm/highmem.h2
-rw-r--r--arch/x86/include/asm/i387.h589
-rw-r--r--arch/x86/include/asm/ia32.h4
-rw-r--r--arch/x86/include/asm/idle.h1
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/irq_controller.h12
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/kgdb.h10
-rw-r--r--arch/x86/include/asm/kvm.h4
-rw-r--r--arch/x86/include/asm/kvm_emulate.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h63
-rw-r--r--arch/x86/include/asm/local.h1
-rw-r--r--arch/x86/include/asm/mc146818rtc.h1
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mrst.h4
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/page_types.h1
-rw-r--r--arch/x86/include/asm/paravirt.h7
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/processor.h85
-rw-r--r--arch/x86/include/asm/prom.h10
-rw-r--r--arch/x86/include/asm/segment.h58
-rw-r--r--arch/x86/include/asm/special_insns.h199
-rw-r--r--arch/x86/include/asm/spinlock.h4
-rw-r--r--arch/x86/include/asm/spinlock_types.h1
-rw-r--r--arch/x86/include/asm/stackprotector.h1
-rw-r--r--arch/x86/include/asm/switch_to.h129
-rw-r--r--arch/x86/include/asm/system.h523
-rw-r--r--arch/x86/include/asm/timer.h8
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/tsc.h4
-rw-r--r--arch/x86/include/asm/vgtod.h17
-rw-r--r--arch/x86/include/asm/virtext.h1
-rw-r--r--arch/x86/include/asm/word-at-a-time.h46
-rw-r--r--arch/x86/include/asm/x2apic.h5
-rw-r--r--arch/x86/include/asm/x86_init.h6
-rw-r--r--arch/x86/include/asm/xen/interface.h1
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c7
-rw-r--r--arch/x86/kernel/acpi/cstate.c1
-rw-r--r--arch/x86/kernel/amd_gart_64.c11
-rw-r--r--arch/x86/kernel/apic/apic.c13
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c7
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c199
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c6
-rw-r--r--arch/x86/kernel/apm_32.c12
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/match.c91
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c195
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c106
-rw-r--r--arch/x86/kernel/cpu/perf_event.h43
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c194
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c526
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c19
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpuid.c1
-rw-r--r--arch/x86/kernel/crash_dump_32.c6
-rw-r--r--arch/x86/kernel/devicetree.c101
-rw-r--r--arch/x86/kernel/dumpstack.c7
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/entry_32.S17
-rw-r--r--arch/x86/kernel/entry_64.S73
-rw-r--r--arch/x86/kernel/i387.c83
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/irq.c7
-rw-r--r--arch/x86/kernel/irq_32.c11
-rw-r--r--arch/x86/kernel/irqinit.c7
-rw-r--r--arch/x86/kernel/kdebugfs.c9
-rw-r--r--arch/x86/kernel/kgdb.c67
-rw-r--r--arch/x86/kernel/kprobes-common.h102
-rw-r--r--arch/x86/kernel/kprobes-opt.c512
-rw-r--r--arch/x86/kernel/kprobes.c664
-rw-r--r--arch/x86/kernel/kvm.c8
-rw-r--r--arch/x86/kernel/kvmclock.c15
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/machine_kexec_32.c1
-rw-r--r--arch/x86/kernel/mca_32.c1
-rw-r--r--arch/x86/kernel/microcode_core.c15
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/msr.c1
-rw-r--r--arch/x86/kernel/nmi_selftest.c37
-rw-r--r--arch/x86/kernel/paravirt.c6
-rw-r--r--arch/x86/kernel/pci-calgary_64.c10
-rw-r--r--arch/x86/kernel/pci-dma.c8
-rw-r--r--arch/x86/kernel/pci-nommu.c6
-rw-r--r--arch/x86/kernel/pci-swiotlb.c17
-rw-r--r--arch/x86/kernel/probe_roms.c1
-rw-r--r--arch/x86/kernel/process.c150
-rw-r--r--arch/x86/kernel/process_32.c63
-rw-r--r--arch/x86/kernel/process_64.c113
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/setup.c22
-rw-r--r--arch/x86/kernel/signal.c1
-rw-r--r--arch/x86/kernel/smpboot.c32
-rw-r--r--arch/x86/kernel/sys_x86_64.c34
-rw-r--r--arch/x86/kernel/tboot.c9
-rw-r--r--arch/x86/kernel/tce_64.c1
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/tls.c5
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kernel/tsc.c17
-rw-r--r--arch/x86/kernel/tsc_sync.c29
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c25
-rw-r--r--arch/x86/kernel/x86_init.c5
-rw-r--r--arch/x86/kernel/xsave.c1
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c112
-rw-r--r--arch/x86/kvm/i8259.c1
-rw-r--r--arch/x86/kvm/lapic.c12
-rw-r--r--arch/x86/kvm/mmu.c85
-rw-r--r--arch/x86/kvm/mmu_audit.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/pmu.c12
-rw-r--r--arch/x86/kvm/svm.c119
-rw-r--r--arch/x86/kvm/vmx.c57
-rw-r--r--arch/x86/kvm/x86.c412
-rw-r--r--arch/x86/lib/atomic64_32.c59
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S29
-rw-r--r--arch/x86/lib/copy_page_64.S12
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/memcpy_64.S44
-rw-r--r--arch/x86/lib/memset_64.S33
-rw-r--r--arch/x86/lib/usercopy_32.c4
-rw-r--r--arch/x86/mm/highmem_32.c4
-rw-r--r--arch/x86/mm/hugetlbpage.c30
-rw-r--r--arch/x86/mm/init.c1
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/init_64.c1
-rw-r--r--arch/x86/mm/kmemcheck/selftest.c1
-rw-r--r--arch/x86/mm/numa_emulation.c4
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/srat.c2
-rw-r--r--arch/x86/net/bpf_jit.S122
-rw-r--r--arch/x86/net/bpf_jit_comp.c47
-rw-r--r--arch/x86/pci/acpi.c29
-rw-r--r--arch/x86/pci/fixup.c12
-rw-r--r--arch/x86/pci/i386.c85
-rw-r--r--arch/x86/pci/mrst.c40
-rw-r--r--arch/x86/pci/xen.c27
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts7
-rw-r--r--arch/x86/platform/efi/efi.c377
-rw-r--r--arch/x86/platform/geode/Makefile2
-rw-r--r--arch/x86/platform/geode/alix.c76
-rw-r--r--arch/x86/platform/geode/geos.c128
-rw-r--r--arch/x86/platform/geode/net5501.c154
-rw-r--r--arch/x86/platform/mrst/Makefile1
-rw-r--r--arch/x86/platform/mrst/mrst.c86
-rw-r--r--arch/x86/platform/mrst/pmu.c817
-rw-r--r--arch/x86/platform/mrst/pmu.h234
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c72
-rw-r--r--arch/x86/platform/olpc/olpc.c97
-rw-r--r--arch/x86/platform/scx200/scx200_32.c24
-rw-r--r--arch/x86/platform/uv/uv_time.c6
-rw-r--r--arch/x86/power/cpu.c5
-rw-r--r--arch/x86/power/hibernate_32.c1
-rw-r--r--arch/x86/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/um/Kconfig4
-rw-r--r--arch/x86/um/asm/processor.h10
-rw-r--r--arch/x86/um/asm/processor_32.h10
-rw-r--r--arch/x86/um/asm/processor_64.h10
-rw-r--r--arch/x86/um/bugs_32.c4
-rw-r--r--arch/x86/um/mem_32.c8
-rw-r--r--arch/x86/um/vdso/vma.c3
-rw-r--r--arch/x86/vdso/vclock_gettime.c135
-rw-r--r--arch/x86/vdso/vdso32-setup.c17
-rw-r--r--arch/x86/vdso/vma.c3
-rw-r--r--arch/x86/xen/enlighten.c101
-rw-r--r--arch/x86/xen/irq.c8
-rw-r--r--arch/x86/xen/mmu.c24
-rw-r--r--arch/x86/xen/multicalls.h2
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c4
-rw-r--r--arch/x86/xen/setup.c3
-rw-r--r--arch/x86/xen/smp.c10
239 files changed, 9047 insertions, 5177 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d2a540f7d6cb..76f5a466547a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -69,7 +69,6 @@ config X86
69 select HAVE_ARCH_JUMP_LABEL 69 select HAVE_ARCH_JUMP_LABEL
70 select HAVE_TEXT_POKE_SMP 70 select HAVE_TEXT_POKE_SMP
71 select HAVE_GENERIC_HARDIRQS 71 select HAVE_GENERIC_HARDIRQS
72 select HAVE_SPARSE_IRQ
73 select SPARSE_IRQ 72 select SPARSE_IRQ
74 select GENERIC_FIND_FIRST_BIT 73 select GENERIC_FIND_FIRST_BIT
75 select GENERIC_IRQ_PROBE 74 select GENERIC_IRQ_PROBE
@@ -82,6 +81,7 @@ config X86
82 select CLKEVT_I8253 81 select CLKEVT_I8253
83 select ARCH_HAVE_NMI_SAFE_CMPXCHG 82 select ARCH_HAVE_NMI_SAFE_CMPXCHG
84 select GENERIC_IOMAP 83 select GENERIC_IOMAP
84 select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
85 85
86config INSTRUCTION_DECODER 86config INSTRUCTION_DECODER
87 def_bool (KPROBES || PERF_EVENTS || UPROBES) 87 def_bool (KPROBES || PERF_EVENTS || UPROBES)
@@ -179,6 +179,9 @@ config ARCH_HAS_DEFAULT_IDLE
179config ARCH_HAS_CACHE_LINE_SIZE 179config ARCH_HAS_CACHE_LINE_SIZE
180 def_bool y 180 def_bool y
181 181
182config ARCH_HAS_CPU_AUTOPROBE
183 def_bool y
184
182config HAVE_SETUP_PER_CPU_AREA 185config HAVE_SETUP_PER_CPU_AREA
183 def_bool y 186 def_bool y
184 187
@@ -401,6 +404,7 @@ config X86_INTEL_CE
401 select X86_REBOOTFIXUPS 404 select X86_REBOOTFIXUPS
402 select OF 405 select OF
403 select OF_EARLY_FLATTREE 406 select OF_EARLY_FLATTREE
407 select IRQ_DOMAIN
404 ---help--- 408 ---help---
405 Select for the Intel CE media processor (CE4100) SOC. 409 Select for the Intel CE media processor (CE4100) SOC.
406 This option compiles in support for the CE4100 SOC for settop 410 This option compiles in support for the CE4100 SOC for settop
@@ -420,27 +424,6 @@ if X86_WANT_INTEL_MID
420config X86_INTEL_MID 424config X86_INTEL_MID
421 bool 425 bool
422 426
423config X86_MRST
424 bool "Moorestown MID platform"
425 depends on PCI
426 depends on PCI_GOANY
427 depends on X86_IO_APIC
428 select X86_INTEL_MID
429 select SFI
430 select DW_APB_TIMER
431 select APB_TIMER
432 select I2C
433 select SPI
434 select INTEL_SCU_IPC
435 select X86_PLATFORM_DEVICES
436 ---help---
437 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
438 Internet Device(MID) platform. Moorestown consists of two chips:
439 Lincroft (CPU core, graphics, and memory controller) and Langwell IOH.
440 Unlike standard x86 PCs, Moorestown does not have many legacy devices
441 nor standard legacy replacement devices/features. e.g. Moorestown does
442 not contain i8259, i8254, HPET, legacy BIOS, most of the io ports.
443
444config X86_MDFLD 427config X86_MDFLD
445 bool "Medfield MID platform" 428 bool "Medfield MID platform"
446 depends on PCI 429 depends on PCI
@@ -454,6 +437,7 @@ config X86_MDFLD
454 select SPI 437 select SPI
455 select INTEL_SCU_IPC 438 select INTEL_SCU_IPC
456 select X86_PLATFORM_DEVICES 439 select X86_PLATFORM_DEVICES
440 select MFD_INTEL_MSIC
457 ---help--- 441 ---help---
458 Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin 442 Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin
459 Internet Device(MID) platform. 443 Internet Device(MID) platform.
@@ -2079,6 +2063,7 @@ config OLPC
2079 select GPIOLIB 2063 select GPIOLIB
2080 select OF 2064 select OF
2081 select OF_PROMTREE 2065 select OF_PROMTREE
2066 select IRQ_DOMAIN
2082 ---help--- 2067 ---help---
2083 Add support for detecting the unique features of the OLPC 2068 Add support for detecting the unique features of the OLPC
2084 XO hardware. 2069 XO hardware.
@@ -2136,6 +2121,19 @@ config ALIX
2136 2121
2137 Note: You have to set alix.force=1 for boards with Award BIOS. 2122 Note: You have to set alix.force=1 for boards with Award BIOS.
2138 2123
2124config NET5501
2125 bool "Soekris Engineering net5501 System Support (LEDS, GPIO, etc)"
2126 select GPIOLIB
2127 ---help---
2128 This option enables system support for the Soekris Engineering net5501.
2129
2130config GEOS
2131 bool "Traverse Technologies GEOS System Support (LEDS, GPIO, etc)"
2132 select GPIOLIB
2133 depends on DMI
2134 ---help---
2135 This option enables system support for the Traverse Technologies GEOS.
2136
2139endif # X86_32 2137endif # X86_32
2140 2138
2141config AMD_NB 2139config AMD_NB
@@ -2194,6 +2192,7 @@ config X86_X32
2194config COMPAT 2192config COMPAT
2195 def_bool y 2193 def_bool y
2196 depends on IA32_EMULATION || X86_X32 2194 depends on IA32_EMULATION || X86_X32
2195 select ARCH_WANT_OLD_COMPAT_IPC
2197 2196
2198config COMPAT_FOR_U64_ALIGNMENT 2197config COMPAT_FOR_U64_ALIGNMENT
2199 def_bool COMPAT 2198 def_bool COMPAT
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 3c57033e2211..706e12e9984b 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -303,7 +303,6 @@ config X86_GENERIC
303config X86_INTERNODE_CACHE_SHIFT 303config X86_INTERNODE_CACHE_SHIFT
304 int 304 int
305 default "12" if X86_VSMP 305 default "12" if X86_VSMP
306 default "7" if NUMA
307 default X86_L1_CACHE_SHIFT 306 default X86_L1_CACHE_SHIFT
308 307
309config X86_CMPXCHG 308config X86_CMPXCHG
@@ -441,7 +440,7 @@ config CPU_SUP_INTEL
441config CPU_SUP_CYRIX_32 440config CPU_SUP_CYRIX_32
442 default y 441 default y
443 bool "Support Cyrix processors" if PROCESSOR_SELECT 442 bool "Support Cyrix processors" if PROCESSOR_SELECT
444 depends on !64BIT 443 depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
445 ---help--- 444 ---help---
446 This enables detection, tunings and quirks for Cyrix processors 445 This enables detection, tunings and quirks for Cyrix processors
447 446
@@ -495,7 +494,7 @@ config CPU_SUP_TRANSMETA_32
495config CPU_SUP_UMC_32 494config CPU_SUP_UMC_32
496 default y 495 default y
497 bool "Support UMC processors" if PROCESSOR_SELECT 496 bool "Support UMC processors" if PROCESSOR_SELECT
498 depends on !64BIT 497 depends on M386 || M486 || (EXPERT && !64BIT)
499 ---help--- 498 ---help---
500 This enables detection, tunings and quirks for UMC processors 499 This enables detection, tunings and quirks for UMC processors
501 500
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 968dbe24a255..41a7237606a3 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -129,6 +129,7 @@ KBUILD_CFLAGS += -Wno-sign-compare
129KBUILD_CFLAGS += -fno-asynchronous-unwind-tables 129KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
130# prevent gcc from generating any FP code by mistake 130# prevent gcc from generating any FP code by mistake
131KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) 131KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
132KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
132 133
133KBUILD_CFLAGS += $(mflags-y) 134KBUILD_CFLAGS += $(mflags-y)
134KBUILD_AFLAGS += $(mflags-y) 135KBUILD_AFLAGS += $(mflags-y)
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
index 36ddec6a41c9..4be406abeefd 100644
--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -8,15 +8,11 @@ ELF_ARCH := i386
8ELF_FORMAT := elf32-i386 8ELF_FORMAT := elf32-i386
9CHECKFLAGS += -D__i386__ 9CHECKFLAGS += -D__i386__
10 10
11ifeq ("$(origin SUBARCH)", "command line")
12ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)")
13KBUILD_CFLAGS += $(call cc-option,-m32) 11KBUILD_CFLAGS += $(call cc-option,-m32)
14KBUILD_AFLAGS += $(call cc-option,-m32) 12KBUILD_AFLAGS += $(call cc-option,-m32)
15LINK-y += $(call cc-option,-m32) 13LINK-y += $(call cc-option,-m32)
16 14
17export LDFLAGS 15export LDFLAGS
18endif
19endif
20 16
21# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. 17# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
22include $(srctree)/arch/x86/Makefile_32.cpu 18include $(srctree)/arch/x86/Makefile_32.cpu
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 95365a82b6a0..5a747dd884db 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -37,7 +37,8 @@ setup-y += video-bios.o
37targets += $(setup-y) 37targets += $(setup-y)
38hostprogs-y := mkcpustr tools/build 38hostprogs-y := mkcpustr tools/build
39 39
40HOST_EXTRACFLAGS += $(LINUXINCLUDE) 40HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \
41 -D__EXPORTED_HEADERS__
41 42
42$(obj)/cpu.o: $(obj)/cpustr.h 43$(obj)/cpu.o: $(obj)/cpustr.h
43 44
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index c7093bd9f2d3..18997e5a1053 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -67,7 +67,7 @@ static inline void outl(u32 v, u16 port)
67{ 67{
68 asm volatile("outl %0,%1" : : "a" (v), "dN" (port)); 68 asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
69} 69}
70static inline u32 inl(u32 port) 70static inline u32 inl(u16 port)
71{ 71{
72 u32 v; 72 u32 v;
73 asm volatile("inl %1,%0" : "=a" (v) : "dN" (port)); 73 asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index b123b9a8f5b3..fd55a2ff3ad8 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,6 +22,7 @@ LDFLAGS := -m elf_$(UTS_MACHINE)
22LDFLAGS_vmlinux := -T 22LDFLAGS_vmlinux := -T
23 23
24hostprogs-y := mkpiggy 24hostprogs-y := mkpiggy
25HOST_EXTRACFLAGS += -I$(srctree)/tools/include
25 26
26VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ 27VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
27 $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ 28 $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index fec216f4fbc3..0cdfc0d2315e 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -539,7 +539,7 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
539 struct initrd *initrd; 539 struct initrd *initrd;
540 efi_file_handle_t *h; 540 efi_file_handle_t *h;
541 efi_file_info_t *info; 541 efi_file_info_t *info;
542 efi_char16_t filename[256]; 542 efi_char16_t filename_16[256];
543 unsigned long info_sz; 543 unsigned long info_sz;
544 efi_guid_t info_guid = EFI_FILE_INFO_ID; 544 efi_guid_t info_guid = EFI_FILE_INFO_ID;
545 efi_char16_t *p; 545 efi_char16_t *p;
@@ -552,14 +552,14 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
552 str += 7; 552 str += 7;
553 553
554 initrd = &initrds[i]; 554 initrd = &initrds[i];
555 p = filename; 555 p = filename_16;
556 556
557 /* Skip any leading slashes */ 557 /* Skip any leading slashes */
558 while (*str == '/' || *str == '\\') 558 while (*str == '/' || *str == '\\')
559 str++; 559 str++;
560 560
561 while (*str && *str != ' ' && *str != '\n') { 561 while (*str && *str != ' ' && *str != '\n') {
562 if (p >= filename + sizeof(filename)) 562 if ((u8 *)p >= (u8 *)filename_16 + sizeof(filename_16))
563 break; 563 break;
564 564
565 *p++ = *str++; 565 *p++ = *str++;
@@ -583,7 +583,7 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
583 goto free_initrds; 583 goto free_initrds;
584 } 584 }
585 585
586 status = efi_call_phys5(fh->open, fh, &h, filename, 586 status = efi_call_phys5(fh->open, fh, &h, filename_16,
587 EFI_FILE_MODE_READ, (u64)0); 587 EFI_FILE_MODE_READ, (u64)0);
588 if (status != EFI_SUCCESS) 588 if (status != EFI_SUCCESS)
589 goto close_handles; 589 goto close_handles;
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 46a823882437..958a641483dd 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -29,14 +29,7 @@
29#include <stdio.h> 29#include <stdio.h>
30#include <string.h> 30#include <string.h>
31#include <inttypes.h> 31#include <inttypes.h>
32 32#include <tools/le_byteshift.h>
33static uint32_t getle32(const void *p)
34{
35 const uint8_t *cp = p;
36
37 return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) +
38 ((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24);
39}
40 33
41int main(int argc, char *argv[]) 34int main(int argc, char *argv[])
42{ 35{
@@ -69,7 +62,7 @@ int main(int argc, char *argv[])
69 } 62 }
70 63
71 ilen = ftell(f); 64 ilen = ftell(f);
72 olen = getle32(&olen); 65 olen = get_unaligned_le32(&olen);
73 fclose(f); 66 fclose(f);
74 67
75 /* 68 /*
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index 89bbf4e4d05d..d3c0b0277666 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -10,6 +10,7 @@
10#define USE_BSD 10#define USE_BSD
11#include <endian.h> 11#include <endian.h>
12#include <regex.h> 12#include <regex.h>
13#include <tools/le_byteshift.h>
13 14
14static void die(char *fmt, ...); 15static void die(char *fmt, ...);
15 16
@@ -605,10 +606,7 @@ static void emit_relocs(int as_text)
605 fwrite("\0\0\0\0", 4, 1, stdout); 606 fwrite("\0\0\0\0", 4, 1, stdout);
606 /* Now print each relocation */ 607 /* Now print each relocation */
607 for (i = 0; i < reloc_count; i++) { 608 for (i = 0; i < reloc_count; i++) {
608 buf[0] = (relocs[i] >> 0) & 0xff; 609 put_unaligned_le32(relocs[i], buf);
609 buf[1] = (relocs[i] >> 8) & 0xff;
610 buf[2] = (relocs[i] >> 16) & 0xff;
611 buf[3] = (relocs[i] >> 24) & 0xff;
612 fwrite(buf, 4, 1, stdout); 610 fwrite(buf, 4, 1, stdout);
613 } 611 }
614 } 612 }
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 4e9bd6bcafa6..ed549767a231 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -29,18 +29,18 @@
29#include <stdarg.h> 29#include <stdarg.h>
30#include <sys/types.h> 30#include <sys/types.h>
31#include <sys/stat.h> 31#include <sys/stat.h>
32#include <sys/sysmacros.h>
33#include <unistd.h> 32#include <unistd.h>
34#include <fcntl.h> 33#include <fcntl.h>
35#include <sys/mman.h> 34#include <sys/mman.h>
36#include <asm/boot.h> 35#include <tools/le_byteshift.h>
37 36
38typedef unsigned char u8; 37typedef unsigned char u8;
39typedef unsigned short u16; 38typedef unsigned short u16;
40typedef unsigned long u32; 39typedef unsigned int u32;
41 40
42#define DEFAULT_MAJOR_ROOT 0 41#define DEFAULT_MAJOR_ROOT 0
43#define DEFAULT_MINOR_ROOT 0 42#define DEFAULT_MINOR_ROOT 0
43#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT)
44 44
45/* Minimal number of setup sectors */ 45/* Minimal number of setup sectors */
46#define SETUP_SECT_MIN 5 46#define SETUP_SECT_MIN 5
@@ -159,7 +159,7 @@ int main(int argc, char ** argv)
159 die("read-error on `setup'"); 159 die("read-error on `setup'");
160 if (c < 1024) 160 if (c < 1024)
161 die("The setup must be at least 1024 bytes"); 161 die("The setup must be at least 1024 bytes");
162 if (buf[510] != 0x55 || buf[511] != 0xaa) 162 if (get_unaligned_le16(&buf[510]) != 0xAA55)
163 die("Boot block hasn't got boot flag (0xAA55)"); 163 die("Boot block hasn't got boot flag (0xAA55)");
164 fclose(file); 164 fclose(file);
165 165
@@ -171,8 +171,7 @@ int main(int argc, char ** argv)
171 memset(buf+c, 0, i-c); 171 memset(buf+c, 0, i-c);
172 172
173 /* Set the default root device */ 173 /* Set the default root device */
174 buf[508] = DEFAULT_MINOR_ROOT; 174 put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
175 buf[509] = DEFAULT_MAJOR_ROOT;
176 175
177 fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i); 176 fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i);
178 177
@@ -192,44 +191,42 @@ int main(int argc, char ** argv)
192 191
193 /* Patch the setup code with the appropriate size parameters */ 192 /* Patch the setup code with the appropriate size parameters */
194 buf[0x1f1] = setup_sectors-1; 193 buf[0x1f1] = setup_sectors-1;
195 buf[0x1f4] = sys_size; 194 put_unaligned_le32(sys_size, &buf[0x1f4]);
196 buf[0x1f5] = sys_size >> 8;
197 buf[0x1f6] = sys_size >> 16;
198 buf[0x1f7] = sys_size >> 24;
199 195
200#ifdef CONFIG_EFI_STUB 196#ifdef CONFIG_EFI_STUB
201 file_sz = sz + i + ((sys_size * 16) - sz); 197 file_sz = sz + i + ((sys_size * 16) - sz);
202 198
203 pe_header = *(unsigned int *)&buf[0x3c]; 199 pe_header = get_unaligned_le32(&buf[0x3c]);
204 200
205 /* Size of code */ 201 /* Size of code */
206 *(unsigned int *)&buf[pe_header + 0x1c] = file_sz; 202 put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
207 203
208 /* Size of image */ 204 /* Size of image */
209 *(unsigned int *)&buf[pe_header + 0x50] = file_sz; 205 put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
210 206
211#ifdef CONFIG_X86_32 207#ifdef CONFIG_X86_32
212 /* Address of entry point */ 208 /* Address of entry point */
213 *(unsigned int *)&buf[pe_header + 0x28] = i; 209 put_unaligned_le32(i, &buf[pe_header + 0x28]);
214 210
215 /* .text size */ 211 /* .text size */
216 *(unsigned int *)&buf[pe_header + 0xb0] = file_sz; 212 put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
217 213
218 /* .text size of initialised data */ 214 /* .text size of initialised data */
219 *(unsigned int *)&buf[pe_header + 0xb8] = file_sz; 215 put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
220#else 216#else
221 /* 217 /*
222 * Address of entry point. startup_32 is at the beginning and 218 * Address of entry point. startup_32 is at the beginning and
223 * the 64-bit entry point (startup_64) is always 512 bytes 219 * the 64-bit entry point (startup_64) is always 512 bytes
224 * after. 220 * after.
225 */ 221 */
226 *(unsigned int *)&buf[pe_header + 0x28] = i + 512; 222 put_unaligned_le32(i + 512, &buf[pe_header + 0x28]);
227 223
228 /* .text size */ 224 /* .text size */
229 *(unsigned int *)&buf[pe_header + 0xc0] = file_sz; 225 put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
230 226
231 /* .text size of initialised data */ 227 /* .text size of initialised data */
232 *(unsigned int *)&buf[pe_header + 0xc8] = file_sz; 228 put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
229
233#endif /* CONFIG_X86_32 */ 230#endif /* CONFIG_X86_32 */
234#endif /* CONFIG_EFI_STUB */ 231#endif /* CONFIG_EFI_STUB */
235 232
@@ -250,8 +247,9 @@ int main(int argc, char ** argv)
250 } 247 }
251 248
252 /* Write the CRC */ 249 /* Write the CRC */
253 fprintf(stderr, "CRC %lx\n", crc); 250 fprintf(stderr, "CRC %x\n", crc);
254 if (fwrite(&crc, 1, 4, stdout) != 4) 251 put_unaligned_le32(crc, buf);
252 if (fwrite(buf, 1, 4, stdout) != 4)
255 die("Writing CRC failed"); 253 die("Writing CRC failed");
256 254
257 close(fd); 255 close(fd);
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 2bf18059fbea..119db67dcb03 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -15,23 +15,28 @@ CONFIG_CPUSETS=y
15CONFIG_CGROUP_CPUACCT=y 15CONFIG_CGROUP_CPUACCT=y
16CONFIG_RESOURCE_COUNTERS=y 16CONFIG_RESOURCE_COUNTERS=y
17CONFIG_CGROUP_SCHED=y 17CONFIG_CGROUP_SCHED=y
18CONFIG_UTS_NS=y
19CONFIG_IPC_NS=y
20CONFIG_USER_NS=y
21CONFIG_PID_NS=y
22CONFIG_NET_NS=y
23CONFIG_BLK_DEV_INITRD=y 18CONFIG_BLK_DEV_INITRD=y
24CONFIG_KALLSYMS_EXTRA_PASS=y
25# CONFIG_COMPAT_BRK is not set 19# CONFIG_COMPAT_BRK is not set
26CONFIG_PROFILING=y 20CONFIG_PROFILING=y
27CONFIG_KPROBES=y 21CONFIG_KPROBES=y
28CONFIG_MODULES=y 22CONFIG_MODULES=y
29CONFIG_MODULE_UNLOAD=y 23CONFIG_MODULE_UNLOAD=y
30CONFIG_MODULE_FORCE_UNLOAD=y 24CONFIG_MODULE_FORCE_UNLOAD=y
25CONFIG_PARTITION_ADVANCED=y
26CONFIG_OSF_PARTITION=y
27CONFIG_AMIGA_PARTITION=y
28CONFIG_MAC_PARTITION=y
29CONFIG_BSD_DISKLABEL=y
30CONFIG_MINIX_SUBPARTITION=y
31CONFIG_SOLARIS_X86_PARTITION=y
32CONFIG_UNIXWARE_DISKLABEL=y
33CONFIG_SGI_PARTITION=y
34CONFIG_SUN_PARTITION=y
35CONFIG_KARMA_PARTITION=y
36CONFIG_EFI_PARTITION=y
31CONFIG_NO_HZ=y 37CONFIG_NO_HZ=y
32CONFIG_HIGH_RES_TIMERS=y 38CONFIG_HIGH_RES_TIMERS=y
33CONFIG_SMP=y 39CONFIG_SMP=y
34CONFIG_SPARSE_IRQ=y
35CONFIG_X86_GENERIC=y 40CONFIG_X86_GENERIC=y
36CONFIG_HPET_TIMER=y 41CONFIG_HPET_TIMER=y
37CONFIG_SCHED_SMT=y 42CONFIG_SCHED_SMT=y
@@ -51,14 +56,12 @@ CONFIG_HZ_1000=y
51CONFIG_KEXEC=y 56CONFIG_KEXEC=y
52CONFIG_CRASH_DUMP=y 57CONFIG_CRASH_DUMP=y
53# CONFIG_COMPAT_VDSO is not set 58# CONFIG_COMPAT_VDSO is not set
54CONFIG_PM=y 59CONFIG_HIBERNATION=y
55CONFIG_PM_DEBUG=y 60CONFIG_PM_DEBUG=y
56CONFIG_PM_TRACE_RTC=y 61CONFIG_PM_TRACE_RTC=y
57CONFIG_HIBERNATION=y
58CONFIG_ACPI_PROCFS=y 62CONFIG_ACPI_PROCFS=y
59CONFIG_ACPI_DOCK=y 63CONFIG_ACPI_DOCK=y
60CONFIG_CPU_FREQ=y 64CONFIG_CPU_FREQ=y
61CONFIG_CPU_FREQ_DEBUG=y
62# CONFIG_CPU_FREQ_STAT is not set 65# CONFIG_CPU_FREQ_STAT is not set
63CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y 66CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
64CONFIG_CPU_FREQ_GOV_PERFORMANCE=y 67CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
@@ -69,7 +72,6 @@ CONFIG_PCI_MSI=y
69CONFIG_PCCARD=y 72CONFIG_PCCARD=y
70CONFIG_YENTA=y 73CONFIG_YENTA=y
71CONFIG_HOTPLUG_PCI=y 74CONFIG_HOTPLUG_PCI=y
72CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
73CONFIG_BINFMT_MISC=y 75CONFIG_BINFMT_MISC=y
74CONFIG_NET=y 76CONFIG_NET=y
75CONFIG_PACKET=y 77CONFIG_PACKET=y
@@ -120,7 +122,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
120CONFIG_IP_NF_IPTABLES=y 122CONFIG_IP_NF_IPTABLES=y
121CONFIG_IP_NF_FILTER=y 123CONFIG_IP_NF_FILTER=y
122CONFIG_IP_NF_TARGET_REJECT=y 124CONFIG_IP_NF_TARGET_REJECT=y
123CONFIG_IP_NF_TARGET_LOG=y
124CONFIG_IP_NF_TARGET_ULOG=y 125CONFIG_IP_NF_TARGET_ULOG=y
125CONFIG_NF_NAT=y 126CONFIG_NF_NAT=y
126CONFIG_IP_NF_TARGET_MASQUERADE=y 127CONFIG_IP_NF_TARGET_MASQUERADE=y
@@ -128,7 +129,6 @@ CONFIG_IP_NF_MANGLE=y
128CONFIG_NF_CONNTRACK_IPV6=y 129CONFIG_NF_CONNTRACK_IPV6=y
129CONFIG_IP6_NF_IPTABLES=y 130CONFIG_IP6_NF_IPTABLES=y
130CONFIG_IP6_NF_MATCH_IPV6HEADER=y 131CONFIG_IP6_NF_MATCH_IPV6HEADER=y
131CONFIG_IP6_NF_TARGET_LOG=y
132CONFIG_IP6_NF_FILTER=y 132CONFIG_IP6_NF_FILTER=y
133CONFIG_IP6_NF_TARGET_REJECT=y 133CONFIG_IP6_NF_TARGET_REJECT=y
134CONFIG_IP6_NF_MANGLE=y 134CONFIG_IP6_NF_MANGLE=y
@@ -169,25 +169,20 @@ CONFIG_DM_ZERO=y
169CONFIG_MACINTOSH_DRIVERS=y 169CONFIG_MACINTOSH_DRIVERS=y
170CONFIG_MAC_EMUMOUSEBTN=y 170CONFIG_MAC_EMUMOUSEBTN=y
171CONFIG_NETDEVICES=y 171CONFIG_NETDEVICES=y
172CONFIG_NET_ETHERNET=y 172CONFIG_NETCONSOLE=y
173CONFIG_NET_VENDOR_3COM=y 173CONFIG_BNX2=y
174CONFIG_TIGON3=y
174CONFIG_NET_TULIP=y 175CONFIG_NET_TULIP=y
175CONFIG_NET_PCI=y
176CONFIG_FORCEDETH=y
177CONFIG_E100=y 176CONFIG_E100=y
177CONFIG_E1000=y
178CONFIG_E1000E=y
179CONFIG_SKY2=y
178CONFIG_NE2K_PCI=y 180CONFIG_NE2K_PCI=y
181CONFIG_FORCEDETH=y
179CONFIG_8139TOO=y 182CONFIG_8139TOO=y
180# CONFIG_8139TOO_PIO is not set 183# CONFIG_8139TOO_PIO is not set
181CONFIG_E1000=y
182CONFIG_E1000E=y
183CONFIG_R8169=y 184CONFIG_R8169=y
184CONFIG_SKY2=y
185CONFIG_TIGON3=y
186CONFIG_BNX2=y
187CONFIG_TR=y
188CONFIG_NET_PCMCIA=y
189CONFIG_FDDI=y 185CONFIG_FDDI=y
190CONFIG_NETCONSOLE=y
191CONFIG_INPUT_POLLDEV=y 186CONFIG_INPUT_POLLDEV=y
192# CONFIG_INPUT_MOUSEDEV_PSAUX is not set 187# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
193CONFIG_INPUT_EVDEV=y 188CONFIG_INPUT_EVDEV=y
@@ -196,6 +191,7 @@ CONFIG_INPUT_TABLET=y
196CONFIG_INPUT_TOUCHSCREEN=y 191CONFIG_INPUT_TOUCHSCREEN=y
197CONFIG_INPUT_MISC=y 192CONFIG_INPUT_MISC=y
198CONFIG_VT_HW_CONSOLE_BINDING=y 193CONFIG_VT_HW_CONSOLE_BINDING=y
194# CONFIG_LEGACY_PTYS is not set
199CONFIG_SERIAL_NONSTANDARD=y 195CONFIG_SERIAL_NONSTANDARD=y
200CONFIG_SERIAL_8250=y 196CONFIG_SERIAL_8250=y
201CONFIG_SERIAL_8250_CONSOLE=y 197CONFIG_SERIAL_8250_CONSOLE=y
@@ -205,7 +201,6 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
205CONFIG_SERIAL_8250_SHARE_IRQ=y 201CONFIG_SERIAL_8250_SHARE_IRQ=y
206CONFIG_SERIAL_8250_DETECT_IRQ=y 202CONFIG_SERIAL_8250_DETECT_IRQ=y
207CONFIG_SERIAL_8250_RSA=y 203CONFIG_SERIAL_8250_RSA=y
208# CONFIG_LEGACY_PTYS is not set
209CONFIG_HW_RANDOM=y 204CONFIG_HW_RANDOM=y
210CONFIG_NVRAM=y 205CONFIG_NVRAM=y
211CONFIG_HPET=y 206CONFIG_HPET=y
@@ -220,7 +215,6 @@ CONFIG_DRM_I915=y
220CONFIG_FB_MODE_HELPERS=y 215CONFIG_FB_MODE_HELPERS=y
221CONFIG_FB_TILEBLITTING=y 216CONFIG_FB_TILEBLITTING=y
222CONFIG_FB_EFI=y 217CONFIG_FB_EFI=y
223CONFIG_BACKLIGHT_LCD_SUPPORT=y
224# CONFIG_LCD_CLASS_DEVICE is not set 218# CONFIG_LCD_CLASS_DEVICE is not set
225CONFIG_VGACON_SOFT_SCROLLBACK=y 219CONFIG_VGACON_SOFT_SCROLLBACK=y
226CONFIG_LOGO=y 220CONFIG_LOGO=y
@@ -283,7 +277,6 @@ CONFIG_ZISOFS=y
283CONFIG_MSDOS_FS=y 277CONFIG_MSDOS_FS=y
284CONFIG_VFAT_FS=y 278CONFIG_VFAT_FS=y
285CONFIG_PROC_KCORE=y 279CONFIG_PROC_KCORE=y
286CONFIG_TMPFS=y
287CONFIG_TMPFS_POSIX_ACL=y 280CONFIG_TMPFS_POSIX_ACL=y
288CONFIG_HUGETLBFS=y 281CONFIG_HUGETLBFS=y
289CONFIG_NFS_FS=y 282CONFIG_NFS_FS=y
@@ -291,18 +284,6 @@ CONFIG_NFS_V3=y
291CONFIG_NFS_V3_ACL=y 284CONFIG_NFS_V3_ACL=y
292CONFIG_NFS_V4=y 285CONFIG_NFS_V4=y
293CONFIG_ROOT_NFS=y 286CONFIG_ROOT_NFS=y
294CONFIG_PARTITION_ADVANCED=y
295CONFIG_OSF_PARTITION=y
296CONFIG_AMIGA_PARTITION=y
297CONFIG_MAC_PARTITION=y
298CONFIG_BSD_DISKLABEL=y
299CONFIG_MINIX_SUBPARTITION=y
300CONFIG_SOLARIS_X86_PARTITION=y
301CONFIG_UNIXWARE_DISKLABEL=y
302CONFIG_SGI_PARTITION=y
303CONFIG_SUN_PARTITION=y
304CONFIG_KARMA_PARTITION=y
305CONFIG_EFI_PARTITION=y
306CONFIG_NLS_DEFAULT="utf8" 287CONFIG_NLS_DEFAULT="utf8"
307CONFIG_NLS_CODEPAGE_437=y 288CONFIG_NLS_CODEPAGE_437=y
308CONFIG_NLS_ASCII=y 289CONFIG_NLS_ASCII=y
@@ -317,13 +298,12 @@ CONFIG_DEBUG_KERNEL=y
317# CONFIG_SCHED_DEBUG is not set 298# CONFIG_SCHED_DEBUG is not set
318CONFIG_SCHEDSTATS=y 299CONFIG_SCHEDSTATS=y
319CONFIG_TIMER_STATS=y 300CONFIG_TIMER_STATS=y
320# CONFIG_RCU_CPU_STALL_DETECTOR is not set 301CONFIG_DEBUG_STACK_USAGE=y
321CONFIG_SYSCTL_SYSCALL_CHECK=y 302CONFIG_SYSCTL_SYSCALL_CHECK=y
322CONFIG_BLK_DEV_IO_TRACE=y 303CONFIG_BLK_DEV_IO_TRACE=y
323CONFIG_PROVIDE_OHCI1394_DMA_INIT=y 304CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
324CONFIG_EARLY_PRINTK_DBGP=y 305CONFIG_EARLY_PRINTK_DBGP=y
325CONFIG_DEBUG_STACKOVERFLOW=y 306CONFIG_DEBUG_STACKOVERFLOW=y
326CONFIG_DEBUG_STACK_USAGE=y
327# CONFIG_DEBUG_RODATA_TEST is not set 307# CONFIG_DEBUG_RODATA_TEST is not set
328CONFIG_DEBUG_NX_TEST=m 308CONFIG_DEBUG_NX_TEST=m
329CONFIG_DEBUG_BOOT_PARAMS=y 309CONFIG_DEBUG_BOOT_PARAMS=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 058a35b8286c..76eb2903809f 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,4 +1,3 @@
1CONFIG_64BIT=y
2CONFIG_EXPERIMENTAL=y 1CONFIG_EXPERIMENTAL=y
3# CONFIG_LOCALVERSION_AUTO is not set 2# CONFIG_LOCALVERSION_AUTO is not set
4CONFIG_SYSVIPC=y 3CONFIG_SYSVIPC=y
@@ -16,26 +15,29 @@ CONFIG_CPUSETS=y
16CONFIG_CGROUP_CPUACCT=y 15CONFIG_CGROUP_CPUACCT=y
17CONFIG_RESOURCE_COUNTERS=y 16CONFIG_RESOURCE_COUNTERS=y
18CONFIG_CGROUP_SCHED=y 17CONFIG_CGROUP_SCHED=y
19CONFIG_UTS_NS=y
20CONFIG_IPC_NS=y
21CONFIG_USER_NS=y
22CONFIG_PID_NS=y
23CONFIG_NET_NS=y
24CONFIG_BLK_DEV_INITRD=y 18CONFIG_BLK_DEV_INITRD=y
25CONFIG_KALLSYMS_EXTRA_PASS=y
26# CONFIG_COMPAT_BRK is not set 19# CONFIG_COMPAT_BRK is not set
27CONFIG_PROFILING=y 20CONFIG_PROFILING=y
28CONFIG_KPROBES=y 21CONFIG_KPROBES=y
29CONFIG_MODULES=y 22CONFIG_MODULES=y
30CONFIG_MODULE_UNLOAD=y 23CONFIG_MODULE_UNLOAD=y
31CONFIG_MODULE_FORCE_UNLOAD=y 24CONFIG_MODULE_FORCE_UNLOAD=y
25CONFIG_PARTITION_ADVANCED=y
26CONFIG_OSF_PARTITION=y
27CONFIG_AMIGA_PARTITION=y
28CONFIG_MAC_PARTITION=y
29CONFIG_BSD_DISKLABEL=y
30CONFIG_MINIX_SUBPARTITION=y
31CONFIG_SOLARIS_X86_PARTITION=y
32CONFIG_UNIXWARE_DISKLABEL=y
33CONFIG_SGI_PARTITION=y
34CONFIG_SUN_PARTITION=y
35CONFIG_KARMA_PARTITION=y
36CONFIG_EFI_PARTITION=y
32CONFIG_NO_HZ=y 37CONFIG_NO_HZ=y
33CONFIG_HIGH_RES_TIMERS=y 38CONFIG_HIGH_RES_TIMERS=y
34CONFIG_SMP=y 39CONFIG_SMP=y
35CONFIG_SPARSE_IRQ=y
36CONFIG_CALGARY_IOMMU=y 40CONFIG_CALGARY_IOMMU=y
37CONFIG_AMD_IOMMU=y
38CONFIG_AMD_IOMMU_STATS=y
39CONFIG_NR_CPUS=64 41CONFIG_NR_CPUS=64
40CONFIG_SCHED_SMT=y 42CONFIG_SCHED_SMT=y
41CONFIG_PREEMPT_VOLUNTARY=y 43CONFIG_PREEMPT_VOLUNTARY=y
@@ -53,27 +55,22 @@ CONFIG_HZ_1000=y
53CONFIG_KEXEC=y 55CONFIG_KEXEC=y
54CONFIG_CRASH_DUMP=y 56CONFIG_CRASH_DUMP=y
55# CONFIG_COMPAT_VDSO is not set 57# CONFIG_COMPAT_VDSO is not set
56CONFIG_PM=y 58CONFIG_HIBERNATION=y
57CONFIG_PM_DEBUG=y 59CONFIG_PM_DEBUG=y
58CONFIG_PM_TRACE_RTC=y 60CONFIG_PM_TRACE_RTC=y
59CONFIG_HIBERNATION=y
60CONFIG_ACPI_PROCFS=y 61CONFIG_ACPI_PROCFS=y
61CONFIG_ACPI_DOCK=y 62CONFIG_ACPI_DOCK=y
62CONFIG_CPU_FREQ=y 63CONFIG_CPU_FREQ=y
63CONFIG_CPU_FREQ_DEBUG=y
64# CONFIG_CPU_FREQ_STAT is not set 64# CONFIG_CPU_FREQ_STAT is not set
65CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y 65CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
66CONFIG_CPU_FREQ_GOV_PERFORMANCE=y 66CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
67CONFIG_CPU_FREQ_GOV_ONDEMAND=y 67CONFIG_CPU_FREQ_GOV_ONDEMAND=y
68CONFIG_X86_ACPI_CPUFREQ=y 68CONFIG_X86_ACPI_CPUFREQ=y
69CONFIG_PCI_MMCONFIG=y 69CONFIG_PCI_MMCONFIG=y
70CONFIG_INTEL_IOMMU=y
71# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
72CONFIG_PCIEPORTBUS=y 70CONFIG_PCIEPORTBUS=y
73CONFIG_PCCARD=y 71CONFIG_PCCARD=y
74CONFIG_YENTA=y 72CONFIG_YENTA=y
75CONFIG_HOTPLUG_PCI=y 73CONFIG_HOTPLUG_PCI=y
76CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
77CONFIG_BINFMT_MISC=y 74CONFIG_BINFMT_MISC=y
78CONFIG_IA32_EMULATION=y 75CONFIG_IA32_EMULATION=y
79CONFIG_NET=y 76CONFIG_NET=y
@@ -125,7 +122,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
125CONFIG_IP_NF_IPTABLES=y 122CONFIG_IP_NF_IPTABLES=y
126CONFIG_IP_NF_FILTER=y 123CONFIG_IP_NF_FILTER=y
127CONFIG_IP_NF_TARGET_REJECT=y 124CONFIG_IP_NF_TARGET_REJECT=y
128CONFIG_IP_NF_TARGET_LOG=y
129CONFIG_IP_NF_TARGET_ULOG=y 125CONFIG_IP_NF_TARGET_ULOG=y
130CONFIG_NF_NAT=y 126CONFIG_NF_NAT=y
131CONFIG_IP_NF_TARGET_MASQUERADE=y 127CONFIG_IP_NF_TARGET_MASQUERADE=y
@@ -133,7 +129,6 @@ CONFIG_IP_NF_MANGLE=y
133CONFIG_NF_CONNTRACK_IPV6=y 129CONFIG_NF_CONNTRACK_IPV6=y
134CONFIG_IP6_NF_IPTABLES=y 130CONFIG_IP6_NF_IPTABLES=y
135CONFIG_IP6_NF_MATCH_IPV6HEADER=y 131CONFIG_IP6_NF_MATCH_IPV6HEADER=y
136CONFIG_IP6_NF_TARGET_LOG=y
137CONFIG_IP6_NF_FILTER=y 132CONFIG_IP6_NF_FILTER=y
138CONFIG_IP6_NF_TARGET_REJECT=y 133CONFIG_IP6_NF_TARGET_REJECT=y
139CONFIG_IP6_NF_MANGLE=y 134CONFIG_IP6_NF_MANGLE=y
@@ -172,20 +167,15 @@ CONFIG_DM_ZERO=y
172CONFIG_MACINTOSH_DRIVERS=y 167CONFIG_MACINTOSH_DRIVERS=y
173CONFIG_MAC_EMUMOUSEBTN=y 168CONFIG_MAC_EMUMOUSEBTN=y
174CONFIG_NETDEVICES=y 169CONFIG_NETDEVICES=y
175CONFIG_NET_ETHERNET=y 170CONFIG_NETCONSOLE=y
176CONFIG_NET_VENDOR_3COM=y 171CONFIG_TIGON3=y
177CONFIG_NET_TULIP=y 172CONFIG_NET_TULIP=y
178CONFIG_NET_PCI=y
179CONFIG_FORCEDETH=y
180CONFIG_E100=y 173CONFIG_E100=y
181CONFIG_8139TOO=y
182CONFIG_E1000=y 174CONFIG_E1000=y
183CONFIG_SKY2=y 175CONFIG_SKY2=y
184CONFIG_TIGON3=y 176CONFIG_FORCEDETH=y
185CONFIG_TR=y 177CONFIG_8139TOO=y
186CONFIG_NET_PCMCIA=y
187CONFIG_FDDI=y 178CONFIG_FDDI=y
188CONFIG_NETCONSOLE=y
189CONFIG_INPUT_POLLDEV=y 179CONFIG_INPUT_POLLDEV=y
190# CONFIG_INPUT_MOUSEDEV_PSAUX is not set 180# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
191CONFIG_INPUT_EVDEV=y 181CONFIG_INPUT_EVDEV=y
@@ -194,6 +184,7 @@ CONFIG_INPUT_TABLET=y
194CONFIG_INPUT_TOUCHSCREEN=y 184CONFIG_INPUT_TOUCHSCREEN=y
195CONFIG_INPUT_MISC=y 185CONFIG_INPUT_MISC=y
196CONFIG_VT_HW_CONSOLE_BINDING=y 186CONFIG_VT_HW_CONSOLE_BINDING=y
187# CONFIG_LEGACY_PTYS is not set
197CONFIG_SERIAL_NONSTANDARD=y 188CONFIG_SERIAL_NONSTANDARD=y
198CONFIG_SERIAL_8250=y 189CONFIG_SERIAL_8250=y
199CONFIG_SERIAL_8250_CONSOLE=y 190CONFIG_SERIAL_8250_CONSOLE=y
@@ -203,7 +194,6 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
203CONFIG_SERIAL_8250_SHARE_IRQ=y 194CONFIG_SERIAL_8250_SHARE_IRQ=y
204CONFIG_SERIAL_8250_DETECT_IRQ=y 195CONFIG_SERIAL_8250_DETECT_IRQ=y
205CONFIG_SERIAL_8250_RSA=y 196CONFIG_SERIAL_8250_RSA=y
206# CONFIG_LEGACY_PTYS is not set
207CONFIG_HW_RANDOM=y 197CONFIG_HW_RANDOM=y
208# CONFIG_HW_RANDOM_INTEL is not set 198# CONFIG_HW_RANDOM_INTEL is not set
209# CONFIG_HW_RANDOM_AMD is not set 199# CONFIG_HW_RANDOM_AMD is not set
@@ -221,7 +211,6 @@ CONFIG_DRM_I915_KMS=y
221CONFIG_FB_MODE_HELPERS=y 211CONFIG_FB_MODE_HELPERS=y
222CONFIG_FB_TILEBLITTING=y 212CONFIG_FB_TILEBLITTING=y
223CONFIG_FB_EFI=y 213CONFIG_FB_EFI=y
224CONFIG_BACKLIGHT_LCD_SUPPORT=y
225# CONFIG_LCD_CLASS_DEVICE is not set 214# CONFIG_LCD_CLASS_DEVICE is not set
226CONFIG_VGACON_SOFT_SCROLLBACK=y 215CONFIG_VGACON_SOFT_SCROLLBACK=y
227CONFIG_LOGO=y 216CONFIG_LOGO=y
@@ -268,6 +257,10 @@ CONFIG_RTC_CLASS=y
268# CONFIG_RTC_HCTOSYS is not set 257# CONFIG_RTC_HCTOSYS is not set
269CONFIG_DMADEVICES=y 258CONFIG_DMADEVICES=y
270CONFIG_EEEPC_LAPTOP=y 259CONFIG_EEEPC_LAPTOP=y
260CONFIG_AMD_IOMMU=y
261CONFIG_AMD_IOMMU_STATS=y
262CONFIG_INTEL_IOMMU=y
263# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
271CONFIG_EFI_VARS=y 264CONFIG_EFI_VARS=y
272CONFIG_EXT3_FS=y 265CONFIG_EXT3_FS=y
273# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set 266# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
@@ -284,7 +277,6 @@ CONFIG_ZISOFS=y
284CONFIG_MSDOS_FS=y 277CONFIG_MSDOS_FS=y
285CONFIG_VFAT_FS=y 278CONFIG_VFAT_FS=y
286CONFIG_PROC_KCORE=y 279CONFIG_PROC_KCORE=y
287CONFIG_TMPFS=y
288CONFIG_TMPFS_POSIX_ACL=y 280CONFIG_TMPFS_POSIX_ACL=y
289CONFIG_HUGETLBFS=y 281CONFIG_HUGETLBFS=y
290CONFIG_NFS_FS=y 282CONFIG_NFS_FS=y
@@ -292,18 +284,6 @@ CONFIG_NFS_V3=y
292CONFIG_NFS_V3_ACL=y 284CONFIG_NFS_V3_ACL=y
293CONFIG_NFS_V4=y 285CONFIG_NFS_V4=y
294CONFIG_ROOT_NFS=y 286CONFIG_ROOT_NFS=y
295CONFIG_PARTITION_ADVANCED=y
296CONFIG_OSF_PARTITION=y
297CONFIG_AMIGA_PARTITION=y
298CONFIG_MAC_PARTITION=y
299CONFIG_BSD_DISKLABEL=y
300CONFIG_MINIX_SUBPARTITION=y
301CONFIG_SOLARIS_X86_PARTITION=y
302CONFIG_UNIXWARE_DISKLABEL=y
303CONFIG_SGI_PARTITION=y
304CONFIG_SUN_PARTITION=y
305CONFIG_KARMA_PARTITION=y
306CONFIG_EFI_PARTITION=y
307CONFIG_NLS_DEFAULT="utf8" 287CONFIG_NLS_DEFAULT="utf8"
308CONFIG_NLS_CODEPAGE_437=y 288CONFIG_NLS_CODEPAGE_437=y
309CONFIG_NLS_ASCII=y 289CONFIG_NLS_ASCII=y
@@ -317,13 +297,12 @@ CONFIG_DEBUG_KERNEL=y
317# CONFIG_SCHED_DEBUG is not set 297# CONFIG_SCHED_DEBUG is not set
318CONFIG_SCHEDSTATS=y 298CONFIG_SCHEDSTATS=y
319CONFIG_TIMER_STATS=y 299CONFIG_TIMER_STATS=y
320# CONFIG_RCU_CPU_STALL_DETECTOR is not set 300CONFIG_DEBUG_STACK_USAGE=y
321CONFIG_SYSCTL_SYSCALL_CHECK=y 301CONFIG_SYSCTL_SYSCALL_CHECK=y
322CONFIG_BLK_DEV_IO_TRACE=y 302CONFIG_BLK_DEV_IO_TRACE=y
323CONFIG_PROVIDE_OHCI1394_DMA_INIT=y 303CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
324CONFIG_EARLY_PRINTK_DBGP=y 304CONFIG_EARLY_PRINTK_DBGP=y
325CONFIG_DEBUG_STACKOVERFLOW=y 305CONFIG_DEBUG_STACKOVERFLOW=y
326CONFIG_DEBUG_STACK_USAGE=y
327# CONFIG_DEBUG_RODATA_TEST is not set 306# CONFIG_DEBUG_RODATA_TEST is not set
328CONFIG_DEBUG_NX_TEST=m 307CONFIG_DEBUG_NX_TEST=m
329CONFIG_DEBUG_BOOT_PARAMS=y 308CONFIG_DEBUG_BOOT_PARAMS=y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 2b0b9631474b..e191ac048b59 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
8obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o 8obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
9 9
10obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 10obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
11obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
11obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 12obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
12obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 13obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
13obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o 14obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -25,6 +26,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
25serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o 26serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
26 27
27aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 28aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
29camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
28blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 30blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
29twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 31twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
30twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o 32twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 545d0ce59818..c799352e24fc 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -28,6 +28,7 @@
28#include <crypto/aes.h> 28#include <crypto/aes.h>
29#include <crypto/cryptd.h> 29#include <crypto/cryptd.h>
30#include <crypto/ctr.h> 30#include <crypto/ctr.h>
31#include <asm/cpu_device_id.h>
31#include <asm/i387.h> 32#include <asm/i387.h>
32#include <asm/aes.h> 33#include <asm/aes.h>
33#include <crypto/scatterwalk.h> 34#include <crypto/scatterwalk.h>
@@ -1107,12 +1108,12 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
1107 one_entry_in_sg = 1; 1108 one_entry_in_sg = 1;
1108 scatterwalk_start(&src_sg_walk, req->src); 1109 scatterwalk_start(&src_sg_walk, req->src);
1109 scatterwalk_start(&assoc_sg_walk, req->assoc); 1110 scatterwalk_start(&assoc_sg_walk, req->assoc);
1110 src = scatterwalk_map(&src_sg_walk, 0); 1111 src = scatterwalk_map(&src_sg_walk);
1111 assoc = scatterwalk_map(&assoc_sg_walk, 0); 1112 assoc = scatterwalk_map(&assoc_sg_walk);
1112 dst = src; 1113 dst = src;
1113 if (unlikely(req->src != req->dst)) { 1114 if (unlikely(req->src != req->dst)) {
1114 scatterwalk_start(&dst_sg_walk, req->dst); 1115 scatterwalk_start(&dst_sg_walk, req->dst);
1115 dst = scatterwalk_map(&dst_sg_walk, 0); 1116 dst = scatterwalk_map(&dst_sg_walk);
1116 } 1117 }
1117 1118
1118 } else { 1119 } else {
@@ -1136,11 +1137,11 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
1136 * back to the packet. */ 1137 * back to the packet. */
1137 if (one_entry_in_sg) { 1138 if (one_entry_in_sg) {
1138 if (unlikely(req->src != req->dst)) { 1139 if (unlikely(req->src != req->dst)) {
1139 scatterwalk_unmap(dst, 0); 1140 scatterwalk_unmap(dst);
1140 scatterwalk_done(&dst_sg_walk, 0, 0); 1141 scatterwalk_done(&dst_sg_walk, 0, 0);
1141 } 1142 }
1142 scatterwalk_unmap(src, 0); 1143 scatterwalk_unmap(src);
1143 scatterwalk_unmap(assoc, 0); 1144 scatterwalk_unmap(assoc);
1144 scatterwalk_done(&src_sg_walk, 0, 0); 1145 scatterwalk_done(&src_sg_walk, 0, 0);
1145 scatterwalk_done(&assoc_sg_walk, 0, 0); 1146 scatterwalk_done(&assoc_sg_walk, 0, 0);
1146 } else { 1147 } else {
@@ -1189,12 +1190,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
1189 one_entry_in_sg = 1; 1190 one_entry_in_sg = 1;
1190 scatterwalk_start(&src_sg_walk, req->src); 1191 scatterwalk_start(&src_sg_walk, req->src);
1191 scatterwalk_start(&assoc_sg_walk, req->assoc); 1192 scatterwalk_start(&assoc_sg_walk, req->assoc);
1192 src = scatterwalk_map(&src_sg_walk, 0); 1193 src = scatterwalk_map(&src_sg_walk);
1193 assoc = scatterwalk_map(&assoc_sg_walk, 0); 1194 assoc = scatterwalk_map(&assoc_sg_walk);
1194 dst = src; 1195 dst = src;
1195 if (unlikely(req->src != req->dst)) { 1196 if (unlikely(req->src != req->dst)) {
1196 scatterwalk_start(&dst_sg_walk, req->dst); 1197 scatterwalk_start(&dst_sg_walk, req->dst);
1197 dst = scatterwalk_map(&dst_sg_walk, 0); 1198 dst = scatterwalk_map(&dst_sg_walk);
1198 } 1199 }
1199 1200
1200 } else { 1201 } else {
@@ -1219,11 +1220,11 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
1219 1220
1220 if (one_entry_in_sg) { 1221 if (one_entry_in_sg) {
1221 if (unlikely(req->src != req->dst)) { 1222 if (unlikely(req->src != req->dst)) {
1222 scatterwalk_unmap(dst, 0); 1223 scatterwalk_unmap(dst);
1223 scatterwalk_done(&dst_sg_walk, 0, 0); 1224 scatterwalk_done(&dst_sg_walk, 0, 0);
1224 } 1225 }
1225 scatterwalk_unmap(src, 0); 1226 scatterwalk_unmap(src);
1226 scatterwalk_unmap(assoc, 0); 1227 scatterwalk_unmap(assoc);
1227 scatterwalk_done(&src_sg_walk, 0, 0); 1228 scatterwalk_done(&src_sg_walk, 0, 0);
1228 scatterwalk_done(&assoc_sg_walk, 0, 0); 1229 scatterwalk_done(&assoc_sg_walk, 0, 0);
1229 } else { 1230 } else {
@@ -1253,14 +1254,19 @@ static struct crypto_alg __rfc4106_alg = {
1253}; 1254};
1254#endif 1255#endif
1255 1256
1257
1258static const struct x86_cpu_id aesni_cpu_id[] = {
1259 X86_FEATURE_MATCH(X86_FEATURE_AES),
1260 {}
1261};
1262MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
1263
1256static int __init aesni_init(void) 1264static int __init aesni_init(void)
1257{ 1265{
1258 int err; 1266 int err;
1259 1267
1260 if (!cpu_has_aes) { 1268 if (!x86_match_cpu(aesni_cpu_id))
1261 printk(KERN_INFO "Intel AES-NI instructions are not detected.\n");
1262 return -ENODEV; 1269 return -ENODEV;
1263 }
1264 1270
1265 if ((err = crypto_fpu_init())) 1271 if ((err = crypto_fpu_init()))
1266 goto fpu_err; 1272 goto fpu_err;
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index b05aa163d55a..7967474de8f7 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -25,6 +25,7 @@
25 * 25 *
26 */ 26 */
27 27
28#include <asm/processor.h>
28#include <crypto/blowfish.h> 29#include <crypto/blowfish.h>
29#include <linux/crypto.h> 30#include <linux/crypto.h>
30#include <linux/init.h> 31#include <linux/init.h>
@@ -76,27 +77,6 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
76 blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); 77 blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
77} 78}
78 79
79static struct crypto_alg bf_alg = {
80 .cra_name = "blowfish",
81 .cra_driver_name = "blowfish-asm",
82 .cra_priority = 200,
83 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
84 .cra_blocksize = BF_BLOCK_SIZE,
85 .cra_ctxsize = sizeof(struct bf_ctx),
86 .cra_alignmask = 3,
87 .cra_module = THIS_MODULE,
88 .cra_list = LIST_HEAD_INIT(bf_alg.cra_list),
89 .cra_u = {
90 .cipher = {
91 .cia_min_keysize = BF_MIN_KEY_SIZE,
92 .cia_max_keysize = BF_MAX_KEY_SIZE,
93 .cia_setkey = blowfish_setkey,
94 .cia_encrypt = blowfish_encrypt,
95 .cia_decrypt = blowfish_decrypt,
96 }
97 }
98};
99
100static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 80static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
101 void (*fn)(struct bf_ctx *, u8 *, const u8 *), 81 void (*fn)(struct bf_ctx *, u8 *, const u8 *),
102 void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) 82 void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
@@ -160,28 +140,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
160 return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); 140 return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
161} 141}
162 142
163static struct crypto_alg blk_ecb_alg = {
164 .cra_name = "ecb(blowfish)",
165 .cra_driver_name = "ecb-blowfish-asm",
166 .cra_priority = 300,
167 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
168 .cra_blocksize = BF_BLOCK_SIZE,
169 .cra_ctxsize = sizeof(struct bf_ctx),
170 .cra_alignmask = 0,
171 .cra_type = &crypto_blkcipher_type,
172 .cra_module = THIS_MODULE,
173 .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
174 .cra_u = {
175 .blkcipher = {
176 .min_keysize = BF_MIN_KEY_SIZE,
177 .max_keysize = BF_MAX_KEY_SIZE,
178 .setkey = blowfish_setkey,
179 .encrypt = ecb_encrypt,
180 .decrypt = ecb_decrypt,
181 },
182 },
183};
184
185static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 143static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
186 struct blkcipher_walk *walk) 144 struct blkcipher_walk *walk)
187{ 145{
@@ -307,29 +265,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
307 return err; 265 return err;
308} 266}
309 267
310static struct crypto_alg blk_cbc_alg = {
311 .cra_name = "cbc(blowfish)",
312 .cra_driver_name = "cbc-blowfish-asm",
313 .cra_priority = 300,
314 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
315 .cra_blocksize = BF_BLOCK_SIZE,
316 .cra_ctxsize = sizeof(struct bf_ctx),
317 .cra_alignmask = 0,
318 .cra_type = &crypto_blkcipher_type,
319 .cra_module = THIS_MODULE,
320 .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
321 .cra_u = {
322 .blkcipher = {
323 .min_keysize = BF_MIN_KEY_SIZE,
324 .max_keysize = BF_MAX_KEY_SIZE,
325 .ivsize = BF_BLOCK_SIZE,
326 .setkey = blowfish_setkey,
327 .encrypt = cbc_encrypt,
328 .decrypt = cbc_decrypt,
329 },
330 },
331};
332
333static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) 268static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
334{ 269{
335 u8 *ctrblk = walk->iv; 270 u8 *ctrblk = walk->iv;
@@ -423,7 +358,67 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
423 return err; 358 return err;
424} 359}
425 360
426static struct crypto_alg blk_ctr_alg = { 361static struct crypto_alg bf_algs[4] = { {
362 .cra_name = "blowfish",
363 .cra_driver_name = "blowfish-asm",
364 .cra_priority = 200,
365 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
366 .cra_blocksize = BF_BLOCK_SIZE,
367 .cra_ctxsize = sizeof(struct bf_ctx),
368 .cra_alignmask = 0,
369 .cra_module = THIS_MODULE,
370 .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list),
371 .cra_u = {
372 .cipher = {
373 .cia_min_keysize = BF_MIN_KEY_SIZE,
374 .cia_max_keysize = BF_MAX_KEY_SIZE,
375 .cia_setkey = blowfish_setkey,
376 .cia_encrypt = blowfish_encrypt,
377 .cia_decrypt = blowfish_decrypt,
378 }
379 }
380}, {
381 .cra_name = "ecb(blowfish)",
382 .cra_driver_name = "ecb-blowfish-asm",
383 .cra_priority = 300,
384 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
385 .cra_blocksize = BF_BLOCK_SIZE,
386 .cra_ctxsize = sizeof(struct bf_ctx),
387 .cra_alignmask = 0,
388 .cra_type = &crypto_blkcipher_type,
389 .cra_module = THIS_MODULE,
390 .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list),
391 .cra_u = {
392 .blkcipher = {
393 .min_keysize = BF_MIN_KEY_SIZE,
394 .max_keysize = BF_MAX_KEY_SIZE,
395 .setkey = blowfish_setkey,
396 .encrypt = ecb_encrypt,
397 .decrypt = ecb_decrypt,
398 },
399 },
400}, {
401 .cra_name = "cbc(blowfish)",
402 .cra_driver_name = "cbc-blowfish-asm",
403 .cra_priority = 300,
404 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
405 .cra_blocksize = BF_BLOCK_SIZE,
406 .cra_ctxsize = sizeof(struct bf_ctx),
407 .cra_alignmask = 0,
408 .cra_type = &crypto_blkcipher_type,
409 .cra_module = THIS_MODULE,
410 .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list),
411 .cra_u = {
412 .blkcipher = {
413 .min_keysize = BF_MIN_KEY_SIZE,
414 .max_keysize = BF_MAX_KEY_SIZE,
415 .ivsize = BF_BLOCK_SIZE,
416 .setkey = blowfish_setkey,
417 .encrypt = cbc_encrypt,
418 .decrypt = cbc_decrypt,
419 },
420 },
421}, {
427 .cra_name = "ctr(blowfish)", 422 .cra_name = "ctr(blowfish)",
428 .cra_driver_name = "ctr-blowfish-asm", 423 .cra_driver_name = "ctr-blowfish-asm",
429 .cra_priority = 300, 424 .cra_priority = 300,
@@ -433,7 +428,7 @@ static struct crypto_alg blk_ctr_alg = {
433 .cra_alignmask = 0, 428 .cra_alignmask = 0,
434 .cra_type = &crypto_blkcipher_type, 429 .cra_type = &crypto_blkcipher_type,
435 .cra_module = THIS_MODULE, 430 .cra_module = THIS_MODULE,
436 .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), 431 .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list),
437 .cra_u = { 432 .cra_u = {
438 .blkcipher = { 433 .blkcipher = {
439 .min_keysize = BF_MIN_KEY_SIZE, 434 .min_keysize = BF_MIN_KEY_SIZE,
@@ -444,43 +439,45 @@ static struct crypto_alg blk_ctr_alg = {
444 .decrypt = ctr_crypt, 439 .decrypt = ctr_crypt,
445 }, 440 },
446 }, 441 },
447}; 442} };
443
444static bool is_blacklisted_cpu(void)
445{
446 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
447 return false;
448
449 if (boot_cpu_data.x86 == 0x0f) {
450 /*
451 * On Pentium 4, blowfish-x86_64 is slower than generic C
452 * implementation because use of 64bit rotates (which are really
453 * slow on P4). Therefore blacklist P4s.
454 */
455 return true;
456 }
457
458 return false;
459}
460
461static int force;
462module_param(force, int, 0);
463MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
448 464
449static int __init init(void) 465static int __init init(void)
450{ 466{
451 int err; 467 if (!force && is_blacklisted_cpu()) {
468 printk(KERN_INFO
469 "blowfish-x86_64: performance on this CPU "
470 "would be suboptimal: disabling "
471 "blowfish-x86_64.\n");
472 return -ENODEV;
473 }
452 474
453 err = crypto_register_alg(&bf_alg); 475 return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
454 if (err)
455 goto bf_err;
456 err = crypto_register_alg(&blk_ecb_alg);
457 if (err)
458 goto ecb_err;
459 err = crypto_register_alg(&blk_cbc_alg);
460 if (err)
461 goto cbc_err;
462 err = crypto_register_alg(&blk_ctr_alg);
463 if (err)
464 goto ctr_err;
465
466 return 0;
467
468ctr_err:
469 crypto_unregister_alg(&blk_cbc_alg);
470cbc_err:
471 crypto_unregister_alg(&blk_ecb_alg);
472ecb_err:
473 crypto_unregister_alg(&bf_alg);
474bf_err:
475 return err;
476} 476}
477 477
478static void __exit fini(void) 478static void __exit fini(void)
479{ 479{
480 crypto_unregister_alg(&blk_ctr_alg); 480 crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
481 crypto_unregister_alg(&blk_cbc_alg);
482 crypto_unregister_alg(&blk_ecb_alg);
483 crypto_unregister_alg(&bf_alg);
484} 481}
485 482
486module_init(init); 483module_init(init);
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
new file mode 100644
index 000000000000..0b3374335fdc
--- /dev/null
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -0,0 +1,520 @@
1/*
2 * Camellia Cipher Algorithm (x86_64)
3 *
4 * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
19 * USA
20 *
21 */
22
23.file "camellia-x86_64-asm_64.S"
24.text
25
26.extern camellia_sp10011110;
27.extern camellia_sp22000222;
28.extern camellia_sp03303033;
29.extern camellia_sp00444404;
30.extern camellia_sp02220222;
31.extern camellia_sp30333033;
32.extern camellia_sp44044404;
33.extern camellia_sp11101110;
34
35#define sp10011110 camellia_sp10011110
36#define sp22000222 camellia_sp22000222
37#define sp03303033 camellia_sp03303033
38#define sp00444404 camellia_sp00444404
39#define sp02220222 camellia_sp02220222
40#define sp30333033 camellia_sp30333033
41#define sp44044404 camellia_sp44044404
42#define sp11101110 camellia_sp11101110
43
44#define CAMELLIA_TABLE_BYTE_LEN 272
45
46/* struct camellia_ctx: */
47#define key_table 0
48#define key_length CAMELLIA_TABLE_BYTE_LEN
49
50/* register macros */
51#define CTX %rdi
52#define RIO %rsi
53#define RIOd %esi
54
55#define RAB0 %rax
56#define RCD0 %rcx
57#define RAB1 %rbx
58#define RCD1 %rdx
59
60#define RAB0d %eax
61#define RCD0d %ecx
62#define RAB1d %ebx
63#define RCD1d %edx
64
65#define RAB0bl %al
66#define RCD0bl %cl
67#define RAB1bl %bl
68#define RCD1bl %dl
69
70#define RAB0bh %ah
71#define RCD0bh %ch
72#define RAB1bh %bh
73#define RCD1bh %dh
74
75#define RT0 %rsi
76#define RT1 %rbp
77#define RT2 %r8
78
79#define RT0d %esi
80#define RT1d %ebp
81#define RT2d %r8d
82
83#define RT2bl %r8b
84
85#define RXOR %r9
86#define RRBP %r10
87#define RDST %r11
88
89#define RXORd %r9d
90#define RXORbl %r9b
91
92#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
93 movzbl ab ## bl, tmp2 ## d; \
94 movzbl ab ## bh, tmp1 ## d; \
95 rorq $16, ab; \
96 xorq T0(, tmp2, 8), dst; \
97 xorq T1(, tmp1, 8), dst;
98
99/**********************************************************************
100 1-way camellia
101 **********************************************************************/
102#define roundsm(ab, subkey, cd) \
103 movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
104 \
105 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
106 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
107 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
108 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
109 \
110 xorq RT2, cd ## 0;
111
112#define fls(l, r, kl, kr) \
113 movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
114 andl l ## 0d, RT0d; \
115 roll $1, RT0d; \
116 shlq $32, RT0; \
117 xorq RT0, l ## 0; \
118 movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
119 orq r ## 0, RT1; \
120 shrq $32, RT1; \
121 xorq RT1, r ## 0; \
122 \
123 movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
124 orq l ## 0, RT2; \
125 shrq $32, RT2; \
126 xorq RT2, l ## 0; \
127 movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
128 andl r ## 0d, RT0d; \
129 roll $1, RT0d; \
130 shlq $32, RT0; \
131 xorq RT0, r ## 0;
132
133#define enc_rounds(i) \
134 roundsm(RAB, i + 2, RCD); \
135 roundsm(RCD, i + 3, RAB); \
136 roundsm(RAB, i + 4, RCD); \
137 roundsm(RCD, i + 5, RAB); \
138 roundsm(RAB, i + 6, RCD); \
139 roundsm(RCD, i + 7, RAB);
140
141#define enc_fls(i) \
142 fls(RAB, RCD, i + 0, i + 1);
143
144#define enc_inpack() \
145 movq (RIO), RAB0; \
146 bswapq RAB0; \
147 rolq $32, RAB0; \
148 movq 4*2(RIO), RCD0; \
149 bswapq RCD0; \
150 rorq $32, RCD0; \
151 xorq key_table(CTX), RAB0;
152
153#define enc_outunpack(op, max) \
154 xorq key_table(CTX, max, 8), RCD0; \
155 rorq $32, RCD0; \
156 bswapq RCD0; \
157 op ## q RCD0, (RIO); \
158 rolq $32, RAB0; \
159 bswapq RAB0; \
160 op ## q RAB0, 4*2(RIO);
161
162#define dec_rounds(i) \
163 roundsm(RAB, i + 7, RCD); \
164 roundsm(RCD, i + 6, RAB); \
165 roundsm(RAB, i + 5, RCD); \
166 roundsm(RCD, i + 4, RAB); \
167 roundsm(RAB, i + 3, RCD); \
168 roundsm(RCD, i + 2, RAB);
169
170#define dec_fls(i) \
171 fls(RAB, RCD, i + 1, i + 0);
172
173#define dec_inpack(max) \
174 movq (RIO), RAB0; \
175 bswapq RAB0; \
176 rolq $32, RAB0; \
177 movq 4*2(RIO), RCD0; \
178 bswapq RCD0; \
179 rorq $32, RCD0; \
180 xorq key_table(CTX, max, 8), RAB0;
181
182#define dec_outunpack() \
183 xorq key_table(CTX), RCD0; \
184 rorq $32, RCD0; \
185 bswapq RCD0; \
186 movq RCD0, (RIO); \
187 rolq $32, RAB0; \
188 bswapq RAB0; \
189 movq RAB0, 4*2(RIO);
190
191.global __camellia_enc_blk;
192.type __camellia_enc_blk,@function;
193
194__camellia_enc_blk:
195 /* input:
196 * %rdi: ctx, CTX
197 * %rsi: dst
198 * %rdx: src
199 * %rcx: bool xor
200 */
201 movq %rbp, RRBP;
202
203 movq %rcx, RXOR;
204 movq %rsi, RDST;
205 movq %rdx, RIO;
206
207 enc_inpack();
208
209 enc_rounds(0);
210 enc_fls(8);
211 enc_rounds(8);
212 enc_fls(16);
213 enc_rounds(16);
214 movl $24, RT1d; /* max */
215
216 cmpb $16, key_length(CTX);
217 je __enc_done;
218
219 enc_fls(24);
220 enc_rounds(24);
221 movl $32, RT1d; /* max */
222
223__enc_done:
224 testb RXORbl, RXORbl;
225 movq RDST, RIO;
226
227 jnz __enc_xor;
228
229 enc_outunpack(mov, RT1);
230
231 movq RRBP, %rbp;
232 ret;
233
234__enc_xor:
235 enc_outunpack(xor, RT1);
236
237 movq RRBP, %rbp;
238 ret;
239
240.global camellia_dec_blk;
241.type camellia_dec_blk,@function;
242
243camellia_dec_blk:
244 /* input:
245 * %rdi: ctx, CTX
246 * %rsi: dst
247 * %rdx: src
248 */
249 cmpl $16, key_length(CTX);
250 movl $32, RT2d;
251 movl $24, RXORd;
252 cmovel RXORd, RT2d; /* max */
253
254 movq %rbp, RRBP;
255 movq %rsi, RDST;
256 movq %rdx, RIO;
257
258 dec_inpack(RT2);
259
260 cmpb $24, RT2bl;
261 je __dec_rounds16;
262
263 dec_rounds(24);
264 dec_fls(24);
265
266__dec_rounds16:
267 dec_rounds(16);
268 dec_fls(16);
269 dec_rounds(8);
270 dec_fls(8);
271 dec_rounds(0);
272
273 movq RDST, RIO;
274
275 dec_outunpack();
276
277 movq RRBP, %rbp;
278 ret;
279
280/**********************************************************************
281 2-way camellia
282 **********************************************************************/
283#define roundsm2(ab, subkey, cd) \
284 movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
285 xorq RT2, cd ## 1; \
286 \
287 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
288 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
289 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
290 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
291 \
292 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
293 xorq RT2, cd ## 0; \
294 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
295 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
296 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
297
298#define fls2(l, r, kl, kr) \
299 movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
300 andl l ## 0d, RT0d; \
301 roll $1, RT0d; \
302 shlq $32, RT0; \
303 xorq RT0, l ## 0; \
304 movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
305 orq r ## 0, RT1; \
306 shrq $32, RT1; \
307 xorq RT1, r ## 0; \
308 \
309 movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
310 andl l ## 1d, RT2d; \
311 roll $1, RT2d; \
312 shlq $32, RT2; \
313 xorq RT2, l ## 1; \
314 movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
315 orq r ## 1, RT0; \
316 shrq $32, RT0; \
317 xorq RT0, r ## 1; \
318 \
319 movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
320 orq l ## 0, RT1; \
321 shrq $32, RT1; \
322 xorq RT1, l ## 0; \
323 movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
324 andl r ## 0d, RT2d; \
325 roll $1, RT2d; \
326 shlq $32, RT2; \
327 xorq RT2, r ## 0; \
328 \
329 movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
330 orq l ## 1, RT0; \
331 shrq $32, RT0; \
332 xorq RT0, l ## 1; \
333 movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
334 andl r ## 1d, RT1d; \
335 roll $1, RT1d; \
336 shlq $32, RT1; \
337 xorq RT1, r ## 1;
338
339#define enc_rounds2(i) \
340 roundsm2(RAB, i + 2, RCD); \
341 roundsm2(RCD, i + 3, RAB); \
342 roundsm2(RAB, i + 4, RCD); \
343 roundsm2(RCD, i + 5, RAB); \
344 roundsm2(RAB, i + 6, RCD); \
345 roundsm2(RCD, i + 7, RAB);
346
347#define enc_fls2(i) \
348 fls2(RAB, RCD, i + 0, i + 1);
349
350#define enc_inpack2() \
351 movq (RIO), RAB0; \
352 bswapq RAB0; \
353 rorq $32, RAB0; \
354 movq 4*2(RIO), RCD0; \
355 bswapq RCD0; \
356 rolq $32, RCD0; \
357 xorq key_table(CTX), RAB0; \
358 \
359 movq 8*2(RIO), RAB1; \
360 bswapq RAB1; \
361 rorq $32, RAB1; \
362 movq 12*2(RIO), RCD1; \
363 bswapq RCD1; \
364 rolq $32, RCD1; \
365 xorq key_table(CTX), RAB1;
366
367#define enc_outunpack2(op, max) \
368 xorq key_table(CTX, max, 8), RCD0; \
369 rolq $32, RCD0; \
370 bswapq RCD0; \
371 op ## q RCD0, (RIO); \
372 rorq $32, RAB0; \
373 bswapq RAB0; \
374 op ## q RAB0, 4*2(RIO); \
375 \
376 xorq key_table(CTX, max, 8), RCD1; \
377 rolq $32, RCD1; \
378 bswapq RCD1; \
379 op ## q RCD1, 8*2(RIO); \
380 rorq $32, RAB1; \
381 bswapq RAB1; \
382 op ## q RAB1, 12*2(RIO);
383
384#define dec_rounds2(i) \
385 roundsm2(RAB, i + 7, RCD); \
386 roundsm2(RCD, i + 6, RAB); \
387 roundsm2(RAB, i + 5, RCD); \
388 roundsm2(RCD, i + 4, RAB); \
389 roundsm2(RAB, i + 3, RCD); \
390 roundsm2(RCD, i + 2, RAB);
391
392#define dec_fls2(i) \
393 fls2(RAB, RCD, i + 1, i + 0);
394
395#define dec_inpack2(max) \
396 movq (RIO), RAB0; \
397 bswapq RAB0; \
398 rorq $32, RAB0; \
399 movq 4*2(RIO), RCD0; \
400 bswapq RCD0; \
401 rolq $32, RCD0; \
402 xorq key_table(CTX, max, 8), RAB0; \
403 \
404 movq 8*2(RIO), RAB1; \
405 bswapq RAB1; \
406 rorq $32, RAB1; \
407 movq 12*2(RIO), RCD1; \
408 bswapq RCD1; \
409 rolq $32, RCD1; \
410 xorq key_table(CTX, max, 8), RAB1;
411
412#define dec_outunpack2() \
413 xorq key_table(CTX), RCD0; \
414 rolq $32, RCD0; \
415 bswapq RCD0; \
416 movq RCD0, (RIO); \
417 rorq $32, RAB0; \
418 bswapq RAB0; \
419 movq RAB0, 4*2(RIO); \
420 \
421 xorq key_table(CTX), RCD1; \
422 rolq $32, RCD1; \
423 bswapq RCD1; \
424 movq RCD1, 8*2(RIO); \
425 rorq $32, RAB1; \
426 bswapq RAB1; \
427 movq RAB1, 12*2(RIO);
428
429.global __camellia_enc_blk_2way;
430.type __camellia_enc_blk_2way,@function;
431
432__camellia_enc_blk_2way:
433 /* input:
434 * %rdi: ctx, CTX
435 * %rsi: dst
436 * %rdx: src
437 * %rcx: bool xor
438 */
439 pushq %rbx;
440
441 movq %rbp, RRBP;
442 movq %rcx, RXOR;
443 movq %rsi, RDST;
444 movq %rdx, RIO;
445
446 enc_inpack2();
447
448 enc_rounds2(0);
449 enc_fls2(8);
450 enc_rounds2(8);
451 enc_fls2(16);
452 enc_rounds2(16);
453 movl $24, RT2d; /* max */
454
455 cmpb $16, key_length(CTX);
456 je __enc2_done;
457
458 enc_fls2(24);
459 enc_rounds2(24);
460 movl $32, RT2d; /* max */
461
462__enc2_done:
463 test RXORbl, RXORbl;
464 movq RDST, RIO;
465 jnz __enc2_xor;
466
467 enc_outunpack2(mov, RT2);
468
469 movq RRBP, %rbp;
470 popq %rbx;
471 ret;
472
473__enc2_xor:
474 enc_outunpack2(xor, RT2);
475
476 movq RRBP, %rbp;
477 popq %rbx;
478 ret;
479
480.global camellia_dec_blk_2way;
481.type camellia_dec_blk_2way,@function;
482
483camellia_dec_blk_2way:
484 /* input:
485 * %rdi: ctx, CTX
486 * %rsi: dst
487 * %rdx: src
488 */
489 cmpl $16, key_length(CTX);
490 movl $32, RT2d;
491 movl $24, RXORd;
492 cmovel RXORd, RT2d; /* max */
493
494 movq %rbx, RXOR;
495 movq %rbp, RRBP;
496 movq %rsi, RDST;
497 movq %rdx, RIO;
498
499 dec_inpack2(RT2);
500
501 cmpb $24, RT2bl;
502 je __dec2_rounds16;
503
504 dec_rounds2(24);
505 dec_fls2(24);
506
507__dec2_rounds16:
508 dec_rounds2(16);
509 dec_fls2(16);
510 dec_rounds2(8);
511 dec_fls2(8);
512 dec_rounds2(0);
513
514 movq RDST, RIO;
515
516 dec_outunpack2();
517
518 movq RRBP, %rbp;
519 movq RXOR, %rbx;
520 ret;
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
new file mode 100644
index 000000000000..3306dc0b139e
--- /dev/null
+++ b/arch/x86/crypto/camellia_glue.c
@@ -0,0 +1,1952 @@
1/*
2 * Glue Code for assembler optimized version of Camellia
3 *
4 * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * Camellia parts based on code by:
7 * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
8 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
9 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
10 * CTR part based on code (crypto/ctr.c) by:
11 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
26 * USA
27 *
28 */
29
30#include <asm/processor.h>
31#include <asm/unaligned.h>
32#include <linux/crypto.h>
33#include <linux/init.h>
34#include <linux/module.h>
35#include <linux/types.h>
36#include <crypto/algapi.h>
37#include <crypto/b128ops.h>
38#include <crypto/lrw.h>
39#include <crypto/xts.h>
40
41#define CAMELLIA_MIN_KEY_SIZE 16
42#define CAMELLIA_MAX_KEY_SIZE 32
43#define CAMELLIA_BLOCK_SIZE 16
44#define CAMELLIA_TABLE_BYTE_LEN 272
45
46struct camellia_ctx {
47 u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
48 u32 key_length;
49};
50
51/* regular block cipher functions */
52asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
53 const u8 *src, bool xor);
54asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
55 const u8 *src);
56
57/* 2-way parallel cipher functions */
58asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
59 const u8 *src, bool xor);
60asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
61 const u8 *src);
62
63static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
64 const u8 *src)
65{
66 __camellia_enc_blk(ctx, dst, src, false);
67}
68
69static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
70 const u8 *src)
71{
72 __camellia_enc_blk(ctx, dst, src, true);
73}
74
75static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
76 const u8 *src)
77{
78 __camellia_enc_blk_2way(ctx, dst, src, false);
79}
80
81static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
82 const u8 *src)
83{
84 __camellia_enc_blk_2way(ctx, dst, src, true);
85}
86
87static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
88{
89 camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src);
90}
91
92static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
93{
94 camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src);
95}
96
97/* camellia sboxes */
98const u64 camellia_sp10011110[256] = {
99 0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00,
100 0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700,
101 0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400,
102 0x8500008585858500, 0x5700005757575700, 0x3500003535353500,
103 0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00,
104 0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00,
105 0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500,
106 0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100,
107 0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00,
108 0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500,
109 0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600,
110 0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00,
111 0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00,
112 0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000,
113 0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00,
114 0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00,
115 0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900,
116 0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700,
117 0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900,
118 0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600,
119 0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00,
120 0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00,
121 0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00,
122 0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00,
123 0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00,
124 0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100,
125 0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00,
126 0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200,
127 0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600,
128 0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700,
129 0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100,
130 0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700,
131 0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00,
132 0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00,
133 0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200,
134 0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600,
135 0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200,
136 0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400,
137 0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300,
138 0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100,
139 0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800,
140 0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00,
141 0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00,
142 0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00,
143 0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200,
144 0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00,
145 0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000,
146 0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200,
147 0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000,
148 0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700,
149 0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00,
150 0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00,
151 0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00,
152 0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00,
153 0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00,
154 0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300,
155 0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600,
156 0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00,
157 0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200,
158 0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600,
159 0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600,
160 0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600,
161 0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300,
162 0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00,
163 0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700,
164 0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00,
165 0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900,
166 0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600,
167 0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400,
168 0x5900005959595900, 0x7800007878787800, 0x9800009898989800,
169 0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700,
170 0x4600004646464600, 0x7100007171717100, 0xba0000babababa00,
171 0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00,
172 0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200,
173 0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200,
174 0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500,
175 0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00,
176 0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900,
177 0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00,
178 0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400,
179 0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300,
180 0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900,
181 0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500,
182 0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400,
183 0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000,
184 0x9e00009e9e9e9e00,
185};
186
187const u64 camellia_sp22000222[256] = {
188 0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858,
189 0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e,
190 0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9,
191 0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a,
192 0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d,
193 0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf,
194 0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a,
195 0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242,
196 0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e,
197 0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca,
198 0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d,
199 0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f,
200 0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e,
201 0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060,
202 0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc,
203 0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434,
204 0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272,
205 0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e,
206 0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3,
207 0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad,
208 0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8,
209 0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a,
210 0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7,
211 0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a,
212 0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656,
213 0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363,
214 0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf,
215 0x9898000000989898, 0x9797000000979797, 0x8585000000858585,
216 0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec,
217 0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f,
218 0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3,
219 0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf,
220 0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474,
221 0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636,
222 0x2222000000222222, 0x3838000000383838, 0x6464000000646464,
223 0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c,
224 0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5,
225 0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888,
226 0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787,
227 0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323,
228 0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1,
229 0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9,
230 0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555,
231 0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa,
232 0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4,
233 0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6,
234 0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1,
235 0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5,
236 0x2020000000202020, 0x8989000000898989, 0x0000000000000000,
237 0x9090000000909090, 0x4747000000474747, 0xefef000000efefef,
238 0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515,
239 0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5,
240 0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb,
241 0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8,
242 0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b,
243 0x9494000000949494, 0x2121000000212121, 0x6666000000666666,
244 0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed,
245 0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe,
246 0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4,
247 0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c,
248 0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d,
249 0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d,
250 0xdede000000dedede, 0x9696000000969696, 0x2626000000262626,
251 0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c,
252 0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f,
253 0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc,
254 0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252,
255 0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d,
256 0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969,
257 0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131,
258 0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf,
259 0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575,
260 0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757,
261 0x8484000000848484, 0x1111000000111111, 0x4545000000454545,
262 0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4,
263 0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa,
264 0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959,
265 0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292,
266 0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878,
267 0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949,
268 0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7,
269 0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393,
270 0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a,
271 0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9,
272 0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101,
273 0x3d3d0000003d3d3d,
274};
275
276const u64 camellia_sp03303033[256] = {
277 0x0038380038003838, 0x0041410041004141, 0x0016160016001616,
278 0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393,
279 0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272,
280 0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a,
281 0x0075750075007575, 0x0006060006000606, 0x0057570057005757,
282 0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7,
283 0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2,
284 0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090,
285 0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7,
286 0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2,
287 0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343,
288 0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7,
289 0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f,
290 0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818,
291 0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f,
292 0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d,
293 0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c,
294 0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3,
295 0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec,
296 0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b,
297 0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636,
298 0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686,
299 0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd,
300 0x0066660066006666, 0x0058580058005858, 0x0096960096009696,
301 0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595,
302 0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8,
303 0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef,
304 0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161,
305 0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b,
306 0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb,
307 0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8,
308 0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb,
309 0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d,
310 0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d,
311 0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919,
312 0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b,
313 0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979,
314 0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222,
315 0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1,
316 0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8,
317 0x0012120012001212, 0x0004040004000404, 0x0074740074007474,
318 0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e,
319 0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555,
320 0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe,
321 0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131,
322 0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad,
323 0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070,
324 0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969,
325 0x0008080008000808, 0x0062620062006262, 0x0000000000000000,
326 0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb,
327 0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545,
328 0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d,
329 0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee,
330 0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e,
331 0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6,
332 0x0025250025002525, 0x0048480048004848, 0x0099990099009999,
333 0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b,
334 0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf,
335 0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929,
336 0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313,
337 0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363,
338 0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b,
339 0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989,
340 0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717,
341 0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3,
342 0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737,
343 0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494,
344 0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b,
345 0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a,
346 0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c,
347 0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3,
348 0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d,
349 0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5,
350 0x0021210021002121, 0x0044440044004444, 0x0051510051005151,
351 0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939,
352 0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa,
353 0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656,
354 0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4,
355 0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e,
356 0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252,
357 0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9,
358 0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4,
359 0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a,
360 0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a,
361 0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040,
362 0x004f4f004f004f4f,
363};
364
365const u64 camellia_sp00444404[256] = {
366 0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3,
367 0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057,
368 0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023,
369 0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5,
370 0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d,
371 0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af,
372 0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e,
373 0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b,
374 0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5,
375 0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a,
376 0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b,
377 0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0,
378 0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0,
379 0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb,
380 0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d,
381 0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004,
382 0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de,
383 0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c,
384 0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe,
385 0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a,
386 0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060,
387 0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0,
388 0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054,
389 0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064,
390 0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3,
391 0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6,
392 0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087,
393 0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090,
394 0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d,
395 0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8,
396 0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081,
397 0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063,
398 0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f,
399 0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9,
400 0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078,
401 0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071,
402 0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088,
403 0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9,
404 0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036,
405 0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1,
406 0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb,
407 0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad,
408 0x0000777777770077, 0x0000808080800080, 0x0000828282820082,
409 0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5,
410 0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c,
411 0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093,
412 0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e,
413 0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd,
414 0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb,
415 0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f,
416 0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1,
417 0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d,
418 0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056,
419 0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066,
420 0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012,
421 0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099,
422 0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e,
423 0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031,
424 0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058,
425 0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c,
426 0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018,
427 0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2,
428 0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008,
429 0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050,
430 0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089,
431 0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095,
432 0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4,
433 0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db,
434 0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f,
435 0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002,
436 0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067,
437 0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2,
438 0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037,
439 0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b,
440 0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079,
441 0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e,
442 0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd,
443 0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a,
444 0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025,
445 0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa,
446 0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee,
447 0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068,
448 0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028,
449 0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1,
450 0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7,
451 0x00009e9e9e9e009e,
452};
453
454const u64 camellia_sp02220222[256] = {
455 0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858,
456 0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e,
457 0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9,
458 0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a,
459 0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d,
460 0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf,
461 0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a,
462 0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242,
463 0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e,
464 0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca,
465 0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d,
466 0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f,
467 0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e,
468 0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060,
469 0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc,
470 0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434,
471 0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272,
472 0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e,
473 0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3,
474 0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad,
475 0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8,
476 0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a,
477 0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7,
478 0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a,
479 0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656,
480 0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363,
481 0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf,
482 0x0098989800989898, 0x0097979700979797, 0x0085858500858585,
483 0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec,
484 0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f,
485 0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3,
486 0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf,
487 0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474,
488 0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636,
489 0x0022222200222222, 0x0038383800383838, 0x0064646400646464,
490 0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c,
491 0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5,
492 0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888,
493 0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787,
494 0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323,
495 0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1,
496 0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9,
497 0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555,
498 0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa,
499 0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4,
500 0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6,
501 0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1,
502 0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5,
503 0x0020202000202020, 0x0089898900898989, 0x0000000000000000,
504 0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef,
505 0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515,
506 0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5,
507 0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb,
508 0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8,
509 0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b,
510 0x0094949400949494, 0x0021212100212121, 0x0066666600666666,
511 0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed,
512 0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe,
513 0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4,
514 0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c,
515 0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d,
516 0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d,
517 0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626,
518 0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c,
519 0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f,
520 0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc,
521 0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252,
522 0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d,
523 0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969,
524 0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131,
525 0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf,
526 0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575,
527 0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757,
528 0x0084848400848484, 0x0011111100111111, 0x0045454500454545,
529 0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4,
530 0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa,
531 0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959,
532 0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292,
533 0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878,
534 0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949,
535 0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7,
536 0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393,
537 0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a,
538 0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9,
539 0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101,
540 0x003d3d3d003d3d3d,
541};
542
543const u64 camellia_sp30333033[256] = {
544 0x3800383838003838, 0x4100414141004141, 0x1600161616001616,
545 0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393,
546 0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272,
547 0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a,
548 0x7500757575007575, 0x0600060606000606, 0x5700575757005757,
549 0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7,
550 0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2,
551 0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090,
552 0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7,
553 0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2,
554 0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343,
555 0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7,
556 0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f,
557 0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818,
558 0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f,
559 0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d,
560 0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c,
561 0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3,
562 0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec,
563 0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b,
564 0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636,
565 0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686,
566 0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd,
567 0x6600666666006666, 0x5800585858005858, 0x9600969696009696,
568 0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595,
569 0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8,
570 0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef,
571 0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161,
572 0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b,
573 0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb,
574 0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8,
575 0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb,
576 0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d,
577 0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d,
578 0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919,
579 0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b,
580 0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979,
581 0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222,
582 0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1,
583 0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8,
584 0x1200121212001212, 0x0400040404000404, 0x7400747474007474,
585 0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e,
586 0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555,
587 0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe,
588 0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131,
589 0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad,
590 0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070,
591 0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969,
592 0x0800080808000808, 0x6200626262006262, 0x0000000000000000,
593 0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb,
594 0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545,
595 0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d,
596 0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee,
597 0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e,
598 0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6,
599 0x2500252525002525, 0x4800484848004848, 0x9900999999009999,
600 0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b,
601 0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf,
602 0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929,
603 0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313,
604 0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363,
605 0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b,
606 0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989,
607 0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717,
608 0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3,
609 0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737,
610 0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494,
611 0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b,
612 0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a,
613 0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c,
614 0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3,
615 0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d,
616 0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5,
617 0x2100212121002121, 0x4400444444004444, 0x5100515151005151,
618 0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939,
619 0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa,
620 0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656,
621 0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4,
622 0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e,
623 0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252,
624 0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9,
625 0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4,
626 0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a,
627 0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a,
628 0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040,
629 0x4f004f4f4f004f4f,
630};
631
632const u64 camellia_sp44044404[256] = {
633 0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3,
634 0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057,
635 0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023,
636 0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5,
637 0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d,
638 0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af,
639 0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e,
640 0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b,
641 0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5,
642 0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a,
643 0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b,
644 0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0,
645 0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0,
646 0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb,
647 0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d,
648 0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004,
649 0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de,
650 0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c,
651 0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe,
652 0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a,
653 0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060,
654 0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0,
655 0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054,
656 0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064,
657 0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3,
658 0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6,
659 0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087,
660 0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090,
661 0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d,
662 0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8,
663 0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081,
664 0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063,
665 0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f,
666 0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9,
667 0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078,
668 0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071,
669 0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088,
670 0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9,
671 0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036,
672 0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1,
673 0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb,
674 0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad,
675 0x7777007777770077, 0x8080008080800080, 0x8282008282820082,
676 0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5,
677 0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c,
678 0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093,
679 0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e,
680 0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd,
681 0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb,
682 0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f,
683 0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1,
684 0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d,
685 0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056,
686 0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066,
687 0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012,
688 0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099,
689 0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e,
690 0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031,
691 0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058,
692 0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c,
693 0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018,
694 0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2,
695 0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008,
696 0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050,
697 0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089,
698 0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095,
699 0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4,
700 0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db,
701 0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f,
702 0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002,
703 0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067,
704 0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2,
705 0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037,
706 0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b,
707 0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079,
708 0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e,
709 0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd,
710 0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a,
711 0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025,
712 0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa,
713 0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee,
714 0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068,
715 0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028,
716 0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1,
717 0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7,
718 0x9e9e009e9e9e009e,
719};
720
721const u64 camellia_sp11101110[256] = {
722 0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00,
723 0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700,
724 0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400,
725 0x8585850085858500, 0x5757570057575700, 0x3535350035353500,
726 0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00,
727 0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00,
728 0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500,
729 0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100,
730 0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00,
731 0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500,
732 0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600,
733 0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00,
734 0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00,
735 0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000,
736 0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00,
737 0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00,
738 0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900,
739 0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700,
740 0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900,
741 0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600,
742 0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00,
743 0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00,
744 0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00,
745 0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00,
746 0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00,
747 0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100,
748 0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00,
749 0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200,
750 0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600,
751 0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700,
752 0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100,
753 0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700,
754 0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00,
755 0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00,
756 0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200,
757 0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600,
758 0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200,
759 0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400,
760 0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300,
761 0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100,
762 0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800,
763 0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00,
764 0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00,
765 0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00,
766 0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200,
767 0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00,
768 0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000,
769 0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200,
770 0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000,
771 0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700,
772 0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00,
773 0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00,
774 0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00,
775 0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00,
776 0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00,
777 0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300,
778 0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600,
779 0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00,
780 0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200,
781 0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600,
782 0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600,
783 0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600,
784 0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300,
785 0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00,
786 0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700,
787 0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00,
788 0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900,
789 0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600,
790 0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400,
791 0x5959590059595900, 0x7878780078787800, 0x9898980098989800,
792 0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700,
793 0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00,
794 0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00,
795 0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200,
796 0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200,
797 0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500,
798 0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00,
799 0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900,
800 0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00,
801 0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400,
802 0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300,
803 0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900,
804 0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500,
805 0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400,
806 0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000,
807 0x9e9e9e009e9e9e00,
808};
809
810/* key constants */
811#define CAMELLIA_SIGMA1L (0xA09E667FL)
812#define CAMELLIA_SIGMA1R (0x3BCC908BL)
813#define CAMELLIA_SIGMA2L (0xB67AE858L)
814#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
815#define CAMELLIA_SIGMA3L (0xC6EF372FL)
816#define CAMELLIA_SIGMA3R (0xE94F82BEL)
817#define CAMELLIA_SIGMA4L (0x54FF53A5L)
818#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
819#define CAMELLIA_SIGMA5L (0x10E527FAL)
820#define CAMELLIA_SIGMA5R (0xDE682D1DL)
821#define CAMELLIA_SIGMA6L (0xB05688C2L)
822#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
823
824/* macros */
825#define ROLDQ(l, r, bits) ({ \
826 u64 t = l; \
827 l = (l << bits) | (r >> (64 - bits)); \
828 r = (r << bits) | (t >> (64 - bits)); \
829})
830
831#define CAMELLIA_F(x, kl, kr, y) ({ \
832 u64 ii = x ^ (((u64)kl << 32) | kr); \
833 y = camellia_sp11101110[(uint8_t)ii]; \
834 y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \
835 ii >>= 16; \
836 y ^= camellia_sp30333033[(uint8_t)ii]; \
837 y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \
838 ii >>= 16; \
839 y ^= camellia_sp00444404[(uint8_t)ii]; \
840 y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \
841 ii >>= 16; \
842 y ^= camellia_sp22000222[(uint8_t)ii]; \
843 y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \
844 y = ror64(y, 32); \
845})
846
847#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32))
848
849static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max)
850{
851 u64 kw4, tt;
852 u32 dw, tl, tr;
853
854 /* absorb kw2 to other subkeys */
855 /* round 2 */
856 subRL[3] ^= subRL[1];
857 /* round 4 */
858 subRL[5] ^= subRL[1];
859 /* round 6 */
860 subRL[7] ^= subRL[1];
861
862 subRL[1] ^= (subRL[1] & ~subRL[9]) << 32;
863 /* modified for FLinv(kl2) */
864 dw = (subRL[1] & subRL[9]) >> 32,
865 subRL[1] ^= rol32(dw, 1);
866
867 /* round 8 */
868 subRL[11] ^= subRL[1];
869 /* round 10 */
870 subRL[13] ^= subRL[1];
871 /* round 12 */
872 subRL[15] ^= subRL[1];
873
874 subRL[1] ^= (subRL[1] & ~subRL[17]) << 32;
875 /* modified for FLinv(kl4) */
876 dw = (subRL[1] & subRL[17]) >> 32,
877 subRL[1] ^= rol32(dw, 1);
878
879 /* round 14 */
880 subRL[19] ^= subRL[1];
881 /* round 16 */
882 subRL[21] ^= subRL[1];
883 /* round 18 */
884 subRL[23] ^= subRL[1];
885
886 if (max == 24) {
887 /* kw3 */
888 subRL[24] ^= subRL[1];
889
890 /* absorb kw4 to other subkeys */
891 kw4 = subRL[25];
892 } else {
893 subRL[1] ^= (subRL[1] & ~subRL[25]) << 32;
894 /* modified for FLinv(kl6) */
895 dw = (subRL[1] & subRL[25]) >> 32,
896 subRL[1] ^= rol32(dw, 1);
897
898 /* round 20 */
899 subRL[27] ^= subRL[1];
900 /* round 22 */
901 subRL[29] ^= subRL[1];
902 /* round 24 */
903 subRL[31] ^= subRL[1];
904 /* kw3 */
905 subRL[32] ^= subRL[1];
906
907 /* absorb kw4 to other subkeys */
908 kw4 = subRL[33];
909 /* round 23 */
910 subRL[30] ^= kw4;
911 /* round 21 */
912 subRL[28] ^= kw4;
913 /* round 19 */
914 subRL[26] ^= kw4;
915
916 kw4 ^= (kw4 & ~subRL[24]) << 32;
917 /* modified for FL(kl5) */
918 dw = (kw4 & subRL[24]) >> 32,
919 kw4 ^= rol32(dw, 1);
920 }
921
922 /* round 17 */
923 subRL[22] ^= kw4;
924 /* round 15 */
925 subRL[20] ^= kw4;
926 /* round 13 */
927 subRL[18] ^= kw4;
928
929 kw4 ^= (kw4 & ~subRL[16]) << 32;
930 /* modified for FL(kl3) */
931 dw = (kw4 & subRL[16]) >> 32,
932 kw4 ^= rol32(dw, 1);
933
934 /* round 11 */
935 subRL[14] ^= kw4;
936 /* round 9 */
937 subRL[12] ^= kw4;
938 /* round 7 */
939 subRL[10] ^= kw4;
940
941 kw4 ^= (kw4 & ~subRL[8]) << 32;
942 /* modified for FL(kl1) */
943 dw = (kw4 & subRL[8]) >> 32,
944 kw4 ^= rol32(dw, 1);
945
946 /* round 5 */
947 subRL[6] ^= kw4;
948 /* round 3 */
949 subRL[4] ^= kw4;
950 /* round 1 */
951 subRL[2] ^= kw4;
952 /* kw1 */
953 subRL[0] ^= kw4;
954
955 /* key XOR is end of F-function */
956 SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */
957 SET_SUBKEY_LR(2, subRL[3]); /* round 1 */
958 SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */
959 SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */
960 SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */
961 SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */
962
963 tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]);
964 dw = tl & (subRL[8] >> 32), /* FL(kl1) */
965 tr = subRL[10] ^ rol32(dw, 1);
966 tt = (tr | ((u64)tl << 32));
967
968 SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */
969 SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */
970 SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */
971
972 tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]);
973 dw = tl & (subRL[9] >> 32), /* FLinv(kl2) */
974 tr = subRL[7] ^ rol32(dw, 1);
975 tt = (tr | ((u64)tl << 32));
976
977 SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */
978 SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */
979 SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */
980 SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */
981 SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */
982
983 tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]);
984 dw = tl & (subRL[16] >> 32), /* FL(kl3) */
985 tr = subRL[18] ^ rol32(dw, 1);
986 tt = (tr | ((u64)tl << 32));
987
988 SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */
989 SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */
990 SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */
991
992 tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]);
993 dw = tl & (subRL[17] >> 32), /* FLinv(kl4) */
994 tr = subRL[15] ^ rol32(dw, 1);
995 tt = (tr | ((u64)tl << 32));
996
997 SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */
998 SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */
999 SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */
1000 SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */
1001 SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */
1002
1003 if (max == 24) {
1004 SET_SUBKEY_LR(23, subRL[22]); /* round 18 */
1005 SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */
1006 } else {
1007 tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]);
1008 dw = tl & (subRL[24] >> 32), /* FL(kl5) */
1009 tr = subRL[26] ^ rol32(dw, 1);
1010 tt = (tr | ((u64)tl << 32));
1011
1012 SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */
1013 SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */
1014 SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */
1015
1016 tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]);
1017 dw = tl & (subRL[25] >> 32), /* FLinv(kl6) */
1018 tr = subRL[23] ^ rol32(dw, 1);
1019 tt = (tr | ((u64)tl << 32));
1020
1021 SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */
1022 SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */
1023 SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */
1024 SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */
1025 SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */
1026 SET_SUBKEY_LR(31, subRL[30]); /* round 24 */
1027 SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */
1028 }
1029}
1030
1031static void camellia_setup128(const unsigned char *key, u64 *subkey)
1032{
1033 u64 kl, kr, ww;
1034 u64 subRL[26];
1035
1036 /**
1037 * k == kl || kr (|| is concatenation)
1038 */
1039 kl = get_unaligned_be64(key);
1040 kr = get_unaligned_be64(key + 8);
1041
1042 /* generate KL dependent subkeys */
1043 /* kw1 */
1044 subRL[0] = kl;
1045 /* kw2 */
1046 subRL[1] = kr;
1047
1048 /* rotation left shift 15bit */
1049 ROLDQ(kl, kr, 15);
1050
1051 /* k3 */
1052 subRL[4] = kl;
1053 /* k4 */
1054 subRL[5] = kr;
1055
1056 /* rotation left shift 15+30bit */
1057 ROLDQ(kl, kr, 30);
1058
1059 /* k7 */
1060 subRL[10] = kl;
1061 /* k8 */
1062 subRL[11] = kr;
1063
1064 /* rotation left shift 15+30+15bit */
1065 ROLDQ(kl, kr, 15);
1066
1067 /* k10 */
1068 subRL[13] = kr;
1069 /* rotation left shift 15+30+15+17 bit */
1070 ROLDQ(kl, kr, 17);
1071
1072 /* kl3 */
1073 subRL[16] = kl;
1074 /* kl4 */
1075 subRL[17] = kr;
1076
1077 /* rotation left shift 15+30+15+17+17 bit */
1078 ROLDQ(kl, kr, 17);
1079
1080 /* k13 */
1081 subRL[18] = kl;
1082 /* k14 */
1083 subRL[19] = kr;
1084
1085 /* rotation left shift 15+30+15+17+17+17 bit */
1086 ROLDQ(kl, kr, 17);
1087
1088 /* k17 */
1089 subRL[22] = kl;
1090 /* k18 */
1091 subRL[23] = kr;
1092
1093 /* generate KA */
1094 kl = subRL[0];
1095 kr = subRL[1];
1096 CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
1097 kr ^= ww;
1098 CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
1099
1100 /* current status == (kll, klr, w0, w1) */
1101 CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
1102 kr ^= ww;
1103 CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
1104 kl ^= ww;
1105
1106 /* generate KA dependent subkeys */
1107 /* k1, k2 */
1108 subRL[2] = kl;
1109 subRL[3] = kr;
1110 ROLDQ(kl, kr, 15);
1111 /* k5,k6 */
1112 subRL[6] = kl;
1113 subRL[7] = kr;
1114 ROLDQ(kl, kr, 15);
1115 /* kl1, kl2 */
1116 subRL[8] = kl;
1117 subRL[9] = kr;
1118 ROLDQ(kl, kr, 15);
1119 /* k9 */
1120 subRL[12] = kl;
1121 ROLDQ(kl, kr, 15);
1122 /* k11, k12 */
1123 subRL[14] = kl;
1124 subRL[15] = kr;
1125 ROLDQ(kl, kr, 34);
1126 /* k15, k16 */
1127 subRL[20] = kl;
1128 subRL[21] = kr;
1129 ROLDQ(kl, kr, 17);
1130 /* kw3, kw4 */
1131 subRL[24] = kl;
1132 subRL[25] = kr;
1133
1134 camellia_setup_tail(subkey, subRL, 24);
1135}
1136
1137static void camellia_setup256(const unsigned char *key, u64 *subkey)
1138{
1139 u64 kl, kr; /* left half of key */
1140 u64 krl, krr; /* right half of key */
1141 u64 ww; /* temporary variables */
1142 u64 subRL[34];
1143
1144 /**
1145 * key = (kl || kr || krl || krr) (|| is concatenation)
1146 */
1147 kl = get_unaligned_be64(key);
1148 kr = get_unaligned_be64(key + 8);
1149 krl = get_unaligned_be64(key + 16);
1150 krr = get_unaligned_be64(key + 24);
1151
1152 /* generate KL dependent subkeys */
1153 /* kw1 */
1154 subRL[0] = kl;
1155 /* kw2 */
1156 subRL[1] = kr;
1157 ROLDQ(kl, kr, 45);
1158 /* k9 */
1159 subRL[12] = kl;
1160 /* k10 */
1161 subRL[13] = kr;
1162 ROLDQ(kl, kr, 15);
1163 /* kl3 */
1164 subRL[16] = kl;
1165 /* kl4 */
1166 subRL[17] = kr;
1167 ROLDQ(kl, kr, 17);
1168 /* k17 */
1169 subRL[22] = kl;
1170 /* k18 */
1171 subRL[23] = kr;
1172 ROLDQ(kl, kr, 34);
1173 /* k23 */
1174 subRL[30] = kl;
1175 /* k24 */
1176 subRL[31] = kr;
1177
1178 /* generate KR dependent subkeys */
1179 ROLDQ(krl, krr, 15);
1180 /* k3 */
1181 subRL[4] = krl;
1182 /* k4 */
1183 subRL[5] = krr;
1184 ROLDQ(krl, krr, 15);
1185 /* kl1 */
1186 subRL[8] = krl;
1187 /* kl2 */
1188 subRL[9] = krr;
1189 ROLDQ(krl, krr, 30);
1190 /* k13 */
1191 subRL[18] = krl;
1192 /* k14 */
1193 subRL[19] = krr;
1194 ROLDQ(krl, krr, 34);
1195 /* k19 */
1196 subRL[26] = krl;
1197 /* k20 */
1198 subRL[27] = krr;
1199 ROLDQ(krl, krr, 34);
1200
1201 /* generate KA */
1202 kl = subRL[0] ^ krl;
1203 kr = subRL[1] ^ krr;
1204
1205 CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
1206 kr ^= ww;
1207 CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
1208 kl ^= krl;
1209 CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
1210 kr ^= ww ^ krr;
1211 CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
1212 kl ^= ww;
1213
1214 /* generate KB */
1215 krl ^= kl;
1216 krr ^= kr;
1217 CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww);
1218 krr ^= ww;
1219 CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww);
1220 krl ^= ww;
1221
1222 /* generate KA dependent subkeys */
1223 ROLDQ(kl, kr, 15);
1224 /* k5 */
1225 subRL[6] = kl;
1226 /* k6 */
1227 subRL[7] = kr;
1228 ROLDQ(kl, kr, 30);
1229 /* k11 */
1230 subRL[14] = kl;
1231 /* k12 */
1232 subRL[15] = kr;
1233 /* rotation left shift 32bit */
1234 ROLDQ(kl, kr, 32);
1235 /* kl5 */
1236 subRL[24] = kl;
1237 /* kl6 */
1238 subRL[25] = kr;
1239 /* rotation left shift 17 from k11,k12 -> k21,k22 */
1240 ROLDQ(kl, kr, 17);
1241 /* k21 */
1242 subRL[28] = kl;
1243 /* k22 */
1244 subRL[29] = kr;
1245
1246 /* generate KB dependent subkeys */
1247 /* k1 */
1248 subRL[2] = krl;
1249 /* k2 */
1250 subRL[3] = krr;
1251 ROLDQ(krl, krr, 30);
1252 /* k7 */
1253 subRL[10] = krl;
1254 /* k8 */
1255 subRL[11] = krr;
1256 ROLDQ(krl, krr, 30);
1257 /* k15 */
1258 subRL[20] = krl;
1259 /* k16 */
1260 subRL[21] = krr;
1261 ROLDQ(krl, krr, 51);
1262 /* kw3 */
1263 subRL[32] = krl;
1264 /* kw4 */
1265 subRL[33] = krr;
1266
1267 camellia_setup_tail(subkey, subRL, 32);
1268}
1269
1270static void camellia_setup192(const unsigned char *key, u64 *subkey)
1271{
1272 unsigned char kk[32];
1273 u64 krl, krr;
1274
1275 memcpy(kk, key, 24);
1276 memcpy((unsigned char *)&krl, key+16, 8);
1277 krr = ~krl;
1278 memcpy(kk+24, (unsigned char *)&krr, 8);
1279 camellia_setup256(kk, subkey);
1280}
1281
1282static int __camellia_setkey(struct camellia_ctx *cctx,
1283 const unsigned char *key,
1284 unsigned int key_len, u32 *flags)
1285{
1286 if (key_len != 16 && key_len != 24 && key_len != 32) {
1287 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
1288 return -EINVAL;
1289 }
1290
1291 cctx->key_length = key_len;
1292
1293 switch (key_len) {
1294 case 16:
1295 camellia_setup128(key, cctx->key_table);
1296 break;
1297 case 24:
1298 camellia_setup192(key, cctx->key_table);
1299 break;
1300 case 32:
1301 camellia_setup256(key, cctx->key_table);
1302 break;
1303 }
1304
1305 return 0;
1306}
1307
1308static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
1309 unsigned int key_len)
1310{
1311 return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
1312 &tfm->crt_flags);
1313}
1314
1315static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
1316 void (*fn)(struct camellia_ctx *, u8 *, const u8 *),
1317 void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *))
1318{
1319 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1320 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1321 unsigned int nbytes;
1322 int err;
1323
1324 err = blkcipher_walk_virt(desc, walk);
1325
1326 while ((nbytes = walk->nbytes)) {
1327 u8 *wsrc = walk->src.virt.addr;
1328 u8 *wdst = walk->dst.virt.addr;
1329
1330 /* Process two block batch */
1331 if (nbytes >= bsize * 2) {
1332 do {
1333 fn_2way(ctx, wdst, wsrc);
1334
1335 wsrc += bsize * 2;
1336 wdst += bsize * 2;
1337 nbytes -= bsize * 2;
1338 } while (nbytes >= bsize * 2);
1339
1340 if (nbytes < bsize)
1341 goto done;
1342 }
1343
1344 /* Handle leftovers */
1345 do {
1346 fn(ctx, wdst, wsrc);
1347
1348 wsrc += bsize;
1349 wdst += bsize;
1350 nbytes -= bsize;
1351 } while (nbytes >= bsize);
1352
1353done:
1354 err = blkcipher_walk_done(desc, walk, nbytes);
1355 }
1356
1357 return err;
1358}
1359
1360static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1361 struct scatterlist *src, unsigned int nbytes)
1362{
1363 struct blkcipher_walk walk;
1364
1365 blkcipher_walk_init(&walk, dst, src, nbytes);
1366 return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way);
1367}
1368
1369static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1370 struct scatterlist *src, unsigned int nbytes)
1371{
1372 struct blkcipher_walk walk;
1373
1374 blkcipher_walk_init(&walk, dst, src, nbytes);
1375 return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way);
1376}
1377
1378static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
1379 struct blkcipher_walk *walk)
1380{
1381 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1382 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1383 unsigned int nbytes = walk->nbytes;
1384 u128 *src = (u128 *)walk->src.virt.addr;
1385 u128 *dst = (u128 *)walk->dst.virt.addr;
1386 u128 *iv = (u128 *)walk->iv;
1387
1388 do {
1389 u128_xor(dst, src, iv);
1390 camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
1391 iv = dst;
1392
1393 src += 1;
1394 dst += 1;
1395 nbytes -= bsize;
1396 } while (nbytes >= bsize);
1397
1398 u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
1399 return nbytes;
1400}
1401
1402static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1403 struct scatterlist *src, unsigned int nbytes)
1404{
1405 struct blkcipher_walk walk;
1406 int err;
1407
1408 blkcipher_walk_init(&walk, dst, src, nbytes);
1409 err = blkcipher_walk_virt(desc, &walk);
1410
1411 while ((nbytes = walk.nbytes)) {
1412 nbytes = __cbc_encrypt(desc, &walk);
1413 err = blkcipher_walk_done(desc, &walk, nbytes);
1414 }
1415
1416 return err;
1417}
1418
1419static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
1420 struct blkcipher_walk *walk)
1421{
1422 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1423 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1424 unsigned int nbytes = walk->nbytes;
1425 u128 *src = (u128 *)walk->src.virt.addr;
1426 u128 *dst = (u128 *)walk->dst.virt.addr;
1427 u128 ivs[2 - 1];
1428 u128 last_iv;
1429
1430 /* Start of the last block. */
1431 src += nbytes / bsize - 1;
1432 dst += nbytes / bsize - 1;
1433
1434 last_iv = *src;
1435
1436 /* Process two block batch */
1437 if (nbytes >= bsize * 2) {
1438 do {
1439 nbytes -= bsize * (2 - 1);
1440 src -= 2 - 1;
1441 dst -= 2 - 1;
1442
1443 ivs[0] = src[0];
1444
1445 camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
1446
1447 u128_xor(dst + 1, dst + 1, ivs + 0);
1448
1449 nbytes -= bsize;
1450 if (nbytes < bsize)
1451 goto done;
1452
1453 u128_xor(dst, dst, src - 1);
1454 src -= 1;
1455 dst -= 1;
1456 } while (nbytes >= bsize * 2);
1457
1458 if (nbytes < bsize)
1459 goto done;
1460 }
1461
1462 /* Handle leftovers */
1463 for (;;) {
1464 camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src);
1465
1466 nbytes -= bsize;
1467 if (nbytes < bsize)
1468 break;
1469
1470 u128_xor(dst, dst, src - 1);
1471 src -= 1;
1472 dst -= 1;
1473 }
1474
1475done:
1476 u128_xor(dst, dst, (u128 *)walk->iv);
1477 *(u128 *)walk->iv = last_iv;
1478
1479 return nbytes;
1480}
1481
1482static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1483 struct scatterlist *src, unsigned int nbytes)
1484{
1485 struct blkcipher_walk walk;
1486 int err;
1487
1488 blkcipher_walk_init(&walk, dst, src, nbytes);
1489 err = blkcipher_walk_virt(desc, &walk);
1490
1491 while ((nbytes = walk.nbytes)) {
1492 nbytes = __cbc_decrypt(desc, &walk);
1493 err = blkcipher_walk_done(desc, &walk, nbytes);
1494 }
1495
1496 return err;
1497}
1498
1499static inline void u128_to_be128(be128 *dst, const u128 *src)
1500{
1501 dst->a = cpu_to_be64(src->a);
1502 dst->b = cpu_to_be64(src->b);
1503}
1504
1505static inline void be128_to_u128(u128 *dst, const be128 *src)
1506{
1507 dst->a = be64_to_cpu(src->a);
1508 dst->b = be64_to_cpu(src->b);
1509}
1510
1511static inline void u128_inc(u128 *i)
1512{
1513 i->b++;
1514 if (!i->b)
1515 i->a++;
1516}
1517
1518static void ctr_crypt_final(struct blkcipher_desc *desc,
1519 struct blkcipher_walk *walk)
1520{
1521 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1522 u8 keystream[CAMELLIA_BLOCK_SIZE];
1523 u8 *src = walk->src.virt.addr;
1524 u8 *dst = walk->dst.virt.addr;
1525 unsigned int nbytes = walk->nbytes;
1526 u128 ctrblk;
1527
1528 memcpy(keystream, src, nbytes);
1529 camellia_enc_blk_xor(ctx, keystream, walk->iv);
1530 memcpy(dst, keystream, nbytes);
1531
1532 be128_to_u128(&ctrblk, (be128 *)walk->iv);
1533 u128_inc(&ctrblk);
1534 u128_to_be128((be128 *)walk->iv, &ctrblk);
1535}
1536
1537static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
1538 struct blkcipher_walk *walk)
1539{
1540 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1541 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1542 unsigned int nbytes = walk->nbytes;
1543 u128 *src = (u128 *)walk->src.virt.addr;
1544 u128 *dst = (u128 *)walk->dst.virt.addr;
1545 u128 ctrblk;
1546 be128 ctrblocks[2];
1547
1548 be128_to_u128(&ctrblk, (be128 *)walk->iv);
1549
1550 /* Process two block batch */
1551 if (nbytes >= bsize * 2) {
1552 do {
1553 if (dst != src) {
1554 dst[0] = src[0];
1555 dst[1] = src[1];
1556 }
1557
1558 /* create ctrblks for parallel encrypt */
1559 u128_to_be128(&ctrblocks[0], &ctrblk);
1560 u128_inc(&ctrblk);
1561 u128_to_be128(&ctrblocks[1], &ctrblk);
1562 u128_inc(&ctrblk);
1563
1564 camellia_enc_blk_xor_2way(ctx, (u8 *)dst,
1565 (u8 *)ctrblocks);
1566
1567 src += 2;
1568 dst += 2;
1569 nbytes -= bsize * 2;
1570 } while (nbytes >= bsize * 2);
1571
1572 if (nbytes < bsize)
1573 goto done;
1574 }
1575
1576 /* Handle leftovers */
1577 do {
1578 if (dst != src)
1579 *dst = *src;
1580
1581 u128_to_be128(&ctrblocks[0], &ctrblk);
1582 u128_inc(&ctrblk);
1583
1584 camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
1585
1586 src += 1;
1587 dst += 1;
1588 nbytes -= bsize;
1589 } while (nbytes >= bsize);
1590
1591done:
1592 u128_to_be128((be128 *)walk->iv, &ctrblk);
1593 return nbytes;
1594}
1595
1596static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1597 struct scatterlist *src, unsigned int nbytes)
1598{
1599 struct blkcipher_walk walk;
1600 int err;
1601
1602 blkcipher_walk_init(&walk, dst, src, nbytes);
1603 err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE);
1604
1605 while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) {
1606 nbytes = __ctr_crypt(desc, &walk);
1607 err = blkcipher_walk_done(desc, &walk, nbytes);
1608 }
1609
1610 if (walk.nbytes) {
1611 ctr_crypt_final(desc, &walk);
1612 err = blkcipher_walk_done(desc, &walk, 0);
1613 }
1614
1615 return err;
1616}
1617
1618static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
1619{
1620 const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1621 struct camellia_ctx *ctx = priv;
1622 int i;
1623
1624 while (nbytes >= 2 * bsize) {
1625 camellia_enc_blk_2way(ctx, srcdst, srcdst);
1626 srcdst += bsize * 2;
1627 nbytes -= bsize * 2;
1628 }
1629
1630 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
1631 camellia_enc_blk(ctx, srcdst, srcdst);
1632}
1633
1634static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
1635{
1636 const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1637 struct camellia_ctx *ctx = priv;
1638 int i;
1639
1640 while (nbytes >= 2 * bsize) {
1641 camellia_dec_blk_2way(ctx, srcdst, srcdst);
1642 srcdst += bsize * 2;
1643 nbytes -= bsize * 2;
1644 }
1645
1646 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
1647 camellia_dec_blk(ctx, srcdst, srcdst);
1648}
1649
1650struct camellia_lrw_ctx {
1651 struct lrw_table_ctx lrw_table;
1652 struct camellia_ctx camellia_ctx;
1653};
1654
1655static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
1656 unsigned int keylen)
1657{
1658 struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
1659 int err;
1660
1661 err = __camellia_setkey(&ctx->camellia_ctx, key,
1662 keylen - CAMELLIA_BLOCK_SIZE,
1663 &tfm->crt_flags);
1664 if (err)
1665 return err;
1666
1667 return lrw_init_table(&ctx->lrw_table,
1668 key + keylen - CAMELLIA_BLOCK_SIZE);
1669}
1670
1671static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1672 struct scatterlist *src, unsigned int nbytes)
1673{
1674 struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1675 be128 buf[2 * 4];
1676 struct lrw_crypt_req req = {
1677 .tbuf = buf,
1678 .tbuflen = sizeof(buf),
1679
1680 .table_ctx = &ctx->lrw_table,
1681 .crypt_ctx = &ctx->camellia_ctx,
1682 .crypt_fn = encrypt_callback,
1683 };
1684
1685 return lrw_crypt(desc, dst, src, nbytes, &req);
1686}
1687
1688static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1689 struct scatterlist *src, unsigned int nbytes)
1690{
1691 struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1692 be128 buf[2 * 4];
1693 struct lrw_crypt_req req = {
1694 .tbuf = buf,
1695 .tbuflen = sizeof(buf),
1696
1697 .table_ctx = &ctx->lrw_table,
1698 .crypt_ctx = &ctx->camellia_ctx,
1699 .crypt_fn = decrypt_callback,
1700 };
1701
1702 return lrw_crypt(desc, dst, src, nbytes, &req);
1703}
1704
1705static void lrw_exit_tfm(struct crypto_tfm *tfm)
1706{
1707 struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
1708
1709 lrw_free_table(&ctx->lrw_table);
1710}
1711
1712struct camellia_xts_ctx {
1713 struct camellia_ctx tweak_ctx;
1714 struct camellia_ctx crypt_ctx;
1715};
1716
1717static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
1718 unsigned int keylen)
1719{
1720 struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
1721 u32 *flags = &tfm->crt_flags;
1722 int err;
1723
1724 /* key consists of keys of equal size concatenated, therefore
1725 * the length must be even
1726 */
1727 if (keylen % 2) {
1728 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
1729 return -EINVAL;
1730 }
1731
1732 /* first half of xts-key is for crypt */
1733 err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
1734 if (err)
1735 return err;
1736
1737 /* second half of xts-key is for tweak */
1738 return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
1739 flags);
1740}
1741
1742static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1743 struct scatterlist *src, unsigned int nbytes)
1744{
1745 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1746 be128 buf[2 * 4];
1747 struct xts_crypt_req req = {
1748 .tbuf = buf,
1749 .tbuflen = sizeof(buf),
1750
1751 .tweak_ctx = &ctx->tweak_ctx,
1752 .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
1753 .crypt_ctx = &ctx->crypt_ctx,
1754 .crypt_fn = encrypt_callback,
1755 };
1756
1757 return xts_crypt(desc, dst, src, nbytes, &req);
1758}
1759
1760static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1761 struct scatterlist *src, unsigned int nbytes)
1762{
1763 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1764 be128 buf[2 * 4];
1765 struct xts_crypt_req req = {
1766 .tbuf = buf,
1767 .tbuflen = sizeof(buf),
1768
1769 .tweak_ctx = &ctx->tweak_ctx,
1770 .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
1771 .crypt_ctx = &ctx->crypt_ctx,
1772 .crypt_fn = decrypt_callback,
1773 };
1774
1775 return xts_crypt(desc, dst, src, nbytes, &req);
1776}
1777
1778static struct crypto_alg camellia_algs[6] = { {
1779 .cra_name = "camellia",
1780 .cra_driver_name = "camellia-asm",
1781 .cra_priority = 200,
1782 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
1783 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
1784 .cra_ctxsize = sizeof(struct camellia_ctx),
1785 .cra_alignmask = 0,
1786 .cra_module = THIS_MODULE,
1787 .cra_list = LIST_HEAD_INIT(camellia_algs[0].cra_list),
1788 .cra_u = {
1789 .cipher = {
1790 .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
1791 .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
1792 .cia_setkey = camellia_setkey,
1793 .cia_encrypt = camellia_encrypt,
1794 .cia_decrypt = camellia_decrypt
1795 }
1796 }
1797}, {
1798 .cra_name = "ecb(camellia)",
1799 .cra_driver_name = "ecb-camellia-asm",
1800 .cra_priority = 300,
1801 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
1802 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
1803 .cra_ctxsize = sizeof(struct camellia_ctx),
1804 .cra_alignmask = 0,
1805 .cra_type = &crypto_blkcipher_type,
1806 .cra_module = THIS_MODULE,
1807 .cra_list = LIST_HEAD_INIT(camellia_algs[1].cra_list),
1808 .cra_u = {
1809 .blkcipher = {
1810 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
1811 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
1812 .setkey = camellia_setkey,
1813 .encrypt = ecb_encrypt,
1814 .decrypt = ecb_decrypt,
1815 },
1816 },
1817}, {
1818 .cra_name = "cbc(camellia)",
1819 .cra_driver_name = "cbc-camellia-asm",
1820 .cra_priority = 300,
1821 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
1822 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
1823 .cra_ctxsize = sizeof(struct camellia_ctx),
1824 .cra_alignmask = 0,
1825 .cra_type = &crypto_blkcipher_type,
1826 .cra_module = THIS_MODULE,
1827 .cra_list = LIST_HEAD_INIT(camellia_algs[2].cra_list),
1828 .cra_u = {
1829 .blkcipher = {
1830 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
1831 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
1832 .ivsize = CAMELLIA_BLOCK_SIZE,
1833 .setkey = camellia_setkey,
1834 .encrypt = cbc_encrypt,
1835 .decrypt = cbc_decrypt,
1836 },
1837 },
1838}, {
1839 .cra_name = "ctr(camellia)",
1840 .cra_driver_name = "ctr-camellia-asm",
1841 .cra_priority = 300,
1842 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
1843 .cra_blocksize = 1,
1844 .cra_ctxsize = sizeof(struct camellia_ctx),
1845 .cra_alignmask = 0,
1846 .cra_type = &crypto_blkcipher_type,
1847 .cra_module = THIS_MODULE,
1848 .cra_list = LIST_HEAD_INIT(camellia_algs[3].cra_list),
1849 .cra_u = {
1850 .blkcipher = {
1851 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
1852 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
1853 .ivsize = CAMELLIA_BLOCK_SIZE,
1854 .setkey = camellia_setkey,
1855 .encrypt = ctr_crypt,
1856 .decrypt = ctr_crypt,
1857 },
1858 },
1859}, {
1860 .cra_name = "lrw(camellia)",
1861 .cra_driver_name = "lrw-camellia-asm",
1862 .cra_priority = 300,
1863 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
1864 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
1865 .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
1866 .cra_alignmask = 0,
1867 .cra_type = &crypto_blkcipher_type,
1868 .cra_module = THIS_MODULE,
1869 .cra_list = LIST_HEAD_INIT(camellia_algs[4].cra_list),
1870 .cra_exit = lrw_exit_tfm,
1871 .cra_u = {
1872 .blkcipher = {
1873 .min_keysize = CAMELLIA_MIN_KEY_SIZE +
1874 CAMELLIA_BLOCK_SIZE,
1875 .max_keysize = CAMELLIA_MAX_KEY_SIZE +
1876 CAMELLIA_BLOCK_SIZE,
1877 .ivsize = CAMELLIA_BLOCK_SIZE,
1878 .setkey = lrw_camellia_setkey,
1879 .encrypt = lrw_encrypt,
1880 .decrypt = lrw_decrypt,
1881 },
1882 },
1883}, {
1884 .cra_name = "xts(camellia)",
1885 .cra_driver_name = "xts-camellia-asm",
1886 .cra_priority = 300,
1887 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
1888 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
1889 .cra_ctxsize = sizeof(struct camellia_xts_ctx),
1890 .cra_alignmask = 0,
1891 .cra_type = &crypto_blkcipher_type,
1892 .cra_module = THIS_MODULE,
1893 .cra_list = LIST_HEAD_INIT(camellia_algs[5].cra_list),
1894 .cra_u = {
1895 .blkcipher = {
1896 .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
1897 .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
1898 .ivsize = CAMELLIA_BLOCK_SIZE,
1899 .setkey = xts_camellia_setkey,
1900 .encrypt = xts_encrypt,
1901 .decrypt = xts_decrypt,
1902 },
1903 },
1904} };
1905
1906static bool is_blacklisted_cpu(void)
1907{
1908 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1909 return false;
1910
1911 if (boot_cpu_data.x86 == 0x0f) {
1912 /*
1913 * On Pentium 4, camellia-asm is slower than original assembler
1914 * implementation because excessive uses of 64bit rotate and
1915 * left-shifts (which are really slow on P4) needed to store and
1916 * handle 128bit block in two 64bit registers.
1917 */
1918 return true;
1919 }
1920
1921 return false;
1922}
1923
1924static int force;
1925module_param(force, int, 0);
1926MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
1927
1928static int __init init(void)
1929{
1930 if (!force && is_blacklisted_cpu()) {
1931 printk(KERN_INFO
1932 "camellia-x86_64: performance on this CPU "
1933 "would be suboptimal: disabling "
1934 "camellia-x86_64.\n");
1935 return -ENODEV;
1936 }
1937
1938 return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
1939}
1940
1941static void __exit fini(void)
1942{
1943 crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
1944}
1945
1946module_init(init);
1947module_exit(fini);
1948
1949MODULE_LICENSE("GPL");
1950MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized");
1951MODULE_ALIAS("camellia");
1952MODULE_ALIAS("camellia-asm");
diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel.c
index b9d00261703c..493f959261f7 100644
--- a/arch/x86/crypto/crc32c-intel.c
+++ b/arch/x86/crypto/crc32c-intel.c
@@ -31,6 +31,7 @@
31#include <crypto/internal/hash.h> 31#include <crypto/internal/hash.h>
32 32
33#include <asm/cpufeature.h> 33#include <asm/cpufeature.h>
34#include <asm/cpu_device_id.h>
34 35
35#define CHKSUM_BLOCK_SIZE 1 36#define CHKSUM_BLOCK_SIZE 1
36#define CHKSUM_DIGEST_SIZE 4 37#define CHKSUM_DIGEST_SIZE 4
@@ -173,13 +174,17 @@ static struct shash_alg alg = {
173 } 174 }
174}; 175};
175 176
177static const struct x86_cpu_id crc32c_cpu_id[] = {
178 X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
179 {}
180};
181MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
176 182
177static int __init crc32c_intel_mod_init(void) 183static int __init crc32c_intel_mod_init(void)
178{ 184{
179 if (cpu_has_xmm4_2) 185 if (!x86_match_cpu(crc32c_cpu_id))
180 return crypto_register_shash(&alg);
181 else
182 return -ENODEV; 186 return -ENODEV;
187 return crypto_register_shash(&alg);
183} 188}
184 189
185static void __exit crc32c_intel_mod_fini(void) 190static void __exit crc32c_intel_mod_fini(void)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 976aa64d9a20..b4bf0a63b520 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -20,6 +20,7 @@
20#include <crypto/gf128mul.h> 20#include <crypto/gf128mul.h>
21#include <crypto/internal/hash.h> 21#include <crypto/internal/hash.h>
22#include <asm/i387.h> 22#include <asm/i387.h>
23#include <asm/cpu_device_id.h>
23 24
24#define GHASH_BLOCK_SIZE 16 25#define GHASH_BLOCK_SIZE 16
25#define GHASH_DIGEST_SIZE 16 26#define GHASH_DIGEST_SIZE 16
@@ -294,15 +295,18 @@ static struct ahash_alg ghash_async_alg = {
294 }, 295 },
295}; 296};
296 297
298static const struct x86_cpu_id pcmul_cpu_id[] = {
299 X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
300 {}
301};
302MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
303
297static int __init ghash_pclmulqdqni_mod_init(void) 304static int __init ghash_pclmulqdqni_mod_init(void)
298{ 305{
299 int err; 306 int err;
300 307
301 if (!cpu_has_pclmulqdq) { 308 if (!x86_match_cpu(pcmul_cpu_id))
302 printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not"
303 " detected.\n");
304 return -ENODEV; 309 return -ENODEV;
305 }
306 310
307 err = crypto_register_shash(&ghash_alg); 311 err = crypto_register_shash(&ghash_alg);
308 if (err) 312 if (err)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index 4e37677ca851..c00053d42f99 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -463,23 +463,20 @@
463 pand x0, x4; \ 463 pand x0, x4; \
464 pxor x2, x4; 464 pxor x2, x4;
465 465
466#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ 466#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
467 movdqa x2, t3; \
468 movdqa x0, t1; \
469 unpcklps x3, t3; \
470 movdqa x0, t2; \ 467 movdqa x0, t2; \
471 unpcklps x1, t1; \ 468 punpckldq x1, x0; \
472 unpckhps x1, t2; \ 469 punpckhdq x1, t2; \
473 movdqa t3, x1; \ 470 movdqa x2, t1; \
474 unpckhps x3, x2; \ 471 punpckhdq x3, x2; \
475 movdqa t1, x0; \ 472 punpckldq x3, t1; \
476 movhlps t1, x1; \ 473 movdqa x0, x1; \
477 movdqa t2, t1; \ 474 punpcklqdq t1, x0; \
478 movlhps t3, x0; \ 475 punpckhqdq t1, x1; \
479 movlhps x2, t1; \ 476 movdqa t2, x3; \
480 movhlps t2, x2; \ 477 punpcklqdq x2, t2; \
481 movdqa x2, x3; \ 478 punpckhqdq x2, x3; \
482 movdqa t1, x2; 479 movdqa t2, x2;
483 480
484#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ 481#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
485 movdqu (0*4*4)(in), x0; \ 482 movdqu (0*4*4)(in), x0; \
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 7f24a1540821..3ee1ff04d3e9 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -585,23 +585,20 @@
585 get_key(i, 1, RK1); \ 585 get_key(i, 1, RK1); \
586 SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 586 SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
587 587
588#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ 588#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
589 movdqa x2, t3; \
590 movdqa x0, t1; \
591 unpcklps x3, t3; \
592 movdqa x0, t2; \ 589 movdqa x0, t2; \
593 unpcklps x1, t1; \ 590 punpckldq x1, x0; \
594 unpckhps x1, t2; \ 591 punpckhdq x1, t2; \
595 movdqa t3, x1; \ 592 movdqa x2, t1; \
596 unpckhps x3, x2; \ 593 punpckhdq x3, x2; \
597 movdqa t1, x0; \ 594 punpckldq x3, t1; \
598 movhlps t1, x1; \ 595 movdqa x0, x1; \
599 movdqa t2, t1; \ 596 punpcklqdq t1, x0; \
600 movlhps t3, x0; \ 597 punpckhqdq t1, x1; \
601 movlhps x2, t1; \ 598 movdqa t2, x3; \
602 movhlps t2, x2; \ 599 punpcklqdq x2, t2; \
603 movdqa x2, x3; \ 600 punpckhqdq x2, x3; \
604 movdqa t1, x2; 601 movdqa t2, x2;
605 602
606#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ 603#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
607 movdqu (0*4*4)(in), x0; \ 604 movdqu (0*4*4)(in), x0; \
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 7955a9b76b91..4b21be85e0a1 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -145,28 +145,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
145 return ecb_crypt(desc, &walk, false); 145 return ecb_crypt(desc, &walk, false);
146} 146}
147 147
148static struct crypto_alg blk_ecb_alg = {
149 .cra_name = "__ecb-serpent-sse2",
150 .cra_driver_name = "__driver-ecb-serpent-sse2",
151 .cra_priority = 0,
152 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
153 .cra_blocksize = SERPENT_BLOCK_SIZE,
154 .cra_ctxsize = sizeof(struct serpent_ctx),
155 .cra_alignmask = 0,
156 .cra_type = &crypto_blkcipher_type,
157 .cra_module = THIS_MODULE,
158 .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
159 .cra_u = {
160 .blkcipher = {
161 .min_keysize = SERPENT_MIN_KEY_SIZE,
162 .max_keysize = SERPENT_MAX_KEY_SIZE,
163 .setkey = serpent_setkey,
164 .encrypt = ecb_encrypt,
165 .decrypt = ecb_decrypt,
166 },
167 },
168};
169
170static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 148static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
171 struct blkcipher_walk *walk) 149 struct blkcipher_walk *walk)
172{ 150{
@@ -295,28 +273,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
295 return err; 273 return err;
296} 274}
297 275
298static struct crypto_alg blk_cbc_alg = {
299 .cra_name = "__cbc-serpent-sse2",
300 .cra_driver_name = "__driver-cbc-serpent-sse2",
301 .cra_priority = 0,
302 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
303 .cra_blocksize = SERPENT_BLOCK_SIZE,
304 .cra_ctxsize = sizeof(struct serpent_ctx),
305 .cra_alignmask = 0,
306 .cra_type = &crypto_blkcipher_type,
307 .cra_module = THIS_MODULE,
308 .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
309 .cra_u = {
310 .blkcipher = {
311 .min_keysize = SERPENT_MIN_KEY_SIZE,
312 .max_keysize = SERPENT_MAX_KEY_SIZE,
313 .setkey = serpent_setkey,
314 .encrypt = cbc_encrypt,
315 .decrypt = cbc_decrypt,
316 },
317 },
318};
319
320static inline void u128_to_be128(be128 *dst, const u128 *src) 276static inline void u128_to_be128(be128 *dst, const u128 *src)
321{ 277{
322 dst->a = cpu_to_be64(src->a); 278 dst->a = cpu_to_be64(src->a);
@@ -439,29 +395,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
439 return err; 395 return err;
440} 396}
441 397
442static struct crypto_alg blk_ctr_alg = {
443 .cra_name = "__ctr-serpent-sse2",
444 .cra_driver_name = "__driver-ctr-serpent-sse2",
445 .cra_priority = 0,
446 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
447 .cra_blocksize = 1,
448 .cra_ctxsize = sizeof(struct serpent_ctx),
449 .cra_alignmask = 0,
450 .cra_type = &crypto_blkcipher_type,
451 .cra_module = THIS_MODULE,
452 .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
453 .cra_u = {
454 .blkcipher = {
455 .min_keysize = SERPENT_MIN_KEY_SIZE,
456 .max_keysize = SERPENT_MAX_KEY_SIZE,
457 .ivsize = SERPENT_BLOCK_SIZE,
458 .setkey = serpent_setkey,
459 .encrypt = ctr_crypt,
460 .decrypt = ctr_crypt,
461 },
462 },
463};
464
465struct crypt_priv { 398struct crypt_priv {
466 struct serpent_ctx *ctx; 399 struct serpent_ctx *ctx;
467 bool fpu_enabled; 400 bool fpu_enabled;
@@ -580,32 +513,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm)
580 lrw_free_table(&ctx->lrw_table); 513 lrw_free_table(&ctx->lrw_table);
581} 514}
582 515
583static struct crypto_alg blk_lrw_alg = {
584 .cra_name = "__lrw-serpent-sse2",
585 .cra_driver_name = "__driver-lrw-serpent-sse2",
586 .cra_priority = 0,
587 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
588 .cra_blocksize = SERPENT_BLOCK_SIZE,
589 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
590 .cra_alignmask = 0,
591 .cra_type = &crypto_blkcipher_type,
592 .cra_module = THIS_MODULE,
593 .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list),
594 .cra_exit = lrw_exit_tfm,
595 .cra_u = {
596 .blkcipher = {
597 .min_keysize = SERPENT_MIN_KEY_SIZE +
598 SERPENT_BLOCK_SIZE,
599 .max_keysize = SERPENT_MAX_KEY_SIZE +
600 SERPENT_BLOCK_SIZE,
601 .ivsize = SERPENT_BLOCK_SIZE,
602 .setkey = lrw_serpent_setkey,
603 .encrypt = lrw_encrypt,
604 .decrypt = lrw_decrypt,
605 },
606 },
607};
608
609struct serpent_xts_ctx { 516struct serpent_xts_ctx {
610 struct serpent_ctx tweak_ctx; 517 struct serpent_ctx tweak_ctx;
611 struct serpent_ctx crypt_ctx; 518 struct serpent_ctx crypt_ctx;
@@ -689,29 +596,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
689 return ret; 596 return ret;
690} 597}
691 598
692static struct crypto_alg blk_xts_alg = {
693 .cra_name = "__xts-serpent-sse2",
694 .cra_driver_name = "__driver-xts-serpent-sse2",
695 .cra_priority = 0,
696 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
697 .cra_blocksize = SERPENT_BLOCK_SIZE,
698 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
699 .cra_alignmask = 0,
700 .cra_type = &crypto_blkcipher_type,
701 .cra_module = THIS_MODULE,
702 .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list),
703 .cra_u = {
704 .blkcipher = {
705 .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
706 .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
707 .ivsize = SERPENT_BLOCK_SIZE,
708 .setkey = xts_serpent_setkey,
709 .encrypt = xts_encrypt,
710 .decrypt = xts_decrypt,
711 },
712 },
713};
714
715static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, 599static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
716 unsigned int key_len) 600 unsigned int key_len)
717{ 601{
@@ -792,28 +676,133 @@ static void ablk_exit(struct crypto_tfm *tfm)
792 cryptd_free_ablkcipher(ctx->cryptd_tfm); 676 cryptd_free_ablkcipher(ctx->cryptd_tfm);
793} 677}
794 678
795static void ablk_init_common(struct crypto_tfm *tfm, 679static int ablk_init(struct crypto_tfm *tfm)
796 struct cryptd_ablkcipher *cryptd_tfm)
797{ 680{
798 struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); 681 struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
682 struct cryptd_ablkcipher *cryptd_tfm;
683 char drv_name[CRYPTO_MAX_ALG_NAME];
684
685 snprintf(drv_name, sizeof(drv_name), "__driver-%s",
686 crypto_tfm_alg_driver_name(tfm));
687
688 cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
689 if (IS_ERR(cryptd_tfm))
690 return PTR_ERR(cryptd_tfm);
799 691
800 ctx->cryptd_tfm = cryptd_tfm; 692 ctx->cryptd_tfm = cryptd_tfm;
801 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + 693 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
802 crypto_ablkcipher_reqsize(&cryptd_tfm->base); 694 crypto_ablkcipher_reqsize(&cryptd_tfm->base);
803}
804
805static int ablk_ecb_init(struct crypto_tfm *tfm)
806{
807 struct cryptd_ablkcipher *cryptd_tfm;
808 695
809 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-serpent-sse2", 0, 0);
810 if (IS_ERR(cryptd_tfm))
811 return PTR_ERR(cryptd_tfm);
812 ablk_init_common(tfm, cryptd_tfm);
813 return 0; 696 return 0;
814} 697}
815 698
816static struct crypto_alg ablk_ecb_alg = { 699static struct crypto_alg serpent_algs[10] = { {
700 .cra_name = "__ecb-serpent-sse2",
701 .cra_driver_name = "__driver-ecb-serpent-sse2",
702 .cra_priority = 0,
703 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
704 .cra_blocksize = SERPENT_BLOCK_SIZE,
705 .cra_ctxsize = sizeof(struct serpent_ctx),
706 .cra_alignmask = 0,
707 .cra_type = &crypto_blkcipher_type,
708 .cra_module = THIS_MODULE,
709 .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list),
710 .cra_u = {
711 .blkcipher = {
712 .min_keysize = SERPENT_MIN_KEY_SIZE,
713 .max_keysize = SERPENT_MAX_KEY_SIZE,
714 .setkey = serpent_setkey,
715 .encrypt = ecb_encrypt,
716 .decrypt = ecb_decrypt,
717 },
718 },
719}, {
720 .cra_name = "__cbc-serpent-sse2",
721 .cra_driver_name = "__driver-cbc-serpent-sse2",
722 .cra_priority = 0,
723 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
724 .cra_blocksize = SERPENT_BLOCK_SIZE,
725 .cra_ctxsize = sizeof(struct serpent_ctx),
726 .cra_alignmask = 0,
727 .cra_type = &crypto_blkcipher_type,
728 .cra_module = THIS_MODULE,
729 .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list),
730 .cra_u = {
731 .blkcipher = {
732 .min_keysize = SERPENT_MIN_KEY_SIZE,
733 .max_keysize = SERPENT_MAX_KEY_SIZE,
734 .setkey = serpent_setkey,
735 .encrypt = cbc_encrypt,
736 .decrypt = cbc_decrypt,
737 },
738 },
739}, {
740 .cra_name = "__ctr-serpent-sse2",
741 .cra_driver_name = "__driver-ctr-serpent-sse2",
742 .cra_priority = 0,
743 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
744 .cra_blocksize = 1,
745 .cra_ctxsize = sizeof(struct serpent_ctx),
746 .cra_alignmask = 0,
747 .cra_type = &crypto_blkcipher_type,
748 .cra_module = THIS_MODULE,
749 .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list),
750 .cra_u = {
751 .blkcipher = {
752 .min_keysize = SERPENT_MIN_KEY_SIZE,
753 .max_keysize = SERPENT_MAX_KEY_SIZE,
754 .ivsize = SERPENT_BLOCK_SIZE,
755 .setkey = serpent_setkey,
756 .encrypt = ctr_crypt,
757 .decrypt = ctr_crypt,
758 },
759 },
760}, {
761 .cra_name = "__lrw-serpent-sse2",
762 .cra_driver_name = "__driver-lrw-serpent-sse2",
763 .cra_priority = 0,
764 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
765 .cra_blocksize = SERPENT_BLOCK_SIZE,
766 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
767 .cra_alignmask = 0,
768 .cra_type = &crypto_blkcipher_type,
769 .cra_module = THIS_MODULE,
770 .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list),
771 .cra_exit = lrw_exit_tfm,
772 .cra_u = {
773 .blkcipher = {
774 .min_keysize = SERPENT_MIN_KEY_SIZE +
775 SERPENT_BLOCK_SIZE,
776 .max_keysize = SERPENT_MAX_KEY_SIZE +
777 SERPENT_BLOCK_SIZE,
778 .ivsize = SERPENT_BLOCK_SIZE,
779 .setkey = lrw_serpent_setkey,
780 .encrypt = lrw_encrypt,
781 .decrypt = lrw_decrypt,
782 },
783 },
784}, {
785 .cra_name = "__xts-serpent-sse2",
786 .cra_driver_name = "__driver-xts-serpent-sse2",
787 .cra_priority = 0,
788 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
789 .cra_blocksize = SERPENT_BLOCK_SIZE,
790 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
791 .cra_alignmask = 0,
792 .cra_type = &crypto_blkcipher_type,
793 .cra_module = THIS_MODULE,
794 .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list),
795 .cra_u = {
796 .blkcipher = {
797 .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
798 .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
799 .ivsize = SERPENT_BLOCK_SIZE,
800 .setkey = xts_serpent_setkey,
801 .encrypt = xts_encrypt,
802 .decrypt = xts_decrypt,
803 },
804 },
805}, {
817 .cra_name = "ecb(serpent)", 806 .cra_name = "ecb(serpent)",
818 .cra_driver_name = "ecb-serpent-sse2", 807 .cra_driver_name = "ecb-serpent-sse2",
819 .cra_priority = 400, 808 .cra_priority = 400,
@@ -823,8 +812,8 @@ static struct crypto_alg ablk_ecb_alg = {
823 .cra_alignmask = 0, 812 .cra_alignmask = 0,
824 .cra_type = &crypto_ablkcipher_type, 813 .cra_type = &crypto_ablkcipher_type,
825 .cra_module = THIS_MODULE, 814 .cra_module = THIS_MODULE,
826 .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list), 815 .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list),
827 .cra_init = ablk_ecb_init, 816 .cra_init = ablk_init,
828 .cra_exit = ablk_exit, 817 .cra_exit = ablk_exit,
829 .cra_u = { 818 .cra_u = {
830 .ablkcipher = { 819 .ablkcipher = {
@@ -835,20 +824,7 @@ static struct crypto_alg ablk_ecb_alg = {
835 .decrypt = ablk_decrypt, 824 .decrypt = ablk_decrypt,
836 }, 825 },
837 }, 826 },
838}; 827}, {
839
840static int ablk_cbc_init(struct crypto_tfm *tfm)
841{
842 struct cryptd_ablkcipher *cryptd_tfm;
843
844 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-serpent-sse2", 0, 0);
845 if (IS_ERR(cryptd_tfm))
846 return PTR_ERR(cryptd_tfm);
847 ablk_init_common(tfm, cryptd_tfm);
848 return 0;
849}
850
851static struct crypto_alg ablk_cbc_alg = {
852 .cra_name = "cbc(serpent)", 828 .cra_name = "cbc(serpent)",
853 .cra_driver_name = "cbc-serpent-sse2", 829 .cra_driver_name = "cbc-serpent-sse2",
854 .cra_priority = 400, 830 .cra_priority = 400,
@@ -858,8 +834,8 @@ static struct crypto_alg ablk_cbc_alg = {
858 .cra_alignmask = 0, 834 .cra_alignmask = 0,
859 .cra_type = &crypto_ablkcipher_type, 835 .cra_type = &crypto_ablkcipher_type,
860 .cra_module = THIS_MODULE, 836 .cra_module = THIS_MODULE,
861 .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list), 837 .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list),
862 .cra_init = ablk_cbc_init, 838 .cra_init = ablk_init,
863 .cra_exit = ablk_exit, 839 .cra_exit = ablk_exit,
864 .cra_u = { 840 .cra_u = {
865 .ablkcipher = { 841 .ablkcipher = {
@@ -871,20 +847,7 @@ static struct crypto_alg ablk_cbc_alg = {
871 .decrypt = ablk_decrypt, 847 .decrypt = ablk_decrypt,
872 }, 848 },
873 }, 849 },
874}; 850}, {
875
876static int ablk_ctr_init(struct crypto_tfm *tfm)
877{
878 struct cryptd_ablkcipher *cryptd_tfm;
879
880 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-serpent-sse2", 0, 0);
881 if (IS_ERR(cryptd_tfm))
882 return PTR_ERR(cryptd_tfm);
883 ablk_init_common(tfm, cryptd_tfm);
884 return 0;
885}
886
887static struct crypto_alg ablk_ctr_alg = {
888 .cra_name = "ctr(serpent)", 851 .cra_name = "ctr(serpent)",
889 .cra_driver_name = "ctr-serpent-sse2", 852 .cra_driver_name = "ctr-serpent-sse2",
890 .cra_priority = 400, 853 .cra_priority = 400,
@@ -894,8 +857,8 @@ static struct crypto_alg ablk_ctr_alg = {
894 .cra_alignmask = 0, 857 .cra_alignmask = 0,
895 .cra_type = &crypto_ablkcipher_type, 858 .cra_type = &crypto_ablkcipher_type,
896 .cra_module = THIS_MODULE, 859 .cra_module = THIS_MODULE,
897 .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), 860 .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list),
898 .cra_init = ablk_ctr_init, 861 .cra_init = ablk_init,
899 .cra_exit = ablk_exit, 862 .cra_exit = ablk_exit,
900 .cra_u = { 863 .cra_u = {
901 .ablkcipher = { 864 .ablkcipher = {
@@ -908,20 +871,7 @@ static struct crypto_alg ablk_ctr_alg = {
908 .geniv = "chainiv", 871 .geniv = "chainiv",
909 }, 872 },
910 }, 873 },
911}; 874}, {
912
913static int ablk_lrw_init(struct crypto_tfm *tfm)
914{
915 struct cryptd_ablkcipher *cryptd_tfm;
916
917 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-lrw-serpent-sse2", 0, 0);
918 if (IS_ERR(cryptd_tfm))
919 return PTR_ERR(cryptd_tfm);
920 ablk_init_common(tfm, cryptd_tfm);
921 return 0;
922}
923
924static struct crypto_alg ablk_lrw_alg = {
925 .cra_name = "lrw(serpent)", 875 .cra_name = "lrw(serpent)",
926 .cra_driver_name = "lrw-serpent-sse2", 876 .cra_driver_name = "lrw-serpent-sse2",
927 .cra_priority = 400, 877 .cra_priority = 400,
@@ -931,8 +881,8 @@ static struct crypto_alg ablk_lrw_alg = {
931 .cra_alignmask = 0, 881 .cra_alignmask = 0,
932 .cra_type = &crypto_ablkcipher_type, 882 .cra_type = &crypto_ablkcipher_type,
933 .cra_module = THIS_MODULE, 883 .cra_module = THIS_MODULE,
934 .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), 884 .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list),
935 .cra_init = ablk_lrw_init, 885 .cra_init = ablk_init,
936 .cra_exit = ablk_exit, 886 .cra_exit = ablk_exit,
937 .cra_u = { 887 .cra_u = {
938 .ablkcipher = { 888 .ablkcipher = {
@@ -946,20 +896,7 @@ static struct crypto_alg ablk_lrw_alg = {
946 .decrypt = ablk_decrypt, 896 .decrypt = ablk_decrypt,
947 }, 897 },
948 }, 898 },
949}; 899}, {
950
951static int ablk_xts_init(struct crypto_tfm *tfm)
952{
953 struct cryptd_ablkcipher *cryptd_tfm;
954
955 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-xts-serpent-sse2", 0, 0);
956 if (IS_ERR(cryptd_tfm))
957 return PTR_ERR(cryptd_tfm);
958 ablk_init_common(tfm, cryptd_tfm);
959 return 0;
960}
961
962static struct crypto_alg ablk_xts_alg = {
963 .cra_name = "xts(serpent)", 900 .cra_name = "xts(serpent)",
964 .cra_driver_name = "xts-serpent-sse2", 901 .cra_driver_name = "xts-serpent-sse2",
965 .cra_priority = 400, 902 .cra_priority = 400,
@@ -969,8 +906,8 @@ static struct crypto_alg ablk_xts_alg = {
969 .cra_alignmask = 0, 906 .cra_alignmask = 0,
970 .cra_type = &crypto_ablkcipher_type, 907 .cra_type = &crypto_ablkcipher_type,
971 .cra_module = THIS_MODULE, 908 .cra_module = THIS_MODULE,
972 .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), 909 .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list),
973 .cra_init = ablk_xts_init, 910 .cra_init = ablk_init,
974 .cra_exit = ablk_exit, 911 .cra_exit = ablk_exit,
975 .cra_u = { 912 .cra_u = {
976 .ablkcipher = { 913 .ablkcipher = {
@@ -982,84 +919,21 @@ static struct crypto_alg ablk_xts_alg = {
982 .decrypt = ablk_decrypt, 919 .decrypt = ablk_decrypt,
983 }, 920 },
984 }, 921 },
985}; 922} };
986 923
987static int __init serpent_sse2_init(void) 924static int __init serpent_sse2_init(void)
988{ 925{
989 int err;
990
991 if (!cpu_has_xmm2) { 926 if (!cpu_has_xmm2) {
992 printk(KERN_INFO "SSE2 instructions are not detected.\n"); 927 printk(KERN_INFO "SSE2 instructions are not detected.\n");
993 return -ENODEV; 928 return -ENODEV;
994 } 929 }
995 930
996 err = crypto_register_alg(&blk_ecb_alg); 931 return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
997 if (err)
998 goto blk_ecb_err;
999 err = crypto_register_alg(&blk_cbc_alg);
1000 if (err)
1001 goto blk_cbc_err;
1002 err = crypto_register_alg(&blk_ctr_alg);
1003 if (err)
1004 goto blk_ctr_err;
1005 err = crypto_register_alg(&ablk_ecb_alg);
1006 if (err)
1007 goto ablk_ecb_err;
1008 err = crypto_register_alg(&ablk_cbc_alg);
1009 if (err)
1010 goto ablk_cbc_err;
1011 err = crypto_register_alg(&ablk_ctr_alg);
1012 if (err)
1013 goto ablk_ctr_err;
1014 err = crypto_register_alg(&blk_lrw_alg);
1015 if (err)
1016 goto blk_lrw_err;
1017 err = crypto_register_alg(&ablk_lrw_alg);
1018 if (err)
1019 goto ablk_lrw_err;
1020 err = crypto_register_alg(&blk_xts_alg);
1021 if (err)
1022 goto blk_xts_err;
1023 err = crypto_register_alg(&ablk_xts_alg);
1024 if (err)
1025 goto ablk_xts_err;
1026 return err;
1027
1028 crypto_unregister_alg(&ablk_xts_alg);
1029ablk_xts_err:
1030 crypto_unregister_alg(&blk_xts_alg);
1031blk_xts_err:
1032 crypto_unregister_alg(&ablk_lrw_alg);
1033ablk_lrw_err:
1034 crypto_unregister_alg(&blk_lrw_alg);
1035blk_lrw_err:
1036 crypto_unregister_alg(&ablk_ctr_alg);
1037ablk_ctr_err:
1038 crypto_unregister_alg(&ablk_cbc_alg);
1039ablk_cbc_err:
1040 crypto_unregister_alg(&ablk_ecb_alg);
1041ablk_ecb_err:
1042 crypto_unregister_alg(&blk_ctr_alg);
1043blk_ctr_err:
1044 crypto_unregister_alg(&blk_cbc_alg);
1045blk_cbc_err:
1046 crypto_unregister_alg(&blk_ecb_alg);
1047blk_ecb_err:
1048 return err;
1049} 932}
1050 933
1051static void __exit serpent_sse2_exit(void) 934static void __exit serpent_sse2_exit(void)
1052{ 935{
1053 crypto_unregister_alg(&ablk_xts_alg); 936 crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
1054 crypto_unregister_alg(&blk_xts_alg);
1055 crypto_unregister_alg(&ablk_lrw_alg);
1056 crypto_unregister_alg(&blk_lrw_alg);
1057 crypto_unregister_alg(&ablk_ctr_alg);
1058 crypto_unregister_alg(&ablk_cbc_alg);
1059 crypto_unregister_alg(&ablk_ecb_alg);
1060 crypto_unregister_alg(&blk_ctr_alg);
1061 crypto_unregister_alg(&blk_cbc_alg);
1062 crypto_unregister_alg(&blk_ecb_alg);
1063} 937}
1064 938
1065module_init(serpent_sse2_init); 939module_init(serpent_sse2_init);
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index dc6b3fb817fc..359ae084275c 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -68,7 +68,7 @@ static struct crypto_alg alg = {
68 .cra_flags = CRYPTO_ALG_TYPE_CIPHER, 68 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
69 .cra_blocksize = TF_BLOCK_SIZE, 69 .cra_blocksize = TF_BLOCK_SIZE,
70 .cra_ctxsize = sizeof(struct twofish_ctx), 70 .cra_ctxsize = sizeof(struct twofish_ctx),
71 .cra_alignmask = 3, 71 .cra_alignmask = 0,
72 .cra_module = THIS_MODULE, 72 .cra_module = THIS_MODULE,
73 .cra_list = LIST_HEAD_INIT(alg.cra_list), 73 .cra_list = LIST_HEAD_INIT(alg.cra_list),
74 .cra_u = { 74 .cra_u = {
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 7fee8c152f93..922ab24cce31 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -25,6 +25,7 @@
25 * 25 *
26 */ 26 */
27 27
28#include <asm/processor.h>
28#include <linux/crypto.h> 29#include <linux/crypto.h>
29#include <linux/init.h> 30#include <linux/init.h>
30#include <linux/module.h> 31#include <linux/module.h>
@@ -122,28 +123,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
122 return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); 123 return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
123} 124}
124 125
125static struct crypto_alg blk_ecb_alg = {
126 .cra_name = "ecb(twofish)",
127 .cra_driver_name = "ecb-twofish-3way",
128 .cra_priority = 300,
129 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
130 .cra_blocksize = TF_BLOCK_SIZE,
131 .cra_ctxsize = sizeof(struct twofish_ctx),
132 .cra_alignmask = 0,
133 .cra_type = &crypto_blkcipher_type,
134 .cra_module = THIS_MODULE,
135 .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
136 .cra_u = {
137 .blkcipher = {
138 .min_keysize = TF_MIN_KEY_SIZE,
139 .max_keysize = TF_MAX_KEY_SIZE,
140 .setkey = twofish_setkey,
141 .encrypt = ecb_encrypt,
142 .decrypt = ecb_decrypt,
143 },
144 },
145};
146
147static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 126static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
148 struct blkcipher_walk *walk) 127 struct blkcipher_walk *walk)
149{ 128{
@@ -267,29 +246,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
267 return err; 246 return err;
268} 247}
269 248
270static struct crypto_alg blk_cbc_alg = {
271 .cra_name = "cbc(twofish)",
272 .cra_driver_name = "cbc-twofish-3way",
273 .cra_priority = 300,
274 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
275 .cra_blocksize = TF_BLOCK_SIZE,
276 .cra_ctxsize = sizeof(struct twofish_ctx),
277 .cra_alignmask = 0,
278 .cra_type = &crypto_blkcipher_type,
279 .cra_module = THIS_MODULE,
280 .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
281 .cra_u = {
282 .blkcipher = {
283 .min_keysize = TF_MIN_KEY_SIZE,
284 .max_keysize = TF_MAX_KEY_SIZE,
285 .ivsize = TF_BLOCK_SIZE,
286 .setkey = twofish_setkey,
287 .encrypt = cbc_encrypt,
288 .decrypt = cbc_decrypt,
289 },
290 },
291};
292
293static inline void u128_to_be128(be128 *dst, const u128 *src) 249static inline void u128_to_be128(be128 *dst, const u128 *src)
294{ 250{
295 dst->a = cpu_to_be64(src->a); 251 dst->a = cpu_to_be64(src->a);
@@ -411,29 +367,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
411 return err; 367 return err;
412} 368}
413 369
414static struct crypto_alg blk_ctr_alg = {
415 .cra_name = "ctr(twofish)",
416 .cra_driver_name = "ctr-twofish-3way",
417 .cra_priority = 300,
418 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
419 .cra_blocksize = 1,
420 .cra_ctxsize = sizeof(struct twofish_ctx),
421 .cra_alignmask = 0,
422 .cra_type = &crypto_blkcipher_type,
423 .cra_module = THIS_MODULE,
424 .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
425 .cra_u = {
426 .blkcipher = {
427 .min_keysize = TF_MIN_KEY_SIZE,
428 .max_keysize = TF_MAX_KEY_SIZE,
429 .ivsize = TF_BLOCK_SIZE,
430 .setkey = twofish_setkey,
431 .encrypt = ctr_crypt,
432 .decrypt = ctr_crypt,
433 },
434 },
435};
436
437static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) 370static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
438{ 371{
439 const unsigned int bsize = TF_BLOCK_SIZE; 372 const unsigned int bsize = TF_BLOCK_SIZE;
@@ -524,30 +457,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm)
524 lrw_free_table(&ctx->lrw_table); 457 lrw_free_table(&ctx->lrw_table);
525} 458}
526 459
527static struct crypto_alg blk_lrw_alg = {
528 .cra_name = "lrw(twofish)",
529 .cra_driver_name = "lrw-twofish-3way",
530 .cra_priority = 300,
531 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
532 .cra_blocksize = TF_BLOCK_SIZE,
533 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
534 .cra_alignmask = 0,
535 .cra_type = &crypto_blkcipher_type,
536 .cra_module = THIS_MODULE,
537 .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list),
538 .cra_exit = lrw_exit_tfm,
539 .cra_u = {
540 .blkcipher = {
541 .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
542 .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
543 .ivsize = TF_BLOCK_SIZE,
544 .setkey = lrw_twofish_setkey,
545 .encrypt = lrw_encrypt,
546 .decrypt = lrw_decrypt,
547 },
548 },
549};
550
551struct twofish_xts_ctx { 460struct twofish_xts_ctx {
552 struct twofish_ctx tweak_ctx; 461 struct twofish_ctx tweak_ctx;
553 struct twofish_ctx crypt_ctx; 462 struct twofish_ctx crypt_ctx;
@@ -614,7 +523,91 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
614 return xts_crypt(desc, dst, src, nbytes, &req); 523 return xts_crypt(desc, dst, src, nbytes, &req);
615} 524}
616 525
617static struct crypto_alg blk_xts_alg = { 526static struct crypto_alg tf_algs[5] = { {
527 .cra_name = "ecb(twofish)",
528 .cra_driver_name = "ecb-twofish-3way",
529 .cra_priority = 300,
530 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
531 .cra_blocksize = TF_BLOCK_SIZE,
532 .cra_ctxsize = sizeof(struct twofish_ctx),
533 .cra_alignmask = 0,
534 .cra_type = &crypto_blkcipher_type,
535 .cra_module = THIS_MODULE,
536 .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list),
537 .cra_u = {
538 .blkcipher = {
539 .min_keysize = TF_MIN_KEY_SIZE,
540 .max_keysize = TF_MAX_KEY_SIZE,
541 .setkey = twofish_setkey,
542 .encrypt = ecb_encrypt,
543 .decrypt = ecb_decrypt,
544 },
545 },
546}, {
547 .cra_name = "cbc(twofish)",
548 .cra_driver_name = "cbc-twofish-3way",
549 .cra_priority = 300,
550 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
551 .cra_blocksize = TF_BLOCK_SIZE,
552 .cra_ctxsize = sizeof(struct twofish_ctx),
553 .cra_alignmask = 0,
554 .cra_type = &crypto_blkcipher_type,
555 .cra_module = THIS_MODULE,
556 .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list),
557 .cra_u = {
558 .blkcipher = {
559 .min_keysize = TF_MIN_KEY_SIZE,
560 .max_keysize = TF_MAX_KEY_SIZE,
561 .ivsize = TF_BLOCK_SIZE,
562 .setkey = twofish_setkey,
563 .encrypt = cbc_encrypt,
564 .decrypt = cbc_decrypt,
565 },
566 },
567}, {
568 .cra_name = "ctr(twofish)",
569 .cra_driver_name = "ctr-twofish-3way",
570 .cra_priority = 300,
571 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
572 .cra_blocksize = 1,
573 .cra_ctxsize = sizeof(struct twofish_ctx),
574 .cra_alignmask = 0,
575 .cra_type = &crypto_blkcipher_type,
576 .cra_module = THIS_MODULE,
577 .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list),
578 .cra_u = {
579 .blkcipher = {
580 .min_keysize = TF_MIN_KEY_SIZE,
581 .max_keysize = TF_MAX_KEY_SIZE,
582 .ivsize = TF_BLOCK_SIZE,
583 .setkey = twofish_setkey,
584 .encrypt = ctr_crypt,
585 .decrypt = ctr_crypt,
586 },
587 },
588}, {
589 .cra_name = "lrw(twofish)",
590 .cra_driver_name = "lrw-twofish-3way",
591 .cra_priority = 300,
592 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
593 .cra_blocksize = TF_BLOCK_SIZE,
594 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
595 .cra_alignmask = 0,
596 .cra_type = &crypto_blkcipher_type,
597 .cra_module = THIS_MODULE,
598 .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list),
599 .cra_exit = lrw_exit_tfm,
600 .cra_u = {
601 .blkcipher = {
602 .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
603 .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
604 .ivsize = TF_BLOCK_SIZE,
605 .setkey = lrw_twofish_setkey,
606 .encrypt = lrw_encrypt,
607 .decrypt = lrw_decrypt,
608 },
609 },
610}, {
618 .cra_name = "xts(twofish)", 611 .cra_name = "xts(twofish)",
619 .cra_driver_name = "xts-twofish-3way", 612 .cra_driver_name = "xts-twofish-3way",
620 .cra_priority = 300, 613 .cra_priority = 300,
@@ -624,7 +617,7 @@ static struct crypto_alg blk_xts_alg = {
624 .cra_alignmask = 0, 617 .cra_alignmask = 0,
625 .cra_type = &crypto_blkcipher_type, 618 .cra_type = &crypto_blkcipher_type,
626 .cra_module = THIS_MODULE, 619 .cra_module = THIS_MODULE,
627 .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list), 620 .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list),
628 .cra_u = { 621 .cra_u = {
629 .blkcipher = { 622 .blkcipher = {
630 .min_keysize = TF_MIN_KEY_SIZE * 2, 623 .min_keysize = TF_MIN_KEY_SIZE * 2,
@@ -635,50 +628,62 @@ static struct crypto_alg blk_xts_alg = {
635 .decrypt = xts_decrypt, 628 .decrypt = xts_decrypt,
636 }, 629 },
637 }, 630 },
638}; 631} };
639 632
640int __init init(void) 633static bool is_blacklisted_cpu(void)
641{ 634{
642 int err; 635 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
636 return false;
637
638 if (boot_cpu_data.x86 == 0x06 &&
639 (boot_cpu_data.x86_model == 0x1c ||
640 boot_cpu_data.x86_model == 0x26 ||
641 boot_cpu_data.x86_model == 0x36)) {
642 /*
643 * On Atom, twofish-3way is slower than original assembler
644 * implementation. Twofish-3way trades off some performance in
645 * storing blocks in 64bit registers to allow three blocks to
646 * be processed parallel. Parallel operation then allows gaining
647 * more performance than was trade off, on out-of-order CPUs.
648 * However Atom does not benefit from this parallellism and
649 * should be blacklisted.
650 */
651 return true;
652 }
643 653
644 err = crypto_register_alg(&blk_ecb_alg); 654 if (boot_cpu_data.x86 == 0x0f) {
645 if (err) 655 /*
646 goto ecb_err; 656 * On Pentium 4, twofish-3way is slower than original assembler
647 err = crypto_register_alg(&blk_cbc_alg); 657 * implementation because excessive uses of 64bit rotate and
648 if (err) 658 * left-shifts (which are really slow on P4) needed to store and
649 goto cbc_err; 659 * handle 128bit block in two 64bit registers.
650 err = crypto_register_alg(&blk_ctr_alg); 660 */
651 if (err) 661 return true;
652 goto ctr_err; 662 }
653 err = crypto_register_alg(&blk_lrw_alg); 663
654 if (err) 664 return false;
655 goto blk_lrw_err; 665}
656 err = crypto_register_alg(&blk_xts_alg); 666
657 if (err) 667static int force;
658 goto blk_xts_err; 668module_param(force, int, 0);
659 669MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
660 return 0; 670
661 671static int __init init(void)
662 crypto_unregister_alg(&blk_xts_alg); 672{
663blk_xts_err: 673 if (!force && is_blacklisted_cpu()) {
664 crypto_unregister_alg(&blk_lrw_alg); 674 printk(KERN_INFO
665blk_lrw_err: 675 "twofish-x86_64-3way: performance on this CPU "
666 crypto_unregister_alg(&blk_ctr_alg); 676 "would be suboptimal: disabling "
667ctr_err: 677 "twofish-x86_64-3way.\n");
668 crypto_unregister_alg(&blk_cbc_alg); 678 return -ENODEV;
669cbc_err: 679 }
670 crypto_unregister_alg(&blk_ecb_alg); 680
671ecb_err: 681 return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
672 return err;
673} 682}
674 683
675void __exit fini(void) 684static void __exit fini(void)
676{ 685{
677 crypto_unregister_alg(&blk_xts_alg); 686 crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
678 crypto_unregister_alg(&blk_lrw_alg);
679 crypto_unregister_alg(&blk_ctr_alg);
680 crypto_unregister_alg(&blk_cbc_alg);
681 crypto_unregister_alg(&blk_ecb_alg);
682} 687}
683 688
684module_init(init); 689module_init(init);
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index fd843877e841..d511d951a052 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -26,7 +26,6 @@
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28 28
29#include <asm/system.h>
30#include <asm/uaccess.h> 29#include <asm/uaccess.h>
31#include <asm/pgalloc.h> 30#include <asm/pgalloc.h>
32#include <asm/cacheflush.h> 31#include <asm/cacheflush.h>
@@ -315,8 +314,14 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
315 current->mm->free_area_cache = TASK_UNMAPPED_BASE; 314 current->mm->free_area_cache = TASK_UNMAPPED_BASE;
316 current->mm->cached_hole_size = 0; 315 current->mm->cached_hole_size = 0;
317 316
317 retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
318 if (retval < 0) {
319 /* Someone check-me: is this error path enough? */
320 send_sig(SIGKILL, current, 0);
321 return retval;
322 }
323
318 install_exec_creds(bprm); 324 install_exec_creds(bprm);
319 current->flags &= ~PF_FORKNOEXEC;
320 325
321 if (N_MAGIC(ex) == OMAGIC) { 326 if (N_MAGIC(ex) == OMAGIC) {
322 unsigned long text_addr, map_size; 327 unsigned long text_addr, map_size;
@@ -410,13 +415,6 @@ beyond_if:
410 415
411 set_brk(current->mm->start_brk, current->mm->brk); 416 set_brk(current->mm->start_brk, current->mm->brk);
412 417
413 retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
414 if (retval < 0) {
415 /* Someone check-me: is this error path enough? */
416 send_sig(SIGKILL, current, 0);
417 return retval;
418 }
419
420 current->mm->start_stack = 418 current->mm->start_stack =
421 (unsigned long)create_aout_tables((char __user *)bprm->p, bprm); 419 (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
422 /* start thread */ 420 /* start thread */
@@ -519,7 +517,8 @@ out:
519 517
520static int __init init_aout_binfmt(void) 518static int __init init_aout_binfmt(void)
521{ 519{
522 return register_binfmt(&aout_format); 520 register_binfmt(&aout_format);
521 return 0;
523} 522}
524 523
525static void __exit exit_aout_binfmt(void) 524static void __exit exit_aout_binfmt(void)
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 45b4fdd4e1da..a69245ba27e3 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -22,6 +22,7 @@
22#include <asm/ucontext.h> 22#include <asm/ucontext.h>
23#include <asm/uaccess.h> 23#include <asm/uaccess.h>
24#include <asm/i387.h> 24#include <asm/i387.h>
25#include <asm/fpu-internal.h>
25#include <asm/ptrace.h> 26#include <asm/ptrace.h>
26#include <asm/ia32_unistd.h> 27#include <asm/ia32_unistd.h>
27#include <asm/user32.h> 28#include <asm/user32.h>
@@ -37,7 +38,7 @@
37int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 38int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
38{ 39{
39 int err = 0; 40 int err = 0;
40 bool ia32 = !is_ia32_task(); 41 bool ia32 = is_ia32_task();
41 42
42 if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) 43 if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
43 return -EFAULT; 44 return -EFAULT;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 37ad100a2210..49331bedc158 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -145,6 +145,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
145 */ 145 */
146#define ASM_OUTPUT2(a...) a 146#define ASM_OUTPUT2(a...) a
147 147
148/*
149 * use this macro if you need clobbers but no inputs in
150 * alternative_{input,io,call}()
151 */
152#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
153
148struct paravirt_patch_site; 154struct paravirt_patch_site;
149#ifdef CONFIG_PARAVIRT 155#ifdef CONFIG_PARAVIRT
150void apply_paravirt(struct paravirt_patch_site *start, 156void apply_paravirt(struct paravirt_patch_site *start,
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3ab9bdd87e79..d85410171260 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -11,7 +11,6 @@
11#include <linux/atomic.h> 11#include <linux/atomic.h>
12#include <asm/fixmap.h> 12#include <asm/fixmap.h>
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/system.h>
15#include <asm/msr.h> 14#include <asm/msr.h>
16 15
17#define ARCH_APICTIMER_STOPS_ON_C3 1 16#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -288,6 +287,7 @@ struct apic {
288 287
289 int (*probe)(void); 288 int (*probe)(void);
290 int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); 289 int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
290 int (*apic_id_valid)(int apicid);
291 int (*apic_id_registered)(void); 291 int (*apic_id_registered)(void);
292 292
293 u32 irq_delivery_mode; 293 u32 irq_delivery_mode;
@@ -532,6 +532,11 @@ static inline unsigned int read_apic_id(void)
532 return apic->get_apic_id(reg); 532 return apic->get_apic_id(reg);
533} 533}
534 534
535static inline int default_apic_id_valid(int apicid)
536{
537 return (apicid < 255);
538}
539
535extern void default_setup_apic_routing(void); 540extern void default_setup_apic_routing(void);
536 541
537extern struct apic apic_noop; 542extern struct apic apic_noop;
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index fa13f0ec2874..198119910da5 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -14,13 +14,52 @@ typedef struct {
14 14
15#define ATOMIC64_INIT(val) { (val) } 15#define ATOMIC64_INIT(val) { (val) }
16 16
17#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
18#ifndef ATOMIC64_EXPORT
19#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
20#else
21#define ATOMIC64_DECL_ONE(sym) __ATOMIC64_DECL(sym); \
22 ATOMIC64_EXPORT(atomic64_##sym)
23#endif
24
17#ifdef CONFIG_X86_CMPXCHG64 25#ifdef CONFIG_X86_CMPXCHG64
18#define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8" 26#define __alternative_atomic64(f, g, out, in...) \
27 asm volatile("call %P[func]" \
28 : out : [func] "i" (atomic64_##g##_cx8), ## in)
29
30#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
19#else 31#else
20#define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8) 32#define __alternative_atomic64(f, g, out, in...) \
33 alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
34 X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in)
35
36#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \
37 ATOMIC64_DECL_ONE(sym##_386)
38
39ATOMIC64_DECL_ONE(add_386);
40ATOMIC64_DECL_ONE(sub_386);
41ATOMIC64_DECL_ONE(inc_386);
42ATOMIC64_DECL_ONE(dec_386);
21#endif 43#endif
22 44
23#define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f) 45#define alternative_atomic64(f, out, in...) \
46 __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
47
48ATOMIC64_DECL(read);
49ATOMIC64_DECL(set);
50ATOMIC64_DECL(xchg);
51ATOMIC64_DECL(add_return);
52ATOMIC64_DECL(sub_return);
53ATOMIC64_DECL(inc_return);
54ATOMIC64_DECL(dec_return);
55ATOMIC64_DECL(dec_if_positive);
56ATOMIC64_DECL(inc_not_zero);
57ATOMIC64_DECL(add_unless);
58
59#undef ATOMIC64_DECL
60#undef ATOMIC64_DECL_ONE
61#undef __ATOMIC64_DECL
62#undef ATOMIC64_EXPORT
24 63
25/** 64/**
26 * atomic64_cmpxchg - cmpxchg atomic64 variable 65 * atomic64_cmpxchg - cmpxchg atomic64 variable
@@ -50,11 +89,9 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n)
50 long long o; 89 long long o;
51 unsigned high = (unsigned)(n >> 32); 90 unsigned high = (unsigned)(n >> 32);
52 unsigned low = (unsigned)n; 91 unsigned low = (unsigned)n;
53 asm volatile(ATOMIC64_ALTERNATIVE(xchg) 92 alternative_atomic64(xchg, "=&A" (o),
54 : "=A" (o), "+b" (low), "+c" (high) 93 "S" (v), "b" (low), "c" (high)
55 : "S" (v) 94 : "memory");
56 : "memory"
57 );
58 return o; 95 return o;
59} 96}
60 97
@@ -69,11 +106,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
69{ 106{
70 unsigned high = (unsigned)(i >> 32); 107 unsigned high = (unsigned)(i >> 32);
71 unsigned low = (unsigned)i; 108 unsigned low = (unsigned)i;
72 asm volatile(ATOMIC64_ALTERNATIVE(set) 109 alternative_atomic64(set, /* no output */,
73 : "+b" (low), "+c" (high) 110 "S" (v), "b" (low), "c" (high)
74 : "S" (v) 111 : "eax", "edx", "memory");
75 : "eax", "edx", "memory"
76 );
77} 112}
78 113
79/** 114/**
@@ -85,10 +120,7 @@ static inline void atomic64_set(atomic64_t *v, long long i)
85static inline long long atomic64_read(const atomic64_t *v) 120static inline long long atomic64_read(const atomic64_t *v)
86{ 121{
87 long long r; 122 long long r;
88 asm volatile(ATOMIC64_ALTERNATIVE(read) 123 alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
89 : "=A" (r), "+c" (v)
90 : : "memory"
91 );
92 return r; 124 return r;
93 } 125 }
94 126
@@ -101,10 +133,9 @@ static inline long long atomic64_read(const atomic64_t *v)
101 */ 133 */
102static inline long long atomic64_add_return(long long i, atomic64_t *v) 134static inline long long atomic64_add_return(long long i, atomic64_t *v)
103{ 135{
104 asm volatile(ATOMIC64_ALTERNATIVE(add_return) 136 alternative_atomic64(add_return,
105 : "+A" (i), "+c" (v) 137 ASM_OUTPUT2("+A" (i), "+c" (v)),
106 : : "memory" 138 ASM_NO_INPUT_CLOBBER("memory"));
107 );
108 return i; 139 return i;
109} 140}
110 141
@@ -113,32 +144,25 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
113 */ 144 */
114static inline long long atomic64_sub_return(long long i, atomic64_t *v) 145static inline long long atomic64_sub_return(long long i, atomic64_t *v)
115{ 146{
116 asm volatile(ATOMIC64_ALTERNATIVE(sub_return) 147 alternative_atomic64(sub_return,
117 : "+A" (i), "+c" (v) 148 ASM_OUTPUT2("+A" (i), "+c" (v)),
118 : : "memory" 149 ASM_NO_INPUT_CLOBBER("memory"));
119 );
120 return i; 150 return i;
121} 151}
122 152
123static inline long long atomic64_inc_return(atomic64_t *v) 153static inline long long atomic64_inc_return(atomic64_t *v)
124{ 154{
125 long long a; 155 long long a;
126 asm volatile(ATOMIC64_ALTERNATIVE(inc_return) 156 alternative_atomic64(inc_return, "=&A" (a),
127 : "=A" (a) 157 "S" (v) : "memory", "ecx");
128 : "S" (v)
129 : "memory", "ecx"
130 );
131 return a; 158 return a;
132} 159}
133 160
134static inline long long atomic64_dec_return(atomic64_t *v) 161static inline long long atomic64_dec_return(atomic64_t *v)
135{ 162{
136 long long a; 163 long long a;
137 asm volatile(ATOMIC64_ALTERNATIVE(dec_return) 164 alternative_atomic64(dec_return, "=&A" (a),
138 : "=A" (a) 165 "S" (v) : "memory", "ecx");
139 : "S" (v)
140 : "memory", "ecx"
141 );
142 return a; 166 return a;
143} 167}
144 168
@@ -151,10 +175,9 @@ static inline long long atomic64_dec_return(atomic64_t *v)
151 */ 175 */
152static inline long long atomic64_add(long long i, atomic64_t *v) 176static inline long long atomic64_add(long long i, atomic64_t *v)
153{ 177{
154 asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return) 178 __alternative_atomic64(add, add_return,
155 : "+A" (i), "+c" (v) 179 ASM_OUTPUT2("+A" (i), "+c" (v)),
156 : : "memory" 180 ASM_NO_INPUT_CLOBBER("memory"));
157 );
158 return i; 181 return i;
159} 182}
160 183
@@ -167,10 +190,9 @@ static inline long long atomic64_add(long long i, atomic64_t *v)
167 */ 190 */
168static inline long long atomic64_sub(long long i, atomic64_t *v) 191static inline long long atomic64_sub(long long i, atomic64_t *v)
169{ 192{
170 asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return) 193 __alternative_atomic64(sub, sub_return,
171 : "+A" (i), "+c" (v) 194 ASM_OUTPUT2("+A" (i), "+c" (v)),
172 : : "memory" 195 ASM_NO_INPUT_CLOBBER("memory"));
173 );
174 return i; 196 return i;
175} 197}
176 198
@@ -196,10 +218,8 @@ static inline int atomic64_sub_and_test(long long i, atomic64_t *v)
196 */ 218 */
197static inline void atomic64_inc(atomic64_t *v) 219static inline void atomic64_inc(atomic64_t *v)
198{ 220{
199 asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return) 221 __alternative_atomic64(inc, inc_return, /* no output */,
200 : : "S" (v) 222 "S" (v) : "memory", "eax", "ecx", "edx");
201 : "memory", "eax", "ecx", "edx"
202 );
203} 223}
204 224
205/** 225/**
@@ -210,10 +230,8 @@ static inline void atomic64_inc(atomic64_t *v)
210 */ 230 */
211static inline void atomic64_dec(atomic64_t *v) 231static inline void atomic64_dec(atomic64_t *v)
212{ 232{
213 asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return) 233 __alternative_atomic64(dec, dec_return, /* no output */,
214 : : "S" (v) 234 "S" (v) : "memory", "eax", "ecx", "edx");
215 : "memory", "eax", "ecx", "edx"
216 );
217} 235}
218 236
219/** 237/**
@@ -263,15 +281,15 @@ static inline int atomic64_add_negative(long long i, atomic64_t *v)
263 * @u: ...unless v is equal to u. 281 * @u: ...unless v is equal to u.
264 * 282 *
265 * Atomically adds @a to @v, so long as it was not @u. 283 * Atomically adds @a to @v, so long as it was not @u.
266 * Returns the old value of @v. 284 * Returns non-zero if the add was done, zero otherwise.
267 */ 285 */
268static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) 286static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
269{ 287{
270 unsigned low = (unsigned)u; 288 unsigned low = (unsigned)u;
271 unsigned high = (unsigned)(u >> 32); 289 unsigned high = (unsigned)(u >> 32);
272 asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t" 290 alternative_atomic64(add_unless,
273 : "+A" (a), "+c" (v), "+S" (low), "+D" (high) 291 ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
274 : : "memory"); 292 "S" (v) : "memory");
275 return (int)a; 293 return (int)a;
276} 294}
277 295
@@ -279,26 +297,20 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
279static inline int atomic64_inc_not_zero(atomic64_t *v) 297static inline int atomic64_inc_not_zero(atomic64_t *v)
280{ 298{
281 int r; 299 int r;
282 asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero) 300 alternative_atomic64(inc_not_zero, "=&a" (r),
283 : "=a" (r) 301 "S" (v) : "ecx", "edx", "memory");
284 : "S" (v)
285 : "ecx", "edx", "memory"
286 );
287 return r; 302 return r;
288} 303}
289 304
290static inline long long atomic64_dec_if_positive(atomic64_t *v) 305static inline long long atomic64_dec_if_positive(atomic64_t *v)
291{ 306{
292 long long r; 307 long long r;
293 asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive) 308 alternative_atomic64(dec_if_positive, "=&A" (r),
294 : "=A" (r) 309 "S" (v) : "ecx", "memory");
295 : "S" (v)
296 : "ecx", "memory"
297 );
298 return r; 310 return r;
299} 311}
300 312
301#undef ATOMIC64_ALTERNATIVE 313#undef alternative_atomic64
302#undef ATOMIC64_ALTERNATIVE_ 314#undef __alternative_atomic64
303 315
304#endif /* _ASM_X86_ATOMIC64_32_H */ 316#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/auxvec.h b/arch/x86/include/asm/auxvec.h
index 1316b4c35425..77203ac352de 100644
--- a/arch/x86/include/asm/auxvec.h
+++ b/arch/x86/include/asm/auxvec.h
@@ -9,4 +9,11 @@
9#endif 9#endif
10#define AT_SYSINFO_EHDR 33 10#define AT_SYSINFO_EHDR 33
11 11
12/* entries in ARCH_DLINFO: */
13#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
14# define AT_VECTOR_SIZE_ARCH 2
15#else /* else it's non-compat x86-64 */
16# define AT_VECTOR_SIZE_ARCH 1
17#endif
18
12#endif /* _ASM_X86_AUXVEC_H */ 19#endif /* _ASM_X86_AUXVEC_H */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
new file mode 100644
index 000000000000..c6cd358a1eec
--- /dev/null
+++ b/arch/x86/include/asm/barrier.h
@@ -0,0 +1,116 @@
1#ifndef _ASM_X86_BARRIER_H
2#define _ASM_X86_BARRIER_H
3
4#include <asm/alternative.h>
5#include <asm/nops.h>
6
7/*
8 * Force strict CPU ordering.
9 * And yes, this is required on UP too when we're talking
10 * to devices.
11 */
12
13#ifdef CONFIG_X86_32
14/*
15 * Some non-Intel clones support out of order store. wmb() ceases to be a
16 * nop for these.
17 */
18#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
19#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
20#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
21#else
22#define mb() asm volatile("mfence":::"memory")
23#define rmb() asm volatile("lfence":::"memory")
24#define wmb() asm volatile("sfence" ::: "memory")
25#endif
26
27/**
28 * read_barrier_depends - Flush all pending reads that subsequents reads
29 * depend on.
30 *
31 * No data-dependent reads from memory-like regions are ever reordered
32 * over this barrier. All reads preceding this primitive are guaranteed
33 * to access memory (but not necessarily other CPUs' caches) before any
34 * reads following this primitive that depend on the data return by
35 * any of the preceding reads. This primitive is much lighter weight than
36 * rmb() on most CPUs, and is never heavier weight than is
37 * rmb().
38 *
39 * These ordering constraints are respected by both the local CPU
40 * and the compiler.
41 *
42 * Ordering is not guaranteed by anything other than these primitives,
43 * not even by data dependencies. See the documentation for
44 * memory_barrier() for examples and URLs to more information.
45 *
46 * For example, the following code would force ordering (the initial
47 * value of "a" is zero, "b" is one, and "p" is "&a"):
48 *
49 * <programlisting>
50 * CPU 0 CPU 1
51 *
52 * b = 2;
53 * memory_barrier();
54 * p = &b; q = p;
55 * read_barrier_depends();
56 * d = *q;
57 * </programlisting>
58 *
59 * because the read of "*q" depends on the read of "p" and these
60 * two reads are separated by a read_barrier_depends(). However,
61 * the following code, with the same initial values for "a" and "b":
62 *
63 * <programlisting>
64 * CPU 0 CPU 1
65 *
66 * a = 2;
67 * memory_barrier();
68 * b = 3; y = b;
69 * read_barrier_depends();
70 * x = a;
71 * </programlisting>
72 *
73 * does not enforce ordering, since there is no data dependency between
74 * the read of "a" and the read of "b". Therefore, on some CPUs, such
75 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
76 * in cases like this where there are no data dependencies.
77 **/
78
79#define read_barrier_depends() do { } while (0)
80
81#ifdef CONFIG_SMP
82#define smp_mb() mb()
83#ifdef CONFIG_X86_PPRO_FENCE
84# define smp_rmb() rmb()
85#else
86# define smp_rmb() barrier()
87#endif
88#ifdef CONFIG_X86_OOSTORE
89# define smp_wmb() wmb()
90#else
91# define smp_wmb() barrier()
92#endif
93#define smp_read_barrier_depends() read_barrier_depends()
94#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
95#else
96#define smp_mb() barrier()
97#define smp_rmb() barrier()
98#define smp_wmb() barrier()
99#define smp_read_barrier_depends() do { } while (0)
100#define set_mb(var, value) do { var = value; barrier(); } while (0)
101#endif
102
103/*
104 * Stop RDTSC speculation. This is needed when you need to use RDTSC
105 * (or get_cycles or vread that possibly accesses the TSC) in a defined
106 * code region.
107 *
108 * (Could use an alternative three way for this if there was one.)
109 */
110static __always_inline void rdtsc_barrier(void)
111{
112 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
113 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
114}
115
116#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index f654d1bb17fb..11e1152222d0 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -36,4 +36,8 @@ do { \
36#endif /* !CONFIG_BUG */ 36#endif /* !CONFIG_BUG */
37 37
38#include <asm-generic/bug.h> 38#include <asm-generic/bug.h>
39
40
41extern void show_regs_common(void);
42
39#endif /* _ASM_X86_BUG_H */ 43#endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 4e12668711e5..9863ee3747da 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -3,6 +3,7 @@
3 3
4/* Caches aren't brain-dead on the intel. */ 4/* Caches aren't brain-dead on the intel. */
5#include <asm-generic/cacheflush.h> 5#include <asm-generic/cacheflush.h>
6#include <asm/special_insns.h>
6 7
7#ifdef CONFIG_X86_PAT 8#ifdef CONFIG_X86_PAT
8/* 9/*
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
new file mode 100644
index 000000000000..ff501e511d91
--- /dev/null
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -0,0 +1,13 @@
1#ifndef _CPU_DEVICE_ID
2#define _CPU_DEVICE_ID 1
3
4/*
5 * Declare drivers belonging to specific x86 CPUs
6 * Similar in spirit to pci_device_id and related PCI functions
7 */
8
9#include <linux/mod_devicetable.h>
10
11extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
12
13#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 8d67d428b0f9..340ee49961a6 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -177,6 +177,7 @@
177#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ 177#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
178#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ 178#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
179#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ 179#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
180#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
180 181
181/* Virtualization flags: Linux defined, word 8 */ 182/* Virtualization flags: Linux defined, word 8 */
182#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ 183#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
@@ -199,10 +200,13 @@
199/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 200/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
200#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 201#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
201#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ 202#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */
203#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */
202#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ 204#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
203#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ 205#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */
204#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */ 206#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */
205#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ 207#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
208#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */
209#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
206 210
207#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 211#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
208 212
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index b903d5ea3941..2d91580bf228 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -78,8 +78,75 @@
78 */ 78 */
79#ifdef __KERNEL__ 79#ifdef __KERNEL__
80 80
81#include <linux/bug.h>
82
81DECLARE_PER_CPU(unsigned long, cpu_dr7); 83DECLARE_PER_CPU(unsigned long, cpu_dr7);
82 84
85#ifndef CONFIG_PARAVIRT
86/*
87 * These special macros can be used to get or set a debugging register
88 */
89#define get_debugreg(var, register) \
90 (var) = native_get_debugreg(register)
91#define set_debugreg(value, register) \
92 native_set_debugreg(register, value)
93#endif
94
95static inline unsigned long native_get_debugreg(int regno)
96{
97 unsigned long val = 0; /* Damn you, gcc! */
98
99 switch (regno) {
100 case 0:
101 asm("mov %%db0, %0" :"=r" (val));
102 break;
103 case 1:
104 asm("mov %%db1, %0" :"=r" (val));
105 break;
106 case 2:
107 asm("mov %%db2, %0" :"=r" (val));
108 break;
109 case 3:
110 asm("mov %%db3, %0" :"=r" (val));
111 break;
112 case 6:
113 asm("mov %%db6, %0" :"=r" (val));
114 break;
115 case 7:
116 asm("mov %%db7, %0" :"=r" (val));
117 break;
118 default:
119 BUG();
120 }
121 return val;
122}
123
124static inline void native_set_debugreg(int regno, unsigned long value)
125{
126 switch (regno) {
127 case 0:
128 asm("mov %0, %%db0" ::"r" (value));
129 break;
130 case 1:
131 asm("mov %0, %%db1" ::"r" (value));
132 break;
133 case 2:
134 asm("mov %0, %%db2" ::"r" (value));
135 break;
136 case 3:
137 asm("mov %0, %%db3" ::"r" (value));
138 break;
139 case 6:
140 asm("mov %0, %%db6" ::"r" (value));
141 break;
142 case 7:
143 asm("mov %0, %%db7" ::"r" (value));
144 break;
145 default:
146 BUG();
147 }
148}
149
83static inline void hw_breakpoint_disable(void) 150static inline void hw_breakpoint_disable(void)
84{ 151{
85 /* Zero the control register for HW Breakpoint */ 152 /* Zero the control register for HW Breakpoint */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index ed3065fd6314..4b4331d71935 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -59,7 +59,8 @@ extern int dma_supported(struct device *hwdev, u64 mask);
59extern int dma_set_mask(struct device *dev, u64 mask); 59extern int dma_set_mask(struct device *dev, u64 mask);
60 60
61extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, 61extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
62 dma_addr_t *dma_addr, gfp_t flag); 62 dma_addr_t *dma_addr, gfp_t flag,
63 struct dma_attrs *attrs);
63 64
64static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) 65static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
65{ 66{
@@ -111,9 +112,11 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
111 return gfp; 112 return gfp;
112} 113}
113 114
115#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
116
114static inline void * 117static inline void *
115dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 118dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
116 gfp_t gfp) 119 gfp_t gfp, struct dma_attrs *attrs)
117{ 120{
118 struct dma_map_ops *ops = get_dma_ops(dev); 121 struct dma_map_ops *ops = get_dma_ops(dev);
119 void *memory; 122 void *memory;
@@ -129,18 +132,21 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
129 if (!is_device_dma_capable(dev)) 132 if (!is_device_dma_capable(dev))
130 return NULL; 133 return NULL;
131 134
132 if (!ops->alloc_coherent) 135 if (!ops->alloc)
133 return NULL; 136 return NULL;
134 137
135 memory = ops->alloc_coherent(dev, size, dma_handle, 138 memory = ops->alloc(dev, size, dma_handle,
136 dma_alloc_coherent_gfp_flags(dev, gfp)); 139 dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
137 debug_dma_alloc_coherent(dev, size, *dma_handle, memory); 140 debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
138 141
139 return memory; 142 return memory;
140} 143}
141 144
142static inline void dma_free_coherent(struct device *dev, size_t size, 145#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
143 void *vaddr, dma_addr_t bus) 146
147static inline void dma_free_attrs(struct device *dev, size_t size,
148 void *vaddr, dma_addr_t bus,
149 struct dma_attrs *attrs)
144{ 150{
145 struct dma_map_ops *ops = get_dma_ops(dev); 151 struct dma_map_ops *ops = get_dma_ops(dev);
146 152
@@ -150,8 +156,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
150 return; 156 return;
151 157
152 debug_dma_free_coherent(dev, size, vaddr, bus); 158 debug_dma_free_coherent(dev, size, vaddr, bus);
153 if (ops->free_coherent) 159 if (ops->free)
154 ops->free_coherent(dev, size, vaddr, bus); 160 ops->free(dev, size, vaddr, bus, attrs);
155} 161}
156 162
157#endif 163#endif
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 844f735fd63a..c9dcc181d4d1 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -95,7 +95,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
95 95
96extern int add_efi_memmap; 96extern int add_efi_memmap;
97extern void efi_set_executable(efi_memory_desc_t *md, bool executable); 97extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
98extern void efi_memblock_x86_reserve_range(void); 98extern int efi_memblock_x86_reserve_range(void);
99extern void efi_call_phys_prelog(void); 99extern void efi_call_phys_prelog(void);
100extern void efi_call_phys_epilog(void); 100extern void efi_call_phys_epilog(void);
101 101
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1e40634591a4..5939f44fe0c0 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -84,7 +84,6 @@ extern unsigned int vdso_enabled;
84 (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) 84 (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
85 85
86#include <asm/processor.h> 86#include <asm/processor.h>
87#include <asm/system.h>
88 87
89#ifdef CONFIG_X86_32 88#ifdef CONFIG_X86_32
90#include <asm/desc.h> 89#include <asm/desc.h>
diff --git a/arch/x86/include/asm/exec.h b/arch/x86/include/asm/exec.h
new file mode 100644
index 000000000000..54c2e1db274a
--- /dev/null
+++ b/arch/x86/include/asm/exec.h
@@ -0,0 +1 @@
/* define arch_align_stack() here */
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
new file mode 100644
index 000000000000..4fa88154e4de
--- /dev/null
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -0,0 +1,520 @@
1/*
2 * Copyright (C) 1994 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * x86-64 work by Andi Kleen 2002
8 */
9
10#ifndef _FPU_INTERNAL_H
11#define _FPU_INTERNAL_H
12
13#include <linux/kernel_stat.h>
14#include <linux/regset.h>
15#include <linux/slab.h>
16#include <asm/asm.h>
17#include <asm/cpufeature.h>
18#include <asm/processor.h>
19#include <asm/sigcontext.h>
20#include <asm/user.h>
21#include <asm/uaccess.h>
22#include <asm/xsave.h>
23
24extern unsigned int sig_xstate_size;
25extern void fpu_init(void);
26
27DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
28
29extern user_regset_active_fn fpregs_active, xfpregs_active;
30extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
31 xstateregs_get;
32extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
33 xstateregs_set;
34
35
36/*
37 * xstateregs_active == fpregs_active. Please refer to the comment
38 * at the definition of fpregs_active.
39 */
40#define xstateregs_active fpregs_active
41
42extern struct _fpx_sw_bytes fx_sw_reserved;
43#ifdef CONFIG_IA32_EMULATION
44extern unsigned int sig_xstate_ia32_size;
45extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
46struct _fpstate_ia32;
47struct _xstate_ia32;
48extern int save_i387_xstate_ia32(void __user *buf);
49extern int restore_i387_xstate_ia32(void __user *buf);
50#endif
51
52#ifdef CONFIG_MATH_EMULATION
53extern void finit_soft_fpu(struct i387_soft_struct *soft);
54#else
55static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
56#endif
57
58#define X87_FSW_ES (1 << 7) /* Exception Summary */
59
60static __always_inline __pure bool use_xsaveopt(void)
61{
62 return static_cpu_has(X86_FEATURE_XSAVEOPT);
63}
64
65static __always_inline __pure bool use_xsave(void)
66{
67 return static_cpu_has(X86_FEATURE_XSAVE);
68}
69
70static __always_inline __pure bool use_fxsr(void)
71{
72 return static_cpu_has(X86_FEATURE_FXSR);
73}
74
75extern void __sanitize_i387_state(struct task_struct *);
76
77static inline void sanitize_i387_state(struct task_struct *tsk)
78{
79 if (!use_xsaveopt())
80 return;
81 __sanitize_i387_state(tsk);
82}
83
84#ifdef CONFIG_X86_64
85static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
86{
87 int err;
88
89 /* See comment in fxsave() below. */
90#ifdef CONFIG_AS_FXSAVEQ
91 asm volatile("1: fxrstorq %[fx]\n\t"
92 "2:\n"
93 ".section .fixup,\"ax\"\n"
94 "3: movl $-1,%[err]\n"
95 " jmp 2b\n"
96 ".previous\n"
97 _ASM_EXTABLE(1b, 3b)
98 : [err] "=r" (err)
99 : [fx] "m" (*fx), "0" (0));
100#else
101 asm volatile("1: rex64/fxrstor (%[fx])\n\t"
102 "2:\n"
103 ".section .fixup,\"ax\"\n"
104 "3: movl $-1,%[err]\n"
105 " jmp 2b\n"
106 ".previous\n"
107 _ASM_EXTABLE(1b, 3b)
108 : [err] "=r" (err)
109 : [fx] "R" (fx), "m" (*fx), "0" (0));
110#endif
111 return err;
112}
113
114static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
115{
116 int err;
117
118 /*
119 * Clear the bytes not touched by the fxsave and reserved
120 * for the SW usage.
121 */
122 err = __clear_user(&fx->sw_reserved,
123 sizeof(struct _fpx_sw_bytes));
124 if (unlikely(err))
125 return -EFAULT;
126
127 /* See comment in fxsave() below. */
128#ifdef CONFIG_AS_FXSAVEQ
129 asm volatile("1: fxsaveq %[fx]\n\t"
130 "2:\n"
131 ".section .fixup,\"ax\"\n"
132 "3: movl $-1,%[err]\n"
133 " jmp 2b\n"
134 ".previous\n"
135 _ASM_EXTABLE(1b, 3b)
136 : [err] "=r" (err), [fx] "=m" (*fx)
137 : "0" (0));
138#else
139 asm volatile("1: rex64/fxsave (%[fx])\n\t"
140 "2:\n"
141 ".section .fixup,\"ax\"\n"
142 "3: movl $-1,%[err]\n"
143 " jmp 2b\n"
144 ".previous\n"
145 _ASM_EXTABLE(1b, 3b)
146 : [err] "=r" (err), "=m" (*fx)
147 : [fx] "R" (fx), "0" (0));
148#endif
149 if (unlikely(err) &&
150 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
151 err = -EFAULT;
152 /* No need to clear here because the caller clears USED_MATH */
153 return err;
154}
155
156static inline void fpu_fxsave(struct fpu *fpu)
157{
158 /* Using "rex64; fxsave %0" is broken because, if the memory operand
159 uses any extended registers for addressing, a second REX prefix
160 will be generated (to the assembler, rex64 followed by semicolon
161 is a separate instruction), and hence the 64-bitness is lost. */
162
163#ifdef CONFIG_AS_FXSAVEQ
164 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
165 starting with gas 2.16. */
166 __asm__ __volatile__("fxsaveq %0"
167 : "=m" (fpu->state->fxsave));
168#else
169 /* Using, as a workaround, the properly prefixed form below isn't
170 accepted by any binutils version so far released, complaining that
171 the same type of prefix is used twice if an extended register is
172 needed for addressing (fix submitted to mainline 2005-11-21).
173 asm volatile("rex64/fxsave %0"
174 : "=m" (fpu->state->fxsave));
175 This, however, we can work around by forcing the compiler to select
176 an addressing mode that doesn't require extended registers. */
177 asm volatile("rex64/fxsave (%[fx])"
178 : "=m" (fpu->state->fxsave)
179 : [fx] "R" (&fpu->state->fxsave));
180#endif
181}
182
183#else /* CONFIG_X86_32 */
184
185/* perform fxrstor iff the processor has extended states, otherwise frstor */
186static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
187{
188 /*
189 * The "nop" is needed to make the instructions the same
190 * length.
191 */
192 alternative_input(
193 "nop ; frstor %1",
194 "fxrstor %1",
195 X86_FEATURE_FXSR,
196 "m" (*fx));
197
198 return 0;
199}
200
201static inline void fpu_fxsave(struct fpu *fpu)
202{
203 asm volatile("fxsave %[fx]"
204 : [fx] "=m" (fpu->state->fxsave));
205}
206
207#endif /* CONFIG_X86_64 */
208
209/*
210 * These must be called with preempt disabled. Returns
211 * 'true' if the FPU state is still intact.
212 */
213static inline int fpu_save_init(struct fpu *fpu)
214{
215 if (use_xsave()) {
216 fpu_xsave(fpu);
217
218 /*
219 * xsave header may indicate the init state of the FP.
220 */
221 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
222 return 1;
223 } else if (use_fxsr()) {
224 fpu_fxsave(fpu);
225 } else {
226 asm volatile("fnsave %[fx]; fwait"
227 : [fx] "=m" (fpu->state->fsave));
228 return 0;
229 }
230
231 /*
232 * If exceptions are pending, we need to clear them so
233 * that we don't randomly get exceptions later.
234 *
235 * FIXME! Is this perhaps only true for the old-style
236 * irq13 case? Maybe we could leave the x87 state
237 * intact otherwise?
238 */
239 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
240 asm volatile("fnclex");
241 return 0;
242 }
243 return 1;
244}
245
246static inline int __save_init_fpu(struct task_struct *tsk)
247{
248 return fpu_save_init(&tsk->thread.fpu);
249}
250
251static inline int fpu_fxrstor_checking(struct fpu *fpu)
252{
253 return fxrstor_checking(&fpu->state->fxsave);
254}
255
256static inline int fpu_restore_checking(struct fpu *fpu)
257{
258 if (use_xsave())
259 return fpu_xrstor_checking(fpu);
260 else
261 return fpu_fxrstor_checking(fpu);
262}
263
264static inline int restore_fpu_checking(struct task_struct *tsk)
265{
266 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
267 is pending. Clear the x87 state here by setting it to fixed
268 values. "m" is a random variable that should be in L1 */
269 alternative_input(
270 ASM_NOP8 ASM_NOP2,
271 "emms\n\t" /* clear stack tags */
272 "fildl %P[addr]", /* set F?P to defined value */
273 X86_FEATURE_FXSAVE_LEAK,
274 [addr] "m" (tsk->thread.fpu.has_fpu));
275
276 return fpu_restore_checking(&tsk->thread.fpu);
277}
278
279/*
280 * Software FPU state helpers. Careful: these need to
281 * be preemption protection *and* they need to be
282 * properly paired with the CR0.TS changes!
283 */
284static inline int __thread_has_fpu(struct task_struct *tsk)
285{
286 return tsk->thread.fpu.has_fpu;
287}
288
289/* Must be paired with an 'stts' after! */
290static inline void __thread_clear_has_fpu(struct task_struct *tsk)
291{
292 tsk->thread.fpu.has_fpu = 0;
293 percpu_write(fpu_owner_task, NULL);
294}
295
296/* Must be paired with a 'clts' before! */
297static inline void __thread_set_has_fpu(struct task_struct *tsk)
298{
299 tsk->thread.fpu.has_fpu = 1;
300 percpu_write(fpu_owner_task, tsk);
301}
302
303/*
304 * Encapsulate the CR0.TS handling together with the
305 * software flag.
306 *
307 * These generally need preemption protection to work,
308 * do try to avoid using these on their own.
309 */
310static inline void __thread_fpu_end(struct task_struct *tsk)
311{
312 __thread_clear_has_fpu(tsk);
313 stts();
314}
315
316static inline void __thread_fpu_begin(struct task_struct *tsk)
317{
318 clts();
319 __thread_set_has_fpu(tsk);
320}
321
322/*
323 * FPU state switching for scheduling.
324 *
325 * This is a two-stage process:
326 *
327 * - switch_fpu_prepare() saves the old state and
328 * sets the new state of the CR0.TS bit. This is
329 * done within the context of the old process.
330 *
331 * - switch_fpu_finish() restores the new state as
332 * necessary.
333 */
334typedef struct { int preload; } fpu_switch_t;
335
336/*
337 * FIXME! We could do a totally lazy restore, but we need to
338 * add a per-cpu "this was the task that last touched the FPU
339 * on this CPU" variable, and the task needs to have a "I last
340 * touched the FPU on this CPU" and check them.
341 *
342 * We don't do that yet, so "fpu_lazy_restore()" always returns
343 * false, but some day..
344 */
345static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
346{
347 return new == percpu_read_stable(fpu_owner_task) &&
348 cpu == new->thread.fpu.last_cpu;
349}
350
351static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
352{
353 fpu_switch_t fpu;
354
355 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
356 if (__thread_has_fpu(old)) {
357 if (!__save_init_fpu(old))
358 cpu = ~0;
359 old->thread.fpu.last_cpu = cpu;
360 old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
361
362 /* Don't change CR0.TS if we just switch! */
363 if (fpu.preload) {
364 new->fpu_counter++;
365 __thread_set_has_fpu(new);
366 prefetch(new->thread.fpu.state);
367 } else
368 stts();
369 } else {
370 old->fpu_counter = 0;
371 old->thread.fpu.last_cpu = ~0;
372 if (fpu.preload) {
373 new->fpu_counter++;
374 if (fpu_lazy_restore(new, cpu))
375 fpu.preload = 0;
376 else
377 prefetch(new->thread.fpu.state);
378 __thread_fpu_begin(new);
379 }
380 }
381 return fpu;
382}
383
384/*
385 * By the time this gets called, we've already cleared CR0.TS and
386 * given the process the FPU if we are going to preload the FPU
387 * state - all we need to do is to conditionally restore the register
388 * state itself.
389 */
390static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
391{
392 if (fpu.preload) {
393 if (unlikely(restore_fpu_checking(new)))
394 __thread_fpu_end(new);
395 }
396}
397
398/*
399 * Signal frame handlers...
400 */
401extern int save_i387_xstate(void __user *buf);
402extern int restore_i387_xstate(void __user *buf);
403
404static inline void __clear_fpu(struct task_struct *tsk)
405{
406 if (__thread_has_fpu(tsk)) {
407 /* Ignore delayed exceptions from user space */
408 asm volatile("1: fwait\n"
409 "2:\n"
410 _ASM_EXTABLE(1b, 2b));
411 __thread_fpu_end(tsk);
412 }
413}
414
415/*
416 * The actual user_fpu_begin/end() functions
417 * need to be preemption-safe.
418 *
419 * NOTE! user_fpu_end() must be used only after you
420 * have saved the FP state, and user_fpu_begin() must
421 * be used only immediately before restoring it.
422 * These functions do not do any save/restore on
423 * their own.
424 */
425static inline void user_fpu_end(void)
426{
427 preempt_disable();
428 __thread_fpu_end(current);
429 preempt_enable();
430}
431
432static inline void user_fpu_begin(void)
433{
434 preempt_disable();
435 if (!user_has_fpu())
436 __thread_fpu_begin(current);
437 preempt_enable();
438}
439
440/*
441 * These disable preemption on their own and are safe
442 */
443static inline void save_init_fpu(struct task_struct *tsk)
444{
445 WARN_ON_ONCE(!__thread_has_fpu(tsk));
446 preempt_disable();
447 __save_init_fpu(tsk);
448 __thread_fpu_end(tsk);
449 preempt_enable();
450}
451
452static inline void clear_fpu(struct task_struct *tsk)
453{
454 preempt_disable();
455 __clear_fpu(tsk);
456 preempt_enable();
457}
458
459/*
460 * i387 state interaction
461 */
462static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
463{
464 if (cpu_has_fxsr) {
465 return tsk->thread.fpu.state->fxsave.cwd;
466 } else {
467 return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
468 }
469}
470
471static inline unsigned short get_fpu_swd(struct task_struct *tsk)
472{
473 if (cpu_has_fxsr) {
474 return tsk->thread.fpu.state->fxsave.swd;
475 } else {
476 return (unsigned short)tsk->thread.fpu.state->fsave.swd;
477 }
478}
479
480static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
481{
482 if (cpu_has_xmm) {
483 return tsk->thread.fpu.state->fxsave.mxcsr;
484 } else {
485 return MXCSR_DEFAULT;
486 }
487}
488
489static bool fpu_allocated(struct fpu *fpu)
490{
491 return fpu->state != NULL;
492}
493
494static inline int fpu_alloc(struct fpu *fpu)
495{
496 if (fpu_allocated(fpu))
497 return 0;
498 fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
499 if (!fpu->state)
500 return -ENOMEM;
501 WARN_ON((unsigned long)fpu->state & 15);
502 return 0;
503}
504
505static inline void fpu_free(struct fpu *fpu)
506{
507 if (fpu->state) {
508 kmem_cache_free(task_xstate_cachep, fpu->state);
509 fpu->state = NULL;
510 }
511}
512
513static inline void fpu_copy(struct fpu *dst, struct fpu *src)
514{
515 memcpy(dst->state, src->state, xstate_size);
516}
517
518extern void fpu_finit(struct fpu *fpu);
519
520#endif
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index d09bb03653f0..71ecbcba1a4e 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -9,7 +9,6 @@
9#include <asm/asm.h> 9#include <asm/asm.h>
10#include <asm/errno.h> 10#include <asm/errno.h>
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h>
13 12
14#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ 13#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
15 asm volatile("1:\t" insn "\n" \ 14 asm volatile("1:\t" insn "\n" \
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index da0b3ca815b7..382f75d735f3 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -7,7 +7,6 @@
7typedef struct { 7typedef struct {
8 unsigned int __softirq_pending; 8 unsigned int __softirq_pending;
9 unsigned int __nmi_count; /* arch dependent */ 9 unsigned int __nmi_count; /* arch dependent */
10 unsigned int irq0_irqs;
11#ifdef CONFIG_X86_LOCAL_APIC 10#ifdef CONFIG_X86_LOCAL_APIC
12 unsigned int apic_timer_irqs; /* arch dependent */ 11 unsigned int apic_timer_irqs; /* arch dependent */
13 unsigned int irq_spurious_count; 12 unsigned int irq_spurious_count;
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 3bd04022fd0c..302a323b3f67 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -61,7 +61,7 @@ void *kmap(struct page *page);
61void kunmap(struct page *page); 61void kunmap(struct page *page);
62 62
63void *kmap_atomic_prot(struct page *page, pgprot_t prot); 63void *kmap_atomic_prot(struct page *page, pgprot_t prot);
64void *__kmap_atomic(struct page *page); 64void *kmap_atomic(struct page *page);
65void __kunmap_atomic(void *kvaddr); 65void __kunmap_atomic(void *kvaddr);
66void *kmap_atomic_pfn(unsigned long pfn); 66void *kmap_atomic_pfn(unsigned long pfn);
67void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); 67void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 247904945d3f..257d9cca214f 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -13,476 +13,18 @@
13#ifndef __ASSEMBLY__ 13#ifndef __ASSEMBLY__
14 14
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/kernel_stat.h>
17#include <linux/regset.h>
18#include <linux/hardirq.h> 16#include <linux/hardirq.h>
19#include <linux/slab.h>
20#include <asm/asm.h>
21#include <asm/cpufeature.h>
22#include <asm/processor.h>
23#include <asm/sigcontext.h>
24#include <asm/user.h>
25#include <asm/uaccess.h>
26#include <asm/xsave.h>
27 17
28extern unsigned int sig_xstate_size; 18struct pt_regs;
29extern void fpu_init(void); 19struct user_i387_struct;
30extern void mxcsr_feature_mask_init(void); 20
31extern int init_fpu(struct task_struct *child); 21extern int init_fpu(struct task_struct *child);
32extern void math_state_restore(void);
33extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 22extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
23extern void math_state_restore(void);
34 24
35DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); 25extern bool irq_fpu_usable(void);
36 26extern void kernel_fpu_begin(void);
37extern user_regset_active_fn fpregs_active, xfpregs_active; 27extern void kernel_fpu_end(void);
38extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
39 xstateregs_get;
40extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
41 xstateregs_set;
42
43/*
44 * xstateregs_active == fpregs_active. Please refer to the comment
45 * at the definition of fpregs_active.
46 */
47#define xstateregs_active fpregs_active
48
49extern struct _fpx_sw_bytes fx_sw_reserved;
50#ifdef CONFIG_IA32_EMULATION
51extern unsigned int sig_xstate_ia32_size;
52extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
53struct _fpstate_ia32;
54struct _xstate_ia32;
55extern int save_i387_xstate_ia32(void __user *buf);
56extern int restore_i387_xstate_ia32(void __user *buf);
57#endif
58
59#ifdef CONFIG_MATH_EMULATION
60extern void finit_soft_fpu(struct i387_soft_struct *soft);
61#else
62static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
63#endif
64
65#define X87_FSW_ES (1 << 7) /* Exception Summary */
66
67static __always_inline __pure bool use_xsaveopt(void)
68{
69 return static_cpu_has(X86_FEATURE_XSAVEOPT);
70}
71
72static __always_inline __pure bool use_xsave(void)
73{
74 return static_cpu_has(X86_FEATURE_XSAVE);
75}
76
77static __always_inline __pure bool use_fxsr(void)
78{
79 return static_cpu_has(X86_FEATURE_FXSR);
80}
81
82extern void __sanitize_i387_state(struct task_struct *);
83
84static inline void sanitize_i387_state(struct task_struct *tsk)
85{
86 if (!use_xsaveopt())
87 return;
88 __sanitize_i387_state(tsk);
89}
90
91#ifdef CONFIG_X86_64
92static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
93{
94 int err;
95
96 /* See comment in fxsave() below. */
97#ifdef CONFIG_AS_FXSAVEQ
98 asm volatile("1: fxrstorq %[fx]\n\t"
99 "2:\n"
100 ".section .fixup,\"ax\"\n"
101 "3: movl $-1,%[err]\n"
102 " jmp 2b\n"
103 ".previous\n"
104 _ASM_EXTABLE(1b, 3b)
105 : [err] "=r" (err)
106 : [fx] "m" (*fx), "0" (0));
107#else
108 asm volatile("1: rex64/fxrstor (%[fx])\n\t"
109 "2:\n"
110 ".section .fixup,\"ax\"\n"
111 "3: movl $-1,%[err]\n"
112 " jmp 2b\n"
113 ".previous\n"
114 _ASM_EXTABLE(1b, 3b)
115 : [err] "=r" (err)
116 : [fx] "R" (fx), "m" (*fx), "0" (0));
117#endif
118 return err;
119}
120
121static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
122{
123 int err;
124
125 /*
126 * Clear the bytes not touched by the fxsave and reserved
127 * for the SW usage.
128 */
129 err = __clear_user(&fx->sw_reserved,
130 sizeof(struct _fpx_sw_bytes));
131 if (unlikely(err))
132 return -EFAULT;
133
134 /* See comment in fxsave() below. */
135#ifdef CONFIG_AS_FXSAVEQ
136 asm volatile("1: fxsaveq %[fx]\n\t"
137 "2:\n"
138 ".section .fixup,\"ax\"\n"
139 "3: movl $-1,%[err]\n"
140 " jmp 2b\n"
141 ".previous\n"
142 _ASM_EXTABLE(1b, 3b)
143 : [err] "=r" (err), [fx] "=m" (*fx)
144 : "0" (0));
145#else
146 asm volatile("1: rex64/fxsave (%[fx])\n\t"
147 "2:\n"
148 ".section .fixup,\"ax\"\n"
149 "3: movl $-1,%[err]\n"
150 " jmp 2b\n"
151 ".previous\n"
152 _ASM_EXTABLE(1b, 3b)
153 : [err] "=r" (err), "=m" (*fx)
154 : [fx] "R" (fx), "0" (0));
155#endif
156 if (unlikely(err) &&
157 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
158 err = -EFAULT;
159 /* No need to clear here because the caller clears USED_MATH */
160 return err;
161}
162
163static inline void fpu_fxsave(struct fpu *fpu)
164{
165 /* Using "rex64; fxsave %0" is broken because, if the memory operand
166 uses any extended registers for addressing, a second REX prefix
167 will be generated (to the assembler, rex64 followed by semicolon
168 is a separate instruction), and hence the 64-bitness is lost. */
169
170#ifdef CONFIG_AS_FXSAVEQ
171 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
172 starting with gas 2.16. */
173 __asm__ __volatile__("fxsaveq %0"
174 : "=m" (fpu->state->fxsave));
175#else
176 /* Using, as a workaround, the properly prefixed form below isn't
177 accepted by any binutils version so far released, complaining that
178 the same type of prefix is used twice if an extended register is
179 needed for addressing (fix submitted to mainline 2005-11-21).
180 asm volatile("rex64/fxsave %0"
181 : "=m" (fpu->state->fxsave));
182 This, however, we can work around by forcing the compiler to select
183 an addressing mode that doesn't require extended registers. */
184 asm volatile("rex64/fxsave (%[fx])"
185 : "=m" (fpu->state->fxsave)
186 : [fx] "R" (&fpu->state->fxsave));
187#endif
188}
189
190#else /* CONFIG_X86_32 */
191
192/* perform fxrstor iff the processor has extended states, otherwise frstor */
193static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
194{
195 /*
196 * The "nop" is needed to make the instructions the same
197 * length.
198 */
199 alternative_input(
200 "nop ; frstor %1",
201 "fxrstor %1",
202 X86_FEATURE_FXSR,
203 "m" (*fx));
204
205 return 0;
206}
207
208static inline void fpu_fxsave(struct fpu *fpu)
209{
210 asm volatile("fxsave %[fx]"
211 : [fx] "=m" (fpu->state->fxsave));
212}
213
214#endif /* CONFIG_X86_64 */
215
216/*
217 * These must be called with preempt disabled. Returns
218 * 'true' if the FPU state is still intact.
219 */
220static inline int fpu_save_init(struct fpu *fpu)
221{
222 if (use_xsave()) {
223 fpu_xsave(fpu);
224
225 /*
226 * xsave header may indicate the init state of the FP.
227 */
228 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
229 return 1;
230 } else if (use_fxsr()) {
231 fpu_fxsave(fpu);
232 } else {
233 asm volatile("fnsave %[fx]; fwait"
234 : [fx] "=m" (fpu->state->fsave));
235 return 0;
236 }
237
238 /*
239 * If exceptions are pending, we need to clear them so
240 * that we don't randomly get exceptions later.
241 *
242 * FIXME! Is this perhaps only true for the old-style
243 * irq13 case? Maybe we could leave the x87 state
244 * intact otherwise?
245 */
246 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
247 asm volatile("fnclex");
248 return 0;
249 }
250 return 1;
251}
252
253static inline int __save_init_fpu(struct task_struct *tsk)
254{
255 return fpu_save_init(&tsk->thread.fpu);
256}
257
258static inline int fpu_fxrstor_checking(struct fpu *fpu)
259{
260 return fxrstor_checking(&fpu->state->fxsave);
261}
262
263static inline int fpu_restore_checking(struct fpu *fpu)
264{
265 if (use_xsave())
266 return fpu_xrstor_checking(fpu);
267 else
268 return fpu_fxrstor_checking(fpu);
269}
270
271static inline int restore_fpu_checking(struct task_struct *tsk)
272{
273 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
274 is pending. Clear the x87 state here by setting it to fixed
275 values. "m" is a random variable that should be in L1 */
276 alternative_input(
277 ASM_NOP8 ASM_NOP2,
278 "emms\n\t" /* clear stack tags */
279 "fildl %P[addr]", /* set F?P to defined value */
280 X86_FEATURE_FXSAVE_LEAK,
281 [addr] "m" (tsk->thread.fpu.has_fpu));
282
283 return fpu_restore_checking(&tsk->thread.fpu);
284}
285
286/*
287 * Software FPU state helpers. Careful: these need to
288 * be preemption protection *and* they need to be
289 * properly paired with the CR0.TS changes!
290 */
291static inline int __thread_has_fpu(struct task_struct *tsk)
292{
293 return tsk->thread.fpu.has_fpu;
294}
295
296/* Must be paired with an 'stts' after! */
297static inline void __thread_clear_has_fpu(struct task_struct *tsk)
298{
299 tsk->thread.fpu.has_fpu = 0;
300 percpu_write(fpu_owner_task, NULL);
301}
302
303/* Must be paired with a 'clts' before! */
304static inline void __thread_set_has_fpu(struct task_struct *tsk)
305{
306 tsk->thread.fpu.has_fpu = 1;
307 percpu_write(fpu_owner_task, tsk);
308}
309
310/*
311 * Encapsulate the CR0.TS handling together with the
312 * software flag.
313 *
314 * These generally need preemption protection to work,
315 * do try to avoid using these on their own.
316 */
317static inline void __thread_fpu_end(struct task_struct *tsk)
318{
319 __thread_clear_has_fpu(tsk);
320 stts();
321}
322
323static inline void __thread_fpu_begin(struct task_struct *tsk)
324{
325 clts();
326 __thread_set_has_fpu(tsk);
327}
328
329/*
330 * FPU state switching for scheduling.
331 *
332 * This is a two-stage process:
333 *
334 * - switch_fpu_prepare() saves the old state and
335 * sets the new state of the CR0.TS bit. This is
336 * done within the context of the old process.
337 *
338 * - switch_fpu_finish() restores the new state as
339 * necessary.
340 */
341typedef struct { int preload; } fpu_switch_t;
342
343/*
344 * FIXME! We could do a totally lazy restore, but we need to
345 * add a per-cpu "this was the task that last touched the FPU
346 * on this CPU" variable, and the task needs to have a "I last
347 * touched the FPU on this CPU" and check them.
348 *
349 * We don't do that yet, so "fpu_lazy_restore()" always returns
350 * false, but some day..
351 */
352static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
353{
354 return new == percpu_read_stable(fpu_owner_task) &&
355 cpu == new->thread.fpu.last_cpu;
356}
357
358static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
359{
360 fpu_switch_t fpu;
361
362 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
363 if (__thread_has_fpu(old)) {
364 if (!__save_init_fpu(old))
365 cpu = ~0;
366 old->thread.fpu.last_cpu = cpu;
367 old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
368
369 /* Don't change CR0.TS if we just switch! */
370 if (fpu.preload) {
371 new->fpu_counter++;
372 __thread_set_has_fpu(new);
373 prefetch(new->thread.fpu.state);
374 } else
375 stts();
376 } else {
377 old->fpu_counter = 0;
378 old->thread.fpu.last_cpu = ~0;
379 if (fpu.preload) {
380 new->fpu_counter++;
381 if (fpu_lazy_restore(new, cpu))
382 fpu.preload = 0;
383 else
384 prefetch(new->thread.fpu.state);
385 __thread_fpu_begin(new);
386 }
387 }
388 return fpu;
389}
390
391/*
392 * By the time this gets called, we've already cleared CR0.TS and
393 * given the process the FPU if we are going to preload the FPU
394 * state - all we need to do is to conditionally restore the register
395 * state itself.
396 */
397static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
398{
399 if (fpu.preload) {
400 if (unlikely(restore_fpu_checking(new)))
401 __thread_fpu_end(new);
402 }
403}
404
405/*
406 * Signal frame handlers...
407 */
408extern int save_i387_xstate(void __user *buf);
409extern int restore_i387_xstate(void __user *buf);
410
411static inline void __clear_fpu(struct task_struct *tsk)
412{
413 if (__thread_has_fpu(tsk)) {
414 /* Ignore delayed exceptions from user space */
415 asm volatile("1: fwait\n"
416 "2:\n"
417 _ASM_EXTABLE(1b, 2b));
418 __thread_fpu_end(tsk);
419 }
420}
421
422/*
423 * Were we in an interrupt that interrupted kernel mode?
424 *
425 * We can do a kernel_fpu_begin/end() pair *ONLY* if that
426 * pair does nothing at all: the thread must not have fpu (so
427 * that we don't try to save the FPU state), and TS must
428 * be set (so that the clts/stts pair does nothing that is
429 * visible in the interrupted kernel thread).
430 */
431static inline bool interrupted_kernel_fpu_idle(void)
432{
433 return !__thread_has_fpu(current) &&
434 (read_cr0() & X86_CR0_TS);
435}
436
437/*
438 * Were we in user mode (or vm86 mode) when we were
439 * interrupted?
440 *
441 * Doing kernel_fpu_begin/end() is ok if we are running
442 * in an interrupt context from user mode - we'll just
443 * save the FPU state as required.
444 */
445static inline bool interrupted_user_mode(void)
446{
447 struct pt_regs *regs = get_irq_regs();
448 return regs && user_mode_vm(regs);
449}
450
451/*
452 * Can we use the FPU in kernel mode with the
453 * whole "kernel_fpu_begin/end()" sequence?
454 *
455 * It's always ok in process context (ie "not interrupt")
456 * but it is sometimes ok even from an irq.
457 */
458static inline bool irq_fpu_usable(void)
459{
460 return !in_interrupt() ||
461 interrupted_user_mode() ||
462 interrupted_kernel_fpu_idle();
463}
464
465static inline void kernel_fpu_begin(void)
466{
467 struct task_struct *me = current;
468
469 WARN_ON_ONCE(!irq_fpu_usable());
470 preempt_disable();
471 if (__thread_has_fpu(me)) {
472 __save_init_fpu(me);
473 __thread_clear_has_fpu(me);
474 /* We do 'stts()' in kernel_fpu_end() */
475 } else {
476 percpu_write(fpu_owner_task, NULL);
477 clts();
478 }
479}
480
481static inline void kernel_fpu_end(void)
482{
483 stts();
484 preempt_enable();
485}
486 28
487/* 29/*
488 * Some instructions like VIA's padlock instructions generate a spurious 30 * Some instructions like VIA's padlock instructions generate a spurious
@@ -524,126 +66,13 @@ static inline void irq_ts_restore(int TS_state)
524 * we can just assume we have FPU access - typically 66 * we can just assume we have FPU access - typically
525 * to save the FP state - we'll just take a #NM 67 * to save the FP state - we'll just take a #NM
526 * fault and get the FPU access back. 68 * fault and get the FPU access back.
527 *
528 * The actual user_fpu_begin/end() functions
529 * need to be preemption-safe, though.
530 *
531 * NOTE! user_fpu_end() must be used only after you
532 * have saved the FP state, and user_fpu_begin() must
533 * be used only immediately before restoring it.
534 * These functions do not do any save/restore on
535 * their own.
536 */ 69 */
537static inline int user_has_fpu(void) 70static inline int user_has_fpu(void)
538{ 71{
539 return __thread_has_fpu(current); 72 return current->thread.fpu.has_fpu;
540}
541
542static inline void user_fpu_end(void)
543{
544 preempt_disable();
545 __thread_fpu_end(current);
546 preempt_enable();
547}
548
549static inline void user_fpu_begin(void)
550{
551 preempt_disable();
552 if (!user_has_fpu())
553 __thread_fpu_begin(current);
554 preempt_enable();
555}
556
557/*
558 * These disable preemption on their own and are safe
559 */
560static inline void save_init_fpu(struct task_struct *tsk)
561{
562 WARN_ON_ONCE(!__thread_has_fpu(tsk));
563 preempt_disable();
564 __save_init_fpu(tsk);
565 __thread_fpu_end(tsk);
566 preempt_enable();
567}
568
569static inline void unlazy_fpu(struct task_struct *tsk)
570{
571 preempt_disable();
572 if (__thread_has_fpu(tsk)) {
573 __save_init_fpu(tsk);
574 __thread_fpu_end(tsk);
575 } else
576 tsk->fpu_counter = 0;
577 preempt_enable();
578}
579
580static inline void clear_fpu(struct task_struct *tsk)
581{
582 preempt_disable();
583 __clear_fpu(tsk);
584 preempt_enable();
585}
586
587/*
588 * i387 state interaction
589 */
590static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
591{
592 if (cpu_has_fxsr) {
593 return tsk->thread.fpu.state->fxsave.cwd;
594 } else {
595 return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
596 }
597}
598
599static inline unsigned short get_fpu_swd(struct task_struct *tsk)
600{
601 if (cpu_has_fxsr) {
602 return tsk->thread.fpu.state->fxsave.swd;
603 } else {
604 return (unsigned short)tsk->thread.fpu.state->fsave.swd;
605 }
606}
607
608static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
609{
610 if (cpu_has_xmm) {
611 return tsk->thread.fpu.state->fxsave.mxcsr;
612 } else {
613 return MXCSR_DEFAULT;
614 }
615}
616
617static bool fpu_allocated(struct fpu *fpu)
618{
619 return fpu->state != NULL;
620}
621
622static inline int fpu_alloc(struct fpu *fpu)
623{
624 if (fpu_allocated(fpu))
625 return 0;
626 fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
627 if (!fpu->state)
628 return -ENOMEM;
629 WARN_ON((unsigned long)fpu->state & 15);
630 return 0;
631}
632
633static inline void fpu_free(struct fpu *fpu)
634{
635 if (fpu->state) {
636 kmem_cache_free(task_xstate_cachep, fpu->state);
637 fpu->state = NULL;
638 }
639}
640
641static inline void fpu_copy(struct fpu *dst, struct fpu *src)
642{
643 memcpy(dst->state, src->state, xstate_size);
644} 73}
645 74
646extern void fpu_finit(struct fpu *fpu); 75extern void unlazy_fpu(struct task_struct *tsk);
647 76
648#endif /* __ASSEMBLY__ */ 77#endif /* __ASSEMBLY__ */
649 78
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 7d0c18587709..ee52760549f0 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -130,8 +130,8 @@ typedef struct compat_siginfo {
130 unsigned int _pid; /* which child */ 130 unsigned int _pid; /* which child */
131 unsigned int _uid; /* sender's uid */ 131 unsigned int _uid; /* sender's uid */
132 int _status; /* exit code */ 132 int _status; /* exit code */
133 s64 _utime; 133 compat_s64 _utime;
134 s64 _stime; 134 compat_s64 _stime;
135 } _sigchld_x32; 135 } _sigchld_x32;
136 136
137 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ 137 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
index f49253d75710..c5d1785373ed 100644
--- a/arch/x86/include/asm/idle.h
+++ b/arch/x86/include/asm/idle.h
@@ -14,6 +14,7 @@ void exit_idle(void);
14#else /* !CONFIG_X86_64 */ 14#else /* !CONFIG_X86_64 */
15static inline void enter_idle(void) { } 15static inline void enter_idle(void) { }
16static inline void exit_idle(void) { } 16static inline void exit_idle(void) { }
17static inline void __exit_idle(void) { }
17#endif /* CONFIG_X86_64 */ 18#endif /* CONFIG_X86_64 */
18 19
19void amd_e400_remove_cpu(int cpu); 20void amd_e400_remove_cpu(int cpu);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 690d1cc9a877..2c4943de5150 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -21,6 +21,15 @@
21#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) 21#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
22#define IO_APIC_REDIR_MASKED (1 << 16) 22#define IO_APIC_REDIR_MASKED (1 << 16)
23 23
24struct io_apic_ops {
25 void (*init) (void);
26 unsigned int (*read) (unsigned int apic, unsigned int reg);
27 void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
28 void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
29};
30
31void __init set_io_apic_ops(const struct io_apic_ops *);
32
24/* 33/*
25 * The structure of the IO-APIC: 34 * The structure of the IO-APIC:
26 */ 35 */
diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h
deleted file mode 100644
index 423bbbddf36d..000000000000
--- a/arch/x86/include/asm/irq_controller.h
+++ /dev/null
@@ -1,12 +0,0 @@
1#ifndef __IRQ_CONTROLLER__
2#define __IRQ_CONTROLLER__
3
4struct irq_domain {
5 int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize,
6 u32 *out_hwirq, u32 *out_type);
7 void *priv;
8 struct device_node *controller;
9 struct list_head l;
10};
11
12#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a32b18ce6ead..3a16c1483b45 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -9,12 +9,12 @@
9 9
10#define JUMP_LABEL_NOP_SIZE 5 10#define JUMP_LABEL_NOP_SIZE 5
11 11
12#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" 12#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
13 13
14static __always_inline bool arch_static_branch(struct jump_label_key *key) 14static __always_inline bool arch_static_branch(struct static_key *key)
15{ 15{
16 asm goto("1:" 16 asm goto("1:"
17 JUMP_LABEL_INITIAL_NOP 17 STATIC_KEY_INITIAL_NOP
18 ".pushsection __jump_table, \"aw\" \n\t" 18 ".pushsection __jump_table, \"aw\" \n\t"
19 _ASM_ALIGN "\n\t" 19 _ASM_ALIGN "\n\t"
20 _ASM_PTR "1b, %l[l_yes], %c0 \n\t" 20 _ASM_PTR "1b, %l[l_yes], %c0 \n\t"
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h
index 77e95f54570a..332f98c9111f 100644
--- a/arch/x86/include/asm/kgdb.h
+++ b/arch/x86/include/asm/kgdb.h
@@ -64,11 +64,15 @@ enum regnames {
64 GDB_PS, /* 17 */ 64 GDB_PS, /* 17 */
65 GDB_CS, /* 18 */ 65 GDB_CS, /* 18 */
66 GDB_SS, /* 19 */ 66 GDB_SS, /* 19 */
67 GDB_DS, /* 20 */
68 GDB_ES, /* 21 */
69 GDB_FS, /* 22 */
70 GDB_GS, /* 23 */
67}; 71};
68#define GDB_ORIG_AX 57 72#define GDB_ORIG_AX 57
69#define DBG_MAX_REG_NUM 20 73#define DBG_MAX_REG_NUM 24
70/* 17 64 bit regs and 3 32 bit regs */ 74/* 17 64 bit regs and 5 32 bit regs */
71#define NUMREGBYTES ((17 * 8) + (3 * 4)) 75#define NUMREGBYTES ((17 * 8) + (5 * 4))
72#endif /* ! CONFIG_X86_32 */ 76#endif /* ! CONFIG_X86_32 */
73 77
74static inline void arch_kgdb_breakpoint(void) 78static inline void arch_kgdb_breakpoint(void)
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 4d8dcbdfc120..e7d1c194d272 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -321,4 +321,8 @@ struct kvm_xcrs {
321 __u64 padding[16]; 321 __u64 padding[16];
322}; 322};
323 323
324/* definition of registers in kvm_run */
325struct kvm_sync_regs {
326};
327
324#endif /* _ASM_X86_KVM_H */ 328#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 7b9cfc4878af..c222e1a1b12a 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -176,6 +176,7 @@ struct x86_emulate_ops {
176 void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); 176 void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
177 ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); 177 ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
178 int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); 178 int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
179 void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val);
179 int (*cpl)(struct x86_emulate_ctxt *ctxt); 180 int (*cpl)(struct x86_emulate_ctxt *ctxt);
180 int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); 181 int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
181 int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); 182 int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
@@ -388,7 +389,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
388#define EMULATION_INTERCEPTED 2 389#define EMULATION_INTERCEPTED 2
389int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); 390int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
390int emulator_task_switch(struct x86_emulate_ctxt *ctxt, 391int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
391 u16 tss_selector, int reason, 392 u16 tss_selector, int idt_index, int reason,
392 bool has_error_code, u32 error_code); 393 bool has_error_code, u32 error_code);
393int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); 394int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
394#endif /* _ASM_X86_KVM_X86_EMULATE_H */ 395#endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 52d6640a5ca1..e216ba066e79 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -29,7 +29,7 @@
29#include <asm/msr-index.h> 29#include <asm/msr-index.h>
30 30
31#define KVM_MAX_VCPUS 254 31#define KVM_MAX_VCPUS 254
32#define KVM_SOFT_MAX_VCPUS 64 32#define KVM_SOFT_MAX_VCPUS 160
33#define KVM_MEMORY_SLOTS 32 33#define KVM_MEMORY_SLOTS 32
34/* memory slots that does not exposed to userspace */ 34/* memory slots that does not exposed to userspace */
35#define KVM_PRIVATE_MEM_SLOTS 4 35#define KVM_PRIVATE_MEM_SLOTS 4
@@ -181,13 +181,6 @@ struct kvm_mmu_memory_cache {
181 void *objects[KVM_NR_MEM_OBJS]; 181 void *objects[KVM_NR_MEM_OBJS];
182}; 182};
183 183
184#define NR_PTE_CHAIN_ENTRIES 5
185
186struct kvm_pte_chain {
187 u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
188 struct hlist_node link;
189};
190
191/* 184/*
192 * kvm_mmu_page_role, below, is defined as: 185 * kvm_mmu_page_role, below, is defined as:
193 * 186 *
@@ -427,12 +420,16 @@ struct kvm_vcpu_arch {
427 420
428 u64 last_guest_tsc; 421 u64 last_guest_tsc;
429 u64 last_kernel_ns; 422 u64 last_kernel_ns;
430 u64 last_tsc_nsec; 423 u64 last_host_tsc;
431 u64 last_tsc_write; 424 u64 tsc_offset_adjustment;
432 u32 virtual_tsc_khz; 425 u64 this_tsc_nsec;
426 u64 this_tsc_write;
427 u8 this_tsc_generation;
433 bool tsc_catchup; 428 bool tsc_catchup;
434 u32 tsc_catchup_mult; 429 bool tsc_always_catchup;
435 s8 tsc_catchup_shift; 430 s8 virtual_tsc_shift;
431 u32 virtual_tsc_mult;
432 u32 virtual_tsc_khz;
436 433
437 atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ 434 atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
438 unsigned nmi_pending; /* NMI queued after currently running handler */ 435 unsigned nmi_pending; /* NMI queued after currently running handler */
@@ -478,6 +475,21 @@ struct kvm_vcpu_arch {
478 u32 id; 475 u32 id;
479 bool send_user_only; 476 bool send_user_only;
480 } apf; 477 } apf;
478
479 /* OSVW MSRs (AMD only) */
480 struct {
481 u64 length;
482 u64 status;
483 } osvw;
484};
485
486struct kvm_lpage_info {
487 unsigned long rmap_pde;
488 int write_count;
489};
490
491struct kvm_arch_memory_slot {
492 struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
481}; 493};
482 494
483struct kvm_arch { 495struct kvm_arch {
@@ -511,8 +523,12 @@ struct kvm_arch {
511 s64 kvmclock_offset; 523 s64 kvmclock_offset;
512 raw_spinlock_t tsc_write_lock; 524 raw_spinlock_t tsc_write_lock;
513 u64 last_tsc_nsec; 525 u64 last_tsc_nsec;
514 u64 last_tsc_offset;
515 u64 last_tsc_write; 526 u64 last_tsc_write;
527 u32 last_tsc_khz;
528 u64 cur_tsc_nsec;
529 u64 cur_tsc_write;
530 u64 cur_tsc_offset;
531 u8 cur_tsc_generation;
516 532
517 struct kvm_xen_hvm_config xen_hvm_config; 533 struct kvm_xen_hvm_config xen_hvm_config;
518 534
@@ -644,7 +660,7 @@ struct kvm_x86_ops {
644 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 660 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
645 int (*get_lpage_level)(void); 661 int (*get_lpage_level)(void);
646 bool (*rdtscp_supported)(void); 662 bool (*rdtscp_supported)(void);
647 void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment); 663 void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
648 664
649 void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); 665 void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
650 666
@@ -652,7 +668,7 @@ struct kvm_x86_ops {
652 668
653 bool (*has_wbinvd_exit)(void); 669 bool (*has_wbinvd_exit)(void);
654 670
655 void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); 671 void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
656 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); 672 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
657 673
658 u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); 674 u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
@@ -674,6 +690,17 @@ struct kvm_arch_async_pf {
674 690
675extern struct kvm_x86_ops *kvm_x86_ops; 691extern struct kvm_x86_ops *kvm_x86_ops;
676 692
693static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
694 s64 adjustment)
695{
696 kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false);
697}
698
699static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
700{
701 kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true);
702}
703
677int kvm_mmu_module_init(void); 704int kvm_mmu_module_init(void);
678void kvm_mmu_module_exit(void); 705void kvm_mmu_module_exit(void);
679 706
@@ -741,8 +768,8 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
741void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 768void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
742int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); 769int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
743 770
744int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 771int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
745 bool has_error_code, u32 error_code); 772 int reason, bool has_error_code, u32 error_code);
746 773
747int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 774int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
748int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); 775int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 9cdae5d47e8f..c8bed0da434a 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -3,7 +3,6 @@
3 3
4#include <linux/percpu.h> 4#include <linux/percpu.h>
5 5
6#include <asm/system.h>
7#include <linux/atomic.h> 6#include <linux/atomic.h>
8#include <asm/asm.h> 7#include <asm/asm.h>
9 8
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 0e8e85bb7c51..d354fb781c57 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -5,7 +5,6 @@
5#define _ASM_X86_MC146818RTC_H 5#define _ASM_X86_MC146818RTC_H
6 6
7#include <asm/io.h> 7#include <asm/io.h>
8#include <asm/system.h>
9#include <asm/processor.h> 8#include <asm/processor.h>
10#include <linux/mc146818rtc.h> 9#include <linux/mc146818rtc.h>
11 10
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 6aefb14cbbc5..441520e4174f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
151 151
152void mce_setup(struct mce *m); 152void mce_setup(struct mce *m);
153void mce_log(struct mce *m); 153void mce_log(struct mce *m);
154extern struct device *mce_device[CONFIG_NR_CPUS]; 154DECLARE_PER_CPU(struct device *, mce_device);
155 155
156/* 156/*
157 * Maximum banks number. 157 * Maximum banks number.
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 0a0a95460434..fc18bf3ce7c8 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -26,8 +26,8 @@ extern struct sfi_rtc_table_entry sfi_mrtc_array[];
26 * identified via MSRs. 26 * identified via MSRs.
27 */ 27 */
28enum mrst_cpu_type { 28enum mrst_cpu_type {
29 MRST_CPU_CHIP_LINCROFT = 1, 29 /* 1 was Moorestown */
30 MRST_CPU_CHIP_PENWELL, 30 MRST_CPU_CHIP_PENWELL = 2,
31}; 31};
32 32
33extern enum mrst_cpu_type __mrst_cpu_chip; 33extern enum mrst_cpu_type __mrst_cpu_chip;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a6962d9161a0..ccb805966f68 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -56,6 +56,13 @@
56#define MSR_OFFCORE_RSP_0 0x000001a6 56#define MSR_OFFCORE_RSP_0 0x000001a6
57#define MSR_OFFCORE_RSP_1 0x000001a7 57#define MSR_OFFCORE_RSP_1 0x000001a7
58 58
59#define MSR_LBR_SELECT 0x000001c8
60#define MSR_LBR_TOS 0x000001c9
61#define MSR_LBR_NHM_FROM 0x00000680
62#define MSR_LBR_NHM_TO 0x000006c0
63#define MSR_LBR_CORE_FROM 0x00000040
64#define MSR_LBR_CORE_TO 0x00000060
65
59#define MSR_IA32_PEBS_ENABLE 0x000003f1 66#define MSR_IA32_PEBS_ENABLE 0x000003f1
60#define MSR_IA32_DS_AREA 0x00000600 67#define MSR_IA32_DS_AREA 0x00000600
61#define MSR_IA32_PERF_CAPABILITIES 0x00000345 68#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index bce688d54c12..e21fdd10479f 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -55,7 +55,6 @@ extern unsigned long init_memory_mapping(unsigned long start,
55 unsigned long end); 55 unsigned long end);
56 56
57extern void initmem_init(void); 57extern void initmem_init(void);
58extern void free_initmem(void);
59 58
60#endif /* !__ASSEMBLY__ */ 59#endif /* !__ASSEMBLY__ */
61 60
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a7d2db9a74fb..aa0f91308367 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -10,6 +10,7 @@
10#include <asm/paravirt_types.h> 10#include <asm/paravirt_types.h>
11 11
12#ifndef __ASSEMBLY__ 12#ifndef __ASSEMBLY__
13#include <linux/bug.h>
13#include <linux/types.h> 14#include <linux/types.h>
14#include <linux/cpumask.h> 15#include <linux/cpumask.h>
15 16
@@ -230,9 +231,9 @@ static inline unsigned long long paravirt_sched_clock(void)
230 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); 231 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
231} 232}
232 233
233struct jump_label_key; 234struct static_key;
234extern struct jump_label_key paravirt_steal_enabled; 235extern struct static_key paravirt_steal_enabled;
235extern struct jump_label_key paravirt_steal_rq_enabled; 236extern struct static_key paravirt_steal_rq_enabled;
236 237
237static inline u64 paravirt_steal_clock(int cpu) 238static inline u64 paravirt_steal_clock(int cpu)
238{ 239{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index e8fb2c7a5f4f..2291895b1836 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -23,6 +23,7 @@
23#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) 23#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16)
24#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) 24#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
25#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) 25#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18)
26#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL (1ULL << 19)
26#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) 27#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20)
27#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) 28#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21)
28#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) 29#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f6d0d2eb0832..4fa7dcceb6c0 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -14,13 +14,13 @@ struct mm_struct;
14#include <asm/sigcontext.h> 14#include <asm/sigcontext.h>
15#include <asm/current.h> 15#include <asm/current.h>
16#include <asm/cpufeature.h> 16#include <asm/cpufeature.h>
17#include <asm/system.h>
18#include <asm/page.h> 17#include <asm/page.h>
19#include <asm/pgtable_types.h> 18#include <asm/pgtable_types.h>
20#include <asm/percpu.h> 19#include <asm/percpu.h>
21#include <asm/msr.h> 20#include <asm/msr.h>
22#include <asm/desc_defs.h> 21#include <asm/desc_defs.h>
23#include <asm/nops.h> 22#include <asm/nops.h>
23#include <asm/special_insns.h>
24 24
25#include <linux/personality.h> 25#include <linux/personality.h>
26#include <linux/cpumask.h> 26#include <linux/cpumask.h>
@@ -29,6 +29,15 @@ struct mm_struct;
29#include <linux/math64.h> 29#include <linux/math64.h>
30#include <linux/init.h> 30#include <linux/init.h>
31#include <linux/err.h> 31#include <linux/err.h>
32#include <linux/irqflags.h>
33
34/*
35 * We handle most unaligned accesses in hardware. On the other hand
36 * unaligned DMA can be quite expensive on some Nehalem processors.
37 *
38 * Based on this we disable the IP header alignment in network drivers.
39 */
40#define NET_IP_ALIGN 0
32 41
33#define HBP_NUM 4 42#define HBP_NUM 4
34/* 43/*
@@ -162,6 +171,7 @@ extern void early_cpu_init(void);
162extern void identify_boot_cpu(void); 171extern void identify_boot_cpu(void);
163extern void identify_secondary_cpu(struct cpuinfo_x86 *); 172extern void identify_secondary_cpu(struct cpuinfo_x86 *);
164extern void print_cpu_info(struct cpuinfo_x86 *); 173extern void print_cpu_info(struct cpuinfo_x86 *);
174void print_cpu_msr(struct cpuinfo_x86 *);
165extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); 175extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
166extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 176extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
167extern unsigned short num_cache_leaves; 177extern unsigned short num_cache_leaves;
@@ -474,61 +484,6 @@ struct thread_struct {
474 unsigned io_bitmap_max; 484 unsigned io_bitmap_max;
475}; 485};
476 486
477static inline unsigned long native_get_debugreg(int regno)
478{
479 unsigned long val = 0; /* Damn you, gcc! */
480
481 switch (regno) {
482 case 0:
483 asm("mov %%db0, %0" :"=r" (val));
484 break;
485 case 1:
486 asm("mov %%db1, %0" :"=r" (val));
487 break;
488 case 2:
489 asm("mov %%db2, %0" :"=r" (val));
490 break;
491 case 3:
492 asm("mov %%db3, %0" :"=r" (val));
493 break;
494 case 6:
495 asm("mov %%db6, %0" :"=r" (val));
496 break;
497 case 7:
498 asm("mov %%db7, %0" :"=r" (val));
499 break;
500 default:
501 BUG();
502 }
503 return val;
504}
505
506static inline void native_set_debugreg(int regno, unsigned long value)
507{
508 switch (regno) {
509 case 0:
510 asm("mov %0, %%db0" ::"r" (value));
511 break;
512 case 1:
513 asm("mov %0, %%db1" ::"r" (value));
514 break;
515 case 2:
516 asm("mov %0, %%db2" ::"r" (value));
517 break;
518 case 3:
519 asm("mov %0, %%db3" ::"r" (value));
520 break;
521 case 6:
522 asm("mov %0, %%db6" ::"r" (value));
523 break;
524 case 7:
525 asm("mov %0, %%db7" ::"r" (value));
526 break;
527 default:
528 BUG();
529 }
530}
531
532/* 487/*
533 * Set IOPL bits in EFLAGS from given mask 488 * Set IOPL bits in EFLAGS from given mask
534 */ 489 */
@@ -574,14 +529,6 @@ static inline void native_swapgs(void)
574#define __cpuid native_cpuid 529#define __cpuid native_cpuid
575#define paravirt_enabled() 0 530#define paravirt_enabled() 0
576 531
577/*
578 * These special macros can be used to get or set a debugging register
579 */
580#define get_debugreg(var, register) \
581 (var) = native_get_debugreg(register)
582#define set_debugreg(value, register) \
583 native_set_debugreg(register, value)
584
585static inline void load_sp0(struct tss_struct *tss, 532static inline void load_sp0(struct tss_struct *tss,
586 struct thread_struct *thread) 533 struct thread_struct *thread)
587{ 534{
@@ -1027,4 +974,14 @@ extern bool cpu_has_amd_erratum(const int *);
1027#define cpu_has_amd_erratum(x) (false) 974#define cpu_has_amd_erratum(x) (false)
1028#endif /* CONFIG_CPU_SUP_AMD */ 975#endif /* CONFIG_CPU_SUP_AMD */
1029 976
977void cpu_idle_wait(void);
978
979extern unsigned long arch_align_stack(unsigned long sp);
980extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
981
982void default_idle(void);
983bool set_pm_idle_to_default(void);
984
985void stop_this_cpu(void *dummy);
986
1030#endif /* _ASM_X86_PROCESSOR_H */ 987#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 644dd885f05a..60bef663609a 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -21,7 +21,6 @@
21#include <asm/irq.h> 21#include <asm/irq.h>
22#include <linux/atomic.h> 22#include <linux/atomic.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24#include <asm/irq_controller.h>
25 24
26#ifdef CONFIG_OF 25#ifdef CONFIG_OF
27extern int of_ioapic; 26extern int of_ioapic;
@@ -43,15 +42,6 @@ extern char cmd_line[COMMAND_LINE_SIZE];
43#define pci_address_to_pio pci_address_to_pio 42#define pci_address_to_pio pci_address_to_pio
44unsigned long pci_address_to_pio(phys_addr_t addr); 43unsigned long pci_address_to_pio(phys_addr_t addr);
45 44
46/**
47 * irq_dispose_mapping - Unmap an interrupt
48 * @virq: linux virq number of the interrupt to unmap
49 *
50 * FIXME: We really should implement proper virq handling like power,
51 * but that's going to be major surgery.
52 */
53static inline void irq_dispose_mapping(unsigned int virq) { }
54
55#define HAVE_ARCH_DEVTREE_FIXUPS 45#define HAVE_ARCH_DEVTREE_FIXUPS
56 46
57#endif /* __ASSEMBLY__ */ 47#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 5e641715c3fe..165466233ab0 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -212,7 +212,61 @@
212#ifdef __KERNEL__ 212#ifdef __KERNEL__
213#ifndef __ASSEMBLY__ 213#ifndef __ASSEMBLY__
214extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10]; 214extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10];
215#endif 215
216#endif 216/*
217 * Load a segment. Fall back on loading the zero
218 * segment if something goes wrong..
219 */
220#define loadsegment(seg, value) \
221do { \
222 unsigned short __val = (value); \
223 \
224 asm volatile(" \n" \
225 "1: movl %k0,%%" #seg " \n" \
226 \
227 ".section .fixup,\"ax\" \n" \
228 "2: xorl %k0,%k0 \n" \
229 " jmp 1b \n" \
230 ".previous \n" \
231 \
232 _ASM_EXTABLE(1b, 2b) \
233 \
234 : "+r" (__val) : : "memory"); \
235} while (0)
236
237/*
238 * Save a segment register away
239 */
240#define savesegment(seg, value) \
241 asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
242
243/*
244 * x86_32 user gs accessors.
245 */
246#ifdef CONFIG_X86_32
247#ifdef CONFIG_X86_32_LAZY_GS
248#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
249#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
250#define task_user_gs(tsk) ((tsk)->thread.gs)
251#define lazy_save_gs(v) savesegment(gs, (v))
252#define lazy_load_gs(v) loadsegment(gs, (v))
253#else /* X86_32_LAZY_GS */
254#define get_user_gs(regs) (u16)((regs)->gs)
255#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
256#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
257#define lazy_save_gs(v) do { } while (0)
258#define lazy_load_gs(v) do { } while (0)
259#endif /* X86_32_LAZY_GS */
260#endif /* X86_32 */
261
262static inline unsigned long get_limit(unsigned long segment)
263{
264 unsigned long __limit;
265 asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
266 return __limit + 1;
267}
268
269#endif /* !__ASSEMBLY__ */
270#endif /* __KERNEL__ */
217 271
218#endif /* _ASM_X86_SEGMENT_H */ 272#endif /* _ASM_X86_SEGMENT_H */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
new file mode 100644
index 000000000000..41fc93a2e225
--- /dev/null
+++ b/arch/x86/include/asm/special_insns.h
@@ -0,0 +1,199 @@
1#ifndef _ASM_X86_SPECIAL_INSNS_H
2#define _ASM_X86_SPECIAL_INSNS_H
3
4
5#ifdef __KERNEL__
6
7static inline void native_clts(void)
8{
9 asm volatile("clts");
10}
11
12/*
13 * Volatile isn't enough to prevent the compiler from reordering the
14 * read/write functions for the control registers and messing everything up.
15 * A memory clobber would solve the problem, but would prevent reordering of
16 * all loads stores around it, which can hurt performance. Solution is to
17 * use a variable and mimic reads and writes to it to enforce serialization
18 */
19static unsigned long __force_order;
20
21static inline unsigned long native_read_cr0(void)
22{
23 unsigned long val;
24 asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
25 return val;
26}
27
28static inline void native_write_cr0(unsigned long val)
29{
30 asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
31}
32
33static inline unsigned long native_read_cr2(void)
34{
35 unsigned long val;
36 asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
37 return val;
38}
39
40static inline void native_write_cr2(unsigned long val)
41{
42 asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
43}
44
45static inline unsigned long native_read_cr3(void)
46{
47 unsigned long val;
48 asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
49 return val;
50}
51
52static inline void native_write_cr3(unsigned long val)
53{
54 asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
55}
56
57static inline unsigned long native_read_cr4(void)
58{
59 unsigned long val;
60 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
61 return val;
62}
63
64static inline unsigned long native_read_cr4_safe(void)
65{
66 unsigned long val;
67 /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
68 * exists, so it will never fail. */
69#ifdef CONFIG_X86_32
70 asm volatile("1: mov %%cr4, %0\n"
71 "2:\n"
72 _ASM_EXTABLE(1b, 2b)
73 : "=r" (val), "=m" (__force_order) : "0" (0));
74#else
75 val = native_read_cr4();
76#endif
77 return val;
78}
79
80static inline void native_write_cr4(unsigned long val)
81{
82 asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
83}
84
85#ifdef CONFIG_X86_64
86static inline unsigned long native_read_cr8(void)
87{
88 unsigned long cr8;
89 asm volatile("movq %%cr8,%0" : "=r" (cr8));
90 return cr8;
91}
92
93static inline void native_write_cr8(unsigned long val)
94{
95 asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
96}
97#endif
98
99static inline void native_wbinvd(void)
100{
101 asm volatile("wbinvd": : :"memory");
102}
103
104extern void native_load_gs_index(unsigned);
105
106#ifdef CONFIG_PARAVIRT
107#include <asm/paravirt.h>
108#else
109
110static inline unsigned long read_cr0(void)
111{
112 return native_read_cr0();
113}
114
115static inline void write_cr0(unsigned long x)
116{
117 native_write_cr0(x);
118}
119
120static inline unsigned long read_cr2(void)
121{
122 return native_read_cr2();
123}
124
125static inline void write_cr2(unsigned long x)
126{
127 native_write_cr2(x);
128}
129
130static inline unsigned long read_cr3(void)
131{
132 return native_read_cr3();
133}
134
135static inline void write_cr3(unsigned long x)
136{
137 native_write_cr3(x);
138}
139
140static inline unsigned long read_cr4(void)
141{
142 return native_read_cr4();
143}
144
145static inline unsigned long read_cr4_safe(void)
146{
147 return native_read_cr4_safe();
148}
149
150static inline void write_cr4(unsigned long x)
151{
152 native_write_cr4(x);
153}
154
155static inline void wbinvd(void)
156{
157 native_wbinvd();
158}
159
160#ifdef CONFIG_X86_64
161
162static inline unsigned long read_cr8(void)
163{
164 return native_read_cr8();
165}
166
167static inline void write_cr8(unsigned long x)
168{
169 native_write_cr8(x);
170}
171
172static inline void load_gs_index(unsigned selector)
173{
174 native_load_gs_index(selector);
175}
176
177#endif
178
179/* Clear the 'TS' bit */
180static inline void clts(void)
181{
182 native_clts();
183}
184
185#endif/* CONFIG_PARAVIRT */
186
187#define stts() write_cr0(read_cr0() | X86_CR0_TS)
188
189static inline void clflush(volatile void *__p)
190{
191 asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
192}
193
194#define nop() asm volatile ("nop")
195
196
197#endif /* __KERNEL__ */
198
199#endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index a82c2bf504b6..76bfa2cf301d 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
88{ 88{
89 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 89 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
90 90
91 return !!(tmp.tail ^ tmp.head); 91 return tmp.tail != tmp.head;
92} 92}
93 93
94static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) 94static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
95{ 95{
96 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 96 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
97 97
98 return ((tmp.tail - tmp.head) & TICKET_MASK) > 1; 98 return (__ticket_t)(tmp.tail - tmp.head) > 1;
99} 99}
100 100
101#ifndef CONFIG_PARAVIRT_SPINLOCKS 101#ifndef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 8ebd5df7451e..ad0ad07fc006 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -16,7 +16,6 @@ typedef u32 __ticketpair_t;
16#endif 16#endif
17 17
18#define TICKET_SHIFT (sizeof(__ticket_t) * 8) 18#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
19#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))
20 19
21typedef struct arch_spinlock { 20typedef struct arch_spinlock {
22 union { 21 union {
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 157517763565..b5d9533d2c38 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -38,7 +38,6 @@
38#include <asm/tsc.h> 38#include <asm/tsc.h>
39#include <asm/processor.h> 39#include <asm/processor.h>
40#include <asm/percpu.h> 40#include <asm/percpu.h>
41#include <asm/system.h>
42#include <asm/desc.h> 41#include <asm/desc.h>
43#include <linux/random.h> 42#include <linux/random.h>
44 43
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
new file mode 100644
index 000000000000..4ec45b3abba1
--- /dev/null
+++ b/arch/x86/include/asm/switch_to.h
@@ -0,0 +1,129 @@
1#ifndef _ASM_X86_SWITCH_TO_H
2#define _ASM_X86_SWITCH_TO_H
3
4struct task_struct; /* one of the stranger aspects of C forward declarations */
5struct task_struct *__switch_to(struct task_struct *prev,
6 struct task_struct *next);
7struct tss_struct;
8void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
9 struct tss_struct *tss);
10
11#ifdef CONFIG_X86_32
12
13#ifdef CONFIG_CC_STACKPROTECTOR
14#define __switch_canary \
15 "movl %P[task_canary](%[next]), %%ebx\n\t" \
16 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
17#define __switch_canary_oparam \
18 , [stack_canary] "=m" (stack_canary.canary)
19#define __switch_canary_iparam \
20 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
21#else /* CC_STACKPROTECTOR */
22#define __switch_canary
23#define __switch_canary_oparam
24#define __switch_canary_iparam
25#endif /* CC_STACKPROTECTOR */
26
27/*
28 * Saving eflags is important. It switches not only IOPL between tasks,
29 * it also protects other tasks from NT leaking through sysenter etc.
30 */
31#define switch_to(prev, next, last) \
32do { \
33 /* \
34 * Context-switching clobbers all registers, so we clobber \
35 * them explicitly, via unused output variables. \
36 * (EAX and EBP is not listed because EBP is saved/restored \
37 * explicitly for wchan access and EAX is the return value of \
38 * __switch_to()) \
39 */ \
40 unsigned long ebx, ecx, edx, esi, edi; \
41 \
42 asm volatile("pushfl\n\t" /* save flags */ \
43 "pushl %%ebp\n\t" /* save EBP */ \
44 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
45 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
46 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
47 "pushl %[next_ip]\n\t" /* restore EIP */ \
48 __switch_canary \
49 "jmp __switch_to\n" /* regparm call */ \
50 "1:\t" \
51 "popl %%ebp\n\t" /* restore EBP */ \
52 "popfl\n" /* restore flags */ \
53 \
54 /* output parameters */ \
55 : [prev_sp] "=m" (prev->thread.sp), \
56 [prev_ip] "=m" (prev->thread.ip), \
57 "=a" (last), \
58 \
59 /* clobbered output registers: */ \
60 "=b" (ebx), "=c" (ecx), "=d" (edx), \
61 "=S" (esi), "=D" (edi) \
62 \
63 __switch_canary_oparam \
64 \
65 /* input parameters: */ \
66 : [next_sp] "m" (next->thread.sp), \
67 [next_ip] "m" (next->thread.ip), \
68 \
69 /* regparm parameters for __switch_to(): */ \
70 [prev] "a" (prev), \
71 [next] "d" (next) \
72 \
73 __switch_canary_iparam \
74 \
75 : /* reloaded segment registers */ \
76 "memory"); \
77} while (0)
78
79#else /* CONFIG_X86_32 */
80
81/* frame pointer must be last for get_wchan */
82#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
83#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
84
85#define __EXTRA_CLOBBER \
86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
87 "r12", "r13", "r14", "r15"
88
89#ifdef CONFIG_CC_STACKPROTECTOR
90#define __switch_canary \
91 "movq %P[task_canary](%%rsi),%%r8\n\t" \
92 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
93#define __switch_canary_oparam \
94 , [gs_canary] "=m" (irq_stack_union.stack_canary)
95#define __switch_canary_iparam \
96 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
97#else /* CC_STACKPROTECTOR */
98#define __switch_canary
99#define __switch_canary_oparam
100#define __switch_canary_iparam
101#endif /* CC_STACKPROTECTOR */
102
103/* Save restore flags to clear handle leaking NT */
104#define switch_to(prev, next, last) \
105 asm volatile(SAVE_CONTEXT \
106 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
107 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
108 "call __switch_to\n\t" \
109 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
110 __switch_canary \
111 "movq %P[thread_info](%%rsi),%%r8\n\t" \
112 "movq %%rax,%%rdi\n\t" \
113 "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
114 "jnz ret_from_fork\n\t" \
115 RESTORE_CONTEXT \
116 : "=a" (last) \
117 __switch_canary_oparam \
118 : [next] "S" (next), [prev] "D" (prev), \
119 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
120 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
121 [_tif_fork] "i" (_TIF_FORK), \
122 [thread_info] "i" (offsetof(struct task_struct, stack)), \
123 [current_task] "m" (current_task) \
124 __switch_canary_iparam \
125 : "memory", "cc" __EXTRA_CLOBBER)
126
127#endif /* CONFIG_X86_32 */
128
129#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
deleted file mode 100644
index 2d2f01ce6dcb..000000000000
--- a/arch/x86/include/asm/system.h
+++ /dev/null
@@ -1,523 +0,0 @@
1#ifndef _ASM_X86_SYSTEM_H
2#define _ASM_X86_SYSTEM_H
3
4#include <asm/asm.h>
5#include <asm/segment.h>
6#include <asm/cpufeature.h>
7#include <asm/cmpxchg.h>
8#include <asm/nops.h>
9
10#include <linux/kernel.h>
11#include <linux/irqflags.h>
12
13/* entries in ARCH_DLINFO: */
14#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
15# define AT_VECTOR_SIZE_ARCH 2
16#else /* else it's non-compat x86-64 */
17# define AT_VECTOR_SIZE_ARCH 1
18#endif
19
20struct task_struct; /* one of the stranger aspects of C forward declarations */
21struct task_struct *__switch_to(struct task_struct *prev,
22 struct task_struct *next);
23struct tss_struct;
24void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
25 struct tss_struct *tss);
26extern void show_regs_common(void);
27
28#ifdef CONFIG_X86_32
29
30#ifdef CONFIG_CC_STACKPROTECTOR
31#define __switch_canary \
32 "movl %P[task_canary](%[next]), %%ebx\n\t" \
33 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
34#define __switch_canary_oparam \
35 , [stack_canary] "=m" (stack_canary.canary)
36#define __switch_canary_iparam \
37 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
38#else /* CC_STACKPROTECTOR */
39#define __switch_canary
40#define __switch_canary_oparam
41#define __switch_canary_iparam
42#endif /* CC_STACKPROTECTOR */
43
44/*
45 * Saving eflags is important. It switches not only IOPL between tasks,
46 * it also protects other tasks from NT leaking through sysenter etc.
47 */
48#define switch_to(prev, next, last) \
49do { \
50 /* \
51 * Context-switching clobbers all registers, so we clobber \
52 * them explicitly, via unused output variables. \
53 * (EAX and EBP is not listed because EBP is saved/restored \
54 * explicitly for wchan access and EAX is the return value of \
55 * __switch_to()) \
56 */ \
57 unsigned long ebx, ecx, edx, esi, edi; \
58 \
59 asm volatile("pushfl\n\t" /* save flags */ \
60 "pushl %%ebp\n\t" /* save EBP */ \
61 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
62 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
63 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
64 "pushl %[next_ip]\n\t" /* restore EIP */ \
65 __switch_canary \
66 "jmp __switch_to\n" /* regparm call */ \
67 "1:\t" \
68 "popl %%ebp\n\t" /* restore EBP */ \
69 "popfl\n" /* restore flags */ \
70 \
71 /* output parameters */ \
72 : [prev_sp] "=m" (prev->thread.sp), \
73 [prev_ip] "=m" (prev->thread.ip), \
74 "=a" (last), \
75 \
76 /* clobbered output registers: */ \
77 "=b" (ebx), "=c" (ecx), "=d" (edx), \
78 "=S" (esi), "=D" (edi) \
79 \
80 __switch_canary_oparam \
81 \
82 /* input parameters: */ \
83 : [next_sp] "m" (next->thread.sp), \
84 [next_ip] "m" (next->thread.ip), \
85 \
86 /* regparm parameters for __switch_to(): */ \
87 [prev] "a" (prev), \
88 [next] "d" (next) \
89 \
90 __switch_canary_iparam \
91 \
92 : /* reloaded segment registers */ \
93 "memory"); \
94} while (0)
95
96/*
97 * disable hlt during certain critical i/o operations
98 */
99#define HAVE_DISABLE_HLT
100#else
101
102/* frame pointer must be last for get_wchan */
103#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
104#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
105
106#define __EXTRA_CLOBBER \
107 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
108 "r12", "r13", "r14", "r15"
109
110#ifdef CONFIG_CC_STACKPROTECTOR
111#define __switch_canary \
112 "movq %P[task_canary](%%rsi),%%r8\n\t" \
113 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
114#define __switch_canary_oparam \
115 , [gs_canary] "=m" (irq_stack_union.stack_canary)
116#define __switch_canary_iparam \
117 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
118#else /* CC_STACKPROTECTOR */
119#define __switch_canary
120#define __switch_canary_oparam
121#define __switch_canary_iparam
122#endif /* CC_STACKPROTECTOR */
123
124/* Save restore flags to clear handle leaking NT */
125#define switch_to(prev, next, last) \
126 asm volatile(SAVE_CONTEXT \
127 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
128 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
129 "call __switch_to\n\t" \
130 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
131 __switch_canary \
132 "movq %P[thread_info](%%rsi),%%r8\n\t" \
133 "movq %%rax,%%rdi\n\t" \
134 "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
135 "jnz ret_from_fork\n\t" \
136 RESTORE_CONTEXT \
137 : "=a" (last) \
138 __switch_canary_oparam \
139 : [next] "S" (next), [prev] "D" (prev), \
140 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
141 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
142 [_tif_fork] "i" (_TIF_FORK), \
143 [thread_info] "i" (offsetof(struct task_struct, stack)), \
144 [current_task] "m" (current_task) \
145 __switch_canary_iparam \
146 : "memory", "cc" __EXTRA_CLOBBER)
147#endif
148
149#ifdef __KERNEL__
150
151extern void native_load_gs_index(unsigned);
152
153/*
154 * Load a segment. Fall back on loading the zero
155 * segment if something goes wrong..
156 */
157#define loadsegment(seg, value) \
158do { \
159 unsigned short __val = (value); \
160 \
161 asm volatile(" \n" \
162 "1: movl %k0,%%" #seg " \n" \
163 \
164 ".section .fixup,\"ax\" \n" \
165 "2: xorl %k0,%k0 \n" \
166 " jmp 1b \n" \
167 ".previous \n" \
168 \
169 _ASM_EXTABLE(1b, 2b) \
170 \
171 : "+r" (__val) : : "memory"); \
172} while (0)
173
174/*
175 * Save a segment register away
176 */
177#define savesegment(seg, value) \
178 asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
179
180/*
181 * x86_32 user gs accessors.
182 */
183#ifdef CONFIG_X86_32
184#ifdef CONFIG_X86_32_LAZY_GS
185#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
186#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
187#define task_user_gs(tsk) ((tsk)->thread.gs)
188#define lazy_save_gs(v) savesegment(gs, (v))
189#define lazy_load_gs(v) loadsegment(gs, (v))
190#else /* X86_32_LAZY_GS */
191#define get_user_gs(regs) (u16)((regs)->gs)
192#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
193#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
194#define lazy_save_gs(v) do { } while (0)
195#define lazy_load_gs(v) do { } while (0)
196#endif /* X86_32_LAZY_GS */
197#endif /* X86_32 */
198
199static inline unsigned long get_limit(unsigned long segment)
200{
201 unsigned long __limit;
202 asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
203 return __limit + 1;
204}
205
206static inline void native_clts(void)
207{
208 asm volatile("clts");
209}
210
211/*
212 * Volatile isn't enough to prevent the compiler from reordering the
213 * read/write functions for the control registers and messing everything up.
214 * A memory clobber would solve the problem, but would prevent reordering of
215 * all loads stores around it, which can hurt performance. Solution is to
216 * use a variable and mimic reads and writes to it to enforce serialization
217 */
218static unsigned long __force_order;
219
220static inline unsigned long native_read_cr0(void)
221{
222 unsigned long val;
223 asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
224 return val;
225}
226
227static inline void native_write_cr0(unsigned long val)
228{
229 asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
230}
231
232static inline unsigned long native_read_cr2(void)
233{
234 unsigned long val;
235 asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
236 return val;
237}
238
239static inline void native_write_cr2(unsigned long val)
240{
241 asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
242}
243
244static inline unsigned long native_read_cr3(void)
245{
246 unsigned long val;
247 asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
248 return val;
249}
250
251static inline void native_write_cr3(unsigned long val)
252{
253 asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
254}
255
256static inline unsigned long native_read_cr4(void)
257{
258 unsigned long val;
259 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
260 return val;
261}
262
263static inline unsigned long native_read_cr4_safe(void)
264{
265 unsigned long val;
266 /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
267 * exists, so it will never fail. */
268#ifdef CONFIG_X86_32
269 asm volatile("1: mov %%cr4, %0\n"
270 "2:\n"
271 _ASM_EXTABLE(1b, 2b)
272 : "=r" (val), "=m" (__force_order) : "0" (0));
273#else
274 val = native_read_cr4();
275#endif
276 return val;
277}
278
279static inline void native_write_cr4(unsigned long val)
280{
281 asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
282}
283
284#ifdef CONFIG_X86_64
285static inline unsigned long native_read_cr8(void)
286{
287 unsigned long cr8;
288 asm volatile("movq %%cr8,%0" : "=r" (cr8));
289 return cr8;
290}
291
292static inline void native_write_cr8(unsigned long val)
293{
294 asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
295}
296#endif
297
298static inline void native_wbinvd(void)
299{
300 asm volatile("wbinvd": : :"memory");
301}
302
303#ifdef CONFIG_PARAVIRT
304#include <asm/paravirt.h>
305#else
306
307static inline unsigned long read_cr0(void)
308{
309 return native_read_cr0();
310}
311
312static inline void write_cr0(unsigned long x)
313{
314 native_write_cr0(x);
315}
316
317static inline unsigned long read_cr2(void)
318{
319 return native_read_cr2();
320}
321
322static inline void write_cr2(unsigned long x)
323{
324 native_write_cr2(x);
325}
326
327static inline unsigned long read_cr3(void)
328{
329 return native_read_cr3();
330}
331
332static inline void write_cr3(unsigned long x)
333{
334 native_write_cr3(x);
335}
336
337static inline unsigned long read_cr4(void)
338{
339 return native_read_cr4();
340}
341
342static inline unsigned long read_cr4_safe(void)
343{
344 return native_read_cr4_safe();
345}
346
347static inline void write_cr4(unsigned long x)
348{
349 native_write_cr4(x);
350}
351
352static inline void wbinvd(void)
353{
354 native_wbinvd();
355}
356
357#ifdef CONFIG_X86_64
358
359static inline unsigned long read_cr8(void)
360{
361 return native_read_cr8();
362}
363
364static inline void write_cr8(unsigned long x)
365{
366 native_write_cr8(x);
367}
368
369static inline void load_gs_index(unsigned selector)
370{
371 native_load_gs_index(selector);
372}
373
374#endif
375
376/* Clear the 'TS' bit */
377static inline void clts(void)
378{
379 native_clts();
380}
381
382#endif/* CONFIG_PARAVIRT */
383
384#define stts() write_cr0(read_cr0() | X86_CR0_TS)
385
386#endif /* __KERNEL__ */
387
388static inline void clflush(volatile void *__p)
389{
390 asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
391}
392
393#define nop() asm volatile ("nop")
394
395void disable_hlt(void);
396void enable_hlt(void);
397
398void cpu_idle_wait(void);
399
400extern unsigned long arch_align_stack(unsigned long sp);
401extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
402
403void default_idle(void);
404bool set_pm_idle_to_default(void);
405
406void stop_this_cpu(void *dummy);
407
408/*
409 * Force strict CPU ordering.
410 * And yes, this is required on UP too when we're talking
411 * to devices.
412 */
413#ifdef CONFIG_X86_32
414/*
415 * Some non-Intel clones support out of order store. wmb() ceases to be a
416 * nop for these.
417 */
418#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
419#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
420#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
421#else
422#define mb() asm volatile("mfence":::"memory")
423#define rmb() asm volatile("lfence":::"memory")
424#define wmb() asm volatile("sfence" ::: "memory")
425#endif
426
427/**
428 * read_barrier_depends - Flush all pending reads that subsequents reads
429 * depend on.
430 *
431 * No data-dependent reads from memory-like regions are ever reordered
432 * over this barrier. All reads preceding this primitive are guaranteed
433 * to access memory (but not necessarily other CPUs' caches) before any
434 * reads following this primitive that depend on the data return by
435 * any of the preceding reads. This primitive is much lighter weight than
436 * rmb() on most CPUs, and is never heavier weight than is
437 * rmb().
438 *
439 * These ordering constraints are respected by both the local CPU
440 * and the compiler.
441 *
442 * Ordering is not guaranteed by anything other than these primitives,
443 * not even by data dependencies. See the documentation for
444 * memory_barrier() for examples and URLs to more information.
445 *
446 * For example, the following code would force ordering (the initial
447 * value of "a" is zero, "b" is one, and "p" is "&a"):
448 *
449 * <programlisting>
450 * CPU 0 CPU 1
451 *
452 * b = 2;
453 * memory_barrier();
454 * p = &b; q = p;
455 * read_barrier_depends();
456 * d = *q;
457 * </programlisting>
458 *
459 * because the read of "*q" depends on the read of "p" and these
460 * two reads are separated by a read_barrier_depends(). However,
461 * the following code, with the same initial values for "a" and "b":
462 *
463 * <programlisting>
464 * CPU 0 CPU 1
465 *
466 * a = 2;
467 * memory_barrier();
468 * b = 3; y = b;
469 * read_barrier_depends();
470 * x = a;
471 * </programlisting>
472 *
473 * does not enforce ordering, since there is no data dependency between
474 * the read of "a" and the read of "b". Therefore, on some CPUs, such
475 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
476 * in cases like this where there are no data dependencies.
477 **/
478
479#define read_barrier_depends() do { } while (0)
480
481#ifdef CONFIG_SMP
482#define smp_mb() mb()
483#ifdef CONFIG_X86_PPRO_FENCE
484# define smp_rmb() rmb()
485#else
486# define smp_rmb() barrier()
487#endif
488#ifdef CONFIG_X86_OOSTORE
489# define smp_wmb() wmb()
490#else
491# define smp_wmb() barrier()
492#endif
493#define smp_read_barrier_depends() read_barrier_depends()
494#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
495#else
496#define smp_mb() barrier()
497#define smp_rmb() barrier()
498#define smp_wmb() barrier()
499#define smp_read_barrier_depends() do { } while (0)
500#define set_mb(var, value) do { var = value; barrier(); } while (0)
501#endif
502
503/*
504 * Stop RDTSC speculation. This is needed when you need to use RDTSC
505 * (or get_cycles or vread that possibly accesses the TSC) in a defined
506 * code region.
507 *
508 * (Could use an alternative three way for this if there was one.)
509 */
510static __always_inline void rdtsc_barrier(void)
511{
512 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
513 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
514}
515
516/*
517 * We handle most unaligned accesses in hardware. On the other hand
518 * unaligned DMA can be quite expensive on some Nehalem processors.
519 *
520 * Based on this we disable the IP header alignment in network drivers.
521 */
522#define NET_IP_ALIGN 0
523#endif /* _ASM_X86_SYSTEM_H */
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 431793e5d484..34baa0eb5d0c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -57,14 +57,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
57 57
58static inline unsigned long long __cycles_2_ns(unsigned long long cyc) 58static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
59{ 59{
60 unsigned long long quot;
61 unsigned long long rem;
62 int cpu = smp_processor_id(); 60 int cpu = smp_processor_id();
63 unsigned long long ns = per_cpu(cyc2ns_offset, cpu); 61 unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
64 quot = (cyc >> CYC2NS_SCALE_FACTOR); 62 ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
65 rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1); 63 (1UL << CYC2NS_SCALE_FACTOR));
66 ns += quot * per_cpu(cyc2ns, cpu) +
67 ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
68 return ns; 64 return ns;
69} 65}
70 66
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 169be8938b96..c0e108e08079 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -5,7 +5,7 @@
5#include <linux/sched.h> 5#include <linux/sched.h>
6 6
7#include <asm/processor.h> 7#include <asm/processor.h>
8#include <asm/system.h> 8#include <asm/special_insns.h>
9 9
10#ifdef CONFIG_PARAVIRT 10#ifdef CONFIG_PARAVIRT
11#include <asm/paravirt.h> 11#include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 15d99153a96d..c91e8b9d588b 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu);
61extern void check_tsc_sync_target(void); 61extern void check_tsc_sync_target(void);
62 62
63extern int notsc_setup(char *); 63extern int notsc_setup(char *);
64extern void save_sched_clock_state(void); 64extern void tsc_save_sched_clock_state(void);
65extern void restore_sched_clock_state(void); 65extern void tsc_restore_sched_clock_state(void);
66 66
67#endif /* _ASM_X86_TSC_H */ 67#endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 815285bcaceb..8b38be2de9e1 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -5,13 +5,8 @@
5#include <linux/clocksource.h> 5#include <linux/clocksource.h>
6 6
7struct vsyscall_gtod_data { 7struct vsyscall_gtod_data {
8 seqlock_t lock; 8 seqcount_t seq;
9 9
10 /* open coded 'struct timespec' */
11 time_t wall_time_sec;
12 u32 wall_time_nsec;
13
14 struct timezone sys_tz;
15 struct { /* extract of a clocksource struct */ 10 struct { /* extract of a clocksource struct */
16 int vclock_mode; 11 int vclock_mode;
17 cycle_t cycle_last; 12 cycle_t cycle_last;
@@ -19,8 +14,16 @@ struct vsyscall_gtod_data {
19 u32 mult; 14 u32 mult;
20 u32 shift; 15 u32 shift;
21 } clock; 16 } clock;
22 struct timespec wall_to_monotonic; 17
18 /* open coded 'struct timespec' */
19 time_t wall_time_sec;
20 u32 wall_time_nsec;
21 u32 monotonic_time_nsec;
22 time_t monotonic_time_sec;
23
24 struct timezone sys_tz;
23 struct timespec wall_time_coarse; 25 struct timespec wall_time_coarse;
26 struct timespec monotonic_time_coarse;
24}; 27};
25extern struct vsyscall_gtod_data vsyscall_gtod_data; 28extern struct vsyscall_gtod_data vsyscall_gtod_data;
26 29
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index e0f9aa16358b..5da71c27cc59 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -16,7 +16,6 @@
16#define _ASM_X86_VIRTEX_H 16#define _ASM_X86_VIRTEX_H
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <asm/system.h>
20 19
21#include <asm/vmx.h> 20#include <asm/vmx.h>
22#include <asm/svm.h> 21#include <asm/svm.h>
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..6fe6767b7124
--- /dev/null
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -0,0 +1,46 @@
1#ifndef _ASM_WORD_AT_A_TIME_H
2#define _ASM_WORD_AT_A_TIME_H
3
4/*
5 * This is largely generic for little-endian machines, but the
6 * optimal byte mask counting is probably going to be something
7 * that is architecture-specific. If you have a reliably fast
8 * bit count instruction, that might be better than the multiply
9 * and shift, for example.
10 */
11
12#ifdef CONFIG_64BIT
13
14/*
15 * Jan Achrenius on G+: microoptimized version of
16 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
17 * that works for the bytemasks without having to
18 * mask them first.
19 */
20static inline long count_masked_bytes(unsigned long mask)
21{
22 return mask*0x0001020304050608ul >> 56;
23}
24
25#else /* 32-bit case */
26
27/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
28static inline long count_masked_bytes(long mask)
29{
30 /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
31 long a = (0x0ff0001+mask) >> 23;
32 /* Fix the 1 for 00 case */
33 return a & mask;
34}
35
36#endif
37
38#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
39
40/* Return the high bit set in the first byte that is a zero */
41static inline unsigned long has_zero(unsigned long a)
42{
43 return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
44}
45
46#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
index 6bf5b8e478c0..92e54abf89e0 100644
--- a/arch/x86/include/asm/x2apic.h
+++ b/arch/x86/include/asm/x2apic.h
@@ -18,6 +18,11 @@ static const struct cpumask *x2apic_target_cpus(void)
18 return cpu_online_mask; 18 return cpu_online_mask;
19} 19}
20 20
21static int x2apic_apic_id_valid(int apicid)
22{
23 return 1;
24}
25
21static int x2apic_apic_id_registered(void) 26static int x2apic_apic_id_registered(void)
22{ 27{
23 return 1; 28 return 1;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 517d4767ffdd..baaca8defec8 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -145,9 +145,11 @@ struct x86_init_ops {
145/** 145/**
146 * struct x86_cpuinit_ops - platform specific cpu hotplug setups 146 * struct x86_cpuinit_ops - platform specific cpu hotplug setups
147 * @setup_percpu_clockev: set up the per cpu clock event device 147 * @setup_percpu_clockev: set up the per cpu clock event device
148 * @early_percpu_clock_init: early init of the per cpu clock event device
148 */ 149 */
149struct x86_cpuinit_ops { 150struct x86_cpuinit_ops {
150 void (*setup_percpu_clockev)(void); 151 void (*setup_percpu_clockev)(void);
152 void (*early_percpu_clock_init)(void);
151 void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); 153 void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
152}; 154};
153 155
@@ -160,6 +162,8 @@ struct x86_cpuinit_ops {
160 * @is_untracked_pat_range exclude from PAT logic 162 * @is_untracked_pat_range exclude from PAT logic
161 * @nmi_init enable NMI on cpus 163 * @nmi_init enable NMI on cpus
162 * @i8042_detect pre-detect if i8042 controller exists 164 * @i8042_detect pre-detect if i8042 controller exists
165 * @save_sched_clock_state: save state for sched_clock() on suspend
166 * @restore_sched_clock_state: restore state for sched_clock() on resume
163 */ 167 */
164struct x86_platform_ops { 168struct x86_platform_ops {
165 unsigned long (*calibrate_tsc)(void); 169 unsigned long (*calibrate_tsc)(void);
@@ -171,6 +175,8 @@ struct x86_platform_ops {
171 void (*nmi_init)(void); 175 void (*nmi_init)(void);
172 unsigned char (*get_nmi_reason)(void); 176 unsigned char (*get_nmi_reason)(void);
173 int (*i8042_detect)(void); 177 int (*i8042_detect)(void);
178 void (*save_sched_clock_state)(void);
179 void (*restore_sched_clock_state)(void);
174}; 180};
175 181
176struct pci_dev; 182struct pci_dev;
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index a1f2db5f1170..cbf0c9d50b92 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -56,6 +56,7 @@ DEFINE_GUEST_HANDLE(int);
56DEFINE_GUEST_HANDLE(long); 56DEFINE_GUEST_HANDLE(long);
57DEFINE_GUEST_HANDLE(void); 57DEFINE_GUEST_HANDLE(void);
58DEFINE_GUEST_HANDLE(uint64_t); 58DEFINE_GUEST_HANDLE(uint64_t);
59DEFINE_GUEST_HANDLE(uint32_t);
59#endif 60#endif
60 61
61#ifndef HYPERVISOR_VIRT_START 62#ifndef HYPERVISOR_VIRT_START
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8c8c365a3bc3..d23d83577d6b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
71obj-$(CONFIG_KPROBES) += kprobes.o 71obj-$(CONFIG_KPROBES) += kprobes.o
72obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
72obj-$(CONFIG_MODULES) += module.o 73obj-$(CONFIG_MODULES) += module.o
73obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o 74obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
74obj-$(CONFIG_KGDB) += kgdb.o 75obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ce664f33ea8e..a415b1f44365 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -239,7 +239,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
239 * to not preallocating memory for all NR_CPUS 239 * to not preallocating memory for all NR_CPUS
240 * when we use CPU hotplug. 240 * when we use CPU hotplug.
241 */ 241 */
242 if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled) 242 if (!apic->apic_id_valid(apic_id) && enabled)
243 printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); 243 printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
244 else 244 else
245 acpi_register_lapic(apic_id, enabled); 245 acpi_register_lapic(apic_id, enabled);
@@ -593,7 +593,7 @@ void __init acpi_set_irq_model_ioapic(void)
593#ifdef CONFIG_ACPI_HOTPLUG_CPU 593#ifdef CONFIG_ACPI_HOTPLUG_CPU
594#include <acpi/processor.h> 594#include <acpi/processor.h>
595 595
596static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) 596static void __cpuinitdata acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
597{ 597{
598#ifdef CONFIG_ACPI_NUMA 598#ifdef CONFIG_ACPI_NUMA
599 int nid; 599 int nid;
@@ -642,6 +642,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
642 kfree(buffer.pointer); 642 kfree(buffer.pointer);
643 buffer.length = ACPI_ALLOCATE_BUFFER; 643 buffer.length = ACPI_ALLOCATE_BUFFER;
644 buffer.pointer = NULL; 644 buffer.pointer = NULL;
645 lapic = NULL;
645 646
646 if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL)) 647 if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
647 goto out; 648 goto out;
@@ -650,7 +651,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
650 goto free_tmp_map; 651 goto free_tmp_map;
651 652
652 cpumask_copy(tmp_map, cpu_present_mask); 653 cpumask_copy(tmp_map, cpu_present_mask);
653 acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); 654 acpi_register_lapic(physid, ACPI_MADT_ENABLED);
654 655
655 /* 656 /*
656 * If mp_register_lapic successfully generates a new logical cpu 657 * If mp_register_lapic successfully generates a new logical cpu
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index f50e7fb2a201..d2b7f27781bc 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -14,6 +14,7 @@
14#include <acpi/processor.h> 14#include <acpi/processor.h>
15#include <asm/acpi.h> 15#include <asm/acpi.h>
16#include <asm/mwait.h> 16#include <asm/mwait.h>
17#include <asm/special_insns.h>
17 18
18/* 19/*
19 * Initialize bm_flags based on the CPU cache properties 20 * Initialize bm_flags based on the CPU cache properties
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b1e7c7f7a0af..e66311200cbd 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -477,7 +477,7 @@ error:
477/* allocate and map a coherent mapping */ 477/* allocate and map a coherent mapping */
478static void * 478static void *
479gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, 479gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
480 gfp_t flag) 480 gfp_t flag, struct dma_attrs *attrs)
481{ 481{
482 dma_addr_t paddr; 482 dma_addr_t paddr;
483 unsigned long align_mask; 483 unsigned long align_mask;
@@ -500,7 +500,8 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
500 } 500 }
501 __free_pages(page, get_order(size)); 501 __free_pages(page, get_order(size));
502 } else 502 } else
503 return dma_generic_alloc_coherent(dev, size, dma_addr, flag); 503 return dma_generic_alloc_coherent(dev, size, dma_addr, flag,
504 attrs);
504 505
505 return NULL; 506 return NULL;
506} 507}
@@ -508,7 +509,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
508/* free a coherent mapping */ 509/* free a coherent mapping */
509static void 510static void
510gart_free_coherent(struct device *dev, size_t size, void *vaddr, 511gart_free_coherent(struct device *dev, size_t size, void *vaddr,
511 dma_addr_t dma_addr) 512 dma_addr_t dma_addr, struct dma_attrs *attrs)
512{ 513{
513 gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL); 514 gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
514 free_pages((unsigned long)vaddr, get_order(size)); 515 free_pages((unsigned long)vaddr, get_order(size));
@@ -700,8 +701,8 @@ static struct dma_map_ops gart_dma_ops = {
700 .unmap_sg = gart_unmap_sg, 701 .unmap_sg = gart_unmap_sg,
701 .map_page = gart_map_page, 702 .map_page = gart_map_page,
702 .unmap_page = gart_unmap_page, 703 .unmap_page = gart_unmap_page,
703 .alloc_coherent = gart_alloc_coherent, 704 .alloc = gart_alloc_coherent,
704 .free_coherent = gart_free_coherent, 705 .free = gart_free_coherent,
705 .mapping_error = gart_mapping_error, 706 .mapping_error = gart_mapping_error,
706}; 707};
707 708
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2eec05b6d1b8..11544d8f1e97 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -383,20 +383,25 @@ static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
383 383
384static unsigned int reserve_eilvt_offset(int offset, unsigned int new) 384static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
385{ 385{
386 unsigned int rsvd; /* 0: uninitialized */ 386 unsigned int rsvd, vector;
387 387
388 if (offset >= APIC_EILVT_NR_MAX) 388 if (offset >= APIC_EILVT_NR_MAX)
389 return ~0; 389 return ~0;
390 390
391 rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED; 391 rsvd = atomic_read(&eilvt_offsets[offset]);
392 do { 392 do {
393 if (rsvd && 393 vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */
394 !eilvt_entry_is_changeable(rsvd, new)) 394 if (vector && !eilvt_entry_is_changeable(vector, new))
395 /* may not change if vectors are different */ 395 /* may not change if vectors are different */
396 return rsvd; 396 return rsvd;
397 rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); 397 rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
398 } while (rsvd != new); 398 } while (rsvd != new);
399 399
400 rsvd &= ~APIC_EILVT_MASKED;
401 if (rsvd && rsvd != vector)
402 pr_info("LVT offset %d assigned for vector 0x%02x\n",
403 offset, rsvd);
404
400 return new; 405 return new;
401} 406}
402 407
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 8c3cdded6f2b..359b6899a36c 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -180,6 +180,7 @@ static struct apic apic_flat = {
180 .name = "flat", 180 .name = "flat",
181 .probe = flat_probe, 181 .probe = flat_probe,
182 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 182 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
183 .apic_id_valid = default_apic_id_valid,
183 .apic_id_registered = flat_apic_id_registered, 184 .apic_id_registered = flat_apic_id_registered,
184 185
185 .irq_delivery_mode = dest_LowestPrio, 186 .irq_delivery_mode = dest_LowestPrio,
@@ -337,6 +338,7 @@ static struct apic apic_physflat = {
337 .name = "physical flat", 338 .name = "physical flat",
338 .probe = physflat_probe, 339 .probe = physflat_probe,
339 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 340 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
341 .apic_id_valid = default_apic_id_valid,
340 .apic_id_registered = flat_apic_id_registered, 342 .apic_id_registered = flat_apic_id_registered,
341 343
342 .irq_delivery_mode = dest_Fixed, 344 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 775b82bc655c..634ae6cdd5c9 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -124,6 +124,7 @@ struct apic apic_noop = {
124 .probe = noop_probe, 124 .probe = noop_probe,
125 .acpi_madt_oem_check = NULL, 125 .acpi_madt_oem_check = NULL,
126 126
127 .apic_id_valid = default_apic_id_valid,
127 .apic_id_registered = noop_apic_id_registered, 128 .apic_id_registered = noop_apic_id_registered,
128 129
129 .irq_delivery_mode = dest_LowestPrio, 130 .irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 09d3d8c1cd99..899803e03214 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -56,6 +56,12 @@ static unsigned int read_xapic_id(void)
56 return get_apic_id(apic_read(APIC_ID)); 56 return get_apic_id(apic_read(APIC_ID));
57} 57}
58 58
59static int numachip_apic_id_valid(int apicid)
60{
61 /* Trust what bootloader passes in MADT */
62 return 1;
63}
64
59static int numachip_apic_id_registered(void) 65static int numachip_apic_id_registered(void)
60{ 66{
61 return physid_isset(read_xapic_id(), phys_cpu_present_map); 67 return physid_isset(read_xapic_id(), phys_cpu_present_map);
@@ -238,6 +244,7 @@ static struct apic apic_numachip __refconst = {
238 .name = "NumaConnect system", 244 .name = "NumaConnect system",
239 .probe = numachip_probe, 245 .probe = numachip_probe,
240 .acpi_madt_oem_check = numachip_acpi_madt_oem_check, 246 .acpi_madt_oem_check = numachip_acpi_madt_oem_check,
247 .apic_id_valid = numachip_apic_id_valid,
241 .apic_id_registered = numachip_apic_id_registered, 248 .apic_id_registered = numachip_apic_id_registered,
242 249
243 .irq_delivery_mode = dest_Fixed, 250 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 521bead01137..0cdec7065aff 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -198,6 +198,7 @@ static struct apic apic_bigsmp = {
198 .name = "bigsmp", 198 .name = "bigsmp",
199 .probe = probe_bigsmp, 199 .probe = probe_bigsmp,
200 .acpi_madt_oem_check = NULL, 200 .acpi_madt_oem_check = NULL,
201 .apic_id_valid = default_apic_id_valid,
201 .apic_id_registered = bigsmp_apic_id_registered, 202 .apic_id_registered = bigsmp_apic_id_registered,
202 203
203 .irq_delivery_mode = dest_Fixed, 204 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 5d513bc47b6b..e42d1d3b9134 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -625,6 +625,7 @@ static struct apic __refdata apic_es7000_cluster = {
625 .name = "es7000", 625 .name = "es7000",
626 .probe = probe_es7000, 626 .probe = probe_es7000,
627 .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster, 627 .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster,
628 .apic_id_valid = default_apic_id_valid,
628 .apic_id_registered = es7000_apic_id_registered, 629 .apic_id_registered = es7000_apic_id_registered,
629 630
630 .irq_delivery_mode = dest_LowestPrio, 631 .irq_delivery_mode = dest_LowestPrio,
@@ -690,6 +691,7 @@ static struct apic __refdata apic_es7000 = {
690 .name = "es7000", 691 .name = "es7000",
691 .probe = probe_es7000, 692 .probe = probe_es7000,
692 .acpi_madt_oem_check = es7000_acpi_madt_oem_check, 693 .acpi_madt_oem_check = es7000_acpi_madt_oem_check,
694 .apic_id_valid = default_apic_id_valid,
693 .apic_id_registered = es7000_apic_id_registered, 695 .apic_id_registered = es7000_apic_id_registered,
694 696
695 .irq_delivery_mode = dest_Fixed, 697 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fb072754bc1d..e88300d8e80a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -64,9 +64,28 @@
64#include <asm/apic.h> 64#include <asm/apic.h>
65 65
66#define __apicdebuginit(type) static type __init 66#define __apicdebuginit(type) static type __init
67
67#define for_each_irq_pin(entry, head) \ 68#define for_each_irq_pin(entry, head) \
68 for (entry = head; entry; entry = entry->next) 69 for (entry = head; entry; entry = entry->next)
69 70
71static void __init __ioapic_init_mappings(void);
72
73static unsigned int __io_apic_read (unsigned int apic, unsigned int reg);
74static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val);
75static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
76
77static struct io_apic_ops io_apic_ops = {
78 .init = __ioapic_init_mappings,
79 .read = __io_apic_read,
80 .write = __io_apic_write,
81 .modify = __io_apic_modify,
82};
83
84void __init set_io_apic_ops(const struct io_apic_ops *ops)
85{
86 io_apic_ops = *ops;
87}
88
70/* 89/*
71 * Is the SiS APIC rmw bug present ? 90 * Is the SiS APIC rmw bug present ?
72 * -1 = don't know, 0 = no, 1 = yes 91 * -1 = don't know, 0 = no, 1 = yes
@@ -294,6 +313,22 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
294 irq_free_desc(at); 313 irq_free_desc(at);
295} 314}
296 315
316static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
317{
318 return io_apic_ops.read(apic, reg);
319}
320
321static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
322{
323 io_apic_ops.write(apic, reg, value);
324}
325
326static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
327{
328 io_apic_ops.modify(apic, reg, value);
329}
330
331
297struct io_apic { 332struct io_apic {
298 unsigned int index; 333 unsigned int index;
299 unsigned int unused[3]; 334 unsigned int unused[3];
@@ -314,16 +349,17 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
314 writel(vector, &io_apic->eoi); 349 writel(vector, &io_apic->eoi);
315} 350}
316 351
317static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 352static unsigned int __io_apic_read(unsigned int apic, unsigned int reg)
318{ 353{
319 struct io_apic __iomem *io_apic = io_apic_base(apic); 354 struct io_apic __iomem *io_apic = io_apic_base(apic);
320 writel(reg, &io_apic->index); 355 writel(reg, &io_apic->index);
321 return readl(&io_apic->data); 356 return readl(&io_apic->data);
322} 357}
323 358
324static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) 359static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
325{ 360{
326 struct io_apic __iomem *io_apic = io_apic_base(apic); 361 struct io_apic __iomem *io_apic = io_apic_base(apic);
362
327 writel(reg, &io_apic->index); 363 writel(reg, &io_apic->index);
328 writel(value, &io_apic->data); 364 writel(value, &io_apic->data);
329} 365}
@@ -334,7 +370,7 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
334 * 370 *
335 * Older SiS APIC requires we rewrite the index register 371 * Older SiS APIC requires we rewrite the index register
336 */ 372 */
337static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) 373static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
338{ 374{
339 struct io_apic __iomem *io_apic = io_apic_base(apic); 375 struct io_apic __iomem *io_apic = io_apic_base(apic);
340 376
@@ -377,6 +413,7 @@ static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
377 413
378 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); 414 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
379 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); 415 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
416
380 return eu.entry; 417 return eu.entry;
381} 418}
382 419
@@ -384,9 +421,11 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
384{ 421{
385 union entry_union eu; 422 union entry_union eu;
386 unsigned long flags; 423 unsigned long flags;
424
387 raw_spin_lock_irqsave(&ioapic_lock, flags); 425 raw_spin_lock_irqsave(&ioapic_lock, flags);
388 eu.entry = __ioapic_read_entry(apic, pin); 426 eu.entry = __ioapic_read_entry(apic, pin);
389 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 427 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
428
390 return eu.entry; 429 return eu.entry;
391} 430}
392 431
@@ -396,8 +435,7 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
396 * the interrupt, and we need to make sure the entry is fully populated 435 * the interrupt, and we need to make sure the entry is fully populated
397 * before that happens. 436 * before that happens.
398 */ 437 */
399static void 438static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
400__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
401{ 439{
402 union entry_union eu = {{0, 0}}; 440 union entry_union eu = {{0, 0}};
403 441
@@ -409,6 +447,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
409static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 447static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
410{ 448{
411 unsigned long flags; 449 unsigned long flags;
450
412 raw_spin_lock_irqsave(&ioapic_lock, flags); 451 raw_spin_lock_irqsave(&ioapic_lock, flags);
413 __ioapic_write_entry(apic, pin, e); 452 __ioapic_write_entry(apic, pin, e);
414 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 453 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -435,8 +474,7 @@ static void ioapic_mask_entry(int apic, int pin)
435 * shared ISA-space IRQs, so we have to support them. We are super 474 * shared ISA-space IRQs, so we have to support them. We are super
436 * fast in the common case, and fast for shared ISA-space IRQs. 475 * fast in the common case, and fast for shared ISA-space IRQs.
437 */ 476 */
438static int 477static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
439__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
440{ 478{
441 struct irq_pin_list **last, *entry; 479 struct irq_pin_list **last, *entry;
442 480
@@ -521,6 +559,7 @@ static void io_apic_sync(struct irq_pin_list *entry)
521 * a dummy read from the IO-APIC 559 * a dummy read from the IO-APIC
522 */ 560 */
523 struct io_apic __iomem *io_apic; 561 struct io_apic __iomem *io_apic;
562
524 io_apic = io_apic_base(entry->apic); 563 io_apic = io_apic_base(entry->apic);
525 readl(&io_apic->data); 564 readl(&io_apic->data);
526} 565}
@@ -2512,21 +2551,73 @@ static void ack_apic_edge(struct irq_data *data)
2512 2551
2513atomic_t irq_mis_count; 2552atomic_t irq_mis_count;
2514 2553
2515static void ack_apic_level(struct irq_data *data)
2516{
2517 struct irq_cfg *cfg = data->chip_data;
2518 int i, do_unmask_irq = 0, irq = data->irq;
2519 unsigned long v;
2520
2521 irq_complete_move(cfg);
2522#ifdef CONFIG_GENERIC_PENDING_IRQ 2554#ifdef CONFIG_GENERIC_PENDING_IRQ
2555static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
2556{
2523 /* If we are moving the irq we need to mask it */ 2557 /* If we are moving the irq we need to mask it */
2524 if (unlikely(irqd_is_setaffinity_pending(data))) { 2558 if (unlikely(irqd_is_setaffinity_pending(data))) {
2525 do_unmask_irq = 1;
2526 mask_ioapic(cfg); 2559 mask_ioapic(cfg);
2560 return true;
2527 } 2561 }
2562 return false;
2563}
2564
2565static inline void ioapic_irqd_unmask(struct irq_data *data,
2566 struct irq_cfg *cfg, bool masked)
2567{
2568 if (unlikely(masked)) {
2569 /* Only migrate the irq if the ack has been received.
2570 *
2571 * On rare occasions the broadcast level triggered ack gets
2572 * delayed going to ioapics, and if we reprogram the
2573 * vector while Remote IRR is still set the irq will never
2574 * fire again.
2575 *
2576 * To prevent this scenario we read the Remote IRR bit
2577 * of the ioapic. This has two effects.
2578 * - On any sane system the read of the ioapic will
2579 * flush writes (and acks) going to the ioapic from
2580 * this cpu.
2581 * - We get to see if the ACK has actually been delivered.
2582 *
2583 * Based on failed experiments of reprogramming the
2584 * ioapic entry from outside of irq context starting
2585 * with masking the ioapic entry and then polling until
2586 * Remote IRR was clear before reprogramming the
2587 * ioapic I don't trust the Remote IRR bit to be
2588 * completey accurate.
2589 *
2590 * However there appears to be no other way to plug
2591 * this race, so if the Remote IRR bit is not
2592 * accurate and is causing problems then it is a hardware bug
2593 * and you can go talk to the chipset vendor about it.
2594 */
2595 if (!io_apic_level_ack_pending(cfg))
2596 irq_move_masked_irq(data);
2597 unmask_ioapic(cfg);
2598 }
2599}
2600#else
2601static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
2602{
2603 return false;
2604}
2605static inline void ioapic_irqd_unmask(struct irq_data *data,
2606 struct irq_cfg *cfg, bool masked)
2607{
2608}
2528#endif 2609#endif
2529 2610
2611static void ack_apic_level(struct irq_data *data)
2612{
2613 struct irq_cfg *cfg = data->chip_data;
2614 int i, irq = data->irq;
2615 unsigned long v;
2616 bool masked;
2617
2618 irq_complete_move(cfg);
2619 masked = ioapic_irqd_mask(data, cfg);
2620
2530 /* 2621 /*
2531 * It appears there is an erratum which affects at least version 0x11 2622 * It appears there is an erratum which affects at least version 0x11
2532 * of I/O APIC (that's the 82093AA and cores integrated into various 2623 * of I/O APIC (that's the 82093AA and cores integrated into various
@@ -2581,38 +2672,7 @@ static void ack_apic_level(struct irq_data *data)
2581 eoi_ioapic_irq(irq, cfg); 2672 eoi_ioapic_irq(irq, cfg);
2582 } 2673 }
2583 2674
2584 /* Now we can move and renable the irq */ 2675 ioapic_irqd_unmask(data, cfg, masked);
2585 if (unlikely(do_unmask_irq)) {
2586 /* Only migrate the irq if the ack has been received.
2587 *
2588 * On rare occasions the broadcast level triggered ack gets
2589 * delayed going to ioapics, and if we reprogram the
2590 * vector while Remote IRR is still set the irq will never
2591 * fire again.
2592 *
2593 * To prevent this scenario we read the Remote IRR bit
2594 * of the ioapic. This has two effects.
2595 * - On any sane system the read of the ioapic will
2596 * flush writes (and acks) going to the ioapic from
2597 * this cpu.
2598 * - We get to see if the ACK has actually been delivered.
2599 *
2600 * Based on failed experiments of reprogramming the
2601 * ioapic entry from outside of irq context starting
2602 * with masking the ioapic entry and then polling until
2603 * Remote IRR was clear before reprogramming the
2604 * ioapic I don't trust the Remote IRR bit to be
2605 * completey accurate.
2606 *
2607 * However there appears to be no other way to plug
2608 * this race, so if the Remote IRR bit is not
2609 * accurate and is causing problems then it is a hardware bug
2610 * and you can go talk to the chipset vendor about it.
2611 */
2612 if (!io_apic_level_ack_pending(cfg))
2613 irq_move_masked_irq(data);
2614 unmask_ioapic(cfg);
2615 }
2616} 2676}
2617 2677
2618#ifdef CONFIG_IRQ_REMAP 2678#ifdef CONFIG_IRQ_REMAP
@@ -3873,6 +3933,11 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3873 3933
3874void __init ioapic_and_gsi_init(void) 3934void __init ioapic_and_gsi_init(void)
3875{ 3935{
3936 io_apic_ops.init();
3937}
3938
3939static void __init __ioapic_init_mappings(void)
3940{
3876 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 3941 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3877 struct resource *ioapic_res; 3942 struct resource *ioapic_res;
3878 int i; 3943 int i;
@@ -3967,18 +4032,36 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
3967static __init int bad_ioapic(unsigned long address) 4032static __init int bad_ioapic(unsigned long address)
3968{ 4033{
3969 if (nr_ioapics >= MAX_IO_APICS) { 4034 if (nr_ioapics >= MAX_IO_APICS) {
3970 printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded " 4035 pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
3971 "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); 4036 MAX_IO_APICS, nr_ioapics);
3972 return 1; 4037 return 1;
3973 } 4038 }
3974 if (!address) { 4039 if (!address) {
3975 printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address" 4040 pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
3976 " found in table, skipping!\n");
3977 return 1; 4041 return 1;
3978 } 4042 }
3979 return 0; 4043 return 0;
3980} 4044}
3981 4045
4046static __init int bad_ioapic_register(int idx)
4047{
4048 union IO_APIC_reg_00 reg_00;
4049 union IO_APIC_reg_01 reg_01;
4050 union IO_APIC_reg_02 reg_02;
4051
4052 reg_00.raw = io_apic_read(idx, 0);
4053 reg_01.raw = io_apic_read(idx, 1);
4054 reg_02.raw = io_apic_read(idx, 2);
4055
4056 if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) {
4057 pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n",
4058 mpc_ioapic_addr(idx));
4059 return 1;
4060 }
4061
4062 return 0;
4063}
4064
3982void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) 4065void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
3983{ 4066{
3984 int idx = 0; 4067 int idx = 0;
@@ -3995,6 +4078,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
3995 ioapics[idx].mp_config.apicaddr = address; 4078 ioapics[idx].mp_config.apicaddr = address;
3996 4079
3997 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 4080 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
4081
4082 if (bad_ioapic_register(idx)) {
4083 clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
4084 return;
4085 }
4086
3998 ioapics[idx].mp_config.apicid = io_apic_unique_id(id); 4087 ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
3999 ioapics[idx].mp_config.apicver = io_apic_get_version(idx); 4088 ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
4000 4089
@@ -4015,10 +4104,10 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4015 if (gsi_cfg->gsi_end >= gsi_top) 4104 if (gsi_cfg->gsi_end >= gsi_top)
4016 gsi_top = gsi_cfg->gsi_end + 1; 4105 gsi_top = gsi_cfg->gsi_end + 1;
4017 4106
4018 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " 4107 pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
4019 "GSI %d-%d\n", idx, mpc_ioapic_id(idx), 4108 idx, mpc_ioapic_id(idx),
4020 mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), 4109 mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
4021 gsi_cfg->gsi_base, gsi_cfg->gsi_end); 4110 gsi_cfg->gsi_base, gsi_cfg->gsi_end);
4022 4111
4023 nr_ioapics++; 4112 nr_ioapics++;
4024} 4113}
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index c4a61ca1349a..00d2422ca7c9 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -478,6 +478,7 @@ static struct apic __refdata apic_numaq = {
478 .name = "NUMAQ", 478 .name = "NUMAQ",
479 .probe = probe_numaq, 479 .probe = probe_numaq,
480 .acpi_madt_oem_check = NULL, 480 .acpi_madt_oem_check = NULL,
481 .apic_id_valid = default_apic_id_valid,
481 .apic_id_registered = numaq_apic_id_registered, 482 .apic_id_registered = numaq_apic_id_registered,
482 483
483 .irq_delivery_mode = dest_LowestPrio, 484 .irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 0787bb3412f4..ff2c1b9aac4d 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -92,6 +92,7 @@ static struct apic apic_default = {
92 .name = "default", 92 .name = "default",
93 .probe = probe_default, 93 .probe = probe_default,
94 .acpi_madt_oem_check = NULL, 94 .acpi_madt_oem_check = NULL,
95 .apic_id_valid = default_apic_id_valid,
95 .apic_id_registered = default_apic_id_registered, 96 .apic_id_registered = default_apic_id_registered,
96 97
97 .irq_delivery_mode = dest_LowestPrio, 98 .irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 19114423c58c..fea000b27f07 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -496,6 +496,7 @@ static struct apic apic_summit = {
496 .name = "summit", 496 .name = "summit",
497 .probe = probe_summit, 497 .probe = probe_summit,
498 .acpi_madt_oem_check = summit_acpi_madt_oem_check, 498 .acpi_madt_oem_check = summit_acpi_madt_oem_check,
499 .apic_id_valid = default_apic_id_valid,
499 .apic_id_registered = summit_apic_id_registered, 500 .apic_id_registered = summit_apic_id_registered,
500 501
501 .irq_delivery_mode = dest_LowestPrio, 502 .irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 500795875827..48f3103b3c93 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -213,6 +213,7 @@ static struct apic apic_x2apic_cluster = {
213 .name = "cluster x2apic", 213 .name = "cluster x2apic",
214 .probe = x2apic_cluster_probe, 214 .probe = x2apic_cluster_probe,
215 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 215 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
216 .apic_id_valid = x2apic_apic_id_valid,
216 .apic_id_registered = x2apic_apic_id_registered, 217 .apic_id_registered = x2apic_apic_id_registered,
217 218
218 .irq_delivery_mode = dest_LowestPrio, 219 .irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f5373dfde21e..8a778db45e3a 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -119,6 +119,7 @@ static struct apic apic_x2apic_phys = {
119 .name = "physical x2apic", 119 .name = "physical x2apic",
120 .probe = x2apic_phys_probe, 120 .probe = x2apic_phys_probe,
121 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 121 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
122 .apic_id_valid = x2apic_apic_id_valid,
122 .apic_id_registered = x2apic_apic_id_registered, 123 .apic_id_registered = x2apic_apic_id_registered,
123 124
124 .irq_delivery_mode = dest_Fixed, 125 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 79b05b88aa19..87bfa69e216e 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -266,6 +266,11 @@ static void uv_send_IPI_all(int vector)
266 uv_send_IPI_mask(cpu_online_mask, vector); 266 uv_send_IPI_mask(cpu_online_mask, vector);
267} 267}
268 268
269static int uv_apic_id_valid(int apicid)
270{
271 return 1;
272}
273
269static int uv_apic_id_registered(void) 274static int uv_apic_id_registered(void)
270{ 275{
271 return 1; 276 return 1;
@@ -351,6 +356,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
351 .name = "UV large system", 356 .name = "UV large system",
352 .probe = uv_probe, 357 .probe = uv_probe,
353 .acpi_madt_oem_check = uv_acpi_madt_oem_check, 358 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
359 .apic_id_valid = uv_apic_id_valid,
354 .apic_id_registered = uv_apic_id_registered, 360 .apic_id_registered = uv_apic_id_registered,
355 361
356 .irq_delivery_mode = dest_Fixed, 362 .irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f76623cbe263..459e78cbf61e 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -231,7 +231,6 @@
231#include <linux/syscore_ops.h> 231#include <linux/syscore_ops.h>
232#include <linux/i8253.h> 232#include <linux/i8253.h>
233 233
234#include <asm/system.h>
235#include <asm/uaccess.h> 234#include <asm/uaccess.h>
236#include <asm/desc.h> 235#include <asm/desc.h>
237#include <asm/olpc.h> 236#include <asm/olpc.h>
@@ -1234,8 +1233,7 @@ static int suspend(int vetoable)
1234 struct apm_user *as; 1233 struct apm_user *as;
1235 1234
1236 dpm_suspend_start(PMSG_SUSPEND); 1235 dpm_suspend_start(PMSG_SUSPEND);
1237 1236 dpm_suspend_end(PMSG_SUSPEND);
1238 dpm_suspend_noirq(PMSG_SUSPEND);
1239 1237
1240 local_irq_disable(); 1238 local_irq_disable();
1241 syscore_suspend(); 1239 syscore_suspend();
@@ -1259,9 +1257,9 @@ static int suspend(int vetoable)
1259 syscore_resume(); 1257 syscore_resume();
1260 local_irq_enable(); 1258 local_irq_enable();
1261 1259
1262 dpm_resume_noirq(PMSG_RESUME); 1260 dpm_resume_start(PMSG_RESUME);
1263
1264 dpm_resume_end(PMSG_RESUME); 1261 dpm_resume_end(PMSG_RESUME);
1262
1265 queue_event(APM_NORMAL_RESUME, NULL); 1263 queue_event(APM_NORMAL_RESUME, NULL);
1266 spin_lock(&user_list_lock); 1264 spin_lock(&user_list_lock);
1267 for (as = user_list; as != NULL; as = as->next) { 1265 for (as = user_list; as != NULL; as = as->next) {
@@ -1277,7 +1275,7 @@ static void standby(void)
1277{ 1275{
1278 int err; 1276 int err;
1279 1277
1280 dpm_suspend_noirq(PMSG_SUSPEND); 1278 dpm_suspend_end(PMSG_SUSPEND);
1281 1279
1282 local_irq_disable(); 1280 local_irq_disable();
1283 syscore_suspend(); 1281 syscore_suspend();
@@ -1291,7 +1289,7 @@ static void standby(void)
1291 syscore_resume(); 1289 syscore_resume();
1292 local_irq_enable(); 1290 local_irq_enable();
1293 1291
1294 dpm_resume_noirq(PMSG_RESUME); 1292 dpm_resume_start(PMSG_RESUME);
1295} 1293}
1296 1294
1297static apm_event_t get_event(void) 1295static apm_event_t get_event(void)
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 25f24dccdcfa..6ab6aa2fdfdd 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -16,6 +16,7 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
16obj-y += proc.o capflags.o powerflags.o common.o 16obj-y += proc.o capflags.o powerflags.o common.o
17obj-y += vmware.o hypervisor.o sched.o mshyperv.o 17obj-y += vmware.o hypervisor.o sched.o mshyperv.o
18obj-y += rdrand.o 18obj-y += rdrand.o
19obj-y += match.o
19 20
20obj-$(CONFIG_X86_32) += bugs.o 21obj-$(CONFIG_X86_32) += bugs.o
21obj-$(CONFIG_X86_64) += bugs_64.o 22obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c0f7d68d318f..67e258362a3d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -18,6 +18,7 @@
18#include <asm/archrandom.h> 18#include <asm/archrandom.h>
19#include <asm/hypervisor.h> 19#include <asm/hypervisor.h>
20#include <asm/processor.h> 20#include <asm/processor.h>
21#include <asm/debugreg.h>
21#include <asm/sections.h> 22#include <asm/sections.h>
22#include <linux/topology.h> 23#include <linux/topology.h>
23#include <linux/cpumask.h> 24#include <linux/cpumask.h>
@@ -28,6 +29,7 @@
28#include <asm/apic.h> 29#include <asm/apic.h>
29#include <asm/desc.h> 30#include <asm/desc.h>
30#include <asm/i387.h> 31#include <asm/i387.h>
32#include <asm/fpu-internal.h>
31#include <asm/mtrr.h> 33#include <asm/mtrr.h>
32#include <linux/numa.h> 34#include <linux/numa.h>
33#include <asm/asm.h> 35#include <asm/asm.h>
@@ -933,7 +935,7 @@ static const struct msr_range msr_range_array[] __cpuinitconst = {
933 { 0xc0011000, 0xc001103b}, 935 { 0xc0011000, 0xc001103b},
934}; 936};
935 937
936static void __cpuinit print_cpu_msr(void) 938static void __cpuinit __print_cpu_msr(void)
937{ 939{
938 unsigned index_min, index_max; 940 unsigned index_min, index_max;
939 unsigned index; 941 unsigned index;
@@ -997,13 +999,13 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
997 else 999 else
998 printk(KERN_CONT "\n"); 1000 printk(KERN_CONT "\n");
999 1001
1000#ifdef CONFIG_SMP 1002 print_cpu_msr(c);
1003}
1004
1005void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c)
1006{
1001 if (c->cpu_index < show_msr) 1007 if (c->cpu_index < show_msr)
1002 print_cpu_msr(); 1008 __print_cpu_msr();
1003#else
1004 if (show_msr)
1005 print_cpu_msr();
1006#endif
1007} 1009}
1008 1010
1009static __init int setup_disablecpuid(char *arg) 1011static __init int setup_disablecpuid(char *arg)
@@ -1045,7 +1047,6 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
1045DEFINE_PER_CPU(unsigned int, irq_count) = -1; 1047DEFINE_PER_CPU(unsigned int, irq_count) = -1;
1046 1048
1047DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); 1049DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1048EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
1049 1050
1050/* 1051/*
1051 * Special IST stacks which the CPU switches to when it calls 1052 * Special IST stacks which the CPU switches to when it calls
@@ -1115,7 +1116,6 @@ void debug_stack_reset(void)
1115DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 1116DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
1116EXPORT_PER_CPU_SYMBOL(current_task); 1117EXPORT_PER_CPU_SYMBOL(current_task);
1117DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); 1118DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1118EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
1119 1119
1120#ifdef CONFIG_CC_STACKPROTECTOR 1120#ifdef CONFIG_CC_STACKPROTECTOR
1121DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); 1121DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
new file mode 100644
index 000000000000..5502b289341b
--- /dev/null
+++ b/arch/x86/kernel/cpu/match.c
@@ -0,0 +1,91 @@
1#include <asm/cpu_device_id.h>
2#include <asm/processor.h>
3#include <linux/cpu.h>
4#include <linux/module.h>
5#include <linux/slab.h>
6
7/**
8 * x86_match_cpu - match current CPU again an array of x86_cpu_ids
9 * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
10 * {}.
11 *
12 * Return the entry if the current CPU matches the entries in the
13 * passed x86_cpu_id match table. Otherwise NULL. The match table
14 * contains vendor (X86_VENDOR_*), family, model and feature bits or
15 * respective wildcard entries.
16 *
17 * A typical table entry would be to match a specific CPU
18 * { X86_VENDOR_INTEL, 6, 0x12 }
19 * or to match a specific CPU feature
20 * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
21 *
22 * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
23 * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
24 *
25 * Arrays used to match for this should also be declared using
26 * MODULE_DEVICE_TABLE(x86_cpu, ...)
27 *
28 * This always matches against the boot cpu, assuming models and features are
29 * consistent over all CPUs.
30 */
31const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
32{
33 const struct x86_cpu_id *m;
34 struct cpuinfo_x86 *c = &boot_cpu_data;
35
36 for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
37 if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
38 continue;
39 if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
40 continue;
41 if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
42 continue;
43 if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
44 continue;
45 return m;
46 }
47 return NULL;
48}
49EXPORT_SYMBOL(x86_match_cpu);
50
51ssize_t arch_print_cpu_modalias(struct device *dev,
52 struct device_attribute *attr,
53 char *bufptr)
54{
55 int size = PAGE_SIZE;
56 int i, n;
57 char *buf = bufptr;
58
59 n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:"
60 "model:%04X:feature:",
61 boot_cpu_data.x86_vendor,
62 boot_cpu_data.x86,
63 boot_cpu_data.x86_model);
64 size -= n;
65 buf += n;
66 size -= 1;
67 for (i = 0; i < NCAPINTS*32; i++) {
68 if (boot_cpu_has(i)) {
69 n = snprintf(buf, size, ",%04X", i);
70 if (n >= size) {
71 WARN(1, "x86 features overflow page\n");
72 break;
73 }
74 size -= n;
75 buf += n;
76 }
77 }
78 *buf++ = '\n';
79 return buf - bufptr;
80}
81
82int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
83{
84 char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
85 if (buf) {
86 arch_print_cpu_modalias(NULL, NULL, buf);
87 add_uevent_var(env, "MODALIAS=%s", buf);
88 kfree(buf);
89 }
90 return 0;
91}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 7395d5f4272d..0c82091b1652 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -54,7 +54,14 @@ static struct severity {
54#define MASK(x, y) .mask = x, .result = y 54#define MASK(x, y) .mask = x, .result = y
55#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) 55#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
56#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) 56#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
57#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
57#define MCACOD 0xffff 58#define MCACOD 0xffff
59/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
60#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
61#define MCACOD_SCRUBMSK 0xfff0
62#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
63#define MCACOD_DATA 0x0134 /* Data Load */
64#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
58 65
59 MCESEV( 66 MCESEV(
60 NO, "Invalid", 67 NO, "Invalid",
@@ -102,11 +109,24 @@ static struct severity {
102 SER, BITCLR(MCI_STATUS_S) 109 SER, BITCLR(MCI_STATUS_S)
103 ), 110 ),
104 111
105 /* AR add known MCACODs here */
106 MCESEV( 112 MCESEV(
107 PANIC, "Action required with lost events", 113 PANIC, "Action required with lost events",
108 SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) 114 SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
109 ), 115 ),
116
117 /* known AR MCACODs: */
118#ifdef CONFIG_MEMORY_FAILURE
119 MCESEV(
120 KEEP, "HT thread notices Action required: data load error",
121 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
122 MCGMASK(MCG_STATUS_EIPV, 0)
123 ),
124 MCESEV(
125 AR, "Action required: data load error",
126 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
127 USER
128 ),
129#endif
110 MCESEV( 130 MCESEV(
111 PANIC, "Action required: unknown MCACOD", 131 PANIC, "Action required: unknown MCACOD",
112 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) 132 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
@@ -115,11 +135,11 @@ static struct severity {
115 /* known AO MCACODs: */ 135 /* known AO MCACODs: */
116 MCESEV( 136 MCESEV(
117 AO, "Action optional: memory scrubbing error", 137 AO, "Action optional: memory scrubbing error",
118 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0) 138 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
119 ), 139 ),
120 MCESEV( 140 MCESEV(
121 AO, "Action optional: last level cache writeback error", 141 AO, "Action optional: last level cache writeback error",
122 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a) 142 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
123 ), 143 ),
124 MCESEV( 144 MCESEV(
125 SOME, "Action optional: unknown MCACOD", 145 SOME, "Action optional: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5a11ae2e9e91..d086a09c087d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -191,7 +191,7 @@ static void drain_mcelog_buffer(void)
191{ 191{
192 unsigned int next, i, prev = 0; 192 unsigned int next, i, prev = 0;
193 193
194 next = rcu_dereference_check_mce(mcelog.next); 194 next = ACCESS_ONCE(mcelog.next);
195 195
196 do { 196 do {
197 struct mce *m; 197 struct mce *m;
@@ -540,6 +540,27 @@ static void mce_report_event(struct pt_regs *regs)
540 irq_work_queue(&__get_cpu_var(mce_irq_work)); 540 irq_work_queue(&__get_cpu_var(mce_irq_work));
541} 541}
542 542
543/*
544 * Read ADDR and MISC registers.
545 */
546static void mce_read_aux(struct mce *m, int i)
547{
548 if (m->status & MCI_STATUS_MISCV)
549 m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
550 if (m->status & MCI_STATUS_ADDRV) {
551 m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
552
553 /*
554 * Mask the reported address by the reported granularity.
555 */
556 if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
557 u8 shift = MCI_MISC_ADDR_LSB(m->misc);
558 m->addr >>= shift;
559 m->addr <<= shift;
560 }
561 }
562}
563
543DEFINE_PER_CPU(unsigned, mce_poll_count); 564DEFINE_PER_CPU(unsigned, mce_poll_count);
544 565
545/* 566/*
@@ -590,10 +611,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
590 (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) 611 (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
591 continue; 612 continue;
592 613
593 if (m.status & MCI_STATUS_MISCV) 614 mce_read_aux(&m, i);
594 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
595 if (m.status & MCI_STATUS_ADDRV)
596 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
597 615
598 if (!(flags & MCP_TIMESTAMP)) 616 if (!(flags & MCP_TIMESTAMP))
599 m.tsc = 0; 617 m.tsc = 0;
@@ -917,6 +935,49 @@ static void mce_clear_state(unsigned long *toclear)
917} 935}
918 936
919/* 937/*
938 * Need to save faulting physical address associated with a process
939 * in the machine check handler some place where we can grab it back
940 * later in mce_notify_process()
941 */
942#define MCE_INFO_MAX 16
943
944struct mce_info {
945 atomic_t inuse;
946 struct task_struct *t;
947 __u64 paddr;
948} mce_info[MCE_INFO_MAX];
949
950static void mce_save_info(__u64 addr)
951{
952 struct mce_info *mi;
953
954 for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
955 if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
956 mi->t = current;
957 mi->paddr = addr;
958 return;
959 }
960 }
961
962 mce_panic("Too many concurrent recoverable errors", NULL, NULL);
963}
964
965static struct mce_info *mce_find_info(void)
966{
967 struct mce_info *mi;
968
969 for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
970 if (atomic_read(&mi->inuse) && mi->t == current)
971 return mi;
972 return NULL;
973}
974
975static void mce_clear_info(struct mce_info *mi)
976{
977 atomic_set(&mi->inuse, 0);
978}
979
980/*
920 * The actual machine check handler. This only handles real 981 * The actual machine check handler. This only handles real
921 * exceptions when something got corrupted coming in through int 18. 982 * exceptions when something got corrupted coming in through int 18.
922 * 983 *
@@ -969,7 +1030,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
969 barrier(); 1030 barrier();
970 1031
971 /* 1032 /*
972 * When no restart IP must always kill or panic. 1033 * When no restart IP might need to kill or panic.
1034 * Assume the worst for now, but if we find the
1035 * severity is MCE_AR_SEVERITY we have other options.
973 */ 1036 */
974 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 1037 if (!(m.mcgstatus & MCG_STATUS_RIPV))
975 kill_it = 1; 1038 kill_it = 1;
@@ -1023,16 +1086,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1023 continue; 1086 continue;
1024 } 1087 }
1025 1088
1026 /* 1089 mce_read_aux(&m, i);
1027 * Kill on action required.
1028 */
1029 if (severity == MCE_AR_SEVERITY)
1030 kill_it = 1;
1031
1032 if (m.status & MCI_STATUS_MISCV)
1033 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
1034 if (m.status & MCI_STATUS_ADDRV)
1035 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
1036 1090
1037 /* 1091 /*
1038 * Action optional error. Queue address for later processing. 1092 * Action optional error. Queue address for later processing.
@@ -1052,6 +1106,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1052 } 1106 }
1053 } 1107 }
1054 1108
1109 /* mce_clear_state will clear *final, save locally for use later */
1110 m = *final;
1111
1055 if (!no_way_out) 1112 if (!no_way_out)
1056 mce_clear_state(toclear); 1113 mce_clear_state(toclear);
1057 1114
@@ -1063,27 +1120,22 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1063 no_way_out = worst >= MCE_PANIC_SEVERITY; 1120 no_way_out = worst >= MCE_PANIC_SEVERITY;
1064 1121
1065 /* 1122 /*
1066 * If we have decided that we just CAN'T continue, and the user 1123 * At insane "tolerant" levels we take no action. Otherwise
1067 * has not set tolerant to an insane level, give up and die. 1124 * we only die if we have no other choice. For less serious
1068 * 1125 * issues we try to recover, or limit damage to the current
1069 * This is mainly used in the case when the system doesn't 1126 * process.
1070 * support MCE broadcasting or it has been disabled.
1071 */
1072 if (no_way_out && tolerant < 3)
1073 mce_panic("Fatal machine check on current CPU", final, msg);
1074
1075 /*
1076 * If the error seems to be unrecoverable, something should be
1077 * done. Try to kill as little as possible. If we can kill just
1078 * one task, do that. If the user has set the tolerance very
1079 * high, don't try to do anything at all.
1080 */ 1127 */
1081 1128 if (tolerant < 3) {
1082 if (kill_it && tolerant < 3) 1129 if (no_way_out)
1083 force_sig(SIGBUS, current); 1130 mce_panic("Fatal machine check on current CPU", &m, msg);
1084 1131 if (worst == MCE_AR_SEVERITY) {
1085 /* notify userspace ASAP */ 1132 /* schedule action before return to userland */
1086 set_thread_flag(TIF_MCE_NOTIFY); 1133 mce_save_info(m.addr);
1134 set_thread_flag(TIF_MCE_NOTIFY);
1135 } else if (kill_it) {
1136 force_sig(SIGBUS, current);
1137 }
1138 }
1087 1139
1088 if (worst > 0) 1140 if (worst > 0)
1089 mce_report_event(regs); 1141 mce_report_event(regs);
@@ -1094,34 +1146,57 @@ out:
1094} 1146}
1095EXPORT_SYMBOL_GPL(do_machine_check); 1147EXPORT_SYMBOL_GPL(do_machine_check);
1096 1148
1097/* dummy to break dependency. actual code is in mm/memory-failure.c */ 1149#ifndef CONFIG_MEMORY_FAILURE
1098void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) 1150int memory_failure(unsigned long pfn, int vector, int flags)
1099{ 1151{
1100 printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); 1152 /* mce_severity() should not hand us an ACTION_REQUIRED error */
1153 BUG_ON(flags & MF_ACTION_REQUIRED);
1154 printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
1155 "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);
1156
1157 return 0;
1101} 1158}
1159#endif
1102 1160
1103/* 1161/*
1104 * Called after mce notification in process context. This code 1162 * Called in process context that interrupted by MCE and marked with
1105 * is allowed to sleep. Call the high level VM handler to process 1163 * TIF_MCE_NOTIFY, just before returning to erroneous userland.
1106 * any corrupted pages. 1164 * This code is allowed to sleep.
1107 * Assume that the work queue code only calls this one at a time 1165 * Attempt possible recovery such as calling the high level VM handler to
1108 * per CPU. 1166 * process any corrupted pages, and kill/signal current process if required.
1109 * Note we don't disable preemption, so this code might run on the wrong 1167 * Action required errors are handled here.
1110 * CPU. In this case the event is picked up by the scheduled work queue.
1111 * This is merely a fast path to expedite processing in some common
1112 * cases.
1113 */ 1168 */
1114void mce_notify_process(void) 1169void mce_notify_process(void)
1115{ 1170{
1116 unsigned long pfn; 1171 unsigned long pfn;
1117 mce_notify_irq(); 1172 struct mce_info *mi = mce_find_info();
1118 while (mce_ring_get(&pfn)) 1173
1119 memory_failure(pfn, MCE_VECTOR); 1174 if (!mi)
1175 mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
1176 pfn = mi->paddr >> PAGE_SHIFT;
1177
1178 clear_thread_flag(TIF_MCE_NOTIFY);
1179
1180 pr_err("Uncorrected hardware memory error in user-access at %llx",
1181 mi->paddr);
1182 if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
1183 pr_err("Memory error not recovered");
1184 force_sig(SIGBUS, current);
1185 }
1186 mce_clear_info(mi);
1120} 1187}
1121 1188
1189/*
1190 * Action optional processing happens here (picking up
1191 * from the list of faulting pages that do_machine_check()
1192 * placed into the "ring").
1193 */
1122static void mce_process_work(struct work_struct *dummy) 1194static void mce_process_work(struct work_struct *dummy)
1123{ 1195{
1124 mce_notify_process(); 1196 unsigned long pfn;
1197
1198 while (mce_ring_get(&pfn))
1199 memory_failure(pfn, MCE_VECTOR, 0);
1125} 1200}
1126 1201
1127#ifdef CONFIG_X86_MCE_INTEL 1202#ifdef CONFIG_X86_MCE_INTEL
@@ -1211,8 +1286,6 @@ int mce_notify_irq(void)
1211 /* Not more than two messages every minute */ 1286 /* Not more than two messages every minute */
1212 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); 1287 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
1213 1288
1214 clear_thread_flag(TIF_MCE_NOTIFY);
1215
1216 if (test_and_clear_bit(0, &mce_need_notify)) { 1289 if (test_and_clear_bit(0, &mce_need_notify)) {
1217 /* wake processes polling /dev/mcelog */ 1290 /* wake processes polling /dev/mcelog */
1218 wake_up_interruptible(&mce_chrdev_wait); 1291 wake_up_interruptible(&mce_chrdev_wait);
@@ -1541,6 +1614,12 @@ static int __mce_read_apei(char __user **ubuf, size_t usize)
1541 /* Error or no more MCE record */ 1614 /* Error or no more MCE record */
1542 if (rc <= 0) { 1615 if (rc <= 0) {
1543 mce_apei_read_done = 1; 1616 mce_apei_read_done = 1;
1617 /*
1618 * When ERST is disabled, mce_chrdev_read() should return
1619 * "no record" instead of "no device."
1620 */
1621 if (rc == -ENODEV)
1622 return 0;
1544 return rc; 1623 return rc;
1545 } 1624 }
1546 rc = -EFAULT; 1625 rc = -EFAULT;
@@ -1859,7 +1938,7 @@ static struct bus_type mce_subsys = {
1859 .dev_name = "machinecheck", 1938 .dev_name = "machinecheck",
1860}; 1939};
1861 1940
1862struct device *mce_device[CONFIG_NR_CPUS]; 1941DEFINE_PER_CPU(struct device *, mce_device);
1863 1942
1864__cpuinitdata 1943__cpuinitdata
1865void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1944void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
@@ -2038,7 +2117,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
2038 goto error2; 2117 goto error2;
2039 } 2118 }
2040 cpumask_set_cpu(cpu, mce_device_initialized); 2119 cpumask_set_cpu(cpu, mce_device_initialized);
2041 mce_device[cpu] = dev; 2120 per_cpu(mce_device, cpu) = dev;
2042 2121
2043 return 0; 2122 return 0;
2044error2: 2123error2:
@@ -2055,7 +2134,7 @@ error:
2055 2134
2056static __cpuinit void mce_device_remove(unsigned int cpu) 2135static __cpuinit void mce_device_remove(unsigned int cpu)
2057{ 2136{
2058 struct device *dev = mce_device[cpu]; 2137 struct device *dev = per_cpu(mce_device, cpu);
2059 int i; 2138 int i;
2060 2139
2061 if (!cpumask_test_cpu(cpu, mce_device_initialized)) 2140 if (!cpumask_test_cpu(cpu, mce_device_initialized))
@@ -2069,7 +2148,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
2069 2148
2070 device_unregister(dev); 2149 device_unregister(dev);
2071 cpumask_clear_cpu(cpu, mce_device_initialized); 2150 cpumask_clear_cpu(cpu, mce_device_initialized);
2072 mce_device[cpu] = NULL; 2151 per_cpu(mce_device, cpu) = NULL;
2073} 2152}
2074 2153
2075/* Make sure there are no machine checks on offlined CPUs. */ 2154/* Make sure there are no machine checks on offlined CPUs. */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index e4eeaaf58a47..99b57179f912 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -523,7 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
523{ 523{
524 int i, err = 0; 524 int i, err = 0;
525 struct threshold_bank *b = NULL; 525 struct threshold_bank *b = NULL;
526 struct device *dev = mce_device[cpu]; 526 struct device *dev = per_cpu(mce_device, cpu);
527 char name[32]; 527 char name[32];
528 528
529 sprintf(name, "threshold_bank%i", bank); 529 sprintf(name, "threshold_bank%i", bank);
@@ -587,7 +587,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
587 if (i == cpu) 587 if (i == cpu)
588 continue; 588 continue;
589 589
590 dev = mce_device[i]; 590 dev = per_cpu(mce_device, i);
591 if (dev) 591 if (dev)
592 err = sysfs_create_link(&dev->kobj,b->kobj, name); 592 err = sysfs_create_link(&dev->kobj,b->kobj, name);
593 if (err) 593 if (err)
@@ -667,7 +667,8 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
667#ifdef CONFIG_SMP 667#ifdef CONFIG_SMP
668 /* sibling symlink */ 668 /* sibling symlink */
669 if (shared_bank[bank] && b->blocks->cpu != cpu) { 669 if (shared_bank[bank] && b->blocks->cpu != cpu) {
670 sysfs_remove_link(&mce_device[cpu]->kobj, name); 670 dev = per_cpu(mce_device, cpu);
671 sysfs_remove_link(&dev->kobj, name);
671 per_cpu(threshold_banks, cpu)[bank] = NULL; 672 per_cpu(threshold_banks, cpu)[bank] = NULL;
672 673
673 return; 674 return;
@@ -679,7 +680,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
679 if (i == cpu) 680 if (i == cpu)
680 continue; 681 continue;
681 682
682 dev = mce_device[i]; 683 dev = per_cpu(mce_device, i);
683 if (dev) 684 if (dev)
684 sysfs_remove_link(&dev->kobj, name); 685 sysfs_remove_link(&dev->kobj, name);
685 per_cpu(threshold_banks, i)[bank] = NULL; 686 per_cpu(threshold_banks, i)[bank] = NULL;
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 5c0e6533d9bc..2d5454cd2c4f 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -9,7 +9,6 @@
9#include <linux/smp.h> 9#include <linux/smp.h>
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/mce.h> 12#include <asm/mce.h>
14#include <asm/msr.h> 13#include <asm/msr.h>
15 14
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 67bb17a37a0a..47a1870279aa 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -25,7 +25,6 @@
25#include <linux/cpu.h> 25#include <linux/cpu.h>
26 26
27#include <asm/processor.h> 27#include <asm/processor.h>
28#include <asm/system.h>
29#include <asm/apic.h> 28#include <asm/apic.h>
30#include <asm/idle.h> 29#include <asm/idle.h>
31#include <asm/mce.h> 30#include <asm/mce.h>
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 54060f565974..2d7998fb628c 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -8,7 +8,6 @@
8#include <linux/init.h> 8#include <linux/init.h>
9 9
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/system.h>
12#include <asm/mce.h> 11#include <asm/mce.h>
13#include <asm/msr.h> 12#include <asm/msr.h>
14 13
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 97b26356e9ee..75772ae6c65f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -12,7 +12,6 @@
12#include <asm/processor-flags.h> 12#include <asm/processor-flags.h>
13#include <asm/cpufeature.h> 13#include <asm/cpufeature.h>
14#include <asm/tlbflush.h> 14#include <asm/tlbflush.h>
15#include <asm/system.h>
16#include <asm/mtrr.h> 15#include <asm/mtrr.h>
17#include <asm/msr.h> 16#include <asm/msr.h>
18#include <asm/pat.h> 17#include <asm/pat.h>
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1c52bdbb9b8b..bb8e03407e18 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -352,6 +352,36 @@ int x86_setup_perfctr(struct perf_event *event)
352 return 0; 352 return 0;
353} 353}
354 354
355/*
356 * check that branch_sample_type is compatible with
357 * settings needed for precise_ip > 1 which implies
358 * using the LBR to capture ALL taken branches at the
359 * priv levels of the measurement
360 */
361static inline int precise_br_compat(struct perf_event *event)
362{
363 u64 m = event->attr.branch_sample_type;
364 u64 b = 0;
365
366 /* must capture all branches */
367 if (!(m & PERF_SAMPLE_BRANCH_ANY))
368 return 0;
369
370 m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
371
372 if (!event->attr.exclude_user)
373 b |= PERF_SAMPLE_BRANCH_USER;
374
375 if (!event->attr.exclude_kernel)
376 b |= PERF_SAMPLE_BRANCH_KERNEL;
377
378 /*
379 * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
380 */
381
382 return m == b;
383}
384
355int x86_pmu_hw_config(struct perf_event *event) 385int x86_pmu_hw_config(struct perf_event *event)
356{ 386{
357 if (event->attr.precise_ip) { 387 if (event->attr.precise_ip) {
@@ -368,6 +398,36 @@ int x86_pmu_hw_config(struct perf_event *event)
368 398
369 if (event->attr.precise_ip > precise) 399 if (event->attr.precise_ip > precise)
370 return -EOPNOTSUPP; 400 return -EOPNOTSUPP;
401 /*
402 * check that PEBS LBR correction does not conflict with
403 * whatever the user is asking with attr->branch_sample_type
404 */
405 if (event->attr.precise_ip > 1) {
406 u64 *br_type = &event->attr.branch_sample_type;
407
408 if (has_branch_stack(event)) {
409 if (!precise_br_compat(event))
410 return -EOPNOTSUPP;
411
412 /* branch_sample_type is compatible */
413
414 } else {
415 /*
416 * user did not specify branch_sample_type
417 *
418 * For PEBS fixups, we capture all
419 * the branches at the priv level of the
420 * event.
421 */
422 *br_type = PERF_SAMPLE_BRANCH_ANY;
423
424 if (!event->attr.exclude_user)
425 *br_type |= PERF_SAMPLE_BRANCH_USER;
426
427 if (!event->attr.exclude_kernel)
428 *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
429 }
430 }
371 } 431 }
372 432
373 /* 433 /*
@@ -425,6 +485,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
425 /* mark unused */ 485 /* mark unused */
426 event->hw.extra_reg.idx = EXTRA_REG_NONE; 486 event->hw.extra_reg.idx = EXTRA_REG_NONE;
427 487
488 /* mark not used */
489 event->hw.extra_reg.idx = EXTRA_REG_NONE;
490 event->hw.branch_reg.idx = EXTRA_REG_NONE;
491
428 return x86_pmu.hw_config(event); 492 return x86_pmu.hw_config(event);
429} 493}
430 494
@@ -578,14 +642,14 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
578 /* Prefer fixed purpose counters */ 642 /* Prefer fixed purpose counters */
579 if (x86_pmu.num_counters_fixed) { 643 if (x86_pmu.num_counters_fixed) {
580 idx = X86_PMC_IDX_FIXED; 644 idx = X86_PMC_IDX_FIXED;
581 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { 645 for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
582 if (!__test_and_set_bit(idx, sched->state.used)) 646 if (!__test_and_set_bit(idx, sched->state.used))
583 goto done; 647 goto done;
584 } 648 }
585 } 649 }
586 /* Grab the first unused counter starting with idx */ 650 /* Grab the first unused counter starting with idx */
587 idx = sched->state.counter; 651 idx = sched->state.counter;
588 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { 652 for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
589 if (!__test_and_set_bit(idx, sched->state.used)) 653 if (!__test_and_set_bit(idx, sched->state.used))
590 goto done; 654 goto done;
591 } 655 }
@@ -1249,6 +1313,11 @@ static void __init pmu_check_apic(void)
1249 pr_info("no hardware sampling interrupt available.\n"); 1313 pr_info("no hardware sampling interrupt available.\n");
1250} 1314}
1251 1315
1316static struct attribute_group x86_pmu_format_group = {
1317 .name = "format",
1318 .attrs = NULL,
1319};
1320
1252static int __init init_hw_perf_events(void) 1321static int __init init_hw_perf_events(void)
1253{ 1322{
1254 struct x86_pmu_quirk *quirk; 1323 struct x86_pmu_quirk *quirk;
@@ -1323,6 +1392,7 @@ static int __init init_hw_perf_events(void)
1323 } 1392 }
1324 1393
1325 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ 1394 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1395 x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1326 1396
1327 pr_info("... version: %d\n", x86_pmu.version); 1397 pr_info("... version: %d\n", x86_pmu.version);
1328 pr_info("... bit width: %d\n", x86_pmu.cntval_bits); 1398 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
@@ -1551,6 +1621,9 @@ static int x86_pmu_event_idx(struct perf_event *event)
1551{ 1621{
1552 int idx = event->hw.idx; 1622 int idx = event->hw.idx;
1553 1623
1624 if (!x86_pmu.attr_rdpmc)
1625 return 0;
1626
1554 if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { 1627 if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
1555 idx -= X86_PMC_IDX_FIXED; 1628 idx -= X86_PMC_IDX_FIXED;
1556 idx |= 1 << 30; 1629 idx |= 1 << 30;
@@ -1603,38 +1676,51 @@ static struct attribute_group x86_pmu_attr_group = {
1603 1676
1604static const struct attribute_group *x86_pmu_attr_groups[] = { 1677static const struct attribute_group *x86_pmu_attr_groups[] = {
1605 &x86_pmu_attr_group, 1678 &x86_pmu_attr_group,
1679 &x86_pmu_format_group,
1606 NULL, 1680 NULL,
1607}; 1681};
1608 1682
1683static void x86_pmu_flush_branch_stack(void)
1684{
1685 if (x86_pmu.flush_branch_stack)
1686 x86_pmu.flush_branch_stack();
1687}
1688
1609static struct pmu pmu = { 1689static struct pmu pmu = {
1610 .pmu_enable = x86_pmu_enable, 1690 .pmu_enable = x86_pmu_enable,
1611 .pmu_disable = x86_pmu_disable, 1691 .pmu_disable = x86_pmu_disable,
1612 1692
1613 .attr_groups = x86_pmu_attr_groups, 1693 .attr_groups = x86_pmu_attr_groups,
1614 1694
1615 .event_init = x86_pmu_event_init, 1695 .event_init = x86_pmu_event_init,
1616 1696
1617 .add = x86_pmu_add, 1697 .add = x86_pmu_add,
1618 .del = x86_pmu_del, 1698 .del = x86_pmu_del,
1619 .start = x86_pmu_start, 1699 .start = x86_pmu_start,
1620 .stop = x86_pmu_stop, 1700 .stop = x86_pmu_stop,
1621 .read = x86_pmu_read, 1701 .read = x86_pmu_read,
1622 1702
1623 .start_txn = x86_pmu_start_txn, 1703 .start_txn = x86_pmu_start_txn,
1624 .cancel_txn = x86_pmu_cancel_txn, 1704 .cancel_txn = x86_pmu_cancel_txn,
1625 .commit_txn = x86_pmu_commit_txn, 1705 .commit_txn = x86_pmu_commit_txn,
1626 1706
1627 .event_idx = x86_pmu_event_idx, 1707 .event_idx = x86_pmu_event_idx,
1708 .flush_branch_stack = x86_pmu_flush_branch_stack,
1628}; 1709};
1629 1710
1630void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) 1711void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
1631{ 1712{
1713 userpg->cap_usr_time = 0;
1714 userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
1715 userpg->pmc_width = x86_pmu.cntval_bits;
1716
1632 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 1717 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1633 return; 1718 return;
1634 1719
1635 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1720 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1636 return; 1721 return;
1637 1722
1723 userpg->cap_usr_time = 1;
1638 userpg->time_mult = this_cpu_read(cyc2ns); 1724 userpg->time_mult = this_cpu_read(cyc2ns);
1639 userpg->time_shift = CYC2NS_SCALE_FACTOR; 1725 userpg->time_shift = CYC2NS_SCALE_FACTOR;
1640 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; 1726 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 82db83b5c3bc..6638aaf54493 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -33,6 +33,7 @@ enum extra_reg_type {
33 33
34 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ 34 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
35 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ 35 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
36 EXTRA_REG_LBR = 2, /* lbr_select */
36 37
37 EXTRA_REG_MAX /* number of entries needed */ 38 EXTRA_REG_MAX /* number of entries needed */
38}; 39};
@@ -130,6 +131,8 @@ struct cpu_hw_events {
130 void *lbr_context; 131 void *lbr_context;
131 struct perf_branch_stack lbr_stack; 132 struct perf_branch_stack lbr_stack;
132 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 133 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
134 struct er_account *lbr_sel;
135 u64 br_sel;
133 136
134 /* 137 /*
135 * Intel host/guest exclude bits 138 * Intel host/guest exclude bits
@@ -268,6 +271,29 @@ struct x86_pmu_quirk {
268 void (*func)(void); 271 void (*func)(void);
269}; 272};
270 273
274union x86_pmu_config {
275 struct {
276 u64 event:8,
277 umask:8,
278 usr:1,
279 os:1,
280 edge:1,
281 pc:1,
282 interrupt:1,
283 __reserved1:1,
284 en:1,
285 inv:1,
286 cmask:8,
287 event2:4,
288 __reserved2:4,
289 go:1,
290 ho:1;
291 } bits;
292 u64 value;
293};
294
295#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
296
271/* 297/*
272 * struct x86_pmu - generic x86 pmu 298 * struct x86_pmu - generic x86 pmu
273 */ 299 */
@@ -313,6 +339,7 @@ struct x86_pmu {
313 * sysfs attrs 339 * sysfs attrs
314 */ 340 */
315 int attr_rdpmc; 341 int attr_rdpmc;
342 struct attribute **format_attrs;
316 343
317 /* 344 /*
318 * CPU Hotplug hooks 345 * CPU Hotplug hooks
@@ -321,6 +348,7 @@ struct x86_pmu {
321 void (*cpu_starting)(int cpu); 348 void (*cpu_starting)(int cpu);
322 void (*cpu_dying)(int cpu); 349 void (*cpu_dying)(int cpu);
323 void (*cpu_dead)(int cpu); 350 void (*cpu_dead)(int cpu);
351 void (*flush_branch_stack)(void);
324 352
325 /* 353 /*
326 * Intel Arch Perfmon v2+ 354 * Intel Arch Perfmon v2+
@@ -342,6 +370,8 @@ struct x86_pmu {
342 */ 370 */
343 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ 371 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
344 int lbr_nr; /* hardware stack size */ 372 int lbr_nr; /* hardware stack size */
373 u64 lbr_sel_mask; /* LBR_SELECT valid bits */
374 const int *lbr_sel_map; /* lbr_select mappings */
345 375
346 /* 376 /*
347 * Extra registers for events 377 * Extra registers for events
@@ -455,6 +485,15 @@ extern struct event_constraint emptyconstraint;
455 485
456extern struct event_constraint unconstrained; 486extern struct event_constraint unconstrained;
457 487
488static inline bool kernel_ip(unsigned long ip)
489{
490#ifdef CONFIG_X86_32
491 return ip > PAGE_OFFSET;
492#else
493 return (long)ip < 0;
494#endif
495}
496
458#ifdef CONFIG_CPU_SUP_AMD 497#ifdef CONFIG_CPU_SUP_AMD
459 498
460int amd_pmu_init(void); 499int amd_pmu_init(void);
@@ -535,6 +574,10 @@ void intel_pmu_lbr_init_nhm(void);
535 574
536void intel_pmu_lbr_init_atom(void); 575void intel_pmu_lbr_init_atom(void);
537 576
577void intel_pmu_lbr_init_snb(void);
578
579int intel_pmu_setup_lbr_filter(struct perf_event *event);
580
538int p4_pmu_init(void); 581int p4_pmu_init(void);
539 582
540int p6_pmu_init(void); 583int p6_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67250a52430b..95e7fe1c5f0b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
139 if (ret) 139 if (ret)
140 return ret; 140 return ret;
141 141
142 if (has_branch_stack(event))
143 return -EOPNOTSUPP;
144
142 if (event->attr.exclude_host && event->attr.exclude_guest) 145 if (event->attr.exclude_host && event->attr.exclude_guest)
143 /* 146 /*
144 * When HO == GO == 1 the hardware treats that as GO == HO == 0 147 * When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -401,6 +404,21 @@ static void amd_pmu_cpu_dead(int cpu)
401 } 404 }
402} 405}
403 406
407PMU_FORMAT_ATTR(event, "config:0-7,32-35");
408PMU_FORMAT_ATTR(umask, "config:8-15" );
409PMU_FORMAT_ATTR(edge, "config:18" );
410PMU_FORMAT_ATTR(inv, "config:23" );
411PMU_FORMAT_ATTR(cmask, "config:24-31" );
412
413static struct attribute *amd_format_attr[] = {
414 &format_attr_event.attr,
415 &format_attr_umask.attr,
416 &format_attr_edge.attr,
417 &format_attr_inv.attr,
418 &format_attr_cmask.attr,
419 NULL,
420};
421
404static __initconst const struct x86_pmu amd_pmu = { 422static __initconst const struct x86_pmu amd_pmu = {
405 .name = "AMD", 423 .name = "AMD",
406 .handle_irq = x86_pmu_handle_irq, 424 .handle_irq = x86_pmu_handle_irq,
@@ -423,6 +441,8 @@ static __initconst const struct x86_pmu amd_pmu = {
423 .get_event_constraints = amd_get_event_constraints, 441 .get_event_constraints = amd_get_event_constraints,
424 .put_event_constraints = amd_put_event_constraints, 442 .put_event_constraints = amd_put_event_constraints,
425 443
444 .format_attrs = amd_format_attr,
445
426 .cpu_prepare = amd_pmu_cpu_prepare, 446 .cpu_prepare = amd_pmu_cpu_prepare,
427 .cpu_starting = amd_pmu_cpu_starting, 447 .cpu_starting = amd_pmu_cpu_starting,
428 .cpu_dead = amd_pmu_cpu_dead, 448 .cpu_dead = amd_pmu_cpu_dead,
@@ -593,6 +613,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
593 .cpu_dead = amd_pmu_cpu_dead, 613 .cpu_dead = amd_pmu_cpu_dead,
594#endif 614#endif
595 .cpu_starting = amd_pmu_cpu_starting, 615 .cpu_starting = amd_pmu_cpu_starting,
616 .format_attrs = amd_format_attr,
596}; 617};
597 618
598__init int amd_pmu_init(void) 619__init int amd_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 3bd37bdf1b8e..26b3e2fef104 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -385,14 +385,15 @@ static __initconst const u64 westmere_hw_cache_event_ids
385#define NHM_LOCAL_DRAM (1 << 14) 385#define NHM_LOCAL_DRAM (1 << 14)
386#define NHM_NON_DRAM (1 << 15) 386#define NHM_NON_DRAM (1 << 15)
387 387
388#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM) 388#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
389#define NHM_REMOTE (NHM_REMOTE_DRAM)
389 390
390#define NHM_DMND_READ (NHM_DMND_DATA_RD) 391#define NHM_DMND_READ (NHM_DMND_DATA_RD)
391#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) 392#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
392#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) 393#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
393 394
394#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) 395#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
395#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD) 396#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
396#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) 397#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
397 398
398static __initconst const u64 nehalem_hw_cache_extra_regs 399static __initconst const u64 nehalem_hw_cache_extra_regs
@@ -416,16 +417,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
416 }, 417 },
417 [ C(NODE) ] = { 418 [ C(NODE) ] = {
418 [ C(OP_READ) ] = { 419 [ C(OP_READ) ] = {
419 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM, 420 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
420 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM, 421 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE,
421 }, 422 },
422 [ C(OP_WRITE) ] = { 423 [ C(OP_WRITE) ] = {
423 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM, 424 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
424 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM, 425 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE,
425 }, 426 },
426 [ C(OP_PREFETCH) ] = { 427 [ C(OP_PREFETCH) ] = {
427 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM, 428 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
428 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM, 429 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE,
429 }, 430 },
430 }, 431 },
431}; 432};
@@ -727,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
727 }, 728 },
728}; 729};
729 730
731static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
732{
733 /* user explicitly requested branch sampling */
734 if (has_branch_stack(event))
735 return true;
736
737 /* implicit branch sampling to correct PEBS skid */
738 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
739 return true;
740
741 return false;
742}
743
730static void intel_pmu_disable_all(void) 744static void intel_pmu_disable_all(void)
731{ 745{
732 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 746 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -881,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
881 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); 895 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
882 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); 896 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
883 897
898 /*
899 * must disable before any actual event
900 * because any event may be combined with LBR
901 */
902 if (intel_pmu_needs_lbr_smpl(event))
903 intel_pmu_lbr_disable(event);
904
884 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 905 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
885 intel_pmu_disable_fixed(hwc); 906 intel_pmu_disable_fixed(hwc);
886 return; 907 return;
@@ -935,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
935 intel_pmu_enable_bts(hwc->config); 956 intel_pmu_enable_bts(hwc->config);
936 return; 957 return;
937 } 958 }
959 /*
960 * must enabled before any actual event
961 * because any event may be combined with LBR
962 */
963 if (intel_pmu_needs_lbr_smpl(event))
964 intel_pmu_lbr_enable(event);
938 965
939 if (event->attr.exclude_host) 966 if (event->attr.exclude_host)
940 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); 967 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1057,6 +1084,9 @@ again:
1057 1084
1058 data.period = event->hw.last_period; 1085 data.period = event->hw.last_period;
1059 1086
1087 if (has_branch_stack(event))
1088 data.br_stack = &cpuc->lbr_stack;
1089
1060 if (perf_event_overflow(event, &data, regs)) 1090 if (perf_event_overflow(event, &data, regs))
1061 x86_pmu_stop(event, 0); 1091 x86_pmu_stop(event, 0);
1062 } 1092 }
@@ -1123,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
1123 */ 1153 */
1124static struct event_constraint * 1154static struct event_constraint *
1125__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, 1155__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1126 struct perf_event *event) 1156 struct perf_event *event,
1157 struct hw_perf_event_extra *reg)
1127{ 1158{
1128 struct event_constraint *c = &emptyconstraint; 1159 struct event_constraint *c = &emptyconstraint;
1129 struct hw_perf_event_extra *reg = &event->hw.extra_reg;
1130 struct er_account *era; 1160 struct er_account *era;
1131 unsigned long flags; 1161 unsigned long flags;
1132 int orig_idx = reg->idx; 1162 int orig_idx = reg->idx;
1133 1163
1134 /* already allocated shared msr */ 1164 /* already allocated shared msr */
1135 if (reg->alloc) 1165 if (reg->alloc)
1136 return &unconstrained; 1166 return NULL; /* call x86_get_event_constraint() */
1137 1167
1138again: 1168again:
1139 era = &cpuc->shared_regs->regs[reg->idx]; 1169 era = &cpuc->shared_regs->regs[reg->idx];
@@ -1156,14 +1186,10 @@ again:
1156 reg->alloc = 1; 1186 reg->alloc = 1;
1157 1187
1158 /* 1188 /*
1159 * All events using extra_reg are unconstrained. 1189 * need to call x86_get_event_constraint()
1160 * Avoids calling x86_get_event_constraints() 1190 * to check if associated event has constraints
1161 *
1162 * Must revisit if extra_reg controlling events
1163 * ever have constraints. Worst case we go through
1164 * the regular event constraint table.
1165 */ 1191 */
1166 c = &unconstrained; 1192 c = NULL;
1167 } else if (intel_try_alt_er(event, orig_idx)) { 1193 } else if (intel_try_alt_er(event, orig_idx)) {
1168 raw_spin_unlock_irqrestore(&era->lock, flags); 1194 raw_spin_unlock_irqrestore(&era->lock, flags);
1169 goto again; 1195 goto again;
@@ -1200,11 +1226,23 @@ static struct event_constraint *
1200intel_shared_regs_constraints(struct cpu_hw_events *cpuc, 1226intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1201 struct perf_event *event) 1227 struct perf_event *event)
1202{ 1228{
1203 struct event_constraint *c = NULL; 1229 struct event_constraint *c = NULL, *d;
1204 1230 struct hw_perf_event_extra *xreg, *breg;
1205 if (event->hw.extra_reg.idx != EXTRA_REG_NONE) 1231
1206 c = __intel_shared_reg_get_constraints(cpuc, event); 1232 xreg = &event->hw.extra_reg;
1207 1233 if (xreg->idx != EXTRA_REG_NONE) {
1234 c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
1235 if (c == &emptyconstraint)
1236 return c;
1237 }
1238 breg = &event->hw.branch_reg;
1239 if (breg->idx != EXTRA_REG_NONE) {
1240 d = __intel_shared_reg_get_constraints(cpuc, event, breg);
1241 if (d == &emptyconstraint) {
1242 __intel_shared_reg_put_constraints(cpuc, xreg);
1243 c = d;
1244 }
1245 }
1208 return c; 1246 return c;
1209} 1247}
1210 1248
@@ -1252,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1252 reg = &event->hw.extra_reg; 1290 reg = &event->hw.extra_reg;
1253 if (reg->idx != EXTRA_REG_NONE) 1291 if (reg->idx != EXTRA_REG_NONE)
1254 __intel_shared_reg_put_constraints(cpuc, reg); 1292 __intel_shared_reg_put_constraints(cpuc, reg);
1293
1294 reg = &event->hw.branch_reg;
1295 if (reg->idx != EXTRA_REG_NONE)
1296 __intel_shared_reg_put_constraints(cpuc, reg);
1255} 1297}
1256 1298
1257static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1299static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1287,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
1287 * 1329 *
1288 * Thereby we gain a PEBS capable cycle counter. 1330 * Thereby we gain a PEBS capable cycle counter.
1289 */ 1331 */
1290 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ 1332 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
1333
1291 1334
1292 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 1335 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1293 event->hw.config = alt_config; 1336 event->hw.config = alt_config;
1294 } 1337 }
1295 1338
1339 if (intel_pmu_needs_lbr_smpl(event)) {
1340 ret = intel_pmu_setup_lbr_filter(event);
1341 if (ret)
1342 return ret;
1343 }
1344
1296 if (event->attr.type != PERF_TYPE_RAW) 1345 if (event->attr.type != PERF_TYPE_RAW)
1297 return 0; 1346 return 0;
1298 1347
@@ -1382,6 +1431,24 @@ static void core_pmu_enable_all(int added)
1382 } 1431 }
1383} 1432}
1384 1433
1434PMU_FORMAT_ATTR(event, "config:0-7" );
1435PMU_FORMAT_ATTR(umask, "config:8-15" );
1436PMU_FORMAT_ATTR(edge, "config:18" );
1437PMU_FORMAT_ATTR(pc, "config:19" );
1438PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
1439PMU_FORMAT_ATTR(inv, "config:23" );
1440PMU_FORMAT_ATTR(cmask, "config:24-31" );
1441
1442static struct attribute *intel_arch_formats_attr[] = {
1443 &format_attr_event.attr,
1444 &format_attr_umask.attr,
1445 &format_attr_edge.attr,
1446 &format_attr_pc.attr,
1447 &format_attr_inv.attr,
1448 &format_attr_cmask.attr,
1449 NULL,
1450};
1451
1385static __initconst const struct x86_pmu core_pmu = { 1452static __initconst const struct x86_pmu core_pmu = {
1386 .name = "core", 1453 .name = "core",
1387 .handle_irq = x86_pmu_handle_irq, 1454 .handle_irq = x86_pmu_handle_irq,
@@ -1406,6 +1473,7 @@ static __initconst const struct x86_pmu core_pmu = {
1406 .put_event_constraints = intel_put_event_constraints, 1473 .put_event_constraints = intel_put_event_constraints,
1407 .event_constraints = intel_core_event_constraints, 1474 .event_constraints = intel_core_event_constraints,
1408 .guest_get_msrs = core_guest_get_msrs, 1475 .guest_get_msrs = core_guest_get_msrs,
1476 .format_attrs = intel_arch_formats_attr,
1409}; 1477};
1410 1478
1411struct intel_shared_regs *allocate_shared_regs(int cpu) 1479struct intel_shared_regs *allocate_shared_regs(int cpu)
@@ -1431,7 +1499,7 @@ static int intel_pmu_cpu_prepare(int cpu)
1431{ 1499{
1432 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1500 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1433 1501
1434 if (!x86_pmu.extra_regs) 1502 if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
1435 return NOTIFY_OK; 1503 return NOTIFY_OK;
1436 1504
1437 cpuc->shared_regs = allocate_shared_regs(cpu); 1505 cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1453,22 +1521,28 @@ static void intel_pmu_cpu_starting(int cpu)
1453 */ 1521 */
1454 intel_pmu_lbr_reset(); 1522 intel_pmu_lbr_reset();
1455 1523
1456 if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) 1524 cpuc->lbr_sel = NULL;
1525
1526 if (!cpuc->shared_regs)
1457 return; 1527 return;
1458 1528
1459 for_each_cpu(i, topology_thread_cpumask(cpu)) { 1529 if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
1460 struct intel_shared_regs *pc; 1530 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1531 struct intel_shared_regs *pc;
1461 1532
1462 pc = per_cpu(cpu_hw_events, i).shared_regs; 1533 pc = per_cpu(cpu_hw_events, i).shared_regs;
1463 if (pc && pc->core_id == core_id) { 1534 if (pc && pc->core_id == core_id) {
1464 cpuc->kfree_on_online = cpuc->shared_regs; 1535 cpuc->kfree_on_online = cpuc->shared_regs;
1465 cpuc->shared_regs = pc; 1536 cpuc->shared_regs = pc;
1466 break; 1537 break;
1538 }
1467 } 1539 }
1540 cpuc->shared_regs->core_id = core_id;
1541 cpuc->shared_regs->refcnt++;
1468 } 1542 }
1469 1543
1470 cpuc->shared_regs->core_id = core_id; 1544 if (x86_pmu.lbr_sel_map)
1471 cpuc->shared_regs->refcnt++; 1545 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
1472} 1546}
1473 1547
1474static void intel_pmu_cpu_dying(int cpu) 1548static void intel_pmu_cpu_dying(int cpu)
@@ -1486,6 +1560,33 @@ static void intel_pmu_cpu_dying(int cpu)
1486 fini_debug_store_on_cpu(cpu); 1560 fini_debug_store_on_cpu(cpu);
1487} 1561}
1488 1562
1563static void intel_pmu_flush_branch_stack(void)
1564{
1565 /*
1566 * Intel LBR does not tag entries with the
1567 * PID of the current task, then we need to
1568 * flush it on ctxsw
1569 * For now, we simply reset it
1570 */
1571 if (x86_pmu.lbr_nr)
1572 intel_pmu_lbr_reset();
1573}
1574
1575PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
1576
1577static struct attribute *intel_arch3_formats_attr[] = {
1578 &format_attr_event.attr,
1579 &format_attr_umask.attr,
1580 &format_attr_edge.attr,
1581 &format_attr_pc.attr,
1582 &format_attr_any.attr,
1583 &format_attr_inv.attr,
1584 &format_attr_cmask.attr,
1585
1586 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
1587 NULL,
1588};
1589
1489static __initconst const struct x86_pmu intel_pmu = { 1590static __initconst const struct x86_pmu intel_pmu = {
1490 .name = "Intel", 1591 .name = "Intel",
1491 .handle_irq = intel_pmu_handle_irq, 1592 .handle_irq = intel_pmu_handle_irq,
@@ -1509,10 +1610,13 @@ static __initconst const struct x86_pmu intel_pmu = {
1509 .get_event_constraints = intel_get_event_constraints, 1610 .get_event_constraints = intel_get_event_constraints,
1510 .put_event_constraints = intel_put_event_constraints, 1611 .put_event_constraints = intel_put_event_constraints,
1511 1612
1613 .format_attrs = intel_arch3_formats_attr,
1614
1512 .cpu_prepare = intel_pmu_cpu_prepare, 1615 .cpu_prepare = intel_pmu_cpu_prepare,
1513 .cpu_starting = intel_pmu_cpu_starting, 1616 .cpu_starting = intel_pmu_cpu_starting,
1514 .cpu_dying = intel_pmu_cpu_dying, 1617 .cpu_dying = intel_pmu_cpu_dying,
1515 .guest_get_msrs = intel_guest_get_msrs, 1618 .guest_get_msrs = intel_guest_get_msrs,
1619 .flush_branch_stack = intel_pmu_flush_branch_stack,
1516}; 1620};
1517 1621
1518static __init void intel_clovertown_quirk(void) 1622static __init void intel_clovertown_quirk(void)
@@ -1689,9 +1793,11 @@ __init int intel_pmu_init(void)
1689 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1793 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1690 1794
1691 /* UOPS_ISSUED.STALLED_CYCLES */ 1795 /* UOPS_ISSUED.STALLED_CYCLES */
1692 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1796 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
1797 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
1693 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1798 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1694 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1799 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
1800 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
1695 1801
1696 x86_add_quirk(intel_nehalem_quirk); 1802 x86_add_quirk(intel_nehalem_quirk);
1697 1803
@@ -1726,9 +1832,11 @@ __init int intel_pmu_init(void)
1726 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1832 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1727 1833
1728 /* UOPS_ISSUED.STALLED_CYCLES */ 1834 /* UOPS_ISSUED.STALLED_CYCLES */
1729 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1835 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
1836 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
1730 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1837 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1731 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1838 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
1839 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
1732 1840
1733 pr_cont("Westmere events, "); 1841 pr_cont("Westmere events, ");
1734 break; 1842 break;
@@ -1739,7 +1847,7 @@ __init int intel_pmu_init(void)
1739 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1847 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1740 sizeof(hw_cache_event_ids)); 1848 sizeof(hw_cache_event_ids));
1741 1849
1742 intel_pmu_lbr_init_nhm(); 1850 intel_pmu_lbr_init_snb();
1743 1851
1744 x86_pmu.event_constraints = intel_snb_event_constraints; 1852 x86_pmu.event_constraints = intel_snb_event_constraints;
1745 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1853 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
@@ -1749,9 +1857,11 @@ __init int intel_pmu_init(void)
1749 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 1857 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
1750 1858
1751 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 1859 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
1752 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1860 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
1861 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
1753 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ 1862 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
1754 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; 1863 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
1864 X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
1755 1865
1756 pr_cont("SandyBridge events, "); 1866 pr_cont("SandyBridge events, ");
1757 break; 1867 break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index d6bd49faa40c..7f64df19e7dd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -3,6 +3,7 @@
3#include <linux/slab.h> 3#include <linux/slab.h>
4 4
5#include <asm/perf_event.h> 5#include <asm/perf_event.h>
6#include <asm/insn.h>
6 7
7#include "perf_event.h" 8#include "perf_event.h"
8 9
@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
439 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 440 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
440 441
441 cpuc->pebs_enabled |= 1ULL << hwc->idx; 442 cpuc->pebs_enabled |= 1ULL << hwc->idx;
442
443 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
444 intel_pmu_lbr_enable(event);
445} 443}
446 444
447void intel_pmu_pebs_disable(struct perf_event *event) 445void intel_pmu_pebs_disable(struct perf_event *event)
@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
454 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 452 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
455 453
456 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 454 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
457
458 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
459 intel_pmu_lbr_disable(event);
460} 455}
461 456
462void intel_pmu_pebs_enable_all(void) 457void intel_pmu_pebs_enable_all(void)
@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
475 wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 470 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
476} 471}
477 472
478#include <asm/insn.h>
479
480static inline bool kernel_ip(unsigned long ip)
481{
482#ifdef CONFIG_X86_32
483 return ip > PAGE_OFFSET;
484#else
485 return (long)ip < 0;
486#endif
487}
488
489static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) 473static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
490{ 474{
491 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 475 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
572 * both formats and we don't use the other fields in this 556 * both formats and we don't use the other fields in this
573 * routine. 557 * routine.
574 */ 558 */
559 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
575 struct pebs_record_core *pebs = __pebs; 560 struct pebs_record_core *pebs = __pebs;
576 struct perf_sample_data data; 561 struct perf_sample_data data;
577 struct pt_regs regs; 562 struct pt_regs regs;
@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
602 else 587 else
603 regs.flags &= ~PERF_EFLAGS_EXACT; 588 regs.flags &= ~PERF_EFLAGS_EXACT;
604 589
590 if (has_branch_stack(event))
591 data.br_stack = &cpuc->lbr_stack;
592
605 if (perf_event_overflow(event, &data, &regs)) 593 if (perf_event_overflow(event, &data, &regs))
606 x86_pmu_stop(event, 0); 594 x86_pmu_stop(event, 0);
607} 595}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 47a7e63bfe54..520b4265fcd2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -3,6 +3,7 @@
3 3
4#include <asm/perf_event.h> 4#include <asm/perf_event.h>
5#include <asm/msr.h> 5#include <asm/msr.h>
6#include <asm/insn.h>
6 7
7#include "perf_event.h" 8#include "perf_event.h"
8 9
@@ -14,6 +15,100 @@ enum {
14}; 15};
15 16
16/* 17/*
18 * Intel LBR_SELECT bits
19 * Intel Vol3a, April 2011, Section 16.7 Table 16-10
20 *
21 * Hardware branch filter (not available on all CPUs)
22 */
23#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
24#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
25#define LBR_JCC_BIT 2 /* do not capture conditional branches */
26#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
27#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
28#define LBR_RETURN_BIT 5 /* do not capture near returns */
29#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
30#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
31#define LBR_FAR_BIT 8 /* do not capture far branches */
32
33#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
34#define LBR_USER (1 << LBR_USER_BIT)
35#define LBR_JCC (1 << LBR_JCC_BIT)
36#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
37#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
38#define LBR_RETURN (1 << LBR_RETURN_BIT)
39#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
40#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
41#define LBR_FAR (1 << LBR_FAR_BIT)
42
43#define LBR_PLM (LBR_KERNEL | LBR_USER)
44
45#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
46#define LBR_NOT_SUPP -1 /* LBR filter not supported */
47#define LBR_IGN 0 /* ignored */
48
49#define LBR_ANY \
50 (LBR_JCC |\
51 LBR_REL_CALL |\
52 LBR_IND_CALL |\
53 LBR_RETURN |\
54 LBR_REL_JMP |\
55 LBR_IND_JMP |\
56 LBR_FAR)
57
58#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
59
60#define for_each_branch_sample_type(x) \
61 for ((x) = PERF_SAMPLE_BRANCH_USER; \
62 (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
63
64/*
65 * x86control flow change classification
66 * x86control flow changes include branches, interrupts, traps, faults
67 */
68enum {
69 X86_BR_NONE = 0, /* unknown */
70
71 X86_BR_USER = 1 << 0, /* branch target is user */
72 X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
73
74 X86_BR_CALL = 1 << 2, /* call */
75 X86_BR_RET = 1 << 3, /* return */
76 X86_BR_SYSCALL = 1 << 4, /* syscall */
77 X86_BR_SYSRET = 1 << 5, /* syscall return */
78 X86_BR_INT = 1 << 6, /* sw interrupt */
79 X86_BR_IRET = 1 << 7, /* return from interrupt */
80 X86_BR_JCC = 1 << 8, /* conditional */
81 X86_BR_JMP = 1 << 9, /* jump */
82 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
83 X86_BR_IND_CALL = 1 << 11,/* indirect calls */
84};
85
86#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
87
88#define X86_BR_ANY \
89 (X86_BR_CALL |\
90 X86_BR_RET |\
91 X86_BR_SYSCALL |\
92 X86_BR_SYSRET |\
93 X86_BR_INT |\
94 X86_BR_IRET |\
95 X86_BR_JCC |\
96 X86_BR_JMP |\
97 X86_BR_IRQ |\
98 X86_BR_IND_CALL)
99
100#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
101
102#define X86_BR_ANY_CALL \
103 (X86_BR_CALL |\
104 X86_BR_IND_CALL |\
105 X86_BR_SYSCALL |\
106 X86_BR_IRQ |\
107 X86_BR_INT)
108
109static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
110
111/*
17 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 112 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
18 * otherwise it becomes near impossible to get a reliable stack. 113 * otherwise it becomes near impossible to get a reliable stack.
19 */ 114 */
@@ -21,6 +116,10 @@ enum {
21static void __intel_pmu_lbr_enable(void) 116static void __intel_pmu_lbr_enable(void)
22{ 117{
23 u64 debugctl; 118 u64 debugctl;
119 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
120
121 if (cpuc->lbr_sel)
122 wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
24 123
25 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 124 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
26 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 125 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
@@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event)
76 * Reset the LBR stack if we changed task context to 175 * Reset the LBR stack if we changed task context to
77 * avoid data leaks. 176 * avoid data leaks.
78 */ 177 */
79
80 if (event->ctx->task && cpuc->lbr_context != event->ctx) { 178 if (event->ctx->task && cpuc->lbr_context != event->ctx) {
81 intel_pmu_lbr_reset(); 179 intel_pmu_lbr_reset();
82 cpuc->lbr_context = event->ctx; 180 cpuc->lbr_context = event->ctx;
83 } 181 }
182 cpuc->br_sel = event->hw.branch_reg.reg;
84 183
85 cpuc->lbr_users++; 184 cpuc->lbr_users++;
86} 185}
@@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
95 cpuc->lbr_users--; 194 cpuc->lbr_users--;
96 WARN_ON_ONCE(cpuc->lbr_users < 0); 195 WARN_ON_ONCE(cpuc->lbr_users < 0);
97 196
98 if (cpuc->enabled && !cpuc->lbr_users) 197 if (cpuc->enabled && !cpuc->lbr_users) {
99 __intel_pmu_lbr_disable(); 198 __intel_pmu_lbr_disable();
199 /* avoid stale pointer */
200 cpuc->lbr_context = NULL;
201 }
100} 202}
101 203
102void intel_pmu_lbr_enable_all(void) 204void intel_pmu_lbr_enable_all(void)
@@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void)
115 __intel_pmu_lbr_disable(); 217 __intel_pmu_lbr_disable();
116} 218}
117 219
220/*
221 * TOS = most recently recorded branch
222 */
118static inline u64 intel_pmu_lbr_tos(void) 223static inline u64 intel_pmu_lbr_tos(void)
119{ 224{
120 u64 tos; 225 u64 tos;
@@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
142 247
143 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 248 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
144 249
145 cpuc->lbr_entries[i].from = msr_lastbranch.from; 250 cpuc->lbr_entries[i].from = msr_lastbranch.from;
146 cpuc->lbr_entries[i].to = msr_lastbranch.to; 251 cpuc->lbr_entries[i].to = msr_lastbranch.to;
147 cpuc->lbr_entries[i].flags = 0; 252 cpuc->lbr_entries[i].mispred = 0;
253 cpuc->lbr_entries[i].predicted = 0;
254 cpuc->lbr_entries[i].reserved = 0;
148 } 255 }
149 cpuc->lbr_stack.nr = i; 256 cpuc->lbr_stack.nr = i;
150} 257}
151 258
152#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
153
154/* 259/*
155 * Due to lack of segmentation in Linux the effective address (offset) 260 * Due to lack of segmentation in Linux the effective address (offset)
156 * is the same as the linear address, allowing us to merge the LIP and EIP 261 * is the same as the linear address, allowing us to merge the LIP and EIP
@@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
165 270
166 for (i = 0; i < x86_pmu.lbr_nr; i++) { 271 for (i = 0; i < x86_pmu.lbr_nr; i++) {
167 unsigned long lbr_idx = (tos - i) & mask; 272 unsigned long lbr_idx = (tos - i) & mask;
168 u64 from, to, flags = 0; 273 u64 from, to, mis = 0, pred = 0;
169 274
170 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 275 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
171 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 276 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
172 277
173 if (lbr_format == LBR_FORMAT_EIP_FLAGS) { 278 if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
174 flags = !!(from & LBR_FROM_FLAG_MISPRED); 279 mis = !!(from & LBR_FROM_FLAG_MISPRED);
280 pred = !mis;
175 from = (u64)((((s64)from) << 1) >> 1); 281 from = (u64)((((s64)from) << 1) >> 1);
176 } 282 }
177 283
178 cpuc->lbr_entries[i].from = from; 284 cpuc->lbr_entries[i].from = from;
179 cpuc->lbr_entries[i].to = to; 285 cpuc->lbr_entries[i].to = to;
180 cpuc->lbr_entries[i].flags = flags; 286 cpuc->lbr_entries[i].mispred = mis;
287 cpuc->lbr_entries[i].predicted = pred;
288 cpuc->lbr_entries[i].reserved = 0;
181 } 289 }
182 cpuc->lbr_stack.nr = i; 290 cpuc->lbr_stack.nr = i;
183} 291}
@@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void)
193 intel_pmu_lbr_read_32(cpuc); 301 intel_pmu_lbr_read_32(cpuc);
194 else 302 else
195 intel_pmu_lbr_read_64(cpuc); 303 intel_pmu_lbr_read_64(cpuc);
304
305 intel_pmu_lbr_filter(cpuc);
306}
307
308/*
309 * SW filter is used:
310 * - in case there is no HW filter
311 * - in case the HW filter has errata or limitations
312 */
313static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
314{
315 u64 br_type = event->attr.branch_sample_type;
316 int mask = 0;
317
318 if (br_type & PERF_SAMPLE_BRANCH_USER)
319 mask |= X86_BR_USER;
320
321 if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
322 mask |= X86_BR_KERNEL;
323
324 /* we ignore BRANCH_HV here */
325
326 if (br_type & PERF_SAMPLE_BRANCH_ANY)
327 mask |= X86_BR_ANY;
328
329 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
330 mask |= X86_BR_ANY_CALL;
331
332 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
333 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
334
335 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
336 mask |= X86_BR_IND_CALL;
337 /*
338 * stash actual user request into reg, it may
339 * be used by fixup code for some CPU
340 */
341 event->hw.branch_reg.reg = mask;
342}
343
344/*
345 * setup the HW LBR filter
346 * Used only when available, may not be enough to disambiguate
347 * all branches, may need the help of the SW filter
348 */
349static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
350{
351 struct hw_perf_event_extra *reg;
352 u64 br_type = event->attr.branch_sample_type;
353 u64 mask = 0, m;
354 u64 v;
355
356 for_each_branch_sample_type(m) {
357 if (!(br_type & m))
358 continue;
359
360 v = x86_pmu.lbr_sel_map[m];
361 if (v == LBR_NOT_SUPP)
362 return -EOPNOTSUPP;
363
364 if (v != LBR_IGN)
365 mask |= v;
366 }
367 reg = &event->hw.branch_reg;
368 reg->idx = EXTRA_REG_LBR;
369
370 /* LBR_SELECT operates in suppress mode so invert mask */
371 reg->config = ~mask & x86_pmu.lbr_sel_mask;
372
373 return 0;
374}
375
376int intel_pmu_setup_lbr_filter(struct perf_event *event)
377{
378 int ret = 0;
379
380 /*
381 * no LBR on this PMU
382 */
383 if (!x86_pmu.lbr_nr)
384 return -EOPNOTSUPP;
385
386 /*
387 * setup SW LBR filter
388 */
389 intel_pmu_setup_sw_lbr_filter(event);
390
391 /*
392 * setup HW LBR filter, if any
393 */
394 if (x86_pmu.lbr_sel_map)
395 ret = intel_pmu_setup_hw_lbr_filter(event);
396
397 return ret;
196} 398}
197 399
400/*
401 * return the type of control flow change at address "from"
402 * intruction is not necessarily a branch (in case of interrupt).
403 *
404 * The branch type returned also includes the priv level of the
405 * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
406 *
407 * If a branch type is unknown OR the instruction cannot be
408 * decoded (e.g., text page not present), then X86_BR_NONE is
409 * returned.
410 */
411static int branch_type(unsigned long from, unsigned long to)
412{
413 struct insn insn;
414 void *addr;
415 int bytes, size = MAX_INSN_SIZE;
416 int ret = X86_BR_NONE;
417 int ext, to_plm, from_plm;
418 u8 buf[MAX_INSN_SIZE];
419 int is64 = 0;
420
421 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
422 from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
423
424 /*
425 * maybe zero if lbr did not fill up after a reset by the time
426 * we get a PMU interrupt
427 */
428 if (from == 0 || to == 0)
429 return X86_BR_NONE;
430
431 if (from_plm == X86_BR_USER) {
432 /*
433 * can happen if measuring at the user level only
434 * and we interrupt in a kernel thread, e.g., idle.
435 */
436 if (!current->mm)
437 return X86_BR_NONE;
438
439 /* may fail if text not present */
440 bytes = copy_from_user_nmi(buf, (void __user *)from, size);
441 if (bytes != size)
442 return X86_BR_NONE;
443
444 addr = buf;
445 } else
446 addr = (void *)from;
447
448 /*
449 * decoder needs to know the ABI especially
450 * on 64-bit systems running 32-bit apps
451 */
452#ifdef CONFIG_X86_64
453 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
454#endif
455 insn_init(&insn, addr, is64);
456 insn_get_opcode(&insn);
457
458 switch (insn.opcode.bytes[0]) {
459 case 0xf:
460 switch (insn.opcode.bytes[1]) {
461 case 0x05: /* syscall */
462 case 0x34: /* sysenter */
463 ret = X86_BR_SYSCALL;
464 break;
465 case 0x07: /* sysret */
466 case 0x35: /* sysexit */
467 ret = X86_BR_SYSRET;
468 break;
469 case 0x80 ... 0x8f: /* conditional */
470 ret = X86_BR_JCC;
471 break;
472 default:
473 ret = X86_BR_NONE;
474 }
475 break;
476 case 0x70 ... 0x7f: /* conditional */
477 ret = X86_BR_JCC;
478 break;
479 case 0xc2: /* near ret */
480 case 0xc3: /* near ret */
481 case 0xca: /* far ret */
482 case 0xcb: /* far ret */
483 ret = X86_BR_RET;
484 break;
485 case 0xcf: /* iret */
486 ret = X86_BR_IRET;
487 break;
488 case 0xcc ... 0xce: /* int */
489 ret = X86_BR_INT;
490 break;
491 case 0xe8: /* call near rel */
492 case 0x9a: /* call far absolute */
493 ret = X86_BR_CALL;
494 break;
495 case 0xe0 ... 0xe3: /* loop jmp */
496 ret = X86_BR_JCC;
497 break;
498 case 0xe9 ... 0xeb: /* jmp */
499 ret = X86_BR_JMP;
500 break;
501 case 0xff: /* call near absolute, call far absolute ind */
502 insn_get_modrm(&insn);
503 ext = (insn.modrm.bytes[0] >> 3) & 0x7;
504 switch (ext) {
505 case 2: /* near ind call */
506 case 3: /* far ind call */
507 ret = X86_BR_IND_CALL;
508 break;
509 case 4:
510 case 5:
511 ret = X86_BR_JMP;
512 break;
513 }
514 break;
515 default:
516 ret = X86_BR_NONE;
517 }
518 /*
519 * interrupts, traps, faults (and thus ring transition) may
520 * occur on any instructions. Thus, to classify them correctly,
521 * we need to first look at the from and to priv levels. If they
522 * are different and to is in the kernel, then it indicates
523 * a ring transition. If the from instruction is not a ring
524 * transition instr (syscall, systenter, int), then it means
525 * it was a irq, trap or fault.
526 *
527 * we have no way of detecting kernel to kernel faults.
528 */
529 if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
530 && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
531 ret = X86_BR_IRQ;
532
533 /*
534 * branch priv level determined by target as
535 * is done by HW when LBR_SELECT is implemented
536 */
537 if (ret != X86_BR_NONE)
538 ret |= to_plm;
539
540 return ret;
541}
542
543/*
544 * implement actual branch filter based on user demand.
545 * Hardware may not exactly satisfy that request, thus
546 * we need to inspect opcodes. Mismatched branches are
547 * discarded. Therefore, the number of branches returned
548 * in PERF_SAMPLE_BRANCH_STACK sample may vary.
549 */
550static void
551intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
552{
553 u64 from, to;
554 int br_sel = cpuc->br_sel;
555 int i, j, type;
556 bool compress = false;
557
558 /* if sampling all branches, then nothing to filter */
559 if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
560 return;
561
562 for (i = 0; i < cpuc->lbr_stack.nr; i++) {
563
564 from = cpuc->lbr_entries[i].from;
565 to = cpuc->lbr_entries[i].to;
566
567 type = branch_type(from, to);
568
569 /* if type does not correspond, then discard */
570 if (type == X86_BR_NONE || (br_sel & type) != type) {
571 cpuc->lbr_entries[i].from = 0;
572 compress = true;
573 }
574 }
575
576 if (!compress)
577 return;
578
579 /* remove all entries with from=0 */
580 for (i = 0; i < cpuc->lbr_stack.nr; ) {
581 if (!cpuc->lbr_entries[i].from) {
582 j = i;
583 while (++j < cpuc->lbr_stack.nr)
584 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
585 cpuc->lbr_stack.nr--;
586 if (!cpuc->lbr_entries[i].from)
587 continue;
588 }
589 i++;
590 }
591}
592
593/*
594 * Map interface branch filters onto LBR filters
595 */
596static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
597 [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
598 [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
599 [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
600 [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
601 [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
602 | LBR_IND_JMP | LBR_FAR,
603 /*
604 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
605 */
606 [PERF_SAMPLE_BRANCH_ANY_CALL] =
607 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
608 /*
609 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
610 */
611 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
612};
613
614static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
615 [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
616 [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
617 [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
618 [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
619 [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
620 [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
621 | LBR_FAR,
622 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
623};
624
625/* core */
198void intel_pmu_lbr_init_core(void) 626void intel_pmu_lbr_init_core(void)
199{ 627{
200 x86_pmu.lbr_nr = 4; 628 x86_pmu.lbr_nr = 4;
201 x86_pmu.lbr_tos = 0x01c9; 629 x86_pmu.lbr_tos = MSR_LBR_TOS;
202 x86_pmu.lbr_from = 0x40; 630 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
203 x86_pmu.lbr_to = 0x60; 631 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
632
633 /*
634 * SW branch filter usage:
635 * - compensate for lack of HW filter
636 */
637 pr_cont("4-deep LBR, ");
204} 638}
205 639
640/* nehalem/westmere */
206void intel_pmu_lbr_init_nhm(void) 641void intel_pmu_lbr_init_nhm(void)
207{ 642{
208 x86_pmu.lbr_nr = 16; 643 x86_pmu.lbr_nr = 16;
209 x86_pmu.lbr_tos = 0x01c9; 644 x86_pmu.lbr_tos = MSR_LBR_TOS;
210 x86_pmu.lbr_from = 0x680; 645 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
211 x86_pmu.lbr_to = 0x6c0; 646 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
647
648 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
649 x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
650
651 /*
652 * SW branch filter usage:
653 * - workaround LBR_SEL errata (see above)
654 * - support syscall, sysret capture.
655 * That requires LBR_FAR but that means far
656 * jmp need to be filtered out
657 */
658 pr_cont("16-deep LBR, ");
659}
660
661/* sandy bridge */
662void intel_pmu_lbr_init_snb(void)
663{
664 x86_pmu.lbr_nr = 16;
665 x86_pmu.lbr_tos = MSR_LBR_TOS;
666 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
667 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
668
669 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
670 x86_pmu.lbr_sel_map = snb_lbr_sel_map;
671
672 /*
673 * SW branch filter usage:
674 * - support syscall, sysret capture.
675 * That requires LBR_FAR but that means far
676 * jmp need to be filtered out
677 */
678 pr_cont("16-deep LBR, ");
212} 679}
213 680
681/* atom */
214void intel_pmu_lbr_init_atom(void) 682void intel_pmu_lbr_init_atom(void)
215{ 683{
684 /*
685 * only models starting at stepping 10 seems
686 * to have an operational LBR which can freeze
687 * on PMU interrupt
688 */
689 if (boot_cpu_data.x86_mask < 10) {
690 pr_cont("LBR disabled due to erratum");
691 return;
692 }
693
216 x86_pmu.lbr_nr = 8; 694 x86_pmu.lbr_nr = 8;
217 x86_pmu.lbr_tos = 0x01c9; 695 x86_pmu.lbr_tos = MSR_LBR_TOS;
218 x86_pmu.lbr_from = 0x40; 696 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
219 x86_pmu.lbr_to = 0x60; 697 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
698
699 /*
700 * SW branch filter usage:
701 * - compensate for lack of HW filter
702 */
703 pr_cont("8-deep LBR, ");
220} 704}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ef484d9d0a25..a2dfacfd7103 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1271,6 +1271,17 @@ done:
1271 return num ? -EINVAL : 0; 1271 return num ? -EINVAL : 0;
1272} 1272}
1273 1273
1274PMU_FORMAT_ATTR(cccr, "config:0-31" );
1275PMU_FORMAT_ATTR(escr, "config:32-62");
1276PMU_FORMAT_ATTR(ht, "config:63" );
1277
1278static struct attribute *intel_p4_formats_attr[] = {
1279 &format_attr_cccr.attr,
1280 &format_attr_escr.attr,
1281 &format_attr_ht.attr,
1282 NULL,
1283};
1284
1274static __initconst const struct x86_pmu p4_pmu = { 1285static __initconst const struct x86_pmu p4_pmu = {
1275 .name = "Netburst P4/Xeon", 1286 .name = "Netburst P4/Xeon",
1276 .handle_irq = p4_pmu_handle_irq, 1287 .handle_irq = p4_pmu_handle_irq,
@@ -1305,6 +1316,8 @@ static __initconst const struct x86_pmu p4_pmu = {
1305 * the former idea is taken from OProfile code 1316 * the former idea is taken from OProfile code
1306 */ 1317 */
1307 .perfctr_second_write = 1, 1318 .perfctr_second_write = 1,
1319
1320 .format_attrs = intel_p4_formats_attr,
1308}; 1321};
1309 1322
1310__init int p4_pmu_init(void) 1323__init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index c7181befecde..32bcfc7dd230 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -87,6 +87,23 @@ static void p6_pmu_enable_event(struct perf_event *event)
87 (void)checking_wrmsrl(hwc->config_base, val); 87 (void)checking_wrmsrl(hwc->config_base, val);
88} 88}
89 89
90PMU_FORMAT_ATTR(event, "config:0-7" );
91PMU_FORMAT_ATTR(umask, "config:8-15" );
92PMU_FORMAT_ATTR(edge, "config:18" );
93PMU_FORMAT_ATTR(pc, "config:19" );
94PMU_FORMAT_ATTR(inv, "config:23" );
95PMU_FORMAT_ATTR(cmask, "config:24-31" );
96
97static struct attribute *intel_p6_formats_attr[] = {
98 &format_attr_event.attr,
99 &format_attr_umask.attr,
100 &format_attr_edge.attr,
101 &format_attr_pc.attr,
102 &format_attr_inv.attr,
103 &format_attr_cmask.attr,
104 NULL,
105};
106
90static __initconst const struct x86_pmu p6_pmu = { 107static __initconst const struct x86_pmu p6_pmu = {
91 .name = "p6", 108 .name = "p6",
92 .handle_irq = x86_pmu_handle_irq, 109 .handle_irq = x86_pmu_handle_irq,
@@ -115,6 +132,8 @@ static __initconst const struct x86_pmu p6_pmu = {
115 .cntval_mask = (1ULL << 32) - 1, 132 .cntval_mask = (1ULL << 32) - 1,
116 .get_event_constraints = x86_get_event_constraints, 133 .get_event_constraints = x86_get_event_constraints,
117 .event_constraints = p6_event_constraints, 134 .event_constraints = p6_event_constraints,
135
136 .format_attrs = intel_p6_formats_attr,
118}; 137};
119 138
120__init int p6_pmu_init(void) 139__init int p6_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index c7f64e6f537a..addf9e82a7f2 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -40,6 +40,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
40 { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, 40 { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
41 { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, 41 { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
42 { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, 42 { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
43 { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
43 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, 44 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
44 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, 45 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
45 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, 46 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index a524353d93f2..39472dd2323f 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -43,7 +43,6 @@
43 43
44#include <asm/processor.h> 44#include <asm/processor.h>
45#include <asm/msr.h> 45#include <asm/msr.h>
46#include <asm/system.h>
47 46
48static struct class *cpuid_class; 47static struct class *cpuid_class;
49 48
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 642f75a68cd5..11891ca7b716 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -62,16 +62,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
62 62
63 if (!userbuf) { 63 if (!userbuf) {
64 memcpy(buf, (vaddr + offset), csize); 64 memcpy(buf, (vaddr + offset), csize);
65 kunmap_atomic(vaddr, KM_PTE0); 65 kunmap_atomic(vaddr);
66 } else { 66 } else {
67 if (!kdump_buf_page) { 67 if (!kdump_buf_page) {
68 printk(KERN_WARNING "Kdump: Kdump buffer page not" 68 printk(KERN_WARNING "Kdump: Kdump buffer page not"
69 " allocated\n"); 69 " allocated\n");
70 kunmap_atomic(vaddr, KM_PTE0); 70 kunmap_atomic(vaddr);
71 return -EFAULT; 71 return -EFAULT;
72 } 72 }
73 copy_page(kdump_buf_page, vaddr); 73 copy_page(kdump_buf_page, vaddr);
74 kunmap_atomic(vaddr, KM_PTE0); 74 kunmap_atomic(vaddr);
75 if (copy_to_user(buf, (kdump_buf_page + offset), csize)) 75 if (copy_to_user(buf, (kdump_buf_page + offset), csize))
76 return -EFAULT; 76 return -EFAULT;
77 } 77 }
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 52821799a702..3ae2ced4a874 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -4,6 +4,7 @@
4#include <linux/bootmem.h> 4#include <linux/bootmem.h>
5#include <linux/export.h> 5#include <linux/export.h>
6#include <linux/io.h> 6#include <linux/io.h>
7#include <linux/irqdomain.h>
7#include <linux/interrupt.h> 8#include <linux/interrupt.h>
8#include <linux/list.h> 9#include <linux/list.h>
9#include <linux/of.h> 10#include <linux/of.h>
@@ -17,64 +18,14 @@
17#include <linux/initrd.h> 18#include <linux/initrd.h>
18 19
19#include <asm/hpet.h> 20#include <asm/hpet.h>
20#include <asm/irq_controller.h>
21#include <asm/apic.h> 21#include <asm/apic.h>
22#include <asm/pci_x86.h> 22#include <asm/pci_x86.h>
23 23
24__initdata u64 initial_dtb; 24__initdata u64 initial_dtb;
25char __initdata cmd_line[COMMAND_LINE_SIZE]; 25char __initdata cmd_line[COMMAND_LINE_SIZE];
26static LIST_HEAD(irq_domains);
27static DEFINE_RAW_SPINLOCK(big_irq_lock);
28 26
29int __initdata of_ioapic; 27int __initdata of_ioapic;
30 28
31#ifdef CONFIG_X86_IO_APIC
32static void add_interrupt_host(struct irq_domain *ih)
33{
34 unsigned long flags;
35
36 raw_spin_lock_irqsave(&big_irq_lock, flags);
37 list_add(&ih->l, &irq_domains);
38 raw_spin_unlock_irqrestore(&big_irq_lock, flags);
39}
40#endif
41
42static struct irq_domain *get_ih_from_node(struct device_node *controller)
43{
44 struct irq_domain *ih, *found = NULL;
45 unsigned long flags;
46
47 raw_spin_lock_irqsave(&big_irq_lock, flags);
48 list_for_each_entry(ih, &irq_domains, l) {
49 if (ih->controller == controller) {
50 found = ih;
51 break;
52 }
53 }
54 raw_spin_unlock_irqrestore(&big_irq_lock, flags);
55 return found;
56}
57
58unsigned int irq_create_of_mapping(struct device_node *controller,
59 const u32 *intspec, unsigned int intsize)
60{
61 struct irq_domain *ih;
62 u32 virq, type;
63 int ret;
64
65 ih = get_ih_from_node(controller);
66 if (!ih)
67 return 0;
68 ret = ih->xlate(ih, intspec, intsize, &virq, &type);
69 if (ret)
70 return 0;
71 if (type == IRQ_TYPE_NONE)
72 return virq;
73 irq_set_irq_type(virq, type);
74 return virq;
75}
76EXPORT_SYMBOL_GPL(irq_create_of_mapping);
77
78unsigned long pci_address_to_pio(phys_addr_t address) 29unsigned long pci_address_to_pio(phys_addr_t address)
79{ 30{
80 /* 31 /*
@@ -354,36 +305,43 @@ static struct of_ioapic_type of_ioapic_type[] =
354 }, 305 },
355}; 306};
356 307
357static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize, 308static int ioapic_xlate(struct irq_domain *domain,
358 u32 *out_hwirq, u32 *out_type) 309 struct device_node *controller,
310 const u32 *intspec, u32 intsize,
311 irq_hw_number_t *out_hwirq, u32 *out_type)
359{ 312{
360 struct mp_ioapic_gsi *gsi_cfg;
361 struct io_apic_irq_attr attr; 313 struct io_apic_irq_attr attr;
362 struct of_ioapic_type *it; 314 struct of_ioapic_type *it;
363 u32 line, idx, type; 315 u32 line, idx;
316 int rc;
364 317
365 if (intsize < 2) 318 if (WARN_ON(intsize < 2))
366 return -EINVAL; 319 return -EINVAL;
367 320
368 line = *intspec; 321 line = intspec[0];
369 idx = (u32) id->priv;
370 gsi_cfg = mp_ioapic_gsi_routing(idx);
371 *out_hwirq = line + gsi_cfg->gsi_base;
372
373 intspec++;
374 type = *intspec;
375 322
376 if (type >= ARRAY_SIZE(of_ioapic_type)) 323 if (intspec[1] >= ARRAY_SIZE(of_ioapic_type))
377 return -EINVAL; 324 return -EINVAL;
378 325
379 it = of_ioapic_type + type; 326 it = &of_ioapic_type[intspec[1]];
380 *out_type = it->out_type;
381 327
328 idx = (u32) domain->host_data;
382 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); 329 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
383 330
384 return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr); 331 rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line),
332 cpu_to_node(0), &attr);
333 if (rc)
334 return rc;
335
336 *out_hwirq = line;
337 *out_type = it->out_type;
338 return 0;
385} 339}
386 340
341const struct irq_domain_ops ioapic_irq_domain_ops = {
342 .xlate = ioapic_xlate,
343};
344
387static void __init ioapic_add_ofnode(struct device_node *np) 345static void __init ioapic_add_ofnode(struct device_node *np)
388{ 346{
389 struct resource r; 347 struct resource r;
@@ -399,13 +357,14 @@ static void __init ioapic_add_ofnode(struct device_node *np)
399 for (i = 0; i < nr_ioapics; i++) { 357 for (i = 0; i < nr_ioapics; i++) {
400 if (r.start == mpc_ioapic_addr(i)) { 358 if (r.start == mpc_ioapic_addr(i)) {
401 struct irq_domain *id; 359 struct irq_domain *id;
360 struct mp_ioapic_gsi *gsi_cfg;
361
362 gsi_cfg = mp_ioapic_gsi_routing(i);
402 363
403 id = kzalloc(sizeof(*id), GFP_KERNEL); 364 id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
365 &ioapic_irq_domain_ops,
366 (void*)i);
404 BUG_ON(!id); 367 BUG_ON(!id);
405 id->controller = np;
406 id->xlate = ioapic_xlate;
407 id->priv = (void *)i;
408 add_interrupt_host(id);
409 return; 368 return;
410 } 369 }
411 } 370 }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 28f98706b08b..1b81839b6c88 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -37,13 +37,16 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
37 const struct stacktrace_ops *ops, 37 const struct stacktrace_ops *ops,
38 struct thread_info *tinfo, int *graph) 38 struct thread_info *tinfo, int *graph)
39{ 39{
40 struct task_struct *task = tinfo->task; 40 struct task_struct *task;
41 unsigned long ret_addr; 41 unsigned long ret_addr;
42 int index = task->curr_ret_stack; 42 int index;
43 43
44 if (addr != (unsigned long)return_to_handler) 44 if (addr != (unsigned long)return_to_handler)
45 return; 45 return;
46 46
47 task = tinfo->task;
48 index = task->curr_ret_stack;
49
47 if (!task->ret_stack || index < *graph) 50 if (!task->ret_stack || index < *graph)
48 return; 51 return;
49 52
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index c99f9ed013d5..88ec9129271d 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs)
87 int i; 87 int i;
88 88
89 print_modules(); 89 print_modules();
90 __show_regs(regs, 0); 90 __show_regs(regs, !user_mode_vm(regs));
91 91
92 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", 92 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
93 TASK_COMM_LEN, current->comm, task_pid_nr(current), 93 TASK_COMM_LEN, current->comm, task_pid_nr(current),
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 79d97e68f042..7b784f4ef1e4 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -98,12 +98,6 @@
98#endif 98#endif
99.endm 99.endm
100 100
101#ifdef CONFIG_VM86
102#define resume_userspace_sig check_userspace
103#else
104#define resume_userspace_sig resume_userspace
105#endif
106
107/* 101/*
108 * User gs save/restore 102 * User gs save/restore
109 * 103 *
@@ -327,10 +321,19 @@ ret_from_exception:
327 preempt_stop(CLBR_ANY) 321 preempt_stop(CLBR_ANY)
328ret_from_intr: 322ret_from_intr:
329 GET_THREAD_INFO(%ebp) 323 GET_THREAD_INFO(%ebp)
330check_userspace: 324resume_userspace_sig:
325#ifdef CONFIG_VM86
331 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 326 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
332 movb PT_CS(%esp), %al 327 movb PT_CS(%esp), %al
333 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax 328 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
329#else
330 /*
331 * We can be coming here from a syscall done in the kernel space,
332 * e.g. a failed kernel_execve().
333 */
334 movl PT_CS(%esp), %eax
335 andl $SEGMENT_RPL_MASK, %eax
336#endif
334 cmpl $USER_RPL, %eax 337 cmpl $USER_RPL, %eax
335 jb resume_kernel # not returning to v8086 or userspace 338 jb resume_kernel # not returning to v8086 or userspace
336 339
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2925e14fb1d9..cdc79b5cfcd9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -320,7 +320,7 @@ ENDPROC(native_usergs_sysret64)
320 movq %rsp, %rsi 320 movq %rsp, %rsi
321 321
322 leaq -RBP(%rsp),%rdi /* arg1 for handler */ 322 leaq -RBP(%rsp),%rdi /* arg1 for handler */
323 testl $3, CS(%rdi) 323 testl $3, CS-RBP(%rsi)
324 je 1f 324 je 1f
325 SWAPGS 325 SWAPGS
326 /* 326 /*
@@ -330,11 +330,10 @@ ENDPROC(native_usergs_sysret64)
330 * moving irq_enter into assembly, which would be too much work) 330 * moving irq_enter into assembly, which would be too much work)
331 */ 331 */
3321: incl PER_CPU_VAR(irq_count) 3321: incl PER_CPU_VAR(irq_count)
333 jne 2f 333 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
334 mov PER_CPU_VAR(irq_stack_ptr),%rsp
335 CFI_DEF_CFA_REGISTER rsi 334 CFI_DEF_CFA_REGISTER rsi
336 335
3372: /* Store previous stack value */ 336 /* Store previous stack value */
338 pushq %rsi 337 pushq %rsi
339 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ 338 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
340 0x77 /* DW_OP_breg7 */, 0, \ 339 0x77 /* DW_OP_breg7 */, 0, \
@@ -857,7 +856,7 @@ ret_from_intr:
857 856
858 /* Restore saved previous stack */ 857 /* Restore saved previous stack */
859 popq %rsi 858 popq %rsi
860 CFI_DEF_CFA_REGISTER rsi 859 CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */
861 leaq ARGOFFSET-RBP(%rsi), %rsp 860 leaq ARGOFFSET-RBP(%rsi), %rsp
862 CFI_DEF_CFA_REGISTER rsp 861 CFI_DEF_CFA_REGISTER rsp
863 CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET 862 CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
@@ -1574,6 +1573,7 @@ ENTRY(nmi)
1574 1573
1575 /* Use %rdx as out temp variable throughout */ 1574 /* Use %rdx as out temp variable throughout */
1576 pushq_cfi %rdx 1575 pushq_cfi %rdx
1576 CFI_REL_OFFSET rdx, 0
1577 1577
1578 /* 1578 /*
1579 * If %cs was not the kernel segment, then the NMI triggered in user 1579 * If %cs was not the kernel segment, then the NMI triggered in user
@@ -1598,6 +1598,7 @@ ENTRY(nmi)
1598 */ 1598 */
1599 lea 6*8(%rsp), %rdx 1599 lea 6*8(%rsp), %rdx
1600 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi 1600 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
1601 CFI_REMEMBER_STATE
1601 1602
1602nested_nmi: 1603nested_nmi:
1603 /* 1604 /*
@@ -1629,10 +1630,12 @@ nested_nmi:
1629 1630
1630nested_nmi_out: 1631nested_nmi_out:
1631 popq_cfi %rdx 1632 popq_cfi %rdx
1633 CFI_RESTORE rdx
1632 1634
1633 /* No need to check faults here */ 1635 /* No need to check faults here */
1634 INTERRUPT_RETURN 1636 INTERRUPT_RETURN
1635 1637
1638 CFI_RESTORE_STATE
1636first_nmi: 1639first_nmi:
1637 /* 1640 /*
1638 * Because nested NMIs will use the pushed location that we 1641 * Because nested NMIs will use the pushed location that we
@@ -1664,10 +1667,15 @@ first_nmi:
1664 * | pt_regs | 1667 * | pt_regs |
1665 * +-------------------------+ 1668 * +-------------------------+
1666 * 1669 *
1667 * The saved RIP is used to fix up the copied RIP that a nested 1670 * The saved stack frame is used to fix up the copied stack frame
1668 * NMI may zero out. The original stack frame and the temp storage 1671 * that a nested NMI may change to make the interrupted NMI iret jump
1672 * to the repeat_nmi. The original stack frame and the temp storage
1669 * is also used by nested NMIs and can not be trusted on exit. 1673 * is also used by nested NMIs and can not be trusted on exit.
1670 */ 1674 */
1675 /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
1676 movq (%rsp), %rdx
1677 CFI_RESTORE rdx
1678
1671 /* Set the NMI executing variable on the stack. */ 1679 /* Set the NMI executing variable on the stack. */
1672 pushq_cfi $1 1680 pushq_cfi $1
1673 1681
@@ -1675,22 +1683,39 @@ first_nmi:
1675 .rept 5 1683 .rept 5
1676 pushq_cfi 6*8(%rsp) 1684 pushq_cfi 6*8(%rsp)
1677 .endr 1685 .endr
1686 CFI_DEF_CFA_OFFSET SS+8-RIP
1687
1688 /* Everything up to here is safe from nested NMIs */
1689
1690 /*
1691 * If there was a nested NMI, the first NMI's iret will return
1692 * here. But NMIs are still enabled and we can take another
1693 * nested NMI. The nested NMI checks the interrupted RIP to see
1694 * if it is between repeat_nmi and end_repeat_nmi, and if so
1695 * it will just return, as we are about to repeat an NMI anyway.
1696 * This makes it safe to copy to the stack frame that a nested
1697 * NMI will update.
1698 */
1699repeat_nmi:
1700 /*
1701 * Update the stack variable to say we are still in NMI (the update
1702 * is benign for the non-repeat case, where 1 was pushed just above
1703 * to this very stack slot).
1704 */
1705 movq $1, 5*8(%rsp)
1678 1706
1679 /* Make another copy, this one may be modified by nested NMIs */ 1707 /* Make another copy, this one may be modified by nested NMIs */
1680 .rept 5 1708 .rept 5
1681 pushq_cfi 4*8(%rsp) 1709 pushq_cfi 4*8(%rsp)
1682 .endr 1710 .endr
1683 1711 CFI_DEF_CFA_OFFSET SS+8-RIP
1684 /* Do not pop rdx, nested NMIs will corrupt it */ 1712end_repeat_nmi:
1685 movq 11*8(%rsp), %rdx
1686 1713
1687 /* 1714 /*
1688 * Everything below this point can be preempted by a nested 1715 * Everything below this point can be preempted by a nested
1689 * NMI if the first NMI took an exception. Repeated NMIs 1716 * NMI if the first NMI took an exception and reset our iret stack
1690 * caused by an exception and nested NMI will start here, and 1717 * so that we repeat another NMI.
1691 * can still be preempted by another NMI.
1692 */ 1718 */
1693restart_nmi:
1694 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1719 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1695 subq $ORIG_RAX-R15, %rsp 1720 subq $ORIG_RAX-R15, %rsp
1696 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1721 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
@@ -1719,26 +1744,6 @@ nmi_restore:
1719 CFI_ENDPROC 1744 CFI_ENDPROC
1720END(nmi) 1745END(nmi)
1721 1746
1722 /*
1723 * If an NMI hit an iret because of an exception or breakpoint,
1724 * it can lose its NMI context, and a nested NMI may come in.
1725 * In that case, the nested NMI will change the preempted NMI's
1726 * stack to jump to here when it does the final iret.
1727 */
1728repeat_nmi:
1729 INTR_FRAME
1730 /* Update the stack variable to say we are still in NMI */
1731 movq $1, 5*8(%rsp)
1732
1733 /* copy the saved stack back to copy stack */
1734 .rept 5
1735 pushq_cfi 4*8(%rsp)
1736 .endr
1737
1738 jmp restart_nmi
1739 CFI_ENDPROC
1740end_repeat_nmi:
1741
1742ENTRY(ignore_sysret) 1747ENTRY(ignore_sysret)
1743 CFI_STARTPROC 1748 CFI_STARTPROC
1744 mov $-ENOSYS,%eax 1749 mov $-ENOSYS,%eax
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 739d8598f789..7734bcbb5a3a 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -16,6 +16,7 @@
16#include <asm/uaccess.h> 16#include <asm/uaccess.h>
17#include <asm/ptrace.h> 17#include <asm/ptrace.h>
18#include <asm/i387.h> 18#include <asm/i387.h>
19#include <asm/fpu-internal.h>
19#include <asm/user.h> 20#include <asm/user.h>
20 21
21#ifdef CONFIG_X86_64 22#ifdef CONFIG_X86_64
@@ -32,6 +33,86 @@
32# define user32_fxsr_struct user_fxsr_struct 33# define user32_fxsr_struct user_fxsr_struct
33#endif 34#endif
34 35
36/*
37 * Were we in an interrupt that interrupted kernel mode?
38 *
39 * We can do a kernel_fpu_begin/end() pair *ONLY* if that
40 * pair does nothing at all: the thread must not have fpu (so
41 * that we don't try to save the FPU state), and TS must
42 * be set (so that the clts/stts pair does nothing that is
43 * visible in the interrupted kernel thread).
44 */
45static inline bool interrupted_kernel_fpu_idle(void)
46{
47 return !__thread_has_fpu(current) &&
48 (read_cr0() & X86_CR0_TS);
49}
50
51/*
52 * Were we in user mode (or vm86 mode) when we were
53 * interrupted?
54 *
55 * Doing kernel_fpu_begin/end() is ok if we are running
56 * in an interrupt context from user mode - we'll just
57 * save the FPU state as required.
58 */
59static inline bool interrupted_user_mode(void)
60{
61 struct pt_regs *regs = get_irq_regs();
62 return regs && user_mode_vm(regs);
63}
64
65/*
66 * Can we use the FPU in kernel mode with the
67 * whole "kernel_fpu_begin/end()" sequence?
68 *
69 * It's always ok in process context (ie "not interrupt")
70 * but it is sometimes ok even from an irq.
71 */
72bool irq_fpu_usable(void)
73{
74 return !in_interrupt() ||
75 interrupted_user_mode() ||
76 interrupted_kernel_fpu_idle();
77}
78EXPORT_SYMBOL(irq_fpu_usable);
79
80void kernel_fpu_begin(void)
81{
82 struct task_struct *me = current;
83
84 WARN_ON_ONCE(!irq_fpu_usable());
85 preempt_disable();
86 if (__thread_has_fpu(me)) {
87 __save_init_fpu(me);
88 __thread_clear_has_fpu(me);
89 /* We do 'stts()' in kernel_fpu_end() */
90 } else {
91 percpu_write(fpu_owner_task, NULL);
92 clts();
93 }
94}
95EXPORT_SYMBOL(kernel_fpu_begin);
96
97void kernel_fpu_end(void)
98{
99 stts();
100 preempt_enable();
101}
102EXPORT_SYMBOL(kernel_fpu_end);
103
104void unlazy_fpu(struct task_struct *tsk)
105{
106 preempt_disable();
107 if (__thread_has_fpu(tsk)) {
108 __save_init_fpu(tsk);
109 __thread_fpu_end(tsk);
110 } else
111 tsk->fpu_counter = 0;
112 preempt_enable();
113}
114EXPORT_SYMBOL(unlazy_fpu);
115
35#ifdef CONFIG_MATH_EMULATION 116#ifdef CONFIG_MATH_EMULATION
36# define HAVE_HWFP (boot_cpu_data.hard_math) 117# define HAVE_HWFP (boot_cpu_data.hard_math)
37#else 118#else
@@ -44,7 +125,7 @@ EXPORT_SYMBOL_GPL(xstate_size);
44unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); 125unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
45static struct i387_fxsave_struct fx_scratch __cpuinitdata; 126static struct i387_fxsave_struct fx_scratch __cpuinitdata;
46 127
47void __cpuinit mxcsr_feature_mask_init(void) 128static void __cpuinit mxcsr_feature_mask_init(void)
48{ 129{
49 unsigned long mask = 0; 130 unsigned long mask = 0;
50 131
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 610485223bdb..36d1853e91af 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -15,7 +15,6 @@
15#include <linux/delay.h> 15#include <linux/delay.h>
16 16
17#include <linux/atomic.h> 17#include <linux/atomic.h>
18#include <asm/system.h>
19#include <asm/timer.h> 18#include <asm/timer.h>
20#include <asm/hw_irq.h> 19#include <asm/hw_irq.h>
21#include <asm/pgtable.h> 20#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 7943e0c21bde..3dafc6003b7c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -282,8 +282,13 @@ void fixup_irqs(void)
282 else if (!(warned++)) 282 else if (!(warned++))
283 set_affinity = 0; 283 set_affinity = 0;
284 284
285 /*
286 * We unmask if the irq was not marked masked by the
287 * core code. That respects the lazy irq disable
288 * behaviour.
289 */
285 if (!irqd_can_move_in_process_context(data) && 290 if (!irqd_can_move_in_process_context(data) &&
286 !irqd_irq_disabled(data) && chip->irq_unmask) 291 !irqd_irq_masked(data) && chip->irq_unmask)
287 chip->irq_unmask(data); 292 chip->irq_unmask(data);
288 293
289 raw_spin_unlock(&desc->lock); 294 raw_spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 40fc86161d92..58b7f27cb3e9 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -100,13 +100,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
100 irqctx->tinfo.task = curctx->tinfo.task; 100 irqctx->tinfo.task = curctx->tinfo.task;
101 irqctx->tinfo.previous_esp = current_stack_pointer; 101 irqctx->tinfo.previous_esp = current_stack_pointer;
102 102
103 /* 103 /* Copy the preempt_count so that the [soft]irq checks work. */
104 * Copy the softirq bits in preempt_count so that the 104 irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
105 * softirq checks work in the hardirq context.
106 */
107 irqctx->tinfo.preempt_count =
108 (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
109 (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
110 105
111 if (unlikely(overflow)) 106 if (unlikely(overflow))
112 call_on_stack(print_stack_overflow, isp); 107 call_on_stack(print_stack_overflow, isp);
@@ -196,7 +191,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
196 if (unlikely(!desc)) 191 if (unlikely(!desc))
197 return false; 192 return false;
198 193
199 if (!execute_on_irq_stack(overflow, desc, irq)) { 194 if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
200 if (unlikely(overflow)) 195 if (unlikely(overflow))
201 print_stack_overflow(); 196 print_stack_overflow();
202 desc->handle_irq(irq, desc); 197 desc->handle_irq(irq, desc);
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 7b77062dea11..252981afd6c4 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -16,7 +16,6 @@
16#include <linux/delay.h> 16#include <linux/delay.h>
17 17
18#include <linux/atomic.h> 18#include <linux/atomic.h>
19#include <asm/system.h>
20#include <asm/timer.h> 19#include <asm/timer.h>
21#include <asm/hw_irq.h> 20#include <asm/hw_irq.h>
22#include <asm/pgtable.h> 21#include <asm/pgtable.h>
@@ -306,10 +305,10 @@ void __init native_init_IRQ(void)
306 * us. (some of these will be overridden and become 305 * us. (some of these will be overridden and become
307 * 'special' SMP interrupts) 306 * 'special' SMP interrupts)
308 */ 307 */
309 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { 308 i = FIRST_EXTERNAL_VECTOR;
309 for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
310 /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ 310 /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
311 if (!test_bit(i, used_vectors)) 311 set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
312 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
313 } 312 }
314 313
315 if (!acpi_ioapic && !of_ioapic) 314 if (!acpi_ioapic && !of_ioapic)
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 90fcf62854bb..1d5d31ea686b 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -68,16 +68,9 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf,
68 return count; 68 return count;
69} 69}
70 70
71static int setup_data_open(struct inode *inode, struct file *file)
72{
73 file->private_data = inode->i_private;
74
75 return 0;
76}
77
78static const struct file_operations fops_setup_data = { 71static const struct file_operations fops_setup_data = {
79 .read = setup_data_read, 72 .read = setup_data_read,
80 .open = setup_data_open, 73 .open = simple_open,
81 .llseek = default_llseek, 74 .llseek = default_llseek,
82}; 75};
83 76
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index faba5771acad..8bfb6146f753 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,10 +43,11 @@
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/nmi.h> 44#include <linux/nmi.h>
45#include <linux/hw_breakpoint.h> 45#include <linux/hw_breakpoint.h>
46#include <linux/uaccess.h>
47#include <linux/memory.h>
46 48
47#include <asm/debugreg.h> 49#include <asm/debugreg.h>
48#include <asm/apicdef.h> 50#include <asm/apicdef.h>
49#include <asm/system.h>
50#include <asm/apic.h> 51#include <asm/apic.h>
51#include <asm/nmi.h> 52#include <asm/nmi.h>
52 53
@@ -67,8 +68,6 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
67 { "ss", 4, offsetof(struct pt_regs, ss) }, 68 { "ss", 4, offsetof(struct pt_regs, ss) },
68 { "ds", 4, offsetof(struct pt_regs, ds) }, 69 { "ds", 4, offsetof(struct pt_regs, ds) },
69 { "es", 4, offsetof(struct pt_regs, es) }, 70 { "es", 4, offsetof(struct pt_regs, es) },
70 { "fs", 4, -1 },
71 { "gs", 4, -1 },
72#else 71#else
73 { "ax", 8, offsetof(struct pt_regs, ax) }, 72 { "ax", 8, offsetof(struct pt_regs, ax) },
74 { "bx", 8, offsetof(struct pt_regs, bx) }, 73 { "bx", 8, offsetof(struct pt_regs, bx) },
@@ -90,7 +89,11 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
90 { "flags", 4, offsetof(struct pt_regs, flags) }, 89 { "flags", 4, offsetof(struct pt_regs, flags) },
91 { "cs", 4, offsetof(struct pt_regs, cs) }, 90 { "cs", 4, offsetof(struct pt_regs, cs) },
92 { "ss", 4, offsetof(struct pt_regs, ss) }, 91 { "ss", 4, offsetof(struct pt_regs, ss) },
92 { "ds", 4, -1 },
93 { "es", 4, -1 },
93#endif 94#endif
95 { "fs", 4, -1 },
96 { "gs", 4, -1 },
94}; 97};
95 98
96int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) 99int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
@@ -740,6 +743,64 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
740 regs->ip = ip; 743 regs->ip = ip;
741} 744}
742 745
746int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
747{
748 int err;
749 char opc[BREAK_INSTR_SIZE];
750
751 bpt->type = BP_BREAKPOINT;
752 err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
753 BREAK_INSTR_SIZE);
754 if (err)
755 return err;
756 err = probe_kernel_write((char *)bpt->bpt_addr,
757 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
758#ifdef CONFIG_DEBUG_RODATA
759 if (!err)
760 return err;
761 /*
762 * It is safe to call text_poke() because normal kernel execution
763 * is stopped on all cores, so long as the text_mutex is not locked.
764 */
765 if (mutex_is_locked(&text_mutex))
766 return -EBUSY;
767 text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
768 BREAK_INSTR_SIZE);
769 err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
770 if (err)
771 return err;
772 if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
773 return -EINVAL;
774 bpt->type = BP_POKE_BREAKPOINT;
775#endif /* CONFIG_DEBUG_RODATA */
776 return err;
777}
778
779int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
780{
781#ifdef CONFIG_DEBUG_RODATA
782 int err;
783 char opc[BREAK_INSTR_SIZE];
784
785 if (bpt->type != BP_POKE_BREAKPOINT)
786 goto knl_write;
787 /*
788 * It is safe to call text_poke() because normal kernel execution
789 * is stopped on all cores, so long as the text_mutex is not locked.
790 */
791 if (mutex_is_locked(&text_mutex))
792 goto knl_write;
793 text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
794 err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
795 if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
796 goto knl_write;
797 return err;
798knl_write:
799#endif /* CONFIG_DEBUG_RODATA */
800 return probe_kernel_write((char *)bpt->bpt_addr,
801 (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
802}
803
743struct kgdb_arch arch_kgdb_ops = { 804struct kgdb_arch arch_kgdb_ops = {
744 /* Breakpoint instruction: */ 805 /* Breakpoint instruction: */
745 .gdb_bpt_instr = { 0xcc }, 806 .gdb_bpt_instr = { 0xcc },
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h
new file mode 100644
index 000000000000..3230b68ef29a
--- /dev/null
+++ b/arch/x86/kernel/kprobes-common.h
@@ -0,0 +1,102 @@
1#ifndef __X86_KERNEL_KPROBES_COMMON_H
2#define __X86_KERNEL_KPROBES_COMMON_H
3
4/* Kprobes and Optprobes common header */
5
6#ifdef CONFIG_X86_64
7#define SAVE_REGS_STRING \
8 /* Skip cs, ip, orig_ax. */ \
9 " subq $24, %rsp\n" \
10 " pushq %rdi\n" \
11 " pushq %rsi\n" \
12 " pushq %rdx\n" \
13 " pushq %rcx\n" \
14 " pushq %rax\n" \
15 " pushq %r8\n" \
16 " pushq %r9\n" \
17 " pushq %r10\n" \
18 " pushq %r11\n" \
19 " pushq %rbx\n" \
20 " pushq %rbp\n" \
21 " pushq %r12\n" \
22 " pushq %r13\n" \
23 " pushq %r14\n" \
24 " pushq %r15\n"
25#define RESTORE_REGS_STRING \
26 " popq %r15\n" \
27 " popq %r14\n" \
28 " popq %r13\n" \
29 " popq %r12\n" \
30 " popq %rbp\n" \
31 " popq %rbx\n" \
32 " popq %r11\n" \
33 " popq %r10\n" \
34 " popq %r9\n" \
35 " popq %r8\n" \
36 " popq %rax\n" \
37 " popq %rcx\n" \
38 " popq %rdx\n" \
39 " popq %rsi\n" \
40 " popq %rdi\n" \
41 /* Skip orig_ax, ip, cs */ \
42 " addq $24, %rsp\n"
43#else
44#define SAVE_REGS_STRING \
45 /* Skip cs, ip, orig_ax and gs. */ \
46 " subl $16, %esp\n" \
47 " pushl %fs\n" \
48 " pushl %es\n" \
49 " pushl %ds\n" \
50 " pushl %eax\n" \
51 " pushl %ebp\n" \
52 " pushl %edi\n" \
53 " pushl %esi\n" \
54 " pushl %edx\n" \
55 " pushl %ecx\n" \
56 " pushl %ebx\n"
57#define RESTORE_REGS_STRING \
58 " popl %ebx\n" \
59 " popl %ecx\n" \
60 " popl %edx\n" \
61 " popl %esi\n" \
62 " popl %edi\n" \
63 " popl %ebp\n" \
64 " popl %eax\n" \
65 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
66 " addl $24, %esp\n"
67#endif
68
69/* Ensure if the instruction can be boostable */
70extern int can_boost(kprobe_opcode_t *instruction);
71/* Recover instruction if given address is probed */
72extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
73 unsigned long addr);
74/*
75 * Copy an instruction and adjust the displacement if the instruction
76 * uses the %rip-relative addressing mode.
77 */
78extern int __copy_instruction(u8 *dest, u8 *src);
79
80/* Generate a relative-jump/call instruction */
81extern void synthesize_reljump(void *from, void *to);
82extern void synthesize_relcall(void *from, void *to);
83
84#ifdef CONFIG_OPTPROBES
85extern int arch_init_optprobes(void);
86extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
87extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
88#else /* !CONFIG_OPTPROBES */
89static inline int arch_init_optprobes(void)
90{
91 return 0;
92}
93static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
94{
95 return 0;
96}
97static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
98{
99 return addr;
100}
101#endif
102#endif
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c
new file mode 100644
index 000000000000..c5e410eed403
--- /dev/null
+++ b/arch/x86/kernel/kprobes-opt.c
@@ -0,0 +1,512 @@
1/*
2 * Kernel Probes Jump Optimization (Optprobes)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2002, 2004
19 * Copyright (C) Hitachi Ltd., 2012
20 */
21#include <linux/kprobes.h>
22#include <linux/ptrace.h>
23#include <linux/string.h>
24#include <linux/slab.h>
25#include <linux/hardirq.h>
26#include <linux/preempt.h>
27#include <linux/module.h>
28#include <linux/kdebug.h>
29#include <linux/kallsyms.h>
30#include <linux/ftrace.h>
31
32#include <asm/cacheflush.h>
33#include <asm/desc.h>
34#include <asm/pgtable.h>
35#include <asm/uaccess.h>
36#include <asm/alternative.h>
37#include <asm/insn.h>
38#include <asm/debugreg.h>
39
40#include "kprobes-common.h"
41
42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
43{
44 struct optimized_kprobe *op;
45 struct kprobe *kp;
46 long offs;
47 int i;
48
49 for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
50 kp = get_kprobe((void *)addr - i);
51 /* This function only handles jump-optimized kprobe */
52 if (kp && kprobe_optimized(kp)) {
53 op = container_of(kp, struct optimized_kprobe, kp);
54 /* If op->list is not empty, op is under optimizing */
55 if (list_empty(&op->list))
56 goto found;
57 }
58 }
59
60 return addr;
61found:
62 /*
63 * If the kprobe can be optimized, original bytes which can be
64 * overwritten by jump destination address. In this case, original
65 * bytes must be recovered from op->optinsn.copied_insn buffer.
66 */
67 memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
68 if (addr == (unsigned long)kp->addr) {
69 buf[0] = kp->opcode;
70 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
71 } else {
72 offs = addr - (unsigned long)kp->addr - 1;
73 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
74 }
75
76 return (unsigned long)buf;
77}
78
79/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
80static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
81{
82#ifdef CONFIG_X86_64
83 *addr++ = 0x48;
84 *addr++ = 0xbf;
85#else
86 *addr++ = 0xb8;
87#endif
88 *(unsigned long *)addr = val;
89}
90
91static void __used __kprobes kprobes_optinsn_template_holder(void)
92{
93 asm volatile (
94 ".global optprobe_template_entry\n"
95 "optprobe_template_entry:\n"
96#ifdef CONFIG_X86_64
97 /* We don't bother saving the ss register */
98 " pushq %rsp\n"
99 " pushfq\n"
100 SAVE_REGS_STRING
101 " movq %rsp, %rsi\n"
102 ".global optprobe_template_val\n"
103 "optprobe_template_val:\n"
104 ASM_NOP5
105 ASM_NOP5
106 ".global optprobe_template_call\n"
107 "optprobe_template_call:\n"
108 ASM_NOP5
109 /* Move flags to rsp */
110 " movq 144(%rsp), %rdx\n"
111 " movq %rdx, 152(%rsp)\n"
112 RESTORE_REGS_STRING
113 /* Skip flags entry */
114 " addq $8, %rsp\n"
115 " popfq\n"
116#else /* CONFIG_X86_32 */
117 " pushf\n"
118 SAVE_REGS_STRING
119 " movl %esp, %edx\n"
120 ".global optprobe_template_val\n"
121 "optprobe_template_val:\n"
122 ASM_NOP5
123 ".global optprobe_template_call\n"
124 "optprobe_template_call:\n"
125 ASM_NOP5
126 RESTORE_REGS_STRING
127 " addl $4, %esp\n" /* skip cs */
128 " popf\n"
129#endif
130 ".global optprobe_template_end\n"
131 "optprobe_template_end:\n");
132}
133
134#define TMPL_MOVE_IDX \
135 ((long)&optprobe_template_val - (long)&optprobe_template_entry)
136#define TMPL_CALL_IDX \
137 ((long)&optprobe_template_call - (long)&optprobe_template_entry)
138#define TMPL_END_IDX \
139 ((long)&optprobe_template_end - (long)&optprobe_template_entry)
140
141#define INT3_SIZE sizeof(kprobe_opcode_t)
142
143/* Optimized kprobe call back function: called from optinsn */
144static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
145{
146 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
147 unsigned long flags;
148
149 /* This is possible if op is under delayed unoptimizing */
150 if (kprobe_disabled(&op->kp))
151 return;
152
153 local_irq_save(flags);
154 if (kprobe_running()) {
155 kprobes_inc_nmissed_count(&op->kp);
156 } else {
157 /* Save skipped registers */
158#ifdef CONFIG_X86_64
159 regs->cs = __KERNEL_CS;
160#else
161 regs->cs = __KERNEL_CS | get_kernel_rpl();
162 regs->gs = 0;
163#endif
164 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
165 regs->orig_ax = ~0UL;
166
167 __this_cpu_write(current_kprobe, &op->kp);
168 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
169 opt_pre_handler(&op->kp, regs);
170 __this_cpu_write(current_kprobe, NULL);
171 }
172 local_irq_restore(flags);
173}
174
175static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
176{
177 int len = 0, ret;
178
179 while (len < RELATIVEJUMP_SIZE) {
180 ret = __copy_instruction(dest + len, src + len);
181 if (!ret || !can_boost(dest + len))
182 return -EINVAL;
183 len += ret;
184 }
185 /* Check whether the address range is reserved */
186 if (ftrace_text_reserved(src, src + len - 1) ||
187 alternatives_text_reserved(src, src + len - 1) ||
188 jump_label_text_reserved(src, src + len - 1))
189 return -EBUSY;
190
191 return len;
192}
193
194/* Check whether insn is indirect jump */
195static int __kprobes insn_is_indirect_jump(struct insn *insn)
196{
197 return ((insn->opcode.bytes[0] == 0xff &&
198 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
199 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
200}
201
202/* Check whether insn jumps into specified address range */
203static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
204{
205 unsigned long target = 0;
206
207 switch (insn->opcode.bytes[0]) {
208 case 0xe0: /* loopne */
209 case 0xe1: /* loope */
210 case 0xe2: /* loop */
211 case 0xe3: /* jcxz */
212 case 0xe9: /* near relative jump */
213 case 0xeb: /* short relative jump */
214 break;
215 case 0x0f:
216 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
217 break;
218 return 0;
219 default:
220 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
221 break;
222 return 0;
223 }
224 target = (unsigned long)insn->next_byte + insn->immediate.value;
225
226 return (start <= target && target <= start + len);
227}
228
229/* Decode whole function to ensure any instructions don't jump into target */
230static int __kprobes can_optimize(unsigned long paddr)
231{
232 unsigned long addr, size = 0, offset = 0;
233 struct insn insn;
234 kprobe_opcode_t buf[MAX_INSN_SIZE];
235
236 /* Lookup symbol including addr */
237 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
238 return 0;
239
240 /*
241 * Do not optimize in the entry code due to the unstable
242 * stack handling.
243 */
244 if ((paddr >= (unsigned long)__entry_text_start) &&
245 (paddr < (unsigned long)__entry_text_end))
246 return 0;
247
248 /* Check there is enough space for a relative jump. */
249 if (size - offset < RELATIVEJUMP_SIZE)
250 return 0;
251
252 /* Decode instructions */
253 addr = paddr - offset;
254 while (addr < paddr - offset + size) { /* Decode until function end */
255 if (search_exception_tables(addr))
256 /*
257 * Since some fixup code will jumps into this function,
258 * we can't optimize kprobe in this function.
259 */
260 return 0;
261 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr));
262 insn_get_length(&insn);
263 /* Another subsystem puts a breakpoint */
264 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
265 return 0;
266 /* Recover address */
267 insn.kaddr = (void *)addr;
268 insn.next_byte = (void *)(addr + insn.length);
269 /* Check any instructions don't jump into target */
270 if (insn_is_indirect_jump(&insn) ||
271 insn_jump_into_range(&insn, paddr + INT3_SIZE,
272 RELATIVE_ADDR_SIZE))
273 return 0;
274 addr += insn.length;
275 }
276
277 return 1;
278}
279
280/* Check optimized_kprobe can actually be optimized. */
281int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
282{
283 int i;
284 struct kprobe *p;
285
286 for (i = 1; i < op->optinsn.size; i++) {
287 p = get_kprobe(op->kp.addr + i);
288 if (p && !kprobe_disabled(p))
289 return -EEXIST;
290 }
291
292 return 0;
293}
294
295/* Check the addr is within the optimized instructions. */
296int __kprobes
297arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
298{
299 return ((unsigned long)op->kp.addr <= addr &&
300 (unsigned long)op->kp.addr + op->optinsn.size > addr);
301}
302
303/* Free optimized instruction slot */
304static __kprobes
305void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
306{
307 if (op->optinsn.insn) {
308 free_optinsn_slot(op->optinsn.insn, dirty);
309 op->optinsn.insn = NULL;
310 op->optinsn.size = 0;
311 }
312}
313
314void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
315{
316 __arch_remove_optimized_kprobe(op, 1);
317}
318
319/*
320 * Copy replacing target instructions
321 * Target instructions MUST be relocatable (checked inside)
322 * This is called when new aggr(opt)probe is allocated or reused.
323 */
324int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
325{
326 u8 *buf;
327 int ret;
328 long rel;
329
330 if (!can_optimize((unsigned long)op->kp.addr))
331 return -EILSEQ;
332
333 op->optinsn.insn = get_optinsn_slot();
334 if (!op->optinsn.insn)
335 return -ENOMEM;
336
337 /*
338 * Verify if the address gap is in 2GB range, because this uses
339 * a relative jump.
340 */
341 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
342 if (abs(rel) > 0x7fffffff)
343 return -ERANGE;
344
345 buf = (u8 *)op->optinsn.insn;
346
347 /* Copy instructions into the out-of-line buffer */
348 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
349 if (ret < 0) {
350 __arch_remove_optimized_kprobe(op, 0);
351 return ret;
352 }
353 op->optinsn.size = ret;
354
355 /* Copy arch-dep-instance from template */
356 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
357
358 /* Set probe information */
359 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
360
361 /* Set probe function call */
362 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
363
364 /* Set returning jmp instruction at the tail of out-of-line buffer */
365 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
366 (u8 *)op->kp.addr + op->optinsn.size);
367
368 flush_icache_range((unsigned long) buf,
369 (unsigned long) buf + TMPL_END_IDX +
370 op->optinsn.size + RELATIVEJUMP_SIZE);
371 return 0;
372}
373
374#define MAX_OPTIMIZE_PROBES 256
375static struct text_poke_param *jump_poke_params;
376static struct jump_poke_buffer {
377 u8 buf[RELATIVEJUMP_SIZE];
378} *jump_poke_bufs;
379
380static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
381 u8 *insn_buf,
382 struct optimized_kprobe *op)
383{
384 s32 rel = (s32)((long)op->optinsn.insn -
385 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
386
387 /* Backup instructions which will be replaced by jump address */
388 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
389 RELATIVE_ADDR_SIZE);
390
391 insn_buf[0] = RELATIVEJUMP_OPCODE;
392 *(s32 *)(&insn_buf[1]) = rel;
393
394 tprm->addr = op->kp.addr;
395 tprm->opcode = insn_buf;
396 tprm->len = RELATIVEJUMP_SIZE;
397}
398
399/*
400 * Replace breakpoints (int3) with relative jumps.
401 * Caller must call with locking kprobe_mutex and text_mutex.
402 */
403void __kprobes arch_optimize_kprobes(struct list_head *oplist)
404{
405 struct optimized_kprobe *op, *tmp;
406 int c = 0;
407
408 list_for_each_entry_safe(op, tmp, oplist, list) {
409 WARN_ON(kprobe_disabled(&op->kp));
410 /* Setup param */
411 setup_optimize_kprobe(&jump_poke_params[c],
412 jump_poke_bufs[c].buf, op);
413 list_del_init(&op->list);
414 if (++c >= MAX_OPTIMIZE_PROBES)
415 break;
416 }
417
418 /*
419 * text_poke_smp doesn't support NMI/MCE code modifying.
420 * However, since kprobes itself also doesn't support NMI/MCE
421 * code probing, it's not a problem.
422 */
423 text_poke_smp_batch(jump_poke_params, c);
424}
425
426static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
427 u8 *insn_buf,
428 struct optimized_kprobe *op)
429{
430 /* Set int3 to first byte for kprobes */
431 insn_buf[0] = BREAKPOINT_INSTRUCTION;
432 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
433
434 tprm->addr = op->kp.addr;
435 tprm->opcode = insn_buf;
436 tprm->len = RELATIVEJUMP_SIZE;
437}
438
439/*
440 * Recover original instructions and breakpoints from relative jumps.
441 * Caller must call with locking kprobe_mutex.
442 */
443extern void arch_unoptimize_kprobes(struct list_head *oplist,
444 struct list_head *done_list)
445{
446 struct optimized_kprobe *op, *tmp;
447 int c = 0;
448
449 list_for_each_entry_safe(op, tmp, oplist, list) {
450 /* Setup param */
451 setup_unoptimize_kprobe(&jump_poke_params[c],
452 jump_poke_bufs[c].buf, op);
453 list_move(&op->list, done_list);
454 if (++c >= MAX_OPTIMIZE_PROBES)
455 break;
456 }
457
458 /*
459 * text_poke_smp doesn't support NMI/MCE code modifying.
460 * However, since kprobes itself also doesn't support NMI/MCE
461 * code probing, it's not a problem.
462 */
463 text_poke_smp_batch(jump_poke_params, c);
464}
465
466/* Replace a relative jump with a breakpoint (int3). */
467void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
468{
469 u8 buf[RELATIVEJUMP_SIZE];
470
471 /* Set int3 to first byte for kprobes */
472 buf[0] = BREAKPOINT_INSTRUCTION;
473 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
474 text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
475}
476
477int __kprobes
478setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
479{
480 struct optimized_kprobe *op;
481
482 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
483 /* This kprobe is really able to run optimized path. */
484 op = container_of(p, struct optimized_kprobe, kp);
485 /* Detour through copied instructions */
486 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
487 if (!reenter)
488 reset_current_kprobe();
489 preempt_enable_no_resched();
490 return 1;
491 }
492 return 0;
493}
494
495int __kprobes arch_init_optprobes(void)
496{
497 /* Allocate code buffer and parameter array */
498 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
499 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
500 if (!jump_poke_bufs)
501 return -ENOMEM;
502
503 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
504 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
505 if (!jump_poke_params) {
506 kfree(jump_poke_bufs);
507 jump_poke_bufs = NULL;
508 return -ENOMEM;
509 }
510
511 return 0;
512}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7da647d8b64c..e213fc8408d2 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -30,16 +30,15 @@
30 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 30 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
31 * <prasanna@in.ibm.com> added function-return probes. 31 * <prasanna@in.ibm.com> added function-return probes.
32 * 2005-May Rusty Lynch <rusty.lynch@intel.com> 32 * 2005-May Rusty Lynch <rusty.lynch@intel.com>
33 * Added function return probes functionality 33 * Added function return probes functionality
34 * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added 34 * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
35 * kprobe-booster and kretprobe-booster for i386. 35 * kprobe-booster and kretprobe-booster for i386.
36 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster 36 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
37 * and kretprobe-booster for x86-64 37 * and kretprobe-booster for x86-64
38 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven 38 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
39 * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> 39 * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
40 * unified x86 kprobes code. 40 * unified x86 kprobes code.
41 */ 41 */
42
43#include <linux/kprobes.h> 42#include <linux/kprobes.h>
44#include <linux/ptrace.h> 43#include <linux/ptrace.h>
45#include <linux/string.h> 44#include <linux/string.h>
@@ -59,6 +58,8 @@
59#include <asm/insn.h> 58#include <asm/insn.h>
60#include <asm/debugreg.h> 59#include <asm/debugreg.h>
61 60
61#include "kprobes-common.h"
62
62void jprobe_return_end(void); 63void jprobe_return_end(void);
63 64
64DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 65DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
108 doesn't switch kernel stack.*/ 109 doesn't switch kernel stack.*/
109 {NULL, NULL} /* Terminator */ 110 {NULL, NULL} /* Terminator */
110}; 111};
112
111const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 113const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
112 114
113static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) 115static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
@@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
123} 125}
124 126
125/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 127/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
126static void __kprobes synthesize_reljump(void *from, void *to) 128void __kprobes synthesize_reljump(void *from, void *to)
127{ 129{
128 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); 130 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
129} 131}
130 132
133/* Insert a call instruction at address 'from', which calls address 'to'.*/
134void __kprobes synthesize_relcall(void *from, void *to)
135{
136 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
137}
138
131/* 139/*
132 * Skip the prefixes of the instruction. 140 * Skip the prefixes of the instruction.
133 */ 141 */
@@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
151 * Returns non-zero if opcode is boostable. 159 * Returns non-zero if opcode is boostable.
152 * RIP relative instructions are adjusted at copying time in 64 bits mode 160 * RIP relative instructions are adjusted at copying time in 64 bits mode
153 */ 161 */
154static int __kprobes can_boost(kprobe_opcode_t *opcodes) 162int __kprobes can_boost(kprobe_opcode_t *opcodes)
155{ 163{
156 kprobe_opcode_t opcode; 164 kprobe_opcode_t opcode;
157 kprobe_opcode_t *orig_opcodes = opcodes; 165 kprobe_opcode_t *orig_opcodes = opcodes;
@@ -207,13 +215,15 @@ retry:
207 } 215 }
208} 216}
209 217
210/* Recover the probed instruction at addr for further analysis. */ 218static unsigned long
211static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) 219__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
212{ 220{
213 struct kprobe *kp; 221 struct kprobe *kp;
222
214 kp = get_kprobe((void *)addr); 223 kp = get_kprobe((void *)addr);
224 /* There is no probe, return original address */
215 if (!kp) 225 if (!kp)
216 return -EINVAL; 226 return addr;
217 227
218 /* 228 /*
219 * Basically, kp->ainsn.insn has an original instruction. 229 * Basically, kp->ainsn.insn has an original instruction.
@@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 */ 240 */
231 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 241 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
232 buf[0] = kp->opcode; 242 buf[0] = kp->opcode;
233 return 0; 243 return (unsigned long)buf;
244}
245
246/*
247 * Recover the probed instruction at addr for further analysis.
248 * Caller must lock kprobes by kprobe_mutex, or disable preemption
249 * for preventing to release referencing kprobes.
250 */
251unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
252{
253 unsigned long __addr;
254
255 __addr = __recover_optprobed_insn(buf, addr);
256 if (__addr != addr)
257 return __addr;
258
259 return __recover_probed_insn(buf, addr);
234} 260}
235 261
236/* Check if paddr is at an instruction boundary */ 262/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 263static int __kprobes can_probe(unsigned long paddr)
238{ 264{
239 int ret; 265 unsigned long addr, __addr, offset = 0;
240 unsigned long addr, offset = 0;
241 struct insn insn; 266 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 267 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 268
@@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr)
247 /* Decode instructions */ 272 /* Decode instructions */
248 addr = paddr - offset; 273 addr = paddr - offset;
249 while (addr < paddr) { 274 while (addr < paddr) {
250 kernel_insn_init(&insn, (void *)addr);
251 insn_get_opcode(&insn);
252
253 /* 275 /*
254 * Check if the instruction has been modified by another 276 * Check if the instruction has been modified by another
255 * kprobe, in which case we replace the breakpoint by the 277 * kprobe, in which case we replace the breakpoint by the
256 * original instruction in our buffer. 278 * original instruction in our buffer.
279 * Also, jump optimization will change the breakpoint to
280 * relative-jump. Since the relative-jump itself is
281 * normally used, we just go through if there is no kprobe.
257 */ 282 */
258 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { 283 __addr = recover_probed_instruction(buf, addr);
259 ret = recover_probed_instruction(buf, addr); 284 kernel_insn_init(&insn, (void *)__addr);
260 if (ret)
261 /*
262 * Another debugging subsystem might insert
263 * this breakpoint. In that case, we can't
264 * recover it.
265 */
266 return 0;
267 kernel_insn_init(&insn, buf);
268 }
269 insn_get_length(&insn); 285 insn_get_length(&insn);
286
287 /*
288 * Another debugging subsystem might insert this breakpoint.
289 * In that case, we can't recover it.
290 */
291 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
292 return 0;
270 addr += insn.length; 293 addr += insn.length;
271 } 294 }
272 295
@@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
299 * If not, return null. 322 * If not, return null.
300 * Only applicable to 64-bit x86. 323 * Only applicable to 64-bit x86.
301 */ 324 */
302static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) 325int __kprobes __copy_instruction(u8 *dest, u8 *src)
303{ 326{
304 struct insn insn; 327 struct insn insn;
305 int ret;
306 kprobe_opcode_t buf[MAX_INSN_SIZE]; 328 kprobe_opcode_t buf[MAX_INSN_SIZE];
307 329
308 kernel_insn_init(&insn, src); 330 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src));
309 if (recover) {
310 insn_get_opcode(&insn);
311 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
312 ret = recover_probed_instruction(buf,
313 (unsigned long)src);
314 if (ret)
315 return 0;
316 kernel_insn_init(&insn, buf);
317 }
318 }
319 insn_get_length(&insn); 331 insn_get_length(&insn);
332 /* Another subsystem puts a breakpoint, failed to recover */
333 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
334 return 0;
320 memcpy(dest, insn.kaddr, insn.length); 335 memcpy(dest, insn.kaddr, insn.length);
321 336
322#ifdef CONFIG_X86_64 337#ifdef CONFIG_X86_64
@@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
337 * extension of the original signed 32-bit displacement would 352 * extension of the original signed 32-bit displacement would
338 * have given. 353 * have given.
339 */ 354 */
340 newdisp = (u8 *) src + (s64) insn.displacement.value - 355 newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
341 (u8 *) dest;
342 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ 356 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
343 disp = (u8 *) dest + insn_offset_displacement(&insn); 357 disp = (u8 *) dest + insn_offset_displacement(&insn);
344 *(s32 *) disp = (s32) newdisp; 358 *(s32 *) disp = (s32) newdisp;
@@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
349 363
350static void __kprobes arch_copy_kprobe(struct kprobe *p) 364static void __kprobes arch_copy_kprobe(struct kprobe *p)
351{ 365{
366 /* Copy an instruction with recovering if other optprobe modifies it.*/
367 __copy_instruction(p->ainsn.insn, p->addr);
368
352 /* 369 /*
353 * Copy an instruction without recovering int3, because it will be 370 * __copy_instruction can modify the displacement of the instruction,
354 * put by another subsystem. 371 * but it doesn't affect boostable check.
355 */ 372 */
356 __copy_instruction(p->ainsn.insn, p->addr, 0); 373 if (can_boost(p->ainsn.insn))
357
358 if (can_boost(p->addr))
359 p->ainsn.boostable = 0; 374 p->ainsn.boostable = 0;
360 else 375 else
361 p->ainsn.boostable = -1; 376 p->ainsn.boostable = -1;
362 377
363 p->opcode = *p->addr; 378 /* Also, displacement change doesn't affect the first byte */
379 p->opcode = p->ainsn.insn[0];
364} 380}
365 381
366int __kprobes arch_prepare_kprobe(struct kprobe *p) 382int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -442,8 +458,8 @@ static void __kprobes restore_btf(void)
442 } 458 }
443} 459}
444 460
445void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 461void __kprobes
446 struct pt_regs *regs) 462arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
447{ 463{
448 unsigned long *sara = stack_addr(regs); 464 unsigned long *sara = stack_addr(regs);
449 465
@@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
453 *sara = (unsigned long) &kretprobe_trampoline; 469 *sara = (unsigned long) &kretprobe_trampoline;
454} 470}
455 471
456#ifdef CONFIG_OPTPROBES 472static void __kprobes
457static int __kprobes setup_detour_execution(struct kprobe *p, 473setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
458 struct pt_regs *regs,
459 int reenter);
460#else
461#define setup_detour_execution(p, regs, reenter) (0)
462#endif
463
464static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
465 struct kprobe_ctlblk *kcb, int reenter)
466{ 474{
467 if (setup_detour_execution(p, regs, reenter)) 475 if (setup_detour_execution(p, regs, reenter))
468 return; 476 return;
@@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
504 * within the handler. We save the original kprobes variables and just single 512 * within the handler. We save the original kprobes variables and just single
505 * step on the instruction of the new probe without calling any user handlers. 513 * step on the instruction of the new probe without calling any user handlers.
506 */ 514 */
507static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, 515static int __kprobes
508 struct kprobe_ctlblk *kcb) 516reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
509{ 517{
510 switch (kcb->kprobe_status) { 518 switch (kcb->kprobe_status) {
511 case KPROBE_HIT_SSDONE: 519 case KPROBE_HIT_SSDONE:
@@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
600 return 0; 608 return 0;
601} 609}
602 610
603#ifdef CONFIG_X86_64
604#define SAVE_REGS_STRING \
605 /* Skip cs, ip, orig_ax. */ \
606 " subq $24, %rsp\n" \
607 " pushq %rdi\n" \
608 " pushq %rsi\n" \
609 " pushq %rdx\n" \
610 " pushq %rcx\n" \
611 " pushq %rax\n" \
612 " pushq %r8\n" \
613 " pushq %r9\n" \
614 " pushq %r10\n" \
615 " pushq %r11\n" \
616 " pushq %rbx\n" \
617 " pushq %rbp\n" \
618 " pushq %r12\n" \
619 " pushq %r13\n" \
620 " pushq %r14\n" \
621 " pushq %r15\n"
622#define RESTORE_REGS_STRING \
623 " popq %r15\n" \
624 " popq %r14\n" \
625 " popq %r13\n" \
626 " popq %r12\n" \
627 " popq %rbp\n" \
628 " popq %rbx\n" \
629 " popq %r11\n" \
630 " popq %r10\n" \
631 " popq %r9\n" \
632 " popq %r8\n" \
633 " popq %rax\n" \
634 " popq %rcx\n" \
635 " popq %rdx\n" \
636 " popq %rsi\n" \
637 " popq %rdi\n" \
638 /* Skip orig_ax, ip, cs */ \
639 " addq $24, %rsp\n"
640#else
641#define SAVE_REGS_STRING \
642 /* Skip cs, ip, orig_ax and gs. */ \
643 " subl $16, %esp\n" \
644 " pushl %fs\n" \
645 " pushl %es\n" \
646 " pushl %ds\n" \
647 " pushl %eax\n" \
648 " pushl %ebp\n" \
649 " pushl %edi\n" \
650 " pushl %esi\n" \
651 " pushl %edx\n" \
652 " pushl %ecx\n" \
653 " pushl %ebx\n"
654#define RESTORE_REGS_STRING \
655 " popl %ebx\n" \
656 " popl %ecx\n" \
657 " popl %edx\n" \
658 " popl %esi\n" \
659 " popl %edi\n" \
660 " popl %ebp\n" \
661 " popl %eax\n" \
662 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
663 " addl $24, %esp\n"
664#endif
665
666/* 611/*
667 * When a retprobed function returns, this code saves registers and 612 * When a retprobed function returns, this code saves registers and
668 * calls trampoline_handler() runs, which calls the kretprobe's handler. 613 * calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
816 * jump instruction after the copied instruction, that jumps to the next 761 * jump instruction after the copied instruction, that jumps to the next
817 * instruction after the probepoint. 762 * instruction after the probepoint.
818 */ 763 */
819static void __kprobes resume_execution(struct kprobe *p, 764static void __kprobes
820 struct pt_regs *regs, struct kprobe_ctlblk *kcb) 765resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
821{ 766{
822 unsigned long *tos = stack_addr(regs); 767 unsigned long *tos = stack_addr(regs);
823 unsigned long copy_ip = (unsigned long)p->ainsn.insn; 768 unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
996/* 941/*
997 * Wrapper routine for handling exceptions. 942 * Wrapper routine for handling exceptions.
998 */ 943 */
999int __kprobes kprobe_exceptions_notify(struct notifier_block *self, 944int __kprobes
1000 unsigned long val, void *data) 945kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
1001{ 946{
1002 struct die_args *args = data; 947 struct die_args *args = data;
1003 int ret = NOTIFY_DONE; 948 int ret = NOTIFY_DONE;
@@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1107 return 0; 1052 return 0;
1108} 1053}
1109 1054
1110
1111#ifdef CONFIG_OPTPROBES
1112
1113/* Insert a call instruction at address 'from', which calls address 'to'.*/
1114static void __kprobes synthesize_relcall(void *from, void *to)
1115{
1116 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
1117}
1118
1119/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
1120static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1121 unsigned long val)
1122{
1123#ifdef CONFIG_X86_64
1124 *addr++ = 0x48;
1125 *addr++ = 0xbf;
1126#else
1127 *addr++ = 0xb8;
1128#endif
1129 *(unsigned long *)addr = val;
1130}
1131
1132static void __used __kprobes kprobes_optinsn_template_holder(void)
1133{
1134 asm volatile (
1135 ".global optprobe_template_entry\n"
1136 "optprobe_template_entry: \n"
1137#ifdef CONFIG_X86_64
1138 /* We don't bother saving the ss register */
1139 " pushq %rsp\n"
1140 " pushfq\n"
1141 SAVE_REGS_STRING
1142 " movq %rsp, %rsi\n"
1143 ".global optprobe_template_val\n"
1144 "optprobe_template_val: \n"
1145 ASM_NOP5
1146 ASM_NOP5
1147 ".global optprobe_template_call\n"
1148 "optprobe_template_call: \n"
1149 ASM_NOP5
1150 /* Move flags to rsp */
1151 " movq 144(%rsp), %rdx\n"
1152 " movq %rdx, 152(%rsp)\n"
1153 RESTORE_REGS_STRING
1154 /* Skip flags entry */
1155 " addq $8, %rsp\n"
1156 " popfq\n"
1157#else /* CONFIG_X86_32 */
1158 " pushf\n"
1159 SAVE_REGS_STRING
1160 " movl %esp, %edx\n"
1161 ".global optprobe_template_val\n"
1162 "optprobe_template_val: \n"
1163 ASM_NOP5
1164 ".global optprobe_template_call\n"
1165 "optprobe_template_call: \n"
1166 ASM_NOP5
1167 RESTORE_REGS_STRING
1168 " addl $4, %esp\n" /* skip cs */
1169 " popf\n"
1170#endif
1171 ".global optprobe_template_end\n"
1172 "optprobe_template_end: \n");
1173}
1174
1175#define TMPL_MOVE_IDX \
1176 ((long)&optprobe_template_val - (long)&optprobe_template_entry)
1177#define TMPL_CALL_IDX \
1178 ((long)&optprobe_template_call - (long)&optprobe_template_entry)
1179#define TMPL_END_IDX \
1180 ((long)&optprobe_template_end - (long)&optprobe_template_entry)
1181
1182#define INT3_SIZE sizeof(kprobe_opcode_t)
1183
1184/* Optimized kprobe call back function: called from optinsn */
1185static void __kprobes optimized_callback(struct optimized_kprobe *op,
1186 struct pt_regs *regs)
1187{
1188 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1189 unsigned long flags;
1190
1191 /* This is possible if op is under delayed unoptimizing */
1192 if (kprobe_disabled(&op->kp))
1193 return;
1194
1195 local_irq_save(flags);
1196 if (kprobe_running()) {
1197 kprobes_inc_nmissed_count(&op->kp);
1198 } else {
1199 /* Save skipped registers */
1200#ifdef CONFIG_X86_64
1201 regs->cs = __KERNEL_CS;
1202#else
1203 regs->cs = __KERNEL_CS | get_kernel_rpl();
1204 regs->gs = 0;
1205#endif
1206 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
1207 regs->orig_ax = ~0UL;
1208
1209 __this_cpu_write(current_kprobe, &op->kp);
1210 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1211 opt_pre_handler(&op->kp, regs);
1212 __this_cpu_write(current_kprobe, NULL);
1213 }
1214 local_irq_restore(flags);
1215}
1216
1217static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1218{
1219 int len = 0, ret;
1220
1221 while (len < RELATIVEJUMP_SIZE) {
1222 ret = __copy_instruction(dest + len, src + len, 1);
1223 if (!ret || !can_boost(dest + len))
1224 return -EINVAL;
1225 len += ret;
1226 }
1227 /* Check whether the address range is reserved */
1228 if (ftrace_text_reserved(src, src + len - 1) ||
1229 alternatives_text_reserved(src, src + len - 1) ||
1230 jump_label_text_reserved(src, src + len - 1))
1231 return -EBUSY;
1232
1233 return len;
1234}
1235
1236/* Check whether insn is indirect jump */
1237static int __kprobes insn_is_indirect_jump(struct insn *insn)
1238{
1239 return ((insn->opcode.bytes[0] == 0xff &&
1240 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
1241 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
1242}
1243
1244/* Check whether insn jumps into specified address range */
1245static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
1246{
1247 unsigned long target = 0;
1248
1249 switch (insn->opcode.bytes[0]) {
1250 case 0xe0: /* loopne */
1251 case 0xe1: /* loope */
1252 case 0xe2: /* loop */
1253 case 0xe3: /* jcxz */
1254 case 0xe9: /* near relative jump */
1255 case 0xeb: /* short relative jump */
1256 break;
1257 case 0x0f:
1258 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
1259 break;
1260 return 0;
1261 default:
1262 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
1263 break;
1264 return 0;
1265 }
1266 target = (unsigned long)insn->next_byte + insn->immediate.value;
1267
1268 return (start <= target && target <= start + len);
1269}
1270
1271/* Decode whole function to ensure any instructions don't jump into target */
1272static int __kprobes can_optimize(unsigned long paddr)
1273{
1274 int ret;
1275 unsigned long addr, size = 0, offset = 0;
1276 struct insn insn;
1277 kprobe_opcode_t buf[MAX_INSN_SIZE];
1278
1279 /* Lookup symbol including addr */
1280 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1281 return 0;
1282
1283 /*
1284 * Do not optimize in the entry code due to the unstable
1285 * stack handling.
1286 */
1287 if ((paddr >= (unsigned long )__entry_text_start) &&
1288 (paddr < (unsigned long )__entry_text_end))
1289 return 0;
1290
1291 /* Check there is enough space for a relative jump. */
1292 if (size - offset < RELATIVEJUMP_SIZE)
1293 return 0;
1294
1295 /* Decode instructions */
1296 addr = paddr - offset;
1297 while (addr < paddr - offset + size) { /* Decode until function end */
1298 if (search_exception_tables(addr))
1299 /*
1300 * Since some fixup code will jumps into this function,
1301 * we can't optimize kprobe in this function.
1302 */
1303 return 0;
1304 kernel_insn_init(&insn, (void *)addr);
1305 insn_get_opcode(&insn);
1306 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
1307 ret = recover_probed_instruction(buf, addr);
1308 if (ret)
1309 return 0;
1310 kernel_insn_init(&insn, buf);
1311 }
1312 insn_get_length(&insn);
1313 /* Recover address */
1314 insn.kaddr = (void *)addr;
1315 insn.next_byte = (void *)(addr + insn.length);
1316 /* Check any instructions don't jump into target */
1317 if (insn_is_indirect_jump(&insn) ||
1318 insn_jump_into_range(&insn, paddr + INT3_SIZE,
1319 RELATIVE_ADDR_SIZE))
1320 return 0;
1321 addr += insn.length;
1322 }
1323
1324 return 1;
1325}
1326
1327/* Check optimized_kprobe can actually be optimized. */
1328int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
1329{
1330 int i;
1331 struct kprobe *p;
1332
1333 for (i = 1; i < op->optinsn.size; i++) {
1334 p = get_kprobe(op->kp.addr + i);
1335 if (p && !kprobe_disabled(p))
1336 return -EEXIST;
1337 }
1338
1339 return 0;
1340}
1341
1342/* Check the addr is within the optimized instructions. */
1343int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
1344 unsigned long addr)
1345{
1346 return ((unsigned long)op->kp.addr <= addr &&
1347 (unsigned long)op->kp.addr + op->optinsn.size > addr);
1348}
1349
1350/* Free optimized instruction slot */
1351static __kprobes
1352void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
1353{
1354 if (op->optinsn.insn) {
1355 free_optinsn_slot(op->optinsn.insn, dirty);
1356 op->optinsn.insn = NULL;
1357 op->optinsn.size = 0;
1358 }
1359}
1360
1361void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
1362{
1363 __arch_remove_optimized_kprobe(op, 1);
1364}
1365
1366/*
1367 * Copy replacing target instructions
1368 * Target instructions MUST be relocatable (checked inside)
1369 */
1370int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1371{
1372 u8 *buf;
1373 int ret;
1374 long rel;
1375
1376 if (!can_optimize((unsigned long)op->kp.addr))
1377 return -EILSEQ;
1378
1379 op->optinsn.insn = get_optinsn_slot();
1380 if (!op->optinsn.insn)
1381 return -ENOMEM;
1382
1383 /*
1384 * Verify if the address gap is in 2GB range, because this uses
1385 * a relative jump.
1386 */
1387 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
1388 if (abs(rel) > 0x7fffffff)
1389 return -ERANGE;
1390
1391 buf = (u8 *)op->optinsn.insn;
1392
1393 /* Copy instructions into the out-of-line buffer */
1394 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
1395 if (ret < 0) {
1396 __arch_remove_optimized_kprobe(op, 0);
1397 return ret;
1398 }
1399 op->optinsn.size = ret;
1400
1401 /* Copy arch-dep-instance from template */
1402 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
1403
1404 /* Set probe information */
1405 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
1406
1407 /* Set probe function call */
1408 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
1409
1410 /* Set returning jmp instruction at the tail of out-of-line buffer */
1411 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
1412 (u8 *)op->kp.addr + op->optinsn.size);
1413
1414 flush_icache_range((unsigned long) buf,
1415 (unsigned long) buf + TMPL_END_IDX +
1416 op->optinsn.size + RELATIVEJUMP_SIZE);
1417 return 0;
1418}
1419
1420#define MAX_OPTIMIZE_PROBES 256
1421static struct text_poke_param *jump_poke_params;
1422static struct jump_poke_buffer {
1423 u8 buf[RELATIVEJUMP_SIZE];
1424} *jump_poke_bufs;
1425
1426static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
1427 u8 *insn_buf,
1428 struct optimized_kprobe *op)
1429{
1430 s32 rel = (s32)((long)op->optinsn.insn -
1431 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1432
1433 /* Backup instructions which will be replaced by jump address */
1434 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1435 RELATIVE_ADDR_SIZE);
1436
1437 insn_buf[0] = RELATIVEJUMP_OPCODE;
1438 *(s32 *)(&insn_buf[1]) = rel;
1439
1440 tprm->addr = op->kp.addr;
1441 tprm->opcode = insn_buf;
1442 tprm->len = RELATIVEJUMP_SIZE;
1443}
1444
1445/*
1446 * Replace breakpoints (int3) with relative jumps.
1447 * Caller must call with locking kprobe_mutex and text_mutex.
1448 */
1449void __kprobes arch_optimize_kprobes(struct list_head *oplist)
1450{
1451 struct optimized_kprobe *op, *tmp;
1452 int c = 0;
1453
1454 list_for_each_entry_safe(op, tmp, oplist, list) {
1455 WARN_ON(kprobe_disabled(&op->kp));
1456 /* Setup param */
1457 setup_optimize_kprobe(&jump_poke_params[c],
1458 jump_poke_bufs[c].buf, op);
1459 list_del_init(&op->list);
1460 if (++c >= MAX_OPTIMIZE_PROBES)
1461 break;
1462 }
1463
1464 /*
1465 * text_poke_smp doesn't support NMI/MCE code modifying.
1466 * However, since kprobes itself also doesn't support NMI/MCE
1467 * code probing, it's not a problem.
1468 */
1469 text_poke_smp_batch(jump_poke_params, c);
1470}
1471
1472static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
1473 u8 *insn_buf,
1474 struct optimized_kprobe *op)
1475{
1476 /* Set int3 to first byte for kprobes */
1477 insn_buf[0] = BREAKPOINT_INSTRUCTION;
1478 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1479
1480 tprm->addr = op->kp.addr;
1481 tprm->opcode = insn_buf;
1482 tprm->len = RELATIVEJUMP_SIZE;
1483}
1484
1485/*
1486 * Recover original instructions and breakpoints from relative jumps.
1487 * Caller must call with locking kprobe_mutex.
1488 */
1489extern void arch_unoptimize_kprobes(struct list_head *oplist,
1490 struct list_head *done_list)
1491{
1492 struct optimized_kprobe *op, *tmp;
1493 int c = 0;
1494
1495 list_for_each_entry_safe(op, tmp, oplist, list) {
1496 /* Setup param */
1497 setup_unoptimize_kprobe(&jump_poke_params[c],
1498 jump_poke_bufs[c].buf, op);
1499 list_move(&op->list, done_list);
1500 if (++c >= MAX_OPTIMIZE_PROBES)
1501 break;
1502 }
1503
1504 /*
1505 * text_poke_smp doesn't support NMI/MCE code modifying.
1506 * However, since kprobes itself also doesn't support NMI/MCE
1507 * code probing, it's not a problem.
1508 */
1509 text_poke_smp_batch(jump_poke_params, c);
1510}
1511
1512/* Replace a relative jump with a breakpoint (int3). */
1513void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
1514{
1515 u8 buf[RELATIVEJUMP_SIZE];
1516
1517 /* Set int3 to first byte for kprobes */
1518 buf[0] = BREAKPOINT_INSTRUCTION;
1519 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1520 text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
1521}
1522
1523static int __kprobes setup_detour_execution(struct kprobe *p,
1524 struct pt_regs *regs,
1525 int reenter)
1526{
1527 struct optimized_kprobe *op;
1528
1529 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
1530 /* This kprobe is really able to run optimized path. */
1531 op = container_of(p, struct optimized_kprobe, kp);
1532 /* Detour through copied instructions */
1533 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
1534 if (!reenter)
1535 reset_current_kprobe();
1536 preempt_enable_no_resched();
1537 return 1;
1538 }
1539 return 0;
1540}
1541
1542static int __kprobes init_poke_params(void)
1543{
1544 /* Allocate code buffer and parameter array */
1545 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
1546 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1547 if (!jump_poke_bufs)
1548 return -ENOMEM;
1549
1550 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
1551 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1552 if (!jump_poke_params) {
1553 kfree(jump_poke_bufs);
1554 jump_poke_bufs = NULL;
1555 return -ENOMEM;
1556 }
1557
1558 return 0;
1559}
1560#else /* !CONFIG_OPTPROBES */
1561static int __kprobes init_poke_params(void)
1562{
1563 return 0;
1564}
1565#endif
1566
1567int __init arch_init_kprobes(void) 1055int __init arch_init_kprobes(void)
1568{ 1056{
1569 return init_poke_params(); 1057 return arch_init_optprobes();
1570} 1058}
1571 1059
1572int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1060int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f0c6fd6f176b..b8ba6e4a27e4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -38,6 +38,7 @@
38#include <asm/traps.h> 38#include <asm/traps.h>
39#include <asm/desc.h> 39#include <asm/desc.h>
40#include <asm/tlbflush.h> 40#include <asm/tlbflush.h>
41#include <asm/idle.h>
41 42
42static int kvmapf = 1; 43static int kvmapf = 1;
43 44
@@ -253,7 +254,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
253 kvm_async_pf_task_wait((u32)read_cr2()); 254 kvm_async_pf_task_wait((u32)read_cr2());
254 break; 255 break;
255 case KVM_PV_REASON_PAGE_READY: 256 case KVM_PV_REASON_PAGE_READY:
257 rcu_irq_enter();
258 exit_idle();
256 kvm_async_pf_task_wake((u32)read_cr2()); 259 kvm_async_pf_task_wake((u32)read_cr2());
260 rcu_irq_exit();
257 break; 261 break;
258 } 262 }
259} 263}
@@ -438,9 +442,9 @@ void __init kvm_guest_init(void)
438static __init int activate_jump_labels(void) 442static __init int activate_jump_labels(void)
439{ 443{
440 if (has_steal_clock) { 444 if (has_steal_clock) {
441 jump_label_inc(&paravirt_steal_enabled); 445 static_key_slow_inc(&paravirt_steal_enabled);
442 if (steal_acc) 446 if (steal_acc)
443 jump_label_inc(&paravirt_steal_rq_enabled); 447 static_key_slow_inc(&paravirt_steal_rq_enabled);
444 } 448 }
445 449
446 return 0; 450 return 0;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 44842d756b29..f8492da65bfc 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -136,6 +136,15 @@ int kvm_register_clock(char *txt)
136 return ret; 136 return ret;
137} 137}
138 138
139static void kvm_save_sched_clock_state(void)
140{
141}
142
143static void kvm_restore_sched_clock_state(void)
144{
145 kvm_register_clock("primary cpu clock, resume");
146}
147
139#ifdef CONFIG_X86_LOCAL_APIC 148#ifdef CONFIG_X86_LOCAL_APIC
140static void __cpuinit kvm_setup_secondary_clock(void) 149static void __cpuinit kvm_setup_secondary_clock(void)
141{ 150{
@@ -144,8 +153,6 @@ static void __cpuinit kvm_setup_secondary_clock(void)
144 * we shouldn't fail. 153 * we shouldn't fail.
145 */ 154 */
146 WARN_ON(kvm_register_clock("secondary cpu clock")); 155 WARN_ON(kvm_register_clock("secondary cpu clock"));
147 /* ok, done with our trickery, call native */
148 setup_secondary_APIC_clock();
149} 156}
150#endif 157#endif
151 158
@@ -194,9 +201,11 @@ void __init kvmclock_init(void)
194 x86_platform.get_wallclock = kvm_get_wallclock; 201 x86_platform.get_wallclock = kvm_get_wallclock;
195 x86_platform.set_wallclock = kvm_set_wallclock; 202 x86_platform.set_wallclock = kvm_set_wallclock;
196#ifdef CONFIG_X86_LOCAL_APIC 203#ifdef CONFIG_X86_LOCAL_APIC
197 x86_cpuinit.setup_percpu_clockev = 204 x86_cpuinit.early_percpu_clock_init =
198 kvm_setup_secondary_clock; 205 kvm_setup_secondary_clock;
199#endif 206#endif
207 x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
208 x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
200 machine_ops.shutdown = kvm_shutdown; 209 machine_ops.shutdown = kvm_shutdown;
201#ifdef CONFIG_KEXEC 210#ifdef CONFIG_KEXEC
202 machine_ops.crash_shutdown = kvm_crash_shutdown; 211 machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index ea697263b373..ebc987398923 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -15,7 +15,6 @@
15#include <linux/vmalloc.h> 15#include <linux/vmalloc.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17 17
18#include <asm/system.h>
19#include <asm/ldt.h> 18#include <asm/ldt.h>
20#include <asm/desc.h> 19#include <asm/desc.h>
21#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index a3fa43ba5d3b..5b19e4d78b00 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -23,7 +23,6 @@
23#include <asm/apic.h> 23#include <asm/apic.h>
24#include <asm/cpufeature.h> 24#include <asm/cpufeature.h>
25#include <asm/desc.h> 25#include <asm/desc.h>
26#include <asm/system.h>
27#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
28#include <asm/debugreg.h> 27#include <asm/debugreg.h>
29 28
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
index 177183cbb6ae..7eb1e2b97827 100644
--- a/arch/x86/kernel/mca_32.c
+++ b/arch/x86/kernel/mca_32.c
@@ -43,7 +43,6 @@
43#include <linux/mca.h> 43#include <linux/mca.h>
44#include <linux/kprobes.h> 44#include <linux/kprobes.h>
45#include <linux/slab.h> 45#include <linux/slab.h>
46#include <asm/system.h>
47#include <asm/io.h> 46#include <asm/io.h>
48#include <linux/proc_fs.h> 47#include <linux/proc_fs.h>
49#include <linux/mman.h> 48#include <linux/mman.h>
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fda91c307104..87a0f8688301 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -86,6 +86,7 @@
86 86
87#include <asm/microcode.h> 87#include <asm/microcode.h>
88#include <asm/processor.h> 88#include <asm/processor.h>
89#include <asm/cpu_device_id.h>
89 90
90MODULE_DESCRIPTION("Microcode Update Driver"); 91MODULE_DESCRIPTION("Microcode Update Driver");
91MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 92MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -504,6 +505,20 @@ static struct notifier_block __refdata mc_cpu_notifier = {
504 .notifier_call = mc_cpu_callback, 505 .notifier_call = mc_cpu_callback,
505}; 506};
506 507
508#ifdef MODULE
509/* Autoload on Intel and AMD systems */
510static const struct x86_cpu_id microcode_id[] = {
511#ifdef CONFIG_MICROCODE_INTEL
512 { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
513#endif
514#ifdef CONFIG_MICROCODE_AMD
515 { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, },
516#endif
517 {}
518};
519MODULE_DEVICE_TABLE(x86cpu, microcode_id);
520#endif
521
507static int __init microcode_init(void) 522static int __init microcode_init(void)
508{ 523{
509 struct cpuinfo_x86 *c = &cpu_data(0); 524 struct cpuinfo_x86 *c = &cpu_data(0);
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 925179f871de..f21fd94ac897 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -26,7 +26,6 @@
26#include <linux/gfp.h> 26#include <linux/gfp.h>
27#include <linux/jump_label.h> 27#include <linux/jump_label.h>
28 28
29#include <asm/system.h>
30#include <asm/page.h> 29#include <asm/page.h>
31#include <asm/pgtable.h> 30#include <asm/pgtable.h>
32 31
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 96356762a51d..eb113693f043 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -40,7 +40,6 @@
40 40
41#include <asm/processor.h> 41#include <asm/processor.h>
42#include <asm/msr.h> 42#include <asm/msr.h>
43#include <asm/system.h>
44 43
45static struct class *msr_class; 44static struct class *msr_class;
46 45
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 0d01a8ea4e11..2c39dcd510fa 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -12,6 +12,7 @@
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/delay.h> 14#include <linux/delay.h>
15#include <linux/init.h>
15 16
16#include <asm/apic.h> 17#include <asm/apic.h>
17#include <asm/nmi.h> 18#include <asm/nmi.h>
@@ -20,35 +21,35 @@
20#define FAILURE 1 21#define FAILURE 1
21#define TIMEOUT 2 22#define TIMEOUT 2
22 23
23static int nmi_fail; 24static int __initdata nmi_fail;
24 25
25/* check to see if NMI IPIs work on this machine */ 26/* check to see if NMI IPIs work on this machine */
26static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly; 27static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __initdata;
27 28
28static int testcase_total; 29static int __initdata testcase_total;
29static int testcase_successes; 30static int __initdata testcase_successes;
30static int expected_testcase_failures; 31static int __initdata expected_testcase_failures;
31static int unexpected_testcase_failures; 32static int __initdata unexpected_testcase_failures;
32static int unexpected_testcase_unknowns; 33static int __initdata unexpected_testcase_unknowns;
33 34
34static int nmi_unk_cb(unsigned int val, struct pt_regs *regs) 35static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
35{ 36{
36 unexpected_testcase_unknowns++; 37 unexpected_testcase_unknowns++;
37 return NMI_HANDLED; 38 return NMI_HANDLED;
38} 39}
39 40
40static void init_nmi_testsuite(void) 41static void __init init_nmi_testsuite(void)
41{ 42{
42 /* trap all the unknown NMIs we may generate */ 43 /* trap all the unknown NMIs we may generate */
43 register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); 44 register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
44} 45}
45 46
46static void cleanup_nmi_testsuite(void) 47static void __init cleanup_nmi_testsuite(void)
47{ 48{
48 unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk"); 49 unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk");
49} 50}
50 51
51static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs) 52static int __init test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
52{ 53{
53 int cpu = raw_smp_processor_id(); 54 int cpu = raw_smp_processor_id();
54 55
@@ -58,7 +59,7 @@ static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
58 return NMI_DONE; 59 return NMI_DONE;
59} 60}
60 61
61static void test_nmi_ipi(struct cpumask *mask) 62static void __init test_nmi_ipi(struct cpumask *mask)
62{ 63{
63 unsigned long timeout; 64 unsigned long timeout;
64 65
@@ -86,7 +87,7 @@ static void test_nmi_ipi(struct cpumask *mask)
86 return; 87 return;
87} 88}
88 89
89static void remote_ipi(void) 90static void __init remote_ipi(void)
90{ 91{
91 cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask); 92 cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask);
92 cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); 93 cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
@@ -94,19 +95,19 @@ static void remote_ipi(void)
94 test_nmi_ipi(to_cpumask(nmi_ipi_mask)); 95 test_nmi_ipi(to_cpumask(nmi_ipi_mask));
95} 96}
96 97
97static void local_ipi(void) 98static void __init local_ipi(void)
98{ 99{
99 cpumask_clear(to_cpumask(nmi_ipi_mask)); 100 cpumask_clear(to_cpumask(nmi_ipi_mask));
100 cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); 101 cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
101 test_nmi_ipi(to_cpumask(nmi_ipi_mask)); 102 test_nmi_ipi(to_cpumask(nmi_ipi_mask));
102} 103}
103 104
104static void reset_nmi(void) 105static void __init reset_nmi(void)
105{ 106{
106 nmi_fail = 0; 107 nmi_fail = 0;
107} 108}
108 109
109static void dotest(void (*testcase_fn)(void), int expected) 110static void __init dotest(void (*testcase_fn)(void), int expected)
110{ 111{
111 testcase_fn(); 112 testcase_fn();
112 /* 113 /*
@@ -131,12 +132,12 @@ static void dotest(void (*testcase_fn)(void), int expected)
131 reset_nmi(); 132 reset_nmi();
132} 133}
133 134
134static inline void print_testname(const char *testname) 135static inline void __init print_testname(const char *testname)
135{ 136{
136 printk("%12s:", testname); 137 printk("%12s:", testname);
137} 138}
138 139
139void nmi_selftest(void) 140void __init nmi_selftest(void)
140{ 141{
141 init_nmi_testsuite(); 142 init_nmi_testsuite();
142 143
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index d90272e6bc40..ab137605e694 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -26,6 +26,7 @@
26 26
27#include <asm/bug.h> 27#include <asm/bug.h>
28#include <asm/paravirt.h> 28#include <asm/paravirt.h>
29#include <asm/debugreg.h>
29#include <asm/desc.h> 30#include <asm/desc.h>
30#include <asm/setup.h> 31#include <asm/setup.h>
31#include <asm/pgtable.h> 32#include <asm/pgtable.h>
@@ -37,6 +38,7 @@
37#include <asm/apic.h> 38#include <asm/apic.h>
38#include <asm/tlbflush.h> 39#include <asm/tlbflush.h>
39#include <asm/timer.h> 40#include <asm/timer.h>
41#include <asm/special_insns.h>
40 42
41/* nop stub */ 43/* nop stub */
42void _paravirt_nop(void) 44void _paravirt_nop(void)
@@ -202,8 +204,8 @@ static void native_flush_tlb_single(unsigned long addr)
202 __native_flush_tlb_single(addr); 204 __native_flush_tlb_single(addr);
203} 205}
204 206
205struct jump_label_key paravirt_steal_enabled; 207struct static_key paravirt_steal_enabled;
206struct jump_label_key paravirt_steal_rq_enabled; 208struct static_key paravirt_steal_rq_enabled;
207 209
208static u64 native_steal_clock(int cpu) 210static u64 native_steal_clock(int cpu)
209{ 211{
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 726494b58345..d0b2fb9ccbb1 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -42,7 +42,6 @@
42#include <asm/calgary.h> 42#include <asm/calgary.h>
43#include <asm/tce.h> 43#include <asm/tce.h>
44#include <asm/pci-direct.h> 44#include <asm/pci-direct.h>
45#include <asm/system.h>
46#include <asm/dma.h> 45#include <asm/dma.h>
47#include <asm/rio.h> 46#include <asm/rio.h>
48#include <asm/bios_ebda.h> 47#include <asm/bios_ebda.h>
@@ -431,7 +430,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
431} 430}
432 431
433static void* calgary_alloc_coherent(struct device *dev, size_t size, 432static void* calgary_alloc_coherent(struct device *dev, size_t size,
434 dma_addr_t *dma_handle, gfp_t flag) 433 dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs)
435{ 434{
436 void *ret = NULL; 435 void *ret = NULL;
437 dma_addr_t mapping; 436 dma_addr_t mapping;
@@ -464,7 +463,8 @@ error:
464} 463}
465 464
466static void calgary_free_coherent(struct device *dev, size_t size, 465static void calgary_free_coherent(struct device *dev, size_t size,
467 void *vaddr, dma_addr_t dma_handle) 466 void *vaddr, dma_addr_t dma_handle,
467 struct dma_attrs *attrs)
468{ 468{
469 unsigned int npages; 469 unsigned int npages;
470 struct iommu_table *tbl = find_iommu_table(dev); 470 struct iommu_table *tbl = find_iommu_table(dev);
@@ -477,8 +477,8 @@ static void calgary_free_coherent(struct device *dev, size_t size,
477} 477}
478 478
479static struct dma_map_ops calgary_dma_ops = { 479static struct dma_map_ops calgary_dma_ops = {
480 .alloc_coherent = calgary_alloc_coherent, 480 .alloc = calgary_alloc_coherent,
481 .free_coherent = calgary_free_coherent, 481 .free = calgary_free_coherent,
482 .map_sg = calgary_map_sg, 482 .map_sg = calgary_map_sg,
483 .unmap_sg = calgary_unmap_sg, 483 .unmap_sg = calgary_unmap_sg,
484 .map_page = calgary_map_page, 484 .map_page = calgary_map_page,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1c4d769e21ea..3003250ac51d 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -96,7 +96,8 @@ void __init pci_iommu_alloc(void)
96 } 96 }
97} 97}
98void *dma_generic_alloc_coherent(struct device *dev, size_t size, 98void *dma_generic_alloc_coherent(struct device *dev, size_t size,
99 dma_addr_t *dma_addr, gfp_t flag) 99 dma_addr_t *dma_addr, gfp_t flag,
100 struct dma_attrs *attrs)
100{ 101{
101 unsigned long dma_mask; 102 unsigned long dma_mask;
102 struct page *page; 103 struct page *page;
@@ -262,10 +263,11 @@ rootfs_initcall(pci_iommu_init);
262 263
263static __devinit void via_no_dac(struct pci_dev *dev) 264static __devinit void via_no_dac(struct pci_dev *dev)
264{ 265{
265 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { 266 if (forbid_dac == 0) {
266 dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); 267 dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
267 forbid_dac = 1; 268 forbid_dac = 1;
268 } 269 }
269} 270}
270DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); 271DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
272 PCI_CLASS_BRIDGE_PCI, 8, via_no_dac);
271#endif 273#endif
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 3af4af810c07..f96050685b46 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -75,7 +75,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
75} 75}
76 76
77static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, 77static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
78 dma_addr_t dma_addr) 78 dma_addr_t dma_addr, struct dma_attrs *attrs)
79{ 79{
80 free_pages((unsigned long)vaddr, get_order(size)); 80 free_pages((unsigned long)vaddr, get_order(size));
81} 81}
@@ -96,8 +96,8 @@ static void nommu_sync_sg_for_device(struct device *dev,
96} 96}
97 97
98struct dma_map_ops nommu_dma_ops = { 98struct dma_map_ops nommu_dma_ops = {
99 .alloc_coherent = dma_generic_alloc_coherent, 99 .alloc = dma_generic_alloc_coherent,
100 .free_coherent = nommu_free_coherent, 100 .free = nommu_free_coherent,
101 .map_sg = nommu_map_sg, 101 .map_sg = nommu_map_sg,
102 .map_page = nommu_map_page, 102 .map_page = nommu_map_page,
103 .sync_single_for_device = nommu_sync_single_for_device, 103 .sync_single_for_device = nommu_sync_single_for_device,
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 8f972cbddef0..6c483ba98b9c 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -15,21 +15,30 @@
15int swiotlb __read_mostly; 15int swiotlb __read_mostly;
16 16
17static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, 17static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
18 dma_addr_t *dma_handle, gfp_t flags) 18 dma_addr_t *dma_handle, gfp_t flags,
19 struct dma_attrs *attrs)
19{ 20{
20 void *vaddr; 21 void *vaddr;
21 22
22 vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags); 23 vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
24 attrs);
23 if (vaddr) 25 if (vaddr)
24 return vaddr; 26 return vaddr;
25 27
26 return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); 28 return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
27} 29}
28 30
31static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
32 void *vaddr, dma_addr_t dma_addr,
33 struct dma_attrs *attrs)
34{
35 swiotlb_free_coherent(dev, size, vaddr, dma_addr);
36}
37
29static struct dma_map_ops swiotlb_dma_ops = { 38static struct dma_map_ops swiotlb_dma_ops = {
30 .mapping_error = swiotlb_dma_mapping_error, 39 .mapping_error = swiotlb_dma_mapping_error,
31 .alloc_coherent = x86_swiotlb_alloc_coherent, 40 .alloc = x86_swiotlb_alloc_coherent,
32 .free_coherent = swiotlb_free_coherent, 41 .free = x86_swiotlb_free_coherent,
33 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 42 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
34 .sync_single_for_device = swiotlb_sync_single_for_device, 43 .sync_single_for_device = swiotlb_sync_single_for_device,
35 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, 44 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 34e06e84ce31..0bc72e2069e3 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -12,6 +12,7 @@
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/export.h> 13#include <linux/export.h>
14 14
15#include <asm/probe_roms.h>
15#include <asm/pci-direct.h> 16#include <asm/pci-direct.h>
16#include <asm/e820.h> 17#include <asm/e820.h>
17#include <asm/mmzone.h> 18#include <asm/mmzone.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 15763af7bfe3..1d92a5ab6e8b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -12,16 +12,37 @@
12#include <linux/user-return-notifier.h> 12#include <linux/user-return-notifier.h>
13#include <linux/dmi.h> 13#include <linux/dmi.h>
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/stackprotector.h>
16#include <linux/tick.h>
17#include <linux/cpuidle.h>
15#include <trace/events/power.h> 18#include <trace/events/power.h>
16#include <linux/hw_breakpoint.h> 19#include <linux/hw_breakpoint.h>
17#include <asm/cpu.h> 20#include <asm/cpu.h>
18#include <asm/system.h>
19#include <asm/apic.h> 21#include <asm/apic.h>
20#include <asm/syscalls.h> 22#include <asm/syscalls.h>
21#include <asm/idle.h> 23#include <asm/idle.h>
22#include <asm/uaccess.h> 24#include <asm/uaccess.h>
23#include <asm/i387.h> 25#include <asm/i387.h>
26#include <asm/fpu-internal.h>
24#include <asm/debugreg.h> 27#include <asm/debugreg.h>
28#include <asm/nmi.h>
29
30#ifdef CONFIG_X86_64
31static DEFINE_PER_CPU(unsigned char, is_idle);
32static ATOMIC_NOTIFIER_HEAD(idle_notifier);
33
34void idle_notifier_register(struct notifier_block *n)
35{
36 atomic_notifier_chain_register(&idle_notifier, n);
37}
38EXPORT_SYMBOL_GPL(idle_notifier_register);
39
40void idle_notifier_unregister(struct notifier_block *n)
41{
42 atomic_notifier_chain_unregister(&idle_notifier, n);
43}
44EXPORT_SYMBOL_GPL(idle_notifier_unregister);
45#endif
25 46
26struct kmem_cache *task_xstate_cachep; 47struct kmem_cache *task_xstate_cachep;
27EXPORT_SYMBOL_GPL(task_xstate_cachep); 48EXPORT_SYMBOL_GPL(task_xstate_cachep);
@@ -341,44 +362,113 @@ void (*pm_idle)(void);
341EXPORT_SYMBOL(pm_idle); 362EXPORT_SYMBOL(pm_idle);
342#endif 363#endif
343 364
344#ifdef CONFIG_X86_32 365static inline int hlt_use_halt(void)
345/*
346 * This halt magic was a workaround for ancient floppy DMA
347 * wreckage. It should be safe to remove.
348 */
349static int hlt_counter;
350void disable_hlt(void)
351{ 366{
352 hlt_counter++; 367 return 1;
353} 368}
354EXPORT_SYMBOL(disable_hlt);
355 369
356void enable_hlt(void) 370#ifndef CONFIG_SMP
371static inline void play_dead(void)
357{ 372{
358 hlt_counter--; 373 BUG();
359} 374}
360EXPORT_SYMBOL(enable_hlt); 375#endif
361 376
362static inline int hlt_use_halt(void) 377#ifdef CONFIG_X86_64
378void enter_idle(void)
363{ 379{
364 return (!hlt_counter && boot_cpu_data.hlt_works_ok); 380 percpu_write(is_idle, 1);
381 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
365} 382}
366#else 383
367static inline int hlt_use_halt(void) 384static void __exit_idle(void)
368{ 385{
369 return 1; 386 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
387 return;
388 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
389}
390
391/* Called from interrupts to signify idle end */
392void exit_idle(void)
393{
394 /* idle loop has pid 0 */
395 if (current->pid)
396 return;
397 __exit_idle();
370} 398}
371#endif 399#endif
372 400
373/* 401/*
402 * The idle thread. There's no useful work to be
403 * done, so just try to conserve power and have a
404 * low exit latency (ie sit in a loop waiting for
405 * somebody to say that they'd like to reschedule)
406 */
407void cpu_idle(void)
408{
409 /*
410 * If we're the non-boot CPU, nothing set the stack canary up
411 * for us. CPU0 already has it initialized but no harm in
412 * doing it again. This is a good place for updating it, as
413 * we wont ever return from this function (so the invalid
414 * canaries already on the stack wont ever trigger).
415 */
416 boot_init_stack_canary();
417 current_thread_info()->status |= TS_POLLING;
418
419 while (1) {
420 tick_nohz_idle_enter();
421
422 while (!need_resched()) {
423 rmb();
424
425 if (cpu_is_offline(smp_processor_id()))
426 play_dead();
427
428 /*
429 * Idle routines should keep interrupts disabled
430 * from here on, until they go to idle.
431 * Otherwise, idle callbacks can misfire.
432 */
433 local_touch_nmi();
434 local_irq_disable();
435
436 enter_idle();
437
438 /* Don't trace irqs off for idle */
439 stop_critical_timings();
440
441 /* enter_idle() needs rcu for notifiers */
442 rcu_idle_enter();
443
444 if (cpuidle_idle_call())
445 pm_idle();
446
447 rcu_idle_exit();
448 start_critical_timings();
449
450 /* In many cases the interrupt that ended idle
451 has already called exit_idle. But some idle
452 loops can be woken up without interrupt. */
453 __exit_idle();
454 }
455
456 tick_nohz_idle_exit();
457 preempt_enable_no_resched();
458 schedule();
459 preempt_disable();
460 }
461}
462
463/*
374 * We use this if we don't have any better 464 * We use this if we don't have any better
375 * idle routine.. 465 * idle routine..
376 */ 466 */
377void default_idle(void) 467void default_idle(void)
378{ 468{
379 if (hlt_use_halt()) { 469 if (hlt_use_halt()) {
380 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 470 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
381 trace_cpu_idle(1, smp_processor_id()); 471 trace_cpu_idle_rcuidle(1, smp_processor_id());
382 current_thread_info()->status &= ~TS_POLLING; 472 current_thread_info()->status &= ~TS_POLLING;
383 /* 473 /*
384 * TS_POLLING-cleared state must be visible before we 474 * TS_POLLING-cleared state must be visible before we
@@ -391,8 +481,8 @@ void default_idle(void)
391 else 481 else
392 local_irq_enable(); 482 local_irq_enable();
393 current_thread_info()->status |= TS_POLLING; 483 current_thread_info()->status |= TS_POLLING;
394 trace_power_end(smp_processor_id()); 484 trace_power_end_rcuidle(smp_processor_id());
395 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); 485 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
396 } else { 486 } else {
397 local_irq_enable(); 487 local_irq_enable();
398 /* loop is done by the caller */ 488 /* loop is done by the caller */
@@ -450,8 +540,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
450static void mwait_idle(void) 540static void mwait_idle(void)
451{ 541{
452 if (!need_resched()) { 542 if (!need_resched()) {
453 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 543 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
454 trace_cpu_idle(1, smp_processor_id()); 544 trace_cpu_idle_rcuidle(1, smp_processor_id());
455 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) 545 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
456 clflush((void *)&current_thread_info()->flags); 546 clflush((void *)&current_thread_info()->flags);
457 547
@@ -461,8 +551,8 @@ static void mwait_idle(void)
461 __sti_mwait(0, 0); 551 __sti_mwait(0, 0);
462 else 552 else
463 local_irq_enable(); 553 local_irq_enable();
464 trace_power_end(smp_processor_id()); 554 trace_power_end_rcuidle(smp_processor_id());
465 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); 555 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
466 } else 556 } else
467 local_irq_enable(); 557 local_irq_enable();
468} 558}
@@ -474,13 +564,13 @@ static void mwait_idle(void)
474 */ 564 */
475static void poll_idle(void) 565static void poll_idle(void)
476{ 566{
477 trace_power_start(POWER_CSTATE, 0, smp_processor_id()); 567 trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
478 trace_cpu_idle(0, smp_processor_id()); 568 trace_cpu_idle_rcuidle(0, smp_processor_id());
479 local_irq_enable(); 569 local_irq_enable();
480 while (!need_resched()) 570 while (!need_resched())
481 cpu_relax(); 571 cpu_relax();
482 trace_power_end(smp_processor_id()); 572 trace_power_end_rcuidle(smp_processor_id());
483 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); 573 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
484} 574}
485 575
486/* 576/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c08d1ff12b7c..ae6847303e26 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -9,7 +9,6 @@
9 * This file handles the architecture-dependent parts of process handling.. 9 * This file handles the architecture-dependent parts of process handling..
10 */ 10 */
11 11
12#include <linux/stackprotector.h>
13#include <linux/cpu.h> 12#include <linux/cpu.h>
14#include <linux/errno.h> 13#include <linux/errno.h>
15#include <linux/sched.h> 14#include <linux/sched.h>
@@ -31,20 +30,18 @@
31#include <linux/kallsyms.h> 30#include <linux/kallsyms.h>
32#include <linux/ptrace.h> 31#include <linux/ptrace.h>
33#include <linux/personality.h> 32#include <linux/personality.h>
34#include <linux/tick.h>
35#include <linux/percpu.h> 33#include <linux/percpu.h>
36#include <linux/prctl.h> 34#include <linux/prctl.h>
37#include <linux/ftrace.h> 35#include <linux/ftrace.h>
38#include <linux/uaccess.h> 36#include <linux/uaccess.h>
39#include <linux/io.h> 37#include <linux/io.h>
40#include <linux/kdebug.h> 38#include <linux/kdebug.h>
41#include <linux/cpuidle.h>
42 39
43#include <asm/pgtable.h> 40#include <asm/pgtable.h>
44#include <asm/system.h>
45#include <asm/ldt.h> 41#include <asm/ldt.h>
46#include <asm/processor.h> 42#include <asm/processor.h>
47#include <asm/i387.h> 43#include <asm/i387.h>
44#include <asm/fpu-internal.h>
48#include <asm/desc.h> 45#include <asm/desc.h>
49#ifdef CONFIG_MATH_EMULATION 46#ifdef CONFIG_MATH_EMULATION
50#include <asm/math_emu.h> 47#include <asm/math_emu.h>
@@ -57,7 +54,7 @@
57#include <asm/idle.h> 54#include <asm/idle.h>
58#include <asm/syscalls.h> 55#include <asm/syscalls.h>
59#include <asm/debugreg.h> 56#include <asm/debugreg.h>
60#include <asm/nmi.h> 57#include <asm/switch_to.h>
61 58
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 59asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 60
@@ -69,62 +66,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
69 return ((unsigned long *)tsk->thread.sp)[3]; 66 return ((unsigned long *)tsk->thread.sp)[3];
70} 67}
71 68
72#ifndef CONFIG_SMP
73static inline void play_dead(void)
74{
75 BUG();
76}
77#endif
78
79/*
80 * The idle thread. There's no useful work to be
81 * done, so just try to conserve power and have a
82 * low exit latency (ie sit in a loop waiting for
83 * somebody to say that they'd like to reschedule)
84 */
85void cpu_idle(void)
86{
87 int cpu = smp_processor_id();
88
89 /*
90 * If we're the non-boot CPU, nothing set the stack canary up
91 * for us. CPU0 already has it initialized but no harm in
92 * doing it again. This is a good place for updating it, as
93 * we wont ever return from this function (so the invalid
94 * canaries already on the stack wont ever trigger).
95 */
96 boot_init_stack_canary();
97
98 current_thread_info()->status |= TS_POLLING;
99
100 /* endless idle loop with no priority at all */
101 while (1) {
102 tick_nohz_idle_enter();
103 rcu_idle_enter();
104 while (!need_resched()) {
105
106 check_pgt_cache();
107 rmb();
108
109 if (cpu_is_offline(cpu))
110 play_dead();
111
112 local_touch_nmi();
113 local_irq_disable();
114 /* Don't trace irqs off for idle */
115 stop_critical_timings();
116 if (cpuidle_idle_call())
117 pm_idle();
118 start_critical_timings();
119 }
120 rcu_idle_exit();
121 tick_nohz_idle_exit();
122 preempt_enable_no_resched();
123 schedule();
124 preempt_disable();
125 }
126}
127
128void __show_regs(struct pt_regs *regs, int all) 69void __show_regs(struct pt_regs *regs, int all)
129{ 70{
130 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 71 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 550e77b1b948..733ca39f367e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -14,7 +14,6 @@
14 * This file handles the architecture-dependent parts of process handling.. 14 * This file handles the architecture-dependent parts of process handling..
15 */ 15 */
16 16
17#include <linux/stackprotector.h>
18#include <linux/cpu.h> 17#include <linux/cpu.h>
19#include <linux/errno.h> 18#include <linux/errno.h>
20#include <linux/sched.h> 19#include <linux/sched.h>
@@ -32,17 +31,15 @@
32#include <linux/notifier.h> 31#include <linux/notifier.h>
33#include <linux/kprobes.h> 32#include <linux/kprobes.h>
34#include <linux/kdebug.h> 33#include <linux/kdebug.h>
35#include <linux/tick.h>
36#include <linux/prctl.h> 34#include <linux/prctl.h>
37#include <linux/uaccess.h> 35#include <linux/uaccess.h>
38#include <linux/io.h> 36#include <linux/io.h>
39#include <linux/ftrace.h> 37#include <linux/ftrace.h>
40#include <linux/cpuidle.h>
41 38
42#include <asm/pgtable.h> 39#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/processor.h> 40#include <asm/processor.h>
45#include <asm/i387.h> 41#include <asm/i387.h>
42#include <asm/fpu-internal.h>
46#include <asm/mmu_context.h> 43#include <asm/mmu_context.h>
47#include <asm/prctl.h> 44#include <asm/prctl.h>
48#include <asm/desc.h> 45#include <asm/desc.h>
@@ -51,116 +48,11 @@
51#include <asm/idle.h> 48#include <asm/idle.h>
52#include <asm/syscalls.h> 49#include <asm/syscalls.h>
53#include <asm/debugreg.h> 50#include <asm/debugreg.h>
54#include <asm/nmi.h> 51#include <asm/switch_to.h>
55 52
56asmlinkage extern void ret_from_fork(void); 53asmlinkage extern void ret_from_fork(void);
57 54
58DEFINE_PER_CPU(unsigned long, old_rsp); 55DEFINE_PER_CPU(unsigned long, old_rsp);
59static DEFINE_PER_CPU(unsigned char, is_idle);
60
61static ATOMIC_NOTIFIER_HEAD(idle_notifier);
62
63void idle_notifier_register(struct notifier_block *n)
64{
65 atomic_notifier_chain_register(&idle_notifier, n);
66}
67EXPORT_SYMBOL_GPL(idle_notifier_register);
68
69void idle_notifier_unregister(struct notifier_block *n)
70{
71 atomic_notifier_chain_unregister(&idle_notifier, n);
72}
73EXPORT_SYMBOL_GPL(idle_notifier_unregister);
74
75void enter_idle(void)
76{
77 percpu_write(is_idle, 1);
78 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
79}
80
81static void __exit_idle(void)
82{
83 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
84 return;
85 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
86}
87
88/* Called from interrupts to signify idle end */
89void exit_idle(void)
90{
91 /* idle loop has pid 0 */
92 if (current->pid)
93 return;
94 __exit_idle();
95}
96
97#ifndef CONFIG_SMP
98static inline void play_dead(void)
99{
100 BUG();
101}
102#endif
103
104/*
105 * The idle thread. There's no useful work to be
106 * done, so just try to conserve power and have a
107 * low exit latency (ie sit in a loop waiting for
108 * somebody to say that they'd like to reschedule)
109 */
110void cpu_idle(void)
111{
112 current_thread_info()->status |= TS_POLLING;
113
114 /*
115 * If we're the non-boot CPU, nothing set the stack canary up
116 * for us. CPU0 already has it initialized but no harm in
117 * doing it again. This is a good place for updating it, as
118 * we wont ever return from this function (so the invalid
119 * canaries already on the stack wont ever trigger).
120 */
121 boot_init_stack_canary();
122
123 /* endless idle loop with no priority at all */
124 while (1) {
125 tick_nohz_idle_enter();
126 while (!need_resched()) {
127
128 rmb();
129
130 if (cpu_is_offline(smp_processor_id()))
131 play_dead();
132 /*
133 * Idle routines should keep interrupts disabled
134 * from here on, until they go to idle.
135 * Otherwise, idle callbacks can misfire.
136 */
137 local_touch_nmi();
138 local_irq_disable();
139 enter_idle();
140 /* Don't trace irqs off for idle */
141 stop_critical_timings();
142
143 /* enter_idle() needs rcu for notifiers */
144 rcu_idle_enter();
145
146 if (cpuidle_idle_call())
147 pm_idle();
148
149 rcu_idle_exit();
150 start_critical_timings();
151
152 /* In many cases the interrupt that ended idle
153 has already called exit_idle. But some idle
154 loops can be woken up without interrupt. */
155 __exit_idle();
156 }
157
158 tick_nohz_idle_exit();
159 preempt_enable_no_resched();
160 schedule();
161 preempt_disable();
162 }
163}
164 56
165/* Prints also some state that isn't saved in the pt_regs */ 57/* Prints also some state that isn't saved in the pt_regs */
166void __show_regs(struct pt_regs *regs, int all) 58void __show_regs(struct pt_regs *regs, int all)
@@ -342,6 +234,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
342 loadsegment(es, _ds); 234 loadsegment(es, _ds);
343 loadsegment(ds, _ds); 235 loadsegment(ds, _ds);
344 load_gs_index(0); 236 load_gs_index(0);
237 current->thread.usersp = new_sp;
345 regs->ip = new_ip; 238 regs->ip = new_ip;
346 regs->sp = new_sp; 239 regs->sp = new_sp;
347 percpu_write(old_rsp, new_sp); 240 percpu_write(old_rsp, new_sp);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 6fb330adc7c7..685845cf16e0 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -24,9 +24,9 @@
24 24
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/pgtable.h> 26#include <asm/pgtable.h>
27#include <asm/system.h>
28#include <asm/processor.h> 27#include <asm/processor.h>
29#include <asm/i387.h> 28#include <asm/i387.h>
29#include <asm/fpu-internal.h>
30#include <asm/debugreg.h> 30#include <asm/debugreg.h>
31#include <asm/ldt.h> 31#include <asm/ldt.h>
32#include <asm/desc.h> 32#include <asm/desc.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d7d5099fe874..1a2901562059 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -90,7 +90,6 @@
90#include <asm/processor.h> 90#include <asm/processor.h>
91#include <asm/bugs.h> 91#include <asm/bugs.h>
92 92
93#include <asm/system.h>
94#include <asm/vsyscall.h> 93#include <asm/vsyscall.h>
95#include <asm/cpu.h> 94#include <asm/cpu.h>
96#include <asm/desc.h> 95#include <asm/desc.h>
@@ -509,15 +508,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)
509 508
510#ifdef CONFIG_KEXEC 509#ifdef CONFIG_KEXEC
511 510
512static inline unsigned long long get_total_mem(void)
513{
514 unsigned long long total;
515
516 total = max_pfn - min_low_pfn;
517
518 return total << PAGE_SHIFT;
519}
520
521/* 511/*
522 * Keep the crash kernel below this limit. On 32 bits earlier kernels 512 * Keep the crash kernel below this limit. On 32 bits earlier kernels
523 * would limit the kernel to the low 512 MiB due to mapping restrictions. 513 * would limit the kernel to the low 512 MiB due to mapping restrictions.
@@ -536,7 +526,7 @@ static void __init reserve_crashkernel(void)
536 unsigned long long crash_size, crash_base; 526 unsigned long long crash_size, crash_base;
537 int ret; 527 int ret;
538 528
539 total_mem = get_total_mem(); 529 total_mem = memblock_phys_mem_size();
540 530
541 ret = parse_crashkernel(boot_command_line, total_mem, 531 ret = parse_crashkernel(boot_command_line, total_mem,
542 &crash_size, &crash_base); 532 &crash_size, &crash_base);
@@ -749,10 +739,16 @@ void __init setup_arch(char **cmdline_p)
749#endif 739#endif
750#ifdef CONFIG_EFI 740#ifdef CONFIG_EFI
751 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 741 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
752 EFI_LOADER_SIGNATURE, 4)) { 742 "EL32", 4)) {
743 efi_enabled = 1;
744 efi_64bit = false;
745 } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
746 "EL64", 4)) {
753 efi_enabled = 1; 747 efi_enabled = 1;
754 efi_memblock_x86_reserve_range(); 748 efi_64bit = true;
755 } 749 }
750 if (efi_enabled && efi_memblock_x86_reserve_range())
751 efi_enabled = 0;
756#endif 752#endif
757 753
758 x86_init.oem.arch_setup(); 754 x86_init.oem.arch_setup();
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b3cd6913ceea..041af2fd088d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -23,6 +23,7 @@
23#include <asm/processor.h> 23#include <asm/processor.h>
24#include <asm/ucontext.h> 24#include <asm/ucontext.h>
25#include <asm/i387.h> 25#include <asm/i387.h>
26#include <asm/fpu-internal.h>
26#include <asm/vdso.h> 27#include <asm/vdso.h>
27#include <asm/mce.h> 28#include <asm/mce.h>
28#include <asm/sighandling.h> 29#include <asm/sighandling.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 66d250c00d11..6e1e406038c2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -50,6 +50,7 @@
50#include <linux/tboot.h> 50#include <linux/tboot.h>
51#include <linux/stackprotector.h> 51#include <linux/stackprotector.h>
52#include <linux/gfp.h> 52#include <linux/gfp.h>
53#include <linux/cpuidle.h>
53 54
54#include <asm/acpi.h> 55#include <asm/acpi.h>
55#include <asm/desc.h> 56#include <asm/desc.h>
@@ -219,14 +220,9 @@ static void __cpuinit smp_callin(void)
219 * Update loops_per_jiffy in cpu_data. Previous call to 220 * Update loops_per_jiffy in cpu_data. Previous call to
220 * smp_store_cpu_info() stored a value that is close but not as 221 * smp_store_cpu_info() stored a value that is close but not as
221 * accurate as the value just calculated. 222 * accurate as the value just calculated.
222 *
223 * Need to enable IRQs because it can take longer and then
224 * the NMI watchdog might kill us.
225 */ 223 */
226 local_irq_enable();
227 calibrate_delay(); 224 calibrate_delay();
228 cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; 225 cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
229 local_irq_disable();
230 pr_debug("Stack at about %p\n", &cpuid); 226 pr_debug("Stack at about %p\n", &cpuid);
231 227
232 /* 228 /*
@@ -255,6 +251,7 @@ notrace static void __cpuinit start_secondary(void *unused)
255 * most necessary things. 251 * most necessary things.
256 */ 252 */
257 cpu_init(); 253 cpu_init();
254 x86_cpuinit.early_percpu_clock_init();
258 preempt_disable(); 255 preempt_disable();
259 smp_callin(); 256 smp_callin();
260 257
@@ -291,19 +288,6 @@ notrace static void __cpuinit start_secondary(void *unused)
291 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 288 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
292 x86_platform.nmi_init(); 289 x86_platform.nmi_init();
293 290
294 /*
295 * Wait until the cpu which brought this one up marked it
296 * online before enabling interrupts. If we don't do that then
297 * we can end up waking up the softirq thread before this cpu
298 * reached the active state, which makes the scheduler unhappy
299 * and schedule the softirq thread on the wrong cpu. This is
300 * only observable with forced threaded interrupts, but in
301 * theory it could also happen w/o them. It's just way harder
302 * to achieve.
303 */
304 while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
305 cpu_relax();
306
307 /* enable local interrupts */ 291 /* enable local interrupts */
308 local_irq_enable(); 292 local_irq_enable();
309 293
@@ -740,8 +724,6 @@ do_rest:
740 * the targeted processor. 724 * the targeted processor.
741 */ 725 */
742 726
743 printk(KERN_DEBUG "smpboot cpu %d: start_ip = %lx\n", cpu, start_ip);
744
745 atomic_set(&init_deasserted, 0); 727 atomic_set(&init_deasserted, 0);
746 728
747 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { 729 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
@@ -791,9 +773,10 @@ do_rest:
791 schedule(); 773 schedule();
792 } 774 }
793 775
794 if (cpumask_test_cpu(cpu, cpu_callin_mask)) 776 if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
777 print_cpu_msr(&cpu_data(cpu));
795 pr_debug("CPU%d: has booted.\n", cpu); 778 pr_debug("CPU%d: has booted.\n", cpu);
796 else { 779 } else {
797 boot_error = 1; 780 boot_error = 1;
798 if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) 781 if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status)
799 == 0xA5A5A5A5) 782 == 0xA5A5A5A5)
@@ -847,7 +830,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
847 830
848 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || 831 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
849 !physid_isset(apicid, phys_cpu_present_map) || 832 !physid_isset(apicid, phys_cpu_present_map) ||
850 (!x2apic_mode && apicid >= 255)) { 833 !apic->apic_id_valid(apicid)) {
851 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); 834 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
852 return -EINVAL; 835 return -EINVAL;
853 } 836 }
@@ -1422,7 +1405,8 @@ void native_play_dead(void)
1422 tboot_shutdown(TB_SHUTDOWN_WFS); 1405 tboot_shutdown(TB_SHUTDOWN_WFS);
1423 1406
1424 mwait_play_dead(); /* Only returns on failure */ 1407 mwait_play_dead(); /* Only returns on failure */
1425 hlt_play_dead(); 1408 if (cpuidle_play_dead())
1409 hlt_play_dead();
1426} 1410}
1427 1411
1428#else /* ... !CONFIG_HOTPLUG_CPU */ 1412#else /* ... !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index f921df8c2099..b4d3c3927dd8 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -195,7 +195,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
195{ 195{
196 struct vm_area_struct *vma; 196 struct vm_area_struct *vma;
197 struct mm_struct *mm = current->mm; 197 struct mm_struct *mm = current->mm;
198 unsigned long addr = addr0; 198 unsigned long addr = addr0, start_addr;
199 199
200 /* requested length too big for entire address space */ 200 /* requested length too big for entire address space */
201 if (len > TASK_SIZE) 201 if (len > TASK_SIZE)
@@ -223,25 +223,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
223 mm->free_area_cache = mm->mmap_base; 223 mm->free_area_cache = mm->mmap_base;
224 } 224 }
225 225
226try_again:
226 /* either no address requested or can't fit in requested address hole */ 227 /* either no address requested or can't fit in requested address hole */
227 addr = mm->free_area_cache; 228 start_addr = addr = mm->free_area_cache;
228
229 /* make sure it can fit in the remaining address space */
230 if (addr > len) {
231 unsigned long tmp_addr = align_addr(addr - len, filp,
232 ALIGN_TOPDOWN);
233
234 vma = find_vma(mm, tmp_addr);
235 if (!vma || tmp_addr + len <= vma->vm_start)
236 /* remember the address as a hint for next time */
237 return mm->free_area_cache = tmp_addr;
238 }
239
240 if (mm->mmap_base < len)
241 goto bottomup;
242 229
243 addr = mm->mmap_base-len; 230 if (addr < len)
231 goto fail;
244 232
233 addr -= len;
245 do { 234 do {
246 addr = align_addr(addr, filp, ALIGN_TOPDOWN); 235 addr = align_addr(addr, filp, ALIGN_TOPDOWN);
247 236
@@ -263,6 +252,17 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
263 addr = vma->vm_start-len; 252 addr = vma->vm_start-len;
264 } while (len < vma->vm_start); 253 } while (len < vma->vm_start);
265 254
255fail:
256 /*
257 * if hint left us with no space for the requested
258 * mapping then try again:
259 */
260 if (start_addr != mm->mmap_base) {
261 mm->free_area_cache = mm->mmap_base;
262 mm->cached_hole_size = 0;
263 goto try_again;
264 }
265
266bottomup: 266bottomup:
267 /* 267 /*
268 * A failed mmap() very likely causes application failure, 268 * A failed mmap() very likely causes application failure,
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index e2410e27f97e..6410744ac5cb 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -272,7 +272,7 @@ static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
272 offsetof(struct acpi_table_facs, firmware_waking_vector); 272 offsetof(struct acpi_table_facs, firmware_waking_vector);
273} 273}
274 274
275void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) 275static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
276{ 276{
277 static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = { 277 static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = {
278 /* S0,1,2: */ -1, -1, -1, 278 /* S0,1,2: */ -1, -1, -1,
@@ -281,7 +281,7 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
281 /* S5: */ TB_SHUTDOWN_S5 }; 281 /* S5: */ TB_SHUTDOWN_S5 };
282 282
283 if (!tboot_enabled()) 283 if (!tboot_enabled())
284 return; 284 return 0;
285 285
286 tboot_copy_fadt(&acpi_gbl_FADT); 286 tboot_copy_fadt(&acpi_gbl_FADT);
287 tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control; 287 tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control;
@@ -292,10 +292,11 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
292 if (sleep_state >= ACPI_S_STATE_COUNT || 292 if (sleep_state >= ACPI_S_STATE_COUNT ||
293 acpi_shutdown_map[sleep_state] == -1) { 293 acpi_shutdown_map[sleep_state] == -1) {
294 pr_warning("unsupported sleep state 0x%x\n", sleep_state); 294 pr_warning("unsupported sleep state 0x%x\n", sleep_state);
295 return; 295 return -1;
296 } 296 }
297 297
298 tboot_shutdown(acpi_shutdown_map[sleep_state]); 298 tboot_shutdown(acpi_shutdown_map[sleep_state]);
299 return 0;
299} 300}
300 301
301static atomic_t ap_wfs_count; 302static atomic_t ap_wfs_count;
@@ -345,6 +346,8 @@ static __init int tboot_late_init(void)
345 346
346 atomic_set(&ap_wfs_count, 0); 347 atomic_set(&ap_wfs_count, 0);
347 register_hotcpu_notifier(&tboot_cpu_notifier); 348 register_hotcpu_notifier(&tboot_cpu_notifier);
349
350 acpi_os_set_prepare_sleep(&tboot_sleep);
348 return 0; 351 return 0;
349} 352}
350 353
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index 9e540fee7009..ab40954e113e 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -34,6 +34,7 @@
34#include <asm/tce.h> 34#include <asm/tce.h>
35#include <asm/calgary.h> 35#include <asm/calgary.h>
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm/cacheflush.h>
37 38
38/* flush a tce at 'tceaddr' to main memory */ 39/* flush a tce at 'tceaddr' to main memory */
39static inline void flush_tce(void* tceaddr) 40static inline void flush_tce(void* tceaddr)
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index dd5fbf4101fc..c6eba2b42673 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -57,9 +57,6 @@ EXPORT_SYMBOL(profile_pc);
57 */ 57 */
58static irqreturn_t timer_interrupt(int irq, void *dev_id) 58static irqreturn_t timer_interrupt(int irq, void *dev_id)
59{ 59{
60 /* Keep nmi watchdog up to date */
61 inc_irq_stat(irq0_irqs);
62
63 global_clock_event->event_handler(global_clock_event); 60 global_clock_event->event_handler(global_clock_event);
64 61
65 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ 62 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 6bb7b8579e70..9d9d2f9e77a5 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -6,7 +6,6 @@
6 6
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8#include <asm/desc.h> 8#include <asm/desc.h>
9#include <asm/system.h>
10#include <asm/ldt.h> 9#include <asm/ldt.h>
11#include <asm/processor.h> 10#include <asm/processor.h>
12#include <asm/proto.h> 11#include <asm/proto.h>
@@ -163,7 +162,7 @@ int regset_tls_get(struct task_struct *target, const struct user_regset *regset,
163{ 162{
164 const struct desc_struct *tls; 163 const struct desc_struct *tls;
165 164
166 if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || 165 if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
167 (pos % sizeof(struct user_desc)) != 0 || 166 (pos % sizeof(struct user_desc)) != 0 ||
168 (count % sizeof(struct user_desc)) != 0) 167 (count % sizeof(struct user_desc)) != 0)
169 return -EINVAL; 168 return -EINVAL;
@@ -198,7 +197,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
198 struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; 197 struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
199 const struct user_desc *info; 198 const struct user_desc *info;
200 199
201 if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || 200 if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
202 (pos % sizeof(struct user_desc)) != 0 || 201 (pos % sizeof(struct user_desc)) != 0 ||
203 (count % sizeof(struct user_desc)) != 0) 202 (count % sizeof(struct user_desc)) != 0)
204 return -EINVAL; 203 return -EINVAL;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c6d17ad59b8a..ff9281f16029 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -50,10 +50,10 @@
50#include <asm/processor.h> 50#include <asm/processor.h>
51#include <asm/debugreg.h> 51#include <asm/debugreg.h>
52#include <linux/atomic.h> 52#include <linux/atomic.h>
53#include <asm/system.h>
54#include <asm/traps.h> 53#include <asm/traps.h>
55#include <asm/desc.h> 54#include <asm/desc.h>
56#include <asm/i387.h> 55#include <asm/i387.h>
56#include <asm/fpu-internal.h>
57#include <asm/mce.h> 57#include <asm/mce.h>
58 58
59#include <asm/mach_traps.h> 59#include <asm/mach_traps.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201c97ec..fc0a147e3727 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -620,7 +620,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
620 620
621 if (cpu_khz) { 621 if (cpu_khz) {
622 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; 622 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
623 *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR); 623 *offset = ns_now - mult_frac(tsc_now, *scale,
624 (1UL << CYC2NS_SCALE_FACTOR));
624 } 625 }
625 626
626 sched_clock_idle_wakeup_event(0); 627 sched_clock_idle_wakeup_event(0);
@@ -629,7 +630,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
629 630
630static unsigned long long cyc2ns_suspend; 631static unsigned long long cyc2ns_suspend;
631 632
632void save_sched_clock_state(void) 633void tsc_save_sched_clock_state(void)
633{ 634{
634 if (!sched_clock_stable) 635 if (!sched_clock_stable)
635 return; 636 return;
@@ -645,7 +646,7 @@ void save_sched_clock_state(void)
645 * that sched_clock() continues from the point where it was left off during 646 * that sched_clock() continues from the point where it was left off during
646 * suspend. 647 * suspend.
647 */ 648 */
648void restore_sched_clock_state(void) 649void tsc_restore_sched_clock_state(void)
649{ 650{
650 unsigned long long offset; 651 unsigned long long offset;
651 unsigned long flags; 652 unsigned long flags;
@@ -932,6 +933,16 @@ static int __init init_tsc_clocksource(void)
932 clocksource_tsc.rating = 0; 933 clocksource_tsc.rating = 0;
933 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 934 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
934 } 935 }
936
937 /*
938 * Trust the results of the earlier calibration on systems
939 * exporting a reliable TSC.
940 */
941 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
942 clocksource_register_khz(&clocksource_tsc, tsc_khz);
943 return 0;
944 }
945
935 schedule_delayed_work(&tsc_irqwork, 0); 946 schedule_delayed_work(&tsc_irqwork, 0);
936 return 0; 947 return 0;
937} 948}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9eba29b46cb7..fc25e60a5884 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -42,7 +42,7 @@ static __cpuinitdata int nr_warps;
42/* 42/*
43 * TSC-warp measurement loop running on both CPUs: 43 * TSC-warp measurement loop running on both CPUs:
44 */ 44 */
45static __cpuinit void check_tsc_warp(void) 45static __cpuinit void check_tsc_warp(unsigned int timeout)
46{ 46{
47 cycles_t start, now, prev, end; 47 cycles_t start, now, prev, end;
48 int i; 48 int i;
@@ -51,9 +51,9 @@ static __cpuinit void check_tsc_warp(void)
51 start = get_cycles(); 51 start = get_cycles();
52 rdtsc_barrier(); 52 rdtsc_barrier();
53 /* 53 /*
54 * The measurement runs for 20 msecs: 54 * The measurement runs for 'timeout' msecs:
55 */ 55 */
56 end = start + tsc_khz * 20ULL; 56 end = start + (cycles_t) tsc_khz * timeout;
57 now = start; 57 now = start;
58 58
59 for (i = 0; ; i++) { 59 for (i = 0; ; i++) {
@@ -99,6 +99,25 @@ static __cpuinit void check_tsc_warp(void)
99} 99}
100 100
101/* 101/*
102 * If the target CPU coming online doesn't have any of its core-siblings
103 * online, a timeout of 20msec will be used for the TSC-warp measurement
104 * loop. Otherwise a smaller timeout of 2msec will be used, as we have some
105 * information about this socket already (and this information grows as we
106 * have more and more logical-siblings in that socket).
107 *
108 * Ideally we should be able to skip the TSC sync check on the other
109 * core-siblings, if the first logical CPU in a socket passed the sync test.
110 * But as the TSC is per-logical CPU and can potentially be modified wrongly
111 * by the bios, TSC sync test for smaller duration should be able
112 * to catch such errors. Also this will catch the condition where all the
113 * cores in the socket doesn't get reset at the same time.
114 */
115static inline unsigned int loop_timeout(int cpu)
116{
117 return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20;
118}
119
120/*
102 * Source CPU calls into this - it waits for the freshly booted 121 * Source CPU calls into this - it waits for the freshly booted
103 * target CPU to arrive and then starts the measurement: 122 * target CPU to arrive and then starts the measurement:
104 */ 123 */
@@ -135,7 +154,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
135 */ 154 */
136 atomic_inc(&start_count); 155 atomic_inc(&start_count);
137 156
138 check_tsc_warp(); 157 check_tsc_warp(loop_timeout(cpu));
139 158
140 while (atomic_read(&stop_count) != cpus-1) 159 while (atomic_read(&stop_count) != cpus-1)
141 cpu_relax(); 160 cpu_relax();
@@ -183,7 +202,7 @@ void __cpuinit check_tsc_sync_target(void)
183 while (atomic_read(&start_count) != cpus) 202 while (atomic_read(&start_count) != cpus)
184 cpu_relax(); 203 cpu_relax();
185 204
186 check_tsc_warp(); 205 check_tsc_warp(loop_timeout(smp_processor_id()));
187 206
188 /* 207 /*
189 * Ok, we are done: 208 * Ok, we are done:
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index a1315ab2d6b9..255f58ae71e8 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
172 spinlock_t *ptl; 172 spinlock_t *ptl;
173 int i; 173 int i;
174 174
175 down_write(&mm->mmap_sem);
175 pgd = pgd_offset(mm, 0xA0000); 176 pgd = pgd_offset(mm, 0xA0000);
176 if (pgd_none_or_clear_bad(pgd)) 177 if (pgd_none_or_clear_bad(pgd))
177 goto out; 178 goto out;
@@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
190 } 191 }
191 pte_unmap_unlock(pte, ptl); 192 pte_unmap_unlock(pte, ptl);
192out: 193out:
194 up_write(&mm->mmap_sem);
193 flush_tlb(); 195 flush_tlb();
194} 196}
195 197
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 327509b95e0e..f386dc49f988 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -52,10 +52,7 @@
52#include "vsyscall_trace.h" 52#include "vsyscall_trace.h"
53 53
54DEFINE_VVAR(int, vgetcpu_mode); 54DEFINE_VVAR(int, vgetcpu_mode);
55DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = 55DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
56{
57 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
58};
59 56
60static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; 57static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
61 58
@@ -80,20 +77,15 @@ early_param("vsyscall", vsyscall_setup);
80 77
81void update_vsyscall_tz(void) 78void update_vsyscall_tz(void)
82{ 79{
83 unsigned long flags;
84
85 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
86 /* sys_tz has changed */
87 vsyscall_gtod_data.sys_tz = sys_tz; 80 vsyscall_gtod_data.sys_tz = sys_tz;
88 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
89} 81}
90 82
91void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, 83void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
92 struct clocksource *clock, u32 mult) 84 struct clocksource *clock, u32 mult)
93{ 85{
94 unsigned long flags; 86 struct timespec monotonic;
95 87
96 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); 88 write_seqcount_begin(&vsyscall_gtod_data.seq);
97 89
98 /* copy vsyscall data */ 90 /* copy vsyscall data */
99 vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; 91 vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
@@ -101,12 +93,19 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
101 vsyscall_gtod_data.clock.mask = clock->mask; 93 vsyscall_gtod_data.clock.mask = clock->mask;
102 vsyscall_gtod_data.clock.mult = mult; 94 vsyscall_gtod_data.clock.mult = mult;
103 vsyscall_gtod_data.clock.shift = clock->shift; 95 vsyscall_gtod_data.clock.shift = clock->shift;
96
104 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; 97 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
105 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; 98 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
106 vsyscall_gtod_data.wall_to_monotonic = *wtm; 99
100 monotonic = timespec_add(*wall_time, *wtm);
101 vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec;
102 vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec;
103
107 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); 104 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
105 vsyscall_gtod_data.monotonic_time_coarse =
106 timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm);
108 107
109 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); 108 write_seqcount_end(&vsyscall_gtod_data.seq);
110} 109}
111 110
112static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, 111static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 947a06ccc673..e9f265fd79ae 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -91,6 +91,7 @@ struct x86_init_ops x86_init __initdata = {
91}; 91};
92 92
93struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 93struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
94 .early_percpu_clock_init = x86_init_noop,
94 .setup_percpu_clockev = setup_secondary_APIC_clock, 95 .setup_percpu_clockev = setup_secondary_APIC_clock,
95 .fixup_cpu_id = x86_default_fixup_cpu_id, 96 .fixup_cpu_id = x86_default_fixup_cpu_id,
96}; 97};
@@ -107,7 +108,9 @@ struct x86_platform_ops x86_platform = {
107 .is_untracked_pat_range = is_ISA_range, 108 .is_untracked_pat_range = is_ISA_range,
108 .nmi_init = default_nmi_init, 109 .nmi_init = default_nmi_init,
109 .get_nmi_reason = default_get_nmi_reason, 110 .get_nmi_reason = default_get_nmi_reason,
110 .i8042_detect = default_i8042_detect 111 .i8042_detect = default_i8042_detect,
112 .save_sched_clock_state = tsc_save_sched_clock_state,
113 .restore_sched_clock_state = tsc_restore_sched_clock_state,
111}; 114};
112 115
113EXPORT_SYMBOL_GPL(x86_platform); 116EXPORT_SYMBOL_GPL(x86_platform);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 711091114119..e62728e30b01 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -6,6 +6,7 @@
6#include <linux/bootmem.h> 6#include <linux/bootmem.h>
7#include <linux/compat.h> 7#include <linux/compat.h>
8#include <asm/i387.h> 8#include <asm/i387.h>
9#include <asm/fpu-internal.h>
9#ifdef CONFIG_IA32_EMULATION 10#ifdef CONFIG_IA32_EMULATION
10#include <asm/sigcontext32.h> 11#include <asm/sigcontext32.h>
11#endif 12#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 89b02bfaaca5..9fed5bedaad6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -236,7 +236,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
236 const u32 kvm_supported_word6_x86_features = 236 const u32 kvm_supported_word6_x86_features =
237 F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | 237 F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
238 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | 238 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
239 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | 239 F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
240 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); 240 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
241 241
242 /* cpuid 0xC0000001.edx */ 242 /* cpuid 0xC0000001.edx */
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 5b97e1797a6d..26d1fb437eb5 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -43,4 +43,12 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
43 return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); 43 return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
44} 44}
45 45
46static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
47{
48 struct kvm_cpuid_entry2 *best;
49
50 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
51 return best && (best->ecx & bit(X86_FEATURE_OSVW));
52}
53
46#endif 54#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0982507b962a..83756223f8aa 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -57,6 +57,7 @@
57#define OpDS 23ull /* DS */ 57#define OpDS 23ull /* DS */
58#define OpFS 24ull /* FS */ 58#define OpFS 24ull /* FS */
59#define OpGS 25ull /* GS */ 59#define OpGS 25ull /* GS */
60#define OpMem8 26ull /* 8-bit zero extended memory operand */
60 61
61#define OpBits 5 /* Width of operand field */ 62#define OpBits 5 /* Width of operand field */
62#define OpMask ((1ull << OpBits) - 1) 63#define OpMask ((1ull << OpBits) - 1)
@@ -101,6 +102,7 @@
101#define SrcAcc (OpAcc << SrcShift) 102#define SrcAcc (OpAcc << SrcShift)
102#define SrcImmU16 (OpImmU16 << SrcShift) 103#define SrcImmU16 (OpImmU16 << SrcShift)
103#define SrcDX (OpDX << SrcShift) 104#define SrcDX (OpDX << SrcShift)
105#define SrcMem8 (OpMem8 << SrcShift)
104#define SrcMask (OpMask << SrcShift) 106#define SrcMask (OpMask << SrcShift)
105#define BitOp (1<<11) 107#define BitOp (1<<11)
106#define MemAbs (1<<12) /* Memory operand is absolute displacement */ 108#define MemAbs (1<<12) /* Memory operand is absolute displacement */
@@ -858,8 +860,7 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
858} 860}
859 861
860static void decode_register_operand(struct x86_emulate_ctxt *ctxt, 862static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
861 struct operand *op, 863 struct operand *op)
862 int inhibit_bytereg)
863{ 864{
864 unsigned reg = ctxt->modrm_reg; 865 unsigned reg = ctxt->modrm_reg;
865 int highbyte_regs = ctxt->rex_prefix == 0; 866 int highbyte_regs = ctxt->rex_prefix == 0;
@@ -876,7 +877,7 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
876 } 877 }
877 878
878 op->type = OP_REG; 879 op->type = OP_REG;
879 if ((ctxt->d & ByteOp) && !inhibit_bytereg) { 880 if (ctxt->d & ByteOp) {
880 op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); 881 op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs);
881 op->bytes = 1; 882 op->bytes = 1;
882 } else { 883 } else {
@@ -1151,6 +1152,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1151 return 1; 1152 return 1;
1152} 1153}
1153 1154
1155static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1156 u16 index, struct desc_struct *desc)
1157{
1158 struct desc_ptr dt;
1159 ulong addr;
1160
1161 ctxt->ops->get_idt(ctxt, &dt);
1162
1163 if (dt.size < index * 8 + 7)
1164 return emulate_gp(ctxt, index << 3 | 0x2);
1165
1166 addr = dt.address + index * 8;
1167 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
1168 &ctxt->exception);
1169}
1170
1154static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, 1171static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1155 u16 selector, struct desc_ptr *dt) 1172 u16 selector, struct desc_ptr *dt)
1156{ 1173{
@@ -1227,6 +1244,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1227 seg_desc.type = 3; 1244 seg_desc.type = 3;
1228 seg_desc.p = 1; 1245 seg_desc.p = 1;
1229 seg_desc.s = 1; 1246 seg_desc.s = 1;
1247 if (ctxt->mode == X86EMUL_MODE_VM86)
1248 seg_desc.dpl = 3;
1230 goto load; 1249 goto load;
1231 } 1250 }
1232 1251
@@ -1891,6 +1910,17 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1891 ss->p = 1; 1910 ss->p = 1;
1892} 1911}
1893 1912
1913static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
1914{
1915 u32 eax, ebx, ecx, edx;
1916
1917 eax = ecx = 0;
1918 return ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)
1919 && ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
1920 && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
1921 && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
1922}
1923
1894static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) 1924static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
1895{ 1925{
1896 struct x86_emulate_ops *ops = ctxt->ops; 1926 struct x86_emulate_ops *ops = ctxt->ops;
@@ -2007,6 +2037,14 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2007 if (ctxt->mode == X86EMUL_MODE_REAL) 2037 if (ctxt->mode == X86EMUL_MODE_REAL)
2008 return emulate_gp(ctxt, 0); 2038 return emulate_gp(ctxt, 0);
2009 2039
2040 /*
2041 * Not recognized on AMD in compat mode (but is recognized in legacy
2042 * mode).
2043 */
2044 if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA)
2045 && !vendor_intel(ctxt))
2046 return emulate_ud(ctxt);
2047
2010 /* XXX sysenter/sysexit have not been tested in 64bit mode. 2048 /* XXX sysenter/sysexit have not been tested in 64bit mode.
2011 * Therefore, we inject an #UD. 2049 * Therefore, we inject an #UD.
2012 */ 2050 */
@@ -2306,6 +2344,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2306 return emulate_gp(ctxt, 0); 2344 return emulate_gp(ctxt, 0);
2307 ctxt->_eip = tss->eip; 2345 ctxt->_eip = tss->eip;
2308 ctxt->eflags = tss->eflags | 2; 2346 ctxt->eflags = tss->eflags | 2;
2347
2348 /* General purpose registers */
2309 ctxt->regs[VCPU_REGS_RAX] = tss->eax; 2349 ctxt->regs[VCPU_REGS_RAX] = tss->eax;
2310 ctxt->regs[VCPU_REGS_RCX] = tss->ecx; 2350 ctxt->regs[VCPU_REGS_RCX] = tss->ecx;
2311 ctxt->regs[VCPU_REGS_RDX] = tss->edx; 2351 ctxt->regs[VCPU_REGS_RDX] = tss->edx;
@@ -2328,6 +2368,24 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2328 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); 2368 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2329 2369
2330 /* 2370 /*
2371 * If we're switching between Protected Mode and VM86, we need to make
2372 * sure to update the mode before loading the segment descriptors so
2373 * that the selectors are interpreted correctly.
2374 *
2375 * Need to get rflags to the vcpu struct immediately because it
2376 * influences the CPL which is checked at least when loading the segment
2377 * descriptors and when pushing an error code to the new kernel stack.
2378 *
2379 * TODO Introduce a separate ctxt->ops->set_cpl callback
2380 */
2381 if (ctxt->eflags & X86_EFLAGS_VM)
2382 ctxt->mode = X86EMUL_MODE_VM86;
2383 else
2384 ctxt->mode = X86EMUL_MODE_PROT32;
2385
2386 ctxt->ops->set_rflags(ctxt, ctxt->eflags);
2387
2388 /*
2331 * Now load segment descriptors. If fault happenes at this stage 2389 * Now load segment descriptors. If fault happenes at this stage
2332 * it is handled in a context of new task 2390 * it is handled in a context of new task
2333 */ 2391 */
@@ -2401,7 +2459,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2401} 2459}
2402 2460
2403static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, 2461static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2404 u16 tss_selector, int reason, 2462 u16 tss_selector, int idt_index, int reason,
2405 bool has_error_code, u32 error_code) 2463 bool has_error_code, u32 error_code)
2406{ 2464{
2407 struct x86_emulate_ops *ops = ctxt->ops; 2465 struct x86_emulate_ops *ops = ctxt->ops;
@@ -2423,12 +2481,35 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2423 2481
2424 /* FIXME: check that next_tss_desc is tss */ 2482 /* FIXME: check that next_tss_desc is tss */
2425 2483
2426 if (reason != TASK_SWITCH_IRET) { 2484 /*
2427 if ((tss_selector & 3) > next_tss_desc.dpl || 2485 * Check privileges. The three cases are task switch caused by...
2428 ops->cpl(ctxt) > next_tss_desc.dpl) 2486 *
2429 return emulate_gp(ctxt, 0); 2487 * 1. jmp/call/int to task gate: Check against DPL of the task gate
2488 * 2. Exception/IRQ/iret: No check is performed
2489 * 3. jmp/call to TSS: Check agains DPL of the TSS
2490 */
2491 if (reason == TASK_SWITCH_GATE) {
2492 if (idt_index != -1) {
2493 /* Software interrupts */
2494 struct desc_struct task_gate_desc;
2495 int dpl;
2496
2497 ret = read_interrupt_descriptor(ctxt, idt_index,
2498 &task_gate_desc);
2499 if (ret != X86EMUL_CONTINUE)
2500 return ret;
2501
2502 dpl = task_gate_desc.dpl;
2503 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2504 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2505 }
2506 } else if (reason != TASK_SWITCH_IRET) {
2507 int dpl = next_tss_desc.dpl;
2508 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2509 return emulate_gp(ctxt, tss_selector);
2430 } 2510 }
2431 2511
2512
2432 desc_limit = desc_limit_scaled(&next_tss_desc); 2513 desc_limit = desc_limit_scaled(&next_tss_desc);
2433 if (!next_tss_desc.p || 2514 if (!next_tss_desc.p ||
2434 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || 2515 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
@@ -2481,7 +2562,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2481} 2562}
2482 2563
2483int emulator_task_switch(struct x86_emulate_ctxt *ctxt, 2564int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2484 u16 tss_selector, int reason, 2565 u16 tss_selector, int idt_index, int reason,
2485 bool has_error_code, u32 error_code) 2566 bool has_error_code, u32 error_code)
2486{ 2567{
2487 int rc; 2568 int rc;
@@ -2489,7 +2570,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2489 ctxt->_eip = ctxt->eip; 2570 ctxt->_eip = ctxt->eip;
2490 ctxt->dst.type = OP_NONE; 2571 ctxt->dst.type = OP_NONE;
2491 2572
2492 rc = emulator_do_task_switch(ctxt, tss_selector, reason, 2573 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
2493 has_error_code, error_code); 2574 has_error_code, error_code);
2494 2575
2495 if (rc == X86EMUL_CONTINUE) 2576 if (rc == X86EMUL_CONTINUE)
@@ -3514,13 +3595,13 @@ static struct opcode twobyte_table[256] = {
3514 I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), 3595 I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
3515 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), 3596 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
3516 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), 3597 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
3517 D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 3598 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3518 /* 0xB8 - 0xBF */ 3599 /* 0xB8 - 0xBF */
3519 N, N, 3600 N, N,
3520 G(BitOp, group8), 3601 G(BitOp, group8),
3521 I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), 3602 I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
3522 I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), 3603 I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr),
3523 D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 3604 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3524 /* 0xC0 - 0xCF */ 3605 /* 0xC0 - 0xCF */
3525 D2bv(DstMem | SrcReg | ModRM | Lock), 3606 D2bv(DstMem | SrcReg | ModRM | Lock),
3526 N, D(DstMem | SrcReg | ModRM | Mov), 3607 N, D(DstMem | SrcReg | ModRM | Mov),
@@ -3602,9 +3683,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
3602 3683
3603 switch (d) { 3684 switch (d) {
3604 case OpReg: 3685 case OpReg:
3605 decode_register_operand(ctxt, op, 3686 decode_register_operand(ctxt, op);
3606 op == &ctxt->dst &&
3607 ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7));
3608 break; 3687 break;
3609 case OpImmUByte: 3688 case OpImmUByte:
3610 rc = decode_imm(ctxt, op, 1, false); 3689 rc = decode_imm(ctxt, op, 1, false);
@@ -3656,6 +3735,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
3656 case OpImm: 3735 case OpImm:
3657 rc = decode_imm(ctxt, op, imm_size(ctxt), true); 3736 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
3658 break; 3737 break;
3738 case OpMem8:
3739 ctxt->memop.bytes = 1;
3740 goto mem_common;
3659 case OpMem16: 3741 case OpMem16:
3660 ctxt->memop.bytes = 2; 3742 ctxt->memop.bytes = 2;
3661 goto mem_common; 3743 goto mem_common;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index b6a73537e1ef..81cf4fa4a2be 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -307,6 +307,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
307 if (val & 0x10) { 307 if (val & 0x10) {
308 s->init4 = val & 1; 308 s->init4 = val & 1;
309 s->last_irr = 0; 309 s->last_irr = 0;
310 s->irr &= s->elcr;
310 s->imr = 0; 311 s->imr = 0;
311 s->priority_add = 0; 312 s->priority_add = 0;
312 s->special_mask = 0; 313 s->special_mask = 0;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cfdc6e0ef002..858432287ab6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -433,7 +433,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
433 break; 433 break;
434 434
435 case APIC_DM_INIT: 435 case APIC_DM_INIT:
436 if (level) { 436 if (!trig_mode || level) {
437 result = 1; 437 result = 1;
438 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 438 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
439 kvm_make_request(KVM_REQ_EVENT, vcpu); 439 kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -731,7 +731,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
731 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 731 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
732 u64 ns = 0; 732 u64 ns = 0;
733 struct kvm_vcpu *vcpu = apic->vcpu; 733 struct kvm_vcpu *vcpu = apic->vcpu;
734 unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); 734 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
735 unsigned long flags; 735 unsigned long flags;
736 736
737 if (unlikely(!tscdeadline || !this_tsc_khz)) 737 if (unlikely(!tscdeadline || !this_tsc_khz))
@@ -1283,9 +1283,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
1283 if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) 1283 if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
1284 return; 1284 return;
1285 1285
1286 vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0); 1286 vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
1287 data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); 1287 data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
1288 kunmap_atomic(vapic, KM_USER0); 1288 kunmap_atomic(vapic);
1289 1289
1290 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1290 apic_set_tpr(vcpu->arch.apic, data & 0xff);
1291} 1291}
@@ -1310,9 +1310,9 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
1310 max_isr = 0; 1310 max_isr = 0;
1311 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 1311 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
1312 1312
1313 vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0); 1313 vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
1314 *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; 1314 *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
1315 kunmap_atomic(vapic, KM_USER0); 1315 kunmap_atomic(vapic);
1316} 1316}
1317 1317
1318void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1318void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 224b02c3cda9..4cb164268846 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -688,9 +688,8 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
688{ 688{
689 unsigned long idx; 689 unsigned long idx;
690 690
691 idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - 691 idx = gfn_to_index(gfn, slot->base_gfn, level);
692 (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); 692 return &slot->arch.lpage_info[level - 2][idx];
693 return &slot->lpage_info[level - 2][idx];
694} 693}
695 694
696static void account_shadowed(struct kvm *kvm, gfn_t gfn) 695static void account_shadowed(struct kvm *kvm, gfn_t gfn)
@@ -946,7 +945,7 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
946 } 945 }
947} 946}
948 947
949static unsigned long *__gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level, 948static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
950 struct kvm_memory_slot *slot) 949 struct kvm_memory_slot *slot)
951{ 950{
952 struct kvm_lpage_info *linfo; 951 struct kvm_lpage_info *linfo;
@@ -966,7 +965,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
966 struct kvm_memory_slot *slot; 965 struct kvm_memory_slot *slot;
967 966
968 slot = gfn_to_memslot(kvm, gfn); 967 slot = gfn_to_memslot(kvm, gfn);
969 return __gfn_to_rmap(kvm, gfn, level, slot); 968 return __gfn_to_rmap(gfn, level, slot);
970} 969}
971 970
972static bool rmap_can_add(struct kvm_vcpu *vcpu) 971static bool rmap_can_add(struct kvm_vcpu *vcpu)
@@ -988,7 +987,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
988 return pte_list_add(vcpu, spte, rmapp); 987 return pte_list_add(vcpu, spte, rmapp);
989} 988}
990 989
991static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) 990static u64 *rmap_next(unsigned long *rmapp, u64 *spte)
992{ 991{
993 return pte_list_next(rmapp, spte); 992 return pte_list_next(rmapp, spte);
994} 993}
@@ -1018,8 +1017,8 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
1018 u64 *spte; 1017 u64 *spte;
1019 int i, write_protected = 0; 1018 int i, write_protected = 0;
1020 1019
1021 rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot); 1020 rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot);
1022 spte = rmap_next(kvm, rmapp, NULL); 1021 spte = rmap_next(rmapp, NULL);
1023 while (spte) { 1022 while (spte) {
1024 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1023 BUG_ON(!(*spte & PT_PRESENT_MASK));
1025 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 1024 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
@@ -1027,14 +1026,14 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
1027 mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); 1026 mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK);
1028 write_protected = 1; 1027 write_protected = 1;
1029 } 1028 }
1030 spte = rmap_next(kvm, rmapp, spte); 1029 spte = rmap_next(rmapp, spte);
1031 } 1030 }
1032 1031
1033 /* check for huge page mappings */ 1032 /* check for huge page mappings */
1034 for (i = PT_DIRECTORY_LEVEL; 1033 for (i = PT_DIRECTORY_LEVEL;
1035 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 1034 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
1036 rmapp = __gfn_to_rmap(kvm, gfn, i, slot); 1035 rmapp = __gfn_to_rmap(gfn, i, slot);
1037 spte = rmap_next(kvm, rmapp, NULL); 1036 spte = rmap_next(rmapp, NULL);
1038 while (spte) { 1037 while (spte) {
1039 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1038 BUG_ON(!(*spte & PT_PRESENT_MASK));
1040 BUG_ON(!is_large_pte(*spte)); 1039 BUG_ON(!is_large_pte(*spte));
@@ -1045,7 +1044,7 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
1045 spte = NULL; 1044 spte = NULL;
1046 write_protected = 1; 1045 write_protected = 1;
1047 } 1046 }
1048 spte = rmap_next(kvm, rmapp, spte); 1047 spte = rmap_next(rmapp, spte);
1049 } 1048 }
1050 } 1049 }
1051 1050
@@ -1066,7 +1065,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
1066 u64 *spte; 1065 u64 *spte;
1067 int need_tlb_flush = 0; 1066 int need_tlb_flush = 0;
1068 1067
1069 while ((spte = rmap_next(kvm, rmapp, NULL))) { 1068 while ((spte = rmap_next(rmapp, NULL))) {
1070 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1069 BUG_ON(!(*spte & PT_PRESENT_MASK));
1071 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); 1070 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
1072 drop_spte(kvm, spte); 1071 drop_spte(kvm, spte);
@@ -1085,14 +1084,14 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
1085 1084
1086 WARN_ON(pte_huge(*ptep)); 1085 WARN_ON(pte_huge(*ptep));
1087 new_pfn = pte_pfn(*ptep); 1086 new_pfn = pte_pfn(*ptep);
1088 spte = rmap_next(kvm, rmapp, NULL); 1087 spte = rmap_next(rmapp, NULL);
1089 while (spte) { 1088 while (spte) {
1090 BUG_ON(!is_shadow_present_pte(*spte)); 1089 BUG_ON(!is_shadow_present_pte(*spte));
1091 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); 1090 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
1092 need_flush = 1; 1091 need_flush = 1;
1093 if (pte_write(*ptep)) { 1092 if (pte_write(*ptep)) {
1094 drop_spte(kvm, spte); 1093 drop_spte(kvm, spte);
1095 spte = rmap_next(kvm, rmapp, NULL); 1094 spte = rmap_next(rmapp, NULL);
1096 } else { 1095 } else {
1097 new_spte = *spte &~ (PT64_BASE_ADDR_MASK); 1096 new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
1098 new_spte |= (u64)new_pfn << PAGE_SHIFT; 1097 new_spte |= (u64)new_pfn << PAGE_SHIFT;
@@ -1102,7 +1101,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
1102 new_spte &= ~shadow_accessed_mask; 1101 new_spte &= ~shadow_accessed_mask;
1103 mmu_spte_clear_track_bits(spte); 1102 mmu_spte_clear_track_bits(spte);
1104 mmu_spte_set(spte, new_spte); 1103 mmu_spte_set(spte, new_spte);
1105 spte = rmap_next(kvm, rmapp, spte); 1104 spte = rmap_next(rmapp, spte);
1106 } 1105 }
1107 } 1106 }
1108 if (need_flush) 1107 if (need_flush)
@@ -1176,7 +1175,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1176 if (!shadow_accessed_mask) 1175 if (!shadow_accessed_mask)
1177 return kvm_unmap_rmapp(kvm, rmapp, data); 1176 return kvm_unmap_rmapp(kvm, rmapp, data);
1178 1177
1179 spte = rmap_next(kvm, rmapp, NULL); 1178 spte = rmap_next(rmapp, NULL);
1180 while (spte) { 1179 while (spte) {
1181 int _young; 1180 int _young;
1182 u64 _spte = *spte; 1181 u64 _spte = *spte;
@@ -1186,7 +1185,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1186 young = 1; 1185 young = 1;
1187 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); 1186 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
1188 } 1187 }
1189 spte = rmap_next(kvm, rmapp, spte); 1188 spte = rmap_next(rmapp, spte);
1190 } 1189 }
1191 return young; 1190 return young;
1192} 1191}
@@ -1205,7 +1204,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1205 if (!shadow_accessed_mask) 1204 if (!shadow_accessed_mask)
1206 goto out; 1205 goto out;
1207 1206
1208 spte = rmap_next(kvm, rmapp, NULL); 1207 spte = rmap_next(rmapp, NULL);
1209 while (spte) { 1208 while (spte) {
1210 u64 _spte = *spte; 1209 u64 _spte = *spte;
1211 BUG_ON(!(_spte & PT_PRESENT_MASK)); 1210 BUG_ON(!(_spte & PT_PRESENT_MASK));
@@ -1214,7 +1213,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1214 young = 1; 1213 young = 1;
1215 break; 1214 break;
1216 } 1215 }
1217 spte = rmap_next(kvm, rmapp, spte); 1216 spte = rmap_next(rmapp, spte);
1218 } 1217 }
1219out: 1218out:
1220 return young; 1219 return young;
@@ -1391,11 +1390,6 @@ struct kvm_mmu_pages {
1391 unsigned int nr; 1390 unsigned int nr;
1392}; 1391};
1393 1392
1394#define for_each_unsync_children(bitmap, idx) \
1395 for (idx = find_first_bit(bitmap, 512); \
1396 idx < 512; \
1397 idx = find_next_bit(bitmap, 512, idx+1))
1398
1399static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, 1393static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
1400 int idx) 1394 int idx)
1401{ 1395{
@@ -1417,7 +1411,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
1417{ 1411{
1418 int i, ret, nr_unsync_leaf = 0; 1412 int i, ret, nr_unsync_leaf = 0;
1419 1413
1420 for_each_unsync_children(sp->unsync_child_bitmap, i) { 1414 for_each_set_bit(i, sp->unsync_child_bitmap, 512) {
1421 struct kvm_mmu_page *child; 1415 struct kvm_mmu_page *child;
1422 u64 ent = sp->spt[i]; 1416 u64 ent = sp->spt[i];
1423 1417
@@ -1803,6 +1797,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1803{ 1797{
1804 if (is_large_pte(*sptep)) { 1798 if (is_large_pte(*sptep)) {
1805 drop_spte(vcpu->kvm, sptep); 1799 drop_spte(vcpu->kvm, sptep);
1800 --vcpu->kvm->stat.lpages;
1806 kvm_flush_remote_tlbs(vcpu->kvm); 1801 kvm_flush_remote_tlbs(vcpu->kvm);
1807 } 1802 }
1808} 1803}
@@ -3190,15 +3185,14 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
3190#undef PTTYPE 3185#undef PTTYPE
3191 3186
3192static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, 3187static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3193 struct kvm_mmu *context, 3188 struct kvm_mmu *context)
3194 int level)
3195{ 3189{
3196 int maxphyaddr = cpuid_maxphyaddr(vcpu); 3190 int maxphyaddr = cpuid_maxphyaddr(vcpu);
3197 u64 exb_bit_rsvd = 0; 3191 u64 exb_bit_rsvd = 0;
3198 3192
3199 if (!context->nx) 3193 if (!context->nx)
3200 exb_bit_rsvd = rsvd_bits(63, 63); 3194 exb_bit_rsvd = rsvd_bits(63, 63);
3201 switch (level) { 3195 switch (context->root_level) {
3202 case PT32_ROOT_LEVEL: 3196 case PT32_ROOT_LEVEL:
3203 /* no rsvd bits for 2 level 4K page table entries */ 3197 /* no rsvd bits for 2 level 4K page table entries */
3204 context->rsvd_bits_mask[0][1] = 0; 3198 context->rsvd_bits_mask[0][1] = 0;
@@ -3256,8 +3250,9 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
3256 int level) 3250 int level)
3257{ 3251{
3258 context->nx = is_nx(vcpu); 3252 context->nx = is_nx(vcpu);
3253 context->root_level = level;
3259 3254
3260 reset_rsvds_bits_mask(vcpu, context, level); 3255 reset_rsvds_bits_mask(vcpu, context);
3261 3256
3262 ASSERT(is_pae(vcpu)); 3257 ASSERT(is_pae(vcpu));
3263 context->new_cr3 = paging_new_cr3; 3258 context->new_cr3 = paging_new_cr3;
@@ -3267,7 +3262,6 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
3267 context->invlpg = paging64_invlpg; 3262 context->invlpg = paging64_invlpg;
3268 context->update_pte = paging64_update_pte; 3263 context->update_pte = paging64_update_pte;
3269 context->free = paging_free; 3264 context->free = paging_free;
3270 context->root_level = level;
3271 context->shadow_root_level = level; 3265 context->shadow_root_level = level;
3272 context->root_hpa = INVALID_PAGE; 3266 context->root_hpa = INVALID_PAGE;
3273 context->direct_map = false; 3267 context->direct_map = false;
@@ -3284,8 +3278,9 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
3284 struct kvm_mmu *context) 3278 struct kvm_mmu *context)
3285{ 3279{
3286 context->nx = false; 3280 context->nx = false;
3281 context->root_level = PT32_ROOT_LEVEL;
3287 3282
3288 reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); 3283 reset_rsvds_bits_mask(vcpu, context);
3289 3284
3290 context->new_cr3 = paging_new_cr3; 3285 context->new_cr3 = paging_new_cr3;
3291 context->page_fault = paging32_page_fault; 3286 context->page_fault = paging32_page_fault;
@@ -3294,7 +3289,6 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
3294 context->sync_page = paging32_sync_page; 3289 context->sync_page = paging32_sync_page;
3295 context->invlpg = paging32_invlpg; 3290 context->invlpg = paging32_invlpg;
3296 context->update_pte = paging32_update_pte; 3291 context->update_pte = paging32_update_pte;
3297 context->root_level = PT32_ROOT_LEVEL;
3298 context->shadow_root_level = PT32E_ROOT_LEVEL; 3292 context->shadow_root_level = PT32E_ROOT_LEVEL;
3299 context->root_hpa = INVALID_PAGE; 3293 context->root_hpa = INVALID_PAGE;
3300 context->direct_map = false; 3294 context->direct_map = false;
@@ -3325,7 +3319,6 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3325 context->get_cr3 = get_cr3; 3319 context->get_cr3 = get_cr3;
3326 context->get_pdptr = kvm_pdptr_read; 3320 context->get_pdptr = kvm_pdptr_read;
3327 context->inject_page_fault = kvm_inject_page_fault; 3321 context->inject_page_fault = kvm_inject_page_fault;
3328 context->nx = is_nx(vcpu);
3329 3322
3330 if (!is_paging(vcpu)) { 3323 if (!is_paging(vcpu)) {
3331 context->nx = false; 3324 context->nx = false;
@@ -3333,19 +3326,19 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3333 context->root_level = 0; 3326 context->root_level = 0;
3334 } else if (is_long_mode(vcpu)) { 3327 } else if (is_long_mode(vcpu)) {
3335 context->nx = is_nx(vcpu); 3328 context->nx = is_nx(vcpu);
3336 reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL);
3337 context->gva_to_gpa = paging64_gva_to_gpa;
3338 context->root_level = PT64_ROOT_LEVEL; 3329 context->root_level = PT64_ROOT_LEVEL;
3330 reset_rsvds_bits_mask(vcpu, context);
3331 context->gva_to_gpa = paging64_gva_to_gpa;
3339 } else if (is_pae(vcpu)) { 3332 } else if (is_pae(vcpu)) {
3340 context->nx = is_nx(vcpu); 3333 context->nx = is_nx(vcpu);
3341 reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL);
3342 context->gva_to_gpa = paging64_gva_to_gpa;
3343 context->root_level = PT32E_ROOT_LEVEL; 3334 context->root_level = PT32E_ROOT_LEVEL;
3335 reset_rsvds_bits_mask(vcpu, context);
3336 context->gva_to_gpa = paging64_gva_to_gpa;
3344 } else { 3337 } else {
3345 context->nx = false; 3338 context->nx = false;
3346 reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL);
3347 context->gva_to_gpa = paging32_gva_to_gpa;
3348 context->root_level = PT32_ROOT_LEVEL; 3339 context->root_level = PT32_ROOT_LEVEL;
3340 reset_rsvds_bits_mask(vcpu, context);
3341 context->gva_to_gpa = paging32_gva_to_gpa;
3349 } 3342 }
3350 3343
3351 return 0; 3344 return 0;
@@ -3408,18 +3401,18 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
3408 g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; 3401 g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
3409 } else if (is_long_mode(vcpu)) { 3402 } else if (is_long_mode(vcpu)) {
3410 g_context->nx = is_nx(vcpu); 3403 g_context->nx = is_nx(vcpu);
3411 reset_rsvds_bits_mask(vcpu, g_context, PT64_ROOT_LEVEL);
3412 g_context->root_level = PT64_ROOT_LEVEL; 3404 g_context->root_level = PT64_ROOT_LEVEL;
3405 reset_rsvds_bits_mask(vcpu, g_context);
3413 g_context->gva_to_gpa = paging64_gva_to_gpa_nested; 3406 g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
3414 } else if (is_pae(vcpu)) { 3407 } else if (is_pae(vcpu)) {
3415 g_context->nx = is_nx(vcpu); 3408 g_context->nx = is_nx(vcpu);
3416 reset_rsvds_bits_mask(vcpu, g_context, PT32E_ROOT_LEVEL);
3417 g_context->root_level = PT32E_ROOT_LEVEL; 3409 g_context->root_level = PT32E_ROOT_LEVEL;
3410 reset_rsvds_bits_mask(vcpu, g_context);
3418 g_context->gva_to_gpa = paging64_gva_to_gpa_nested; 3411 g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
3419 } else { 3412 } else {
3420 g_context->nx = false; 3413 g_context->nx = false;
3421 reset_rsvds_bits_mask(vcpu, g_context, PT32_ROOT_LEVEL);
3422 g_context->root_level = PT32_ROOT_LEVEL; 3414 g_context->root_level = PT32_ROOT_LEVEL;
3415 reset_rsvds_bits_mask(vcpu, g_context);
3423 g_context->gva_to_gpa = paging32_gva_to_gpa_nested; 3416 g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
3424 } 3417 }
3425 3418
@@ -3555,7 +3548,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
3555 * If we're seeing too many writes to a page, it may no longer be a page table, 3548 * If we're seeing too many writes to a page, it may no longer be a page table,
3556 * or we may be forking, in which case it is better to unmap the page. 3549 * or we may be forking, in which case it is better to unmap the page.
3557 */ 3550 */
3558static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte) 3551static bool detect_write_flooding(struct kvm_mmu_page *sp)
3559{ 3552{
3560 /* 3553 /*
3561 * Skip write-flooding detected for the sp whose level is 1, because 3554 * Skip write-flooding detected for the sp whose level is 1, because
@@ -3664,10 +3657,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3664 3657
3665 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; 3658 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
3666 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { 3659 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
3667 spte = get_written_sptes(sp, gpa, &npte);
3668
3669 if (detect_write_misaligned(sp, gpa, bytes) || 3660 if (detect_write_misaligned(sp, gpa, bytes) ||
3670 detect_write_flooding(sp, spte)) { 3661 detect_write_flooding(sp)) {
3671 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, 3662 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
3672 &invalid_list); 3663 &invalid_list);
3673 ++vcpu->kvm->stat.mmu_flooded; 3664 ++vcpu->kvm->stat.mmu_flooded;
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index fe15dcc07a6b..715da5a19a5b 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -200,13 +200,13 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
200 slot = gfn_to_memslot(kvm, sp->gfn); 200 slot = gfn_to_memslot(kvm, sp->gfn);
201 rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; 201 rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
202 202
203 spte = rmap_next(kvm, rmapp, NULL); 203 spte = rmap_next(rmapp, NULL);
204 while (spte) { 204 while (spte) {
205 if (is_writable_pte(*spte)) 205 if (is_writable_pte(*spte))
206 audit_printk(kvm, "shadow page has writable " 206 audit_printk(kvm, "shadow page has writable "
207 "mappings: gfn %llx role %x\n", 207 "mappings: gfn %llx role %x\n",
208 sp->gfn, sp->role.word); 208 sp->gfn, sp->role.word);
209 spte = rmap_next(kvm, rmapp, spte); 209 spte = rmap_next(rmapp, spte);
210 } 210 }
211} 211}
212 212
@@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
234} 234}
235 235
236static bool mmu_audit; 236static bool mmu_audit;
237static struct jump_label_key mmu_audit_key; 237static struct static_key mmu_audit_key;
238 238
239static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) 239static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
240{ 240{
@@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
250 250
251static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) 251static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
252{ 252{
253 if (static_branch((&mmu_audit_key))) 253 if (static_key_false((&mmu_audit_key)))
254 __kvm_mmu_audit(vcpu, point); 254 __kvm_mmu_audit(vcpu, point);
255} 255}
256 256
@@ -259,7 +259,7 @@ static void mmu_audit_enable(void)
259 if (mmu_audit) 259 if (mmu_audit)
260 return; 260 return;
261 261
262 jump_label_inc(&mmu_audit_key); 262 static_key_slow_inc(&mmu_audit_key);
263 mmu_audit = true; 263 mmu_audit = true;
264} 264}
265 265
@@ -268,7 +268,7 @@ static void mmu_audit_disable(void)
268 if (!mmu_audit) 268 if (!mmu_audit)
269 return; 269 return;
270 270
271 jump_label_dec(&mmu_audit_key); 271 static_key_slow_dec(&mmu_audit_key);
272 mmu_audit = false; 272 mmu_audit = false;
273} 273}
274 274
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 15610285ebb6..df5a70311be8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -92,9 +92,9 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
92 if (unlikely(npages != 1)) 92 if (unlikely(npages != 1))
93 return -EFAULT; 93 return -EFAULT;
94 94
95 table = kmap_atomic(page, KM_USER0); 95 table = kmap_atomic(page);
96 ret = CMPXCHG(&table[index], orig_pte, new_pte); 96 ret = CMPXCHG(&table[index], orig_pte, new_pte);
97 kunmap_atomic(table, KM_USER0); 97 kunmap_atomic(table);
98 98
99 kvm_release_page_dirty(page); 99 kvm_release_page_dirty(page);
100 100
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 7aad5446f393..173df38dbda5 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -33,10 +33,11 @@ static struct kvm_arch_event_perf_mapping {
33 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, 33 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
34 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 34 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
35 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, 35 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
36 [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
36}; 37};
37 38
38/* mapping between fixed pmc index and arch_events array */ 39/* mapping between fixed pmc index and arch_events array */
39int fixed_pmc_events[] = {1, 0, 2}; 40int fixed_pmc_events[] = {1, 0, 7};
40 41
41static bool pmc_is_gp(struct kvm_pmc *pmc) 42static bool pmc_is_gp(struct kvm_pmc *pmc)
42{ 43{
@@ -210,6 +211,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
210 unsigned config, type = PERF_TYPE_RAW; 211 unsigned config, type = PERF_TYPE_RAW;
211 u8 event_select, unit_mask; 212 u8 event_select, unit_mask;
212 213
214 if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
215 printk_once("kvm pmu: pin control bit is ignored\n");
216
213 pmc->eventsel = eventsel; 217 pmc->eventsel = eventsel;
214 218
215 stop_counter(pmc); 219 stop_counter(pmc);
@@ -220,7 +224,7 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
220 event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; 224 event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
221 unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; 225 unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
222 226
223 if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE | 227 if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
224 ARCH_PERFMON_EVENTSEL_INV | 228 ARCH_PERFMON_EVENTSEL_INV |
225 ARCH_PERFMON_EVENTSEL_CMASK))) { 229 ARCH_PERFMON_EVENTSEL_CMASK))) {
226 config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, 230 config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
@@ -365,7 +369,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
365 case MSR_CORE_PERF_FIXED_CTR_CTRL: 369 case MSR_CORE_PERF_FIXED_CTR_CTRL:
366 if (pmu->fixed_ctr_ctrl == data) 370 if (pmu->fixed_ctr_ctrl == data)
367 return 0; 371 return 0;
368 if (!(data & 0xfffffffffffff444)) { 372 if (!(data & 0xfffffffffffff444ull)) {
369 reprogram_fixed_counters(pmu, data); 373 reprogram_fixed_counters(pmu, data);
370 return 0; 374 return 0;
371 } 375 }
@@ -413,7 +417,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
413 struct kvm_pmc *counters; 417 struct kvm_pmc *counters;
414 u64 ctr; 418 u64 ctr;
415 419
416 pmc &= (3u << 30) - 1; 420 pmc &= ~(3u << 30);
417 if (!fixed && pmc >= pmu->nr_arch_gp_counters) 421 if (!fixed && pmc >= pmu->nr_arch_gp_counters)
418 return 1; 422 return 1;
419 if (fixed && pmc >= pmu->nr_arch_fixed_counters) 423 if (fixed && pmc >= pmu->nr_arch_fixed_counters)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e385214711cb..e334389e1c75 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -111,6 +111,12 @@ struct nested_state {
111#define MSRPM_OFFSETS 16 111#define MSRPM_OFFSETS 16
112static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; 112static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
113 113
114/*
115 * Set osvw_len to higher value when updated Revision Guides
116 * are published and we know what the new status bits are
117 */
118static uint64_t osvw_len = 4, osvw_status;
119
114struct vcpu_svm { 120struct vcpu_svm {
115 struct kvm_vcpu vcpu; 121 struct kvm_vcpu vcpu;
116 struct vmcb *vmcb; 122 struct vmcb *vmcb;
@@ -177,11 +183,13 @@ static bool npt_enabled = true;
177#else 183#else
178static bool npt_enabled; 184static bool npt_enabled;
179#endif 185#endif
180static int npt = 1;
181 186
187/* allow nested paging (virtualized MMU) for all guests */
188static int npt = true;
182module_param(npt, int, S_IRUGO); 189module_param(npt, int, S_IRUGO);
183 190
184static int nested = 1; 191/* allow nested virtualization in KVM/SVM */
192static int nested = true;
185module_param(nested, int, S_IRUGO); 193module_param(nested, int, S_IRUGO);
186 194
187static void svm_flush_tlb(struct kvm_vcpu *vcpu); 195static void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -557,6 +565,27 @@ static void svm_init_erratum_383(void)
557 erratum_383_found = true; 565 erratum_383_found = true;
558} 566}
559 567
568static void svm_init_osvw(struct kvm_vcpu *vcpu)
569{
570 /*
571 * Guests should see errata 400 and 415 as fixed (assuming that
572 * HLT and IO instructions are intercepted).
573 */
574 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
575 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
576
577 /*
578 * By increasing VCPU's osvw.length to 3 we are telling the guest that
579 * all osvw.status bits inside that length, including bit 0 (which is
580 * reserved for erratum 298), are valid. However, if host processor's
581 * osvw_len is 0 then osvw_status[0] carries no information. We need to
582 * be conservative here and therefore we tell the guest that erratum 298
583 * is present (because we really don't know).
584 */
585 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
586 vcpu->arch.osvw.status |= 1;
587}
588
560static int has_svm(void) 589static int has_svm(void)
561{ 590{
562 const char *msg; 591 const char *msg;
@@ -623,6 +652,36 @@ static int svm_hardware_enable(void *garbage)
623 __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; 652 __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
624 } 653 }
625 654
655
656 /*
657 * Get OSVW bits.
658 *
659 * Note that it is possible to have a system with mixed processor
660 * revisions and therefore different OSVW bits. If bits are not the same
661 * on different processors then choose the worst case (i.e. if erratum
662 * is present on one processor and not on another then assume that the
663 * erratum is present everywhere).
664 */
665 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
666 uint64_t len, status = 0;
667 int err;
668
669 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
670 if (!err)
671 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
672 &err);
673
674 if (err)
675 osvw_status = osvw_len = 0;
676 else {
677 if (len < osvw_len)
678 osvw_len = len;
679 osvw_status |= status;
680 osvw_status &= (1ULL << osvw_len) - 1;
681 }
682 } else
683 osvw_status = osvw_len = 0;
684
626 svm_init_erratum_383(); 685 svm_init_erratum_383();
627 686
628 amd_pmu_enable_virt(); 687 amd_pmu_enable_virt();
@@ -910,20 +969,25 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
910 return _tsc; 969 return _tsc;
911} 970}
912 971
913static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) 972static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
914{ 973{
915 struct vcpu_svm *svm = to_svm(vcpu); 974 struct vcpu_svm *svm = to_svm(vcpu);
916 u64 ratio; 975 u64 ratio;
917 u64 khz; 976 u64 khz;
918 977
919 /* TSC scaling supported? */ 978 /* Guest TSC same frequency as host TSC? */
920 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) 979 if (!scale) {
980 svm->tsc_ratio = TSC_RATIO_DEFAULT;
921 return; 981 return;
982 }
922 983
923 /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ 984 /* TSC scaling supported? */
924 if (user_tsc_khz == 0) { 985 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
925 vcpu->arch.virtual_tsc_khz = 0; 986 if (user_tsc_khz > tsc_khz) {
926 svm->tsc_ratio = TSC_RATIO_DEFAULT; 987 vcpu->arch.tsc_catchup = 1;
988 vcpu->arch.tsc_always_catchup = 1;
989 } else
990 WARN(1, "user requested TSC rate below hardware speed\n");
927 return; 991 return;
928 } 992 }
929 993
@@ -938,7 +1002,6 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
938 user_tsc_khz); 1002 user_tsc_khz);
939 return; 1003 return;
940 } 1004 }
941 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
942 svm->tsc_ratio = ratio; 1005 svm->tsc_ratio = ratio;
943} 1006}
944 1007
@@ -958,10 +1021,14 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
958 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 1021 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
959} 1022}
960 1023
961static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) 1024static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
962{ 1025{
963 struct vcpu_svm *svm = to_svm(vcpu); 1026 struct vcpu_svm *svm = to_svm(vcpu);
964 1027
1028 WARN_ON(adjustment < 0);
1029 if (host)
1030 adjustment = svm_scale_tsc(vcpu, adjustment);
1031
965 svm->vmcb->control.tsc_offset += adjustment; 1032 svm->vmcb->control.tsc_offset += adjustment;
966 if (is_guest_mode(vcpu)) 1033 if (is_guest_mode(vcpu))
967 svm->nested.hsave->control.tsc_offset += adjustment; 1034 svm->nested.hsave->control.tsc_offset += adjustment;
@@ -1191,6 +1258,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1191 if (kvm_vcpu_is_bsp(&svm->vcpu)) 1258 if (kvm_vcpu_is_bsp(&svm->vcpu))
1192 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 1259 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1193 1260
1261 svm_init_osvw(&svm->vcpu);
1262
1194 return &svm->vcpu; 1263 return &svm->vcpu;
1195 1264
1196free_page4: 1265free_page4:
@@ -1268,6 +1337,21 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1268 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1337 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1269} 1338}
1270 1339
1340static void svm_update_cpl(struct kvm_vcpu *vcpu)
1341{
1342 struct vcpu_svm *svm = to_svm(vcpu);
1343 int cpl;
1344
1345 if (!is_protmode(vcpu))
1346 cpl = 0;
1347 else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
1348 cpl = 3;
1349 else
1350 cpl = svm->vmcb->save.cs.selector & 0x3;
1351
1352 svm->vmcb->save.cpl = cpl;
1353}
1354
1271static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 1355static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1272{ 1356{
1273 return to_svm(vcpu)->vmcb->save.rflags; 1357 return to_svm(vcpu)->vmcb->save.rflags;
@@ -1275,7 +1359,11 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1275 1359
1276static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1360static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1277{ 1361{
1362 unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags;
1363
1278 to_svm(vcpu)->vmcb->save.rflags = rflags; 1364 to_svm(vcpu)->vmcb->save.rflags = rflags;
1365 if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
1366 svm_update_cpl(vcpu);
1279} 1367}
1280 1368
1281static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 1369static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
@@ -1543,9 +1631,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
1543 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; 1631 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1544 } 1632 }
1545 if (seg == VCPU_SREG_CS) 1633 if (seg == VCPU_SREG_CS)
1546 svm->vmcb->save.cpl 1634 svm_update_cpl(vcpu);
1547 = (svm->vmcb->save.cs.attrib
1548 >> SVM_SELECTOR_DPL_SHIFT) & 3;
1549 1635
1550 mark_dirty(svm->vmcb, VMCB_SEG); 1636 mark_dirty(svm->vmcb, VMCB_SEG);
1551} 1637}
@@ -2735,7 +2821,10 @@ static int task_switch_interception(struct vcpu_svm *svm)
2735 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) 2821 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2736 skip_emulated_instruction(&svm->vcpu); 2822 skip_emulated_instruction(&svm->vcpu);
2737 2823
2738 if (kvm_task_switch(&svm->vcpu, tss_selector, reason, 2824 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
2825 int_vec = -1;
2826
2827 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
2739 has_error_code, error_code) == EMULATE_FAIL) { 2828 has_error_code, error_code) == EMULATE_FAIL) {
2740 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 2829 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2741 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 2830 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3b4c8d8ad906..ad85adfef843 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -70,9 +70,6 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
70static bool __read_mostly vmm_exclusive = 1; 70static bool __read_mostly vmm_exclusive = 1;
71module_param(vmm_exclusive, bool, S_IRUGO); 71module_param(vmm_exclusive, bool, S_IRUGO);
72 72
73static bool __read_mostly yield_on_hlt = 1;
74module_param(yield_on_hlt, bool, S_IRUGO);
75
76static bool __read_mostly fasteoi = 1; 73static bool __read_mostly fasteoi = 1;
77module_param(fasteoi, bool, S_IRUGO); 74module_param(fasteoi, bool, S_IRUGO);
78 75
@@ -1457,7 +1454,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
1457#ifdef CONFIG_X86_64 1454#ifdef CONFIG_X86_64
1458 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 1455 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
1459#endif 1456#endif
1460 if (__thread_has_fpu(current)) 1457 if (user_has_fpu())
1461 clts(); 1458 clts();
1462 load_gdt(&__get_cpu_var(host_gdt)); 1459 load_gdt(&__get_cpu_var(host_gdt));
1463} 1460}
@@ -1655,17 +1652,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
1655 vmx_set_interrupt_shadow(vcpu, 0); 1652 vmx_set_interrupt_shadow(vcpu, 0);
1656} 1653}
1657 1654
1658static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
1659{
1660 /* Ensure that we clear the HLT state in the VMCS. We don't need to
1661 * explicitly skip the instruction because if the HLT state is set, then
1662 * the instruction is already executing and RIP has already been
1663 * advanced. */
1664 if (!yield_on_hlt &&
1665 vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
1666 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
1667}
1668
1669/* 1655/*
1670 * KVM wants to inject page-faults which it got to the guest. This function 1656 * KVM wants to inject page-faults which it got to the guest. This function
1671 * checks whether in a nested guest, we need to inject them to L1 or L2. 1657 * checks whether in a nested guest, we need to inject them to L1 or L2.
@@ -1678,7 +1664,7 @@ static int nested_pf_handled(struct kvm_vcpu *vcpu)
1678 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 1664 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1679 1665
1680 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 1666 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
1681 if (!(vmcs12->exception_bitmap & PF_VECTOR)) 1667 if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
1682 return 0; 1668 return 0;
1683 1669
1684 nested_vmx_vmexit(vcpu); 1670 nested_vmx_vmexit(vcpu);
@@ -1718,7 +1704,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
1718 intr_info |= INTR_TYPE_HARD_EXCEPTION; 1704 intr_info |= INTR_TYPE_HARD_EXCEPTION;
1719 1705
1720 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 1706 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
1721 vmx_clear_hlt(vcpu);
1722} 1707}
1723 1708
1724static bool vmx_rdtscp_supported(void) 1709static bool vmx_rdtscp_supported(void)
@@ -1817,13 +1802,19 @@ u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
1817} 1802}
1818 1803
1819/* 1804/*
1820 * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ 1805 * Engage any workarounds for mis-matched TSC rates. Currently limited to
1821 * ioctl. In this case the call-back should update internal vmx state to make 1806 * software catchup for faster rates on slower CPUs.
1822 * the changes effective.
1823 */ 1807 */
1824static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) 1808static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1825{ 1809{
1826 /* Nothing to do here */ 1810 if (!scale)
1811 return;
1812
1813 if (user_tsc_khz > tsc_khz) {
1814 vcpu->arch.tsc_catchup = 1;
1815 vcpu->arch.tsc_always_catchup = 1;
1816 } else
1817 WARN(1, "user requested TSC rate below hardware speed\n");
1827} 1818}
1828 1819
1829/* 1820/*
@@ -1850,7 +1841,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1850 } 1841 }
1851} 1842}
1852 1843
1853static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) 1844static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
1854{ 1845{
1855 u64 offset = vmcs_read64(TSC_OFFSET); 1846 u64 offset = vmcs_read64(TSC_OFFSET);
1856 vmcs_write64(TSC_OFFSET, offset + adjustment); 1847 vmcs_write64(TSC_OFFSET, offset + adjustment);
@@ -2219,6 +2210,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
2219 msr = find_msr_entry(vmx, msr_index); 2210 msr = find_msr_entry(vmx, msr_index);
2220 if (msr) { 2211 if (msr) {
2221 msr->data = data; 2212 msr->data = data;
2213 if (msr - vmx->guest_msrs < vmx->save_nmsrs)
2214 kvm_set_shared_msr(msr->index, msr->data,
2215 msr->mask);
2222 break; 2216 break;
2223 } 2217 }
2224 ret = kvm_set_msr_common(vcpu, msr_index, data); 2218 ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -2399,7 +2393,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2399 &_pin_based_exec_control) < 0) 2393 &_pin_based_exec_control) < 0)
2400 return -EIO; 2394 return -EIO;
2401 2395
2402 min = 2396 min = CPU_BASED_HLT_EXITING |
2403#ifdef CONFIG_X86_64 2397#ifdef CONFIG_X86_64
2404 CPU_BASED_CR8_LOAD_EXITING | 2398 CPU_BASED_CR8_LOAD_EXITING |
2405 CPU_BASED_CR8_STORE_EXITING | 2399 CPU_BASED_CR8_STORE_EXITING |
@@ -2414,9 +2408,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2414 CPU_BASED_INVLPG_EXITING | 2408 CPU_BASED_INVLPG_EXITING |
2415 CPU_BASED_RDPMC_EXITING; 2409 CPU_BASED_RDPMC_EXITING;
2416 2410
2417 if (yield_on_hlt)
2418 min |= CPU_BASED_HLT_EXITING;
2419
2420 opt = CPU_BASED_TPR_SHADOW | 2411 opt = CPU_BASED_TPR_SHADOW |
2421 CPU_BASED_USE_MSR_BITMAPS | 2412 CPU_BASED_USE_MSR_BITMAPS |
2422 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 2413 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -3915,7 +3906,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
3915 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 3906 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
3916 3907
3917 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 3908 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
3909 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3918 vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ 3910 vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
3911 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3919 vmx_set_cr4(&vmx->vcpu, 0); 3912 vmx_set_cr4(&vmx->vcpu, 0);
3920 vmx_set_efer(&vmx->vcpu, 0); 3913 vmx_set_efer(&vmx->vcpu, 0);
3921 vmx_fpu_activate(&vmx->vcpu); 3914 vmx_fpu_activate(&vmx->vcpu);
@@ -4003,7 +3996,6 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
4003 } else 3996 } else
4004 intr |= INTR_TYPE_EXT_INTR; 3997 intr |= INTR_TYPE_EXT_INTR;
4005 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); 3998 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
4006 vmx_clear_hlt(vcpu);
4007} 3999}
4008 4000
4009static void vmx_inject_nmi(struct kvm_vcpu *vcpu) 4001static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -4035,7 +4027,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
4035 } 4027 }
4036 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 4028 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
4037 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 4029 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
4038 vmx_clear_hlt(vcpu);
4039} 4030}
4040 4031
4041static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 4032static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -4672,9 +4663,10 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
4672 bool has_error_code = false; 4663 bool has_error_code = false;
4673 u32 error_code = 0; 4664 u32 error_code = 0;
4674 u16 tss_selector; 4665 u16 tss_selector;
4675 int reason, type, idt_v; 4666 int reason, type, idt_v, idt_index;
4676 4667
4677 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); 4668 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
4669 idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
4678 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); 4670 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
4679 4671
4680 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 4672 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -4712,8 +4704,9 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
4712 type != INTR_TYPE_NMI_INTR)) 4704 type != INTR_TYPE_NMI_INTR))
4713 skip_emulated_instruction(vcpu); 4705 skip_emulated_instruction(vcpu);
4714 4706
4715 if (kvm_task_switch(vcpu, tss_selector, reason, 4707 if (kvm_task_switch(vcpu, tss_selector,
4716 has_error_code, error_code) == EMULATE_FAIL) { 4708 type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
4709 has_error_code, error_code) == EMULATE_FAIL) {
4717 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 4710 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4718 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 4711 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4719 vcpu->run->internal.ndata = 0; 4712 vcpu->run->internal.ndata = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9cbfc0698118..4044ce0bf7c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -57,6 +57,7 @@
57#include <asm/mtrr.h> 57#include <asm/mtrr.h>
58#include <asm/mce.h> 58#include <asm/mce.h>
59#include <asm/i387.h> 59#include <asm/i387.h>
60#include <asm/fpu-internal.h> /* Ugh! */
60#include <asm/xcr.h> 61#include <asm/xcr.h>
61#include <asm/pvclock.h> 62#include <asm/pvclock.h>
62#include <asm/div64.h> 63#include <asm/div64.h>
@@ -96,6 +97,10 @@ EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
96u32 kvm_max_guest_tsc_khz; 97u32 kvm_max_guest_tsc_khz;
97EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); 98EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
98 99
100/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
101static u32 tsc_tolerance_ppm = 250;
102module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
103
99#define KVM_NR_SHARED_MSRS 16 104#define KVM_NR_SHARED_MSRS 16
100 105
101struct kvm_shared_msrs_global { 106struct kvm_shared_msrs_global {
@@ -968,50 +973,51 @@ static inline u64 get_kernel_ns(void)
968static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); 973static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
969unsigned long max_tsc_khz; 974unsigned long max_tsc_khz;
970 975
971static inline int kvm_tsc_changes_freq(void) 976static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
972{ 977{
973 int cpu = get_cpu(); 978 return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
974 int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && 979 vcpu->arch.virtual_tsc_shift);
975 cpufreq_quick_get(cpu) != 0;
976 put_cpu();
977 return ret;
978} 980}
979 981
980u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) 982static u32 adjust_tsc_khz(u32 khz, s32 ppm)
981{ 983{
982 if (vcpu->arch.virtual_tsc_khz) 984 u64 v = (u64)khz * (1000000 + ppm);
983 return vcpu->arch.virtual_tsc_khz; 985 do_div(v, 1000000);
984 else 986 return v;
985 return __this_cpu_read(cpu_tsc_khz);
986} 987}
987 988
988static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) 989static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
989{ 990{
990 u64 ret; 991 u32 thresh_lo, thresh_hi;
991 992 int use_scaling = 0;
992 WARN_ON(preemptible());
993 if (kvm_tsc_changes_freq())
994 printk_once(KERN_WARNING
995 "kvm: unreliable cycle conversion on adjustable rate TSC\n");
996 ret = nsec * vcpu_tsc_khz(vcpu);
997 do_div(ret, USEC_PER_SEC);
998 return ret;
999}
1000 993
1001static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1002{
1003 /* Compute a scale to convert nanoseconds in TSC cycles */ 994 /* Compute a scale to convert nanoseconds in TSC cycles */
1004 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, 995 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1005 &vcpu->arch.tsc_catchup_shift, 996 &vcpu->arch.virtual_tsc_shift,
1006 &vcpu->arch.tsc_catchup_mult); 997 &vcpu->arch.virtual_tsc_mult);
998 vcpu->arch.virtual_tsc_khz = this_tsc_khz;
999
1000 /*
1001 * Compute the variation in TSC rate which is acceptable
1002 * within the range of tolerance and decide if the
1003 * rate being applied is within that bounds of the hardware
1004 * rate. If so, no scaling or compensation need be done.
1005 */
1006 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1007 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1008 if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
1009 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
1010 use_scaling = 1;
1011 }
1012 kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
1007} 1013}
1008 1014
1009static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) 1015static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1010{ 1016{
1011 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, 1017 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1012 vcpu->arch.tsc_catchup_mult, 1018 vcpu->arch.virtual_tsc_mult,
1013 vcpu->arch.tsc_catchup_shift); 1019 vcpu->arch.virtual_tsc_shift);
1014 tsc += vcpu->arch.last_tsc_write; 1020 tsc += vcpu->arch.this_tsc_write;
1015 return tsc; 1021 return tsc;
1016} 1022}
1017 1023
@@ -1020,48 +1026,88 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1020 struct kvm *kvm = vcpu->kvm; 1026 struct kvm *kvm = vcpu->kvm;
1021 u64 offset, ns, elapsed; 1027 u64 offset, ns, elapsed;
1022 unsigned long flags; 1028 unsigned long flags;
1023 s64 sdiff; 1029 s64 usdiff;
1024 1030
1025 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1031 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1026 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); 1032 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1027 ns = get_kernel_ns(); 1033 ns = get_kernel_ns();
1028 elapsed = ns - kvm->arch.last_tsc_nsec; 1034 elapsed = ns - kvm->arch.last_tsc_nsec;
1029 sdiff = data - kvm->arch.last_tsc_write; 1035
1030 if (sdiff < 0) 1036 /* n.b - signed multiplication and division required */
1031 sdiff = -sdiff; 1037 usdiff = data - kvm->arch.last_tsc_write;
1038#ifdef CONFIG_X86_64
1039 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
1040#else
1041 /* do_div() only does unsigned */
1042 asm("idivl %2; xor %%edx, %%edx"
1043 : "=A"(usdiff)
1044 : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
1045#endif
1046 do_div(elapsed, 1000);
1047 usdiff -= elapsed;
1048 if (usdiff < 0)
1049 usdiff = -usdiff;
1032 1050
1033 /* 1051 /*
1034 * Special case: close write to TSC within 5 seconds of 1052 * Special case: TSC write with a small delta (1 second) of virtual
1035 * another CPU is interpreted as an attempt to synchronize 1053 * cycle time against real time is interpreted as an attempt to
1036 * The 5 seconds is to accommodate host load / swapping as 1054 * synchronize the CPU.
1037 * well as any reset of TSC during the boot process. 1055 *
1038 * 1056 * For a reliable TSC, we can match TSC offsets, and for an unstable
1039 * In that case, for a reliable TSC, we can match TSC offsets, 1057 * TSC, we add elapsed time in this computation. We could let the
1040 * or make a best guest using elapsed value. 1058 * compensation code attempt to catch up if we fall behind, but
1041 */ 1059 * it's better to try to match offsets from the beginning.
1042 if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && 1060 */
1043 elapsed < 5ULL * NSEC_PER_SEC) { 1061 if (usdiff < USEC_PER_SEC &&
1062 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1044 if (!check_tsc_unstable()) { 1063 if (!check_tsc_unstable()) {
1045 offset = kvm->arch.last_tsc_offset; 1064 offset = kvm->arch.cur_tsc_offset;
1046 pr_debug("kvm: matched tsc offset for %llu\n", data); 1065 pr_debug("kvm: matched tsc offset for %llu\n", data);
1047 } else { 1066 } else {
1048 u64 delta = nsec_to_cycles(vcpu, elapsed); 1067 u64 delta = nsec_to_cycles(vcpu, elapsed);
1049 offset += delta; 1068 data += delta;
1069 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1050 pr_debug("kvm: adjusted tsc offset by %llu\n", delta); 1070 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1051 } 1071 }
1052 ns = kvm->arch.last_tsc_nsec; 1072 } else {
1073 /*
1074 * We split periods of matched TSC writes into generations.
1075 * For each generation, we track the original measured
1076 * nanosecond time, offset, and write, so if TSCs are in
1077 * sync, we can match exact offset, and if not, we can match
1078 * exact software computaion in compute_guest_tsc()
1079 *
1080 * These values are tracked in kvm->arch.cur_xxx variables.
1081 */
1082 kvm->arch.cur_tsc_generation++;
1083 kvm->arch.cur_tsc_nsec = ns;
1084 kvm->arch.cur_tsc_write = data;
1085 kvm->arch.cur_tsc_offset = offset;
1086 pr_debug("kvm: new tsc generation %u, clock %llu\n",
1087 kvm->arch.cur_tsc_generation, data);
1053 } 1088 }
1089
1090 /*
1091 * We also track th most recent recorded KHZ, write and time to
1092 * allow the matching interval to be extended at each write.
1093 */
1054 kvm->arch.last_tsc_nsec = ns; 1094 kvm->arch.last_tsc_nsec = ns;
1055 kvm->arch.last_tsc_write = data; 1095 kvm->arch.last_tsc_write = data;
1056 kvm->arch.last_tsc_offset = offset; 1096 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1057 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1058 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1059 1097
1060 /* Reset of TSC must disable overshoot protection below */ 1098 /* Reset of TSC must disable overshoot protection below */
1061 vcpu->arch.hv_clock.tsc_timestamp = 0; 1099 vcpu->arch.hv_clock.tsc_timestamp = 0;
1062 vcpu->arch.last_tsc_write = data; 1100 vcpu->arch.last_guest_tsc = data;
1063 vcpu->arch.last_tsc_nsec = ns; 1101
1102 /* Keep track of which generation this VCPU has synchronized to */
1103 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1104 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1105 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1106
1107 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1108 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1064} 1109}
1110
1065EXPORT_SYMBOL_GPL(kvm_write_tsc); 1111EXPORT_SYMBOL_GPL(kvm_write_tsc);
1066 1112
1067static int kvm_guest_time_update(struct kvm_vcpu *v) 1113static int kvm_guest_time_update(struct kvm_vcpu *v)
@@ -1077,7 +1123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1077 local_irq_save(flags); 1123 local_irq_save(flags);
1078 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); 1124 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
1079 kernel_ns = get_kernel_ns(); 1125 kernel_ns = get_kernel_ns();
1080 this_tsc_khz = vcpu_tsc_khz(v); 1126 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1081 if (unlikely(this_tsc_khz == 0)) { 1127 if (unlikely(this_tsc_khz == 0)) {
1082 local_irq_restore(flags); 1128 local_irq_restore(flags);
1083 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); 1129 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -1097,7 +1143,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1097 if (vcpu->tsc_catchup) { 1143 if (vcpu->tsc_catchup) {
1098 u64 tsc = compute_guest_tsc(v, kernel_ns); 1144 u64 tsc = compute_guest_tsc(v, kernel_ns);
1099 if (tsc > tsc_timestamp) { 1145 if (tsc > tsc_timestamp) {
1100 kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp); 1146 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
1101 tsc_timestamp = tsc; 1147 tsc_timestamp = tsc;
1102 } 1148 }
1103 } 1149 }
@@ -1129,7 +1175,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1129 * observed by the guest and ensure the new system time is greater. 1175 * observed by the guest and ensure the new system time is greater.
1130 */ 1176 */
1131 max_kernel_ns = 0; 1177 max_kernel_ns = 0;
1132 if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) { 1178 if (vcpu->hv_clock.tsc_timestamp) {
1133 max_kernel_ns = vcpu->last_guest_tsc - 1179 max_kernel_ns = vcpu->last_guest_tsc -
1134 vcpu->hv_clock.tsc_timestamp; 1180 vcpu->hv_clock.tsc_timestamp;
1135 max_kernel_ns = pvclock_scale_delta(max_kernel_ns, 1181 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
@@ -1162,12 +1208,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1162 */ 1208 */
1163 vcpu->hv_clock.version += 2; 1209 vcpu->hv_clock.version += 2;
1164 1210
1165 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); 1211 shared_kaddr = kmap_atomic(vcpu->time_page);
1166 1212
1167 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, 1213 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
1168 sizeof(vcpu->hv_clock)); 1214 sizeof(vcpu->hv_clock));
1169 1215
1170 kunmap_atomic(shared_kaddr, KM_USER0); 1216 kunmap_atomic(shared_kaddr);
1171 1217
1172 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 1218 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
1173 return 0; 1219 return 0;
@@ -1503,6 +1549,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1503 case MSR_K7_HWCR: 1549 case MSR_K7_HWCR:
1504 data &= ~(u64)0x40; /* ignore flush filter disable */ 1550 data &= ~(u64)0x40; /* ignore flush filter disable */
1505 data &= ~(u64)0x100; /* ignore ignne emulation enable */ 1551 data &= ~(u64)0x100; /* ignore ignne emulation enable */
1552 data &= ~(u64)0x8; /* ignore TLB cache disable */
1506 if (data != 0) { 1553 if (data != 0) {
1507 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", 1554 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1508 data); 1555 data);
@@ -1675,6 +1722,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1675 */ 1722 */
1676 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); 1723 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
1677 break; 1724 break;
1725 case MSR_AMD64_OSVW_ID_LENGTH:
1726 if (!guest_cpuid_has_osvw(vcpu))
1727 return 1;
1728 vcpu->arch.osvw.length = data;
1729 break;
1730 case MSR_AMD64_OSVW_STATUS:
1731 if (!guest_cpuid_has_osvw(vcpu))
1732 return 1;
1733 vcpu->arch.osvw.status = data;
1734 break;
1678 default: 1735 default:
1679 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) 1736 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1680 return xen_hvm_config(vcpu, data); 1737 return xen_hvm_config(vcpu, data);
@@ -1959,6 +2016,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1959 */ 2016 */
1960 data = 0xbe702111; 2017 data = 0xbe702111;
1961 break; 2018 break;
2019 case MSR_AMD64_OSVW_ID_LENGTH:
2020 if (!guest_cpuid_has_osvw(vcpu))
2021 return 1;
2022 data = vcpu->arch.osvw.length;
2023 break;
2024 case MSR_AMD64_OSVW_STATUS:
2025 if (!guest_cpuid_has_osvw(vcpu))
2026 return 1;
2027 data = vcpu->arch.osvw.status;
2028 break;
1962 default: 2029 default:
1963 if (kvm_pmu_msr(vcpu, msr)) 2030 if (kvm_pmu_msr(vcpu, msr))
1964 return kvm_pmu_get_msr(vcpu, msr, pdata); 2031 return kvm_pmu_get_msr(vcpu, msr, pdata);
@@ -2079,6 +2146,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2079 case KVM_CAP_XSAVE: 2146 case KVM_CAP_XSAVE:
2080 case KVM_CAP_ASYNC_PF: 2147 case KVM_CAP_ASYNC_PF:
2081 case KVM_CAP_GET_TSC_KHZ: 2148 case KVM_CAP_GET_TSC_KHZ:
2149 case KVM_CAP_PCI_2_3:
2082 r = 1; 2150 r = 1;
2083 break; 2151 break;
2084 case KVM_CAP_COALESCED_MMIO: 2152 case KVM_CAP_COALESCED_MMIO:
@@ -2213,19 +2281,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2213 } 2281 }
2214 2282
2215 kvm_x86_ops->vcpu_load(vcpu, cpu); 2283 kvm_x86_ops->vcpu_load(vcpu, cpu);
2216 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2217 /* Make sure TSC doesn't go backwards */
2218 s64 tsc_delta;
2219 u64 tsc;
2220 2284
2221 tsc = kvm_x86_ops->read_l1_tsc(vcpu); 2285 /* Apply any externally detected TSC adjustments (due to suspend) */
2222 tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : 2286 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2223 tsc - vcpu->arch.last_guest_tsc; 2287 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2288 vcpu->arch.tsc_offset_adjustment = 0;
2289 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
2290 }
2224 2291
2292 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2293 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2294 native_read_tsc() - vcpu->arch.last_host_tsc;
2225 if (tsc_delta < 0) 2295 if (tsc_delta < 0)
2226 mark_tsc_unstable("KVM discovered backwards TSC"); 2296 mark_tsc_unstable("KVM discovered backwards TSC");
2227 if (check_tsc_unstable()) { 2297 if (check_tsc_unstable()) {
2228 kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); 2298 u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
2299 vcpu->arch.last_guest_tsc);
2300 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2229 vcpu->arch.tsc_catchup = 1; 2301 vcpu->arch.tsc_catchup = 1;
2230 } 2302 }
2231 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 2303 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2242,7 +2314,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2242{ 2314{
2243 kvm_x86_ops->vcpu_put(vcpu); 2315 kvm_x86_ops->vcpu_put(vcpu);
2244 kvm_put_guest_fpu(vcpu); 2316 kvm_put_guest_fpu(vcpu);
2245 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); 2317 vcpu->arch.last_host_tsc = native_read_tsc();
2246} 2318}
2247 2319
2248static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 2320static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@ -2784,26 +2856,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2784 u32 user_tsc_khz; 2856 u32 user_tsc_khz;
2785 2857
2786 r = -EINVAL; 2858 r = -EINVAL;
2787 if (!kvm_has_tsc_control)
2788 break;
2789
2790 user_tsc_khz = (u32)arg; 2859 user_tsc_khz = (u32)arg;
2791 2860
2792 if (user_tsc_khz >= kvm_max_guest_tsc_khz) 2861 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
2793 goto out; 2862 goto out;
2794 2863
2795 kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); 2864 if (user_tsc_khz == 0)
2865 user_tsc_khz = tsc_khz;
2866
2867 kvm_set_tsc_khz(vcpu, user_tsc_khz);
2796 2868
2797 r = 0; 2869 r = 0;
2798 goto out; 2870 goto out;
2799 } 2871 }
2800 case KVM_GET_TSC_KHZ: { 2872 case KVM_GET_TSC_KHZ: {
2801 r = -EIO; 2873 r = vcpu->arch.virtual_tsc_khz;
2802 if (check_tsc_unstable())
2803 goto out;
2804
2805 r = vcpu_tsc_khz(vcpu);
2806
2807 goto out; 2874 goto out;
2808 } 2875 }
2809 default: 2876 default:
@@ -2814,6 +2881,11 @@ out:
2814 return r; 2881 return r;
2815} 2882}
2816 2883
2884int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2885{
2886 return VM_FAULT_SIGBUS;
2887}
2888
2817static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) 2889static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
2818{ 2890{
2819 int ret; 2891 int ret;
@@ -2997,6 +3069,8 @@ static void write_protect_slot(struct kvm *kvm,
2997 unsigned long *dirty_bitmap, 3069 unsigned long *dirty_bitmap,
2998 unsigned long nr_dirty_pages) 3070 unsigned long nr_dirty_pages)
2999{ 3071{
3072 spin_lock(&kvm->mmu_lock);
3073
3000 /* Not many dirty pages compared to # of shadow pages. */ 3074 /* Not many dirty pages compared to # of shadow pages. */
3001 if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { 3075 if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
3002 unsigned long gfn_offset; 3076 unsigned long gfn_offset;
@@ -3004,16 +3078,13 @@ static void write_protect_slot(struct kvm *kvm,
3004 for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { 3078 for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
3005 unsigned long gfn = memslot->base_gfn + gfn_offset; 3079 unsigned long gfn = memslot->base_gfn + gfn_offset;
3006 3080
3007 spin_lock(&kvm->mmu_lock);
3008 kvm_mmu_rmap_write_protect(kvm, gfn, memslot); 3081 kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
3009 spin_unlock(&kvm->mmu_lock);
3010 } 3082 }
3011 kvm_flush_remote_tlbs(kvm); 3083 kvm_flush_remote_tlbs(kvm);
3012 } else { 3084 } else
3013 spin_lock(&kvm->mmu_lock);
3014 kvm_mmu_slot_remove_write_access(kvm, memslot->id); 3085 kvm_mmu_slot_remove_write_access(kvm, memslot->id);
3015 spin_unlock(&kvm->mmu_lock); 3086
3016 } 3087 spin_unlock(&kvm->mmu_lock);
3017} 3088}
3018 3089
3019/* 3090/*
@@ -3132,6 +3203,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
3132 r = -EEXIST; 3203 r = -EEXIST;
3133 if (kvm->arch.vpic) 3204 if (kvm->arch.vpic)
3134 goto create_irqchip_unlock; 3205 goto create_irqchip_unlock;
3206 r = -EINVAL;
3207 if (atomic_read(&kvm->online_vcpus))
3208 goto create_irqchip_unlock;
3135 r = -ENOMEM; 3209 r = -ENOMEM;
3136 vpic = kvm_create_pic(kvm); 3210 vpic = kvm_create_pic(kvm);
3137 if (vpic) { 3211 if (vpic) {
@@ -3848,7 +3922,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
3848 goto emul_write; 3922 goto emul_write;
3849 } 3923 }
3850 3924
3851 kaddr = kmap_atomic(page, KM_USER0); 3925 kaddr = kmap_atomic(page);
3852 kaddr += offset_in_page(gpa); 3926 kaddr += offset_in_page(gpa);
3853 switch (bytes) { 3927 switch (bytes) {
3854 case 1: 3928 case 1:
@@ -3866,7 +3940,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
3866 default: 3940 default:
3867 BUG(); 3941 BUG();
3868 } 3942 }
3869 kunmap_atomic(kaddr, KM_USER0); 3943 kunmap_atomic(kaddr);
3870 kvm_release_page_dirty(page); 3944 kvm_release_page_dirty(page);
3871 3945
3872 if (!exchanged) 3946 if (!exchanged)
@@ -4062,6 +4136,11 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4062 return res; 4136 return res;
4063} 4137}
4064 4138
4139static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
4140{
4141 kvm_set_rflags(emul_to_vcpu(ctxt), val);
4142}
4143
4065static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) 4144static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4066{ 4145{
4067 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); 4146 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
@@ -4243,6 +4322,7 @@ static struct x86_emulate_ops emulate_ops = {
4243 .set_idt = emulator_set_idt, 4322 .set_idt = emulator_set_idt,
4244 .get_cr = emulator_get_cr, 4323 .get_cr = emulator_get_cr,
4245 .set_cr = emulator_set_cr, 4324 .set_cr = emulator_set_cr,
4325 .set_rflags = emulator_set_rflags,
4246 .cpl = emulator_get_cpl, 4326 .cpl = emulator_get_cpl,
4247 .get_dr = emulator_get_dr, 4327 .get_dr = emulator_get_dr,
4248 .set_dr = emulator_set_dr, 4328 .set_dr = emulator_set_dr,
@@ -5287,6 +5367,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5287 profile_hit(KVM_PROFILING, (void *)rip); 5367 profile_hit(KVM_PROFILING, (void *)rip);
5288 } 5368 }
5289 5369
5370 if (unlikely(vcpu->arch.tsc_always_catchup))
5371 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5290 5372
5291 kvm_lapic_sync_from_vapic(vcpu); 5373 kvm_lapic_sync_from_vapic(vcpu);
5292 5374
@@ -5586,15 +5668,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
5586 return 0; 5668 return 0;
5587} 5669}
5588 5670
5589int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 5671int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
5590 bool has_error_code, u32 error_code) 5672 int reason, bool has_error_code, u32 error_code)
5591{ 5673{
5592 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 5674 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5593 int ret; 5675 int ret;
5594 5676
5595 init_emulate_ctxt(vcpu); 5677 init_emulate_ctxt(vcpu);
5596 5678
5597 ret = emulator_task_switch(ctxt, tss_selector, reason, 5679 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
5598 has_error_code, error_code); 5680 has_error_code, error_code);
5599 5681
5600 if (ret) 5682 if (ret)
@@ -5927,13 +6009,88 @@ int kvm_arch_hardware_enable(void *garbage)
5927 struct kvm *kvm; 6009 struct kvm *kvm;
5928 struct kvm_vcpu *vcpu; 6010 struct kvm_vcpu *vcpu;
5929 int i; 6011 int i;
6012 int ret;
6013 u64 local_tsc;
6014 u64 max_tsc = 0;
6015 bool stable, backwards_tsc = false;
5930 6016
5931 kvm_shared_msr_cpu_online(); 6017 kvm_shared_msr_cpu_online();
5932 list_for_each_entry(kvm, &vm_list, vm_list) 6018 ret = kvm_x86_ops->hardware_enable(garbage);
5933 kvm_for_each_vcpu(i, vcpu, kvm) 6019 if (ret != 0)
5934 if (vcpu->cpu == smp_processor_id()) 6020 return ret;
5935 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 6021
5936 return kvm_x86_ops->hardware_enable(garbage); 6022 local_tsc = native_read_tsc();
6023 stable = !check_tsc_unstable();
6024 list_for_each_entry(kvm, &vm_list, vm_list) {
6025 kvm_for_each_vcpu(i, vcpu, kvm) {
6026 if (!stable && vcpu->cpu == smp_processor_id())
6027 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
6028 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
6029 backwards_tsc = true;
6030 if (vcpu->arch.last_host_tsc > max_tsc)
6031 max_tsc = vcpu->arch.last_host_tsc;
6032 }
6033 }
6034 }
6035
6036 /*
6037 * Sometimes, even reliable TSCs go backwards. This happens on
6038 * platforms that reset TSC during suspend or hibernate actions, but
6039 * maintain synchronization. We must compensate. Fortunately, we can
6040 * detect that condition here, which happens early in CPU bringup,
6041 * before any KVM threads can be running. Unfortunately, we can't
6042 * bring the TSCs fully up to date with real time, as we aren't yet far
6043 * enough into CPU bringup that we know how much real time has actually
6044 * elapsed; our helper function, get_kernel_ns() will be using boot
6045 * variables that haven't been updated yet.
6046 *
6047 * So we simply find the maximum observed TSC above, then record the
6048 * adjustment to TSC in each VCPU. When the VCPU later gets loaded,
6049 * the adjustment will be applied. Note that we accumulate
6050 * adjustments, in case multiple suspend cycles happen before some VCPU
6051 * gets a chance to run again. In the event that no KVM threads get a
6052 * chance to run, we will miss the entire elapsed period, as we'll have
6053 * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
6054 * loose cycle time. This isn't too big a deal, since the loss will be
6055 * uniform across all VCPUs (not to mention the scenario is extremely
6056 * unlikely). It is possible that a second hibernate recovery happens
6057 * much faster than a first, causing the observed TSC here to be
6058 * smaller; this would require additional padding adjustment, which is
6059 * why we set last_host_tsc to the local tsc observed here.
6060 *
6061 * N.B. - this code below runs only on platforms with reliable TSC,
6062 * as that is the only way backwards_tsc is set above. Also note
6063 * that this runs for ALL vcpus, which is not a bug; all VCPUs should
6064 * have the same delta_cyc adjustment applied if backwards_tsc
6065 * is detected. Note further, this adjustment is only done once,
6066 * as we reset last_host_tsc on all VCPUs to stop this from being
6067 * called multiple times (one for each physical CPU bringup).
6068 *
6069 * Platforms with unnreliable TSCs don't have to deal with this, they
6070 * will be compensated by the logic in vcpu_load, which sets the TSC to
6071 * catchup mode. This will catchup all VCPUs to real time, but cannot
6072 * guarantee that they stay in perfect synchronization.
6073 */
6074 if (backwards_tsc) {
6075 u64 delta_cyc = max_tsc - local_tsc;
6076 list_for_each_entry(kvm, &vm_list, vm_list) {
6077 kvm_for_each_vcpu(i, vcpu, kvm) {
6078 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6079 vcpu->arch.last_host_tsc = local_tsc;
6080 }
6081
6082 /*
6083 * We have to disable TSC offset matching.. if you were
6084 * booting a VM while issuing an S4 host suspend....
6085 * you may have some problem. Solving this issue is
6086 * left as an exercise to the reader.
6087 */
6088 kvm->arch.last_tsc_nsec = 0;
6089 kvm->arch.last_tsc_write = 0;
6090 }
6091
6092 }
6093 return 0;
5937} 6094}
5938 6095
5939void kvm_arch_hardware_disable(void *garbage) 6096void kvm_arch_hardware_disable(void *garbage)
@@ -5957,6 +6114,11 @@ void kvm_arch_check_processor_compat(void *rtn)
5957 kvm_x86_ops->check_processor_compatibility(rtn); 6114 kvm_x86_ops->check_processor_compatibility(rtn);
5958} 6115}
5959 6116
6117bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
6118{
6119 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
6120}
6121
5960int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 6122int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5961{ 6123{
5962 struct page *page; 6124 struct page *page;
@@ -5979,7 +6141,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5979 } 6141 }
5980 vcpu->arch.pio_data = page_address(page); 6142 vcpu->arch.pio_data = page_address(page);
5981 6143
5982 kvm_init_tsc_catchup(vcpu, max_tsc_khz); 6144 kvm_set_tsc_khz(vcpu, max_tsc_khz);
5983 6145
5984 r = kvm_mmu_create(vcpu); 6146 r = kvm_mmu_create(vcpu);
5985 if (r < 0) 6147 if (r < 0)
@@ -6031,8 +6193,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
6031 free_page((unsigned long)vcpu->arch.pio_data); 6193 free_page((unsigned long)vcpu->arch.pio_data);
6032} 6194}
6033 6195
6034int kvm_arch_init_vm(struct kvm *kvm) 6196int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6035{ 6197{
6198 if (type)
6199 return -EINVAL;
6200
6036 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 6201 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6037 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 6202 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
6038 6203
@@ -6092,6 +6257,65 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
6092 put_page(kvm->arch.ept_identity_pagetable); 6257 put_page(kvm->arch.ept_identity_pagetable);
6093} 6258}
6094 6259
6260void kvm_arch_free_memslot(struct kvm_memory_slot *free,
6261 struct kvm_memory_slot *dont)
6262{
6263 int i;
6264
6265 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
6266 if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
6267 vfree(free->arch.lpage_info[i]);
6268 free->arch.lpage_info[i] = NULL;
6269 }
6270 }
6271}
6272
6273int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
6274{
6275 int i;
6276
6277 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
6278 unsigned long ugfn;
6279 int lpages;
6280 int level = i + 2;
6281
6282 lpages = gfn_to_index(slot->base_gfn + npages - 1,
6283 slot->base_gfn, level) + 1;
6284
6285 slot->arch.lpage_info[i] =
6286 vzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
6287 if (!slot->arch.lpage_info[i])
6288 goto out_free;
6289
6290 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
6291 slot->arch.lpage_info[i][0].write_count = 1;
6292 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
6293 slot->arch.lpage_info[i][lpages - 1].write_count = 1;
6294 ugfn = slot->userspace_addr >> PAGE_SHIFT;
6295 /*
6296 * If the gfn and userspace address are not aligned wrt each
6297 * other, or if explicitly asked to, disable large page
6298 * support for this slot
6299 */
6300 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
6301 !kvm_largepages_enabled()) {
6302 unsigned long j;
6303
6304 for (j = 0; j < lpages; ++j)
6305 slot->arch.lpage_info[i][j].write_count = 1;
6306 }
6307 }
6308
6309 return 0;
6310
6311out_free:
6312 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
6313 vfree(slot->arch.lpage_info[i]);
6314 slot->arch.lpage_info[i] = NULL;
6315 }
6316 return -ENOMEM;
6317}
6318
6095int kvm_arch_prepare_memory_region(struct kvm *kvm, 6319int kvm_arch_prepare_memory_region(struct kvm *kvm,
6096 struct kvm_memory_slot *memslot, 6320 struct kvm_memory_slot *memslot,
6097 struct kvm_memory_slot old, 6321 struct kvm_memory_slot old,
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index 042f6826bf57..a0b4a350daa7 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -1,59 +1,4 @@
1#include <linux/compiler.h> 1#define ATOMIC64_EXPORT EXPORT_SYMBOL
2#include <linux/module.h>
3#include <linux/types.h>
4 2
5#include <asm/processor.h> 3#include <linux/export.h>
6#include <asm/cmpxchg.h>
7#include <linux/atomic.h> 4#include <linux/atomic.h>
8
9long long atomic64_read_cx8(long long, const atomic64_t *v);
10EXPORT_SYMBOL(atomic64_read_cx8);
11long long atomic64_set_cx8(long long, const atomic64_t *v);
12EXPORT_SYMBOL(atomic64_set_cx8);
13long long atomic64_xchg_cx8(long long, unsigned high);
14EXPORT_SYMBOL(atomic64_xchg_cx8);
15long long atomic64_add_return_cx8(long long a, atomic64_t *v);
16EXPORT_SYMBOL(atomic64_add_return_cx8);
17long long atomic64_sub_return_cx8(long long a, atomic64_t *v);
18EXPORT_SYMBOL(atomic64_sub_return_cx8);
19long long atomic64_inc_return_cx8(long long a, atomic64_t *v);
20EXPORT_SYMBOL(atomic64_inc_return_cx8);
21long long atomic64_dec_return_cx8(long long a, atomic64_t *v);
22EXPORT_SYMBOL(atomic64_dec_return_cx8);
23long long atomic64_dec_if_positive_cx8(atomic64_t *v);
24EXPORT_SYMBOL(atomic64_dec_if_positive_cx8);
25int atomic64_inc_not_zero_cx8(atomic64_t *v);
26EXPORT_SYMBOL(atomic64_inc_not_zero_cx8);
27int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u);
28EXPORT_SYMBOL(atomic64_add_unless_cx8);
29
30#ifndef CONFIG_X86_CMPXCHG64
31long long atomic64_read_386(long long, const atomic64_t *v);
32EXPORT_SYMBOL(atomic64_read_386);
33long long atomic64_set_386(long long, const atomic64_t *v);
34EXPORT_SYMBOL(atomic64_set_386);
35long long atomic64_xchg_386(long long, unsigned high);
36EXPORT_SYMBOL(atomic64_xchg_386);
37long long atomic64_add_return_386(long long a, atomic64_t *v);
38EXPORT_SYMBOL(atomic64_add_return_386);
39long long atomic64_sub_return_386(long long a, atomic64_t *v);
40EXPORT_SYMBOL(atomic64_sub_return_386);
41long long atomic64_inc_return_386(long long a, atomic64_t *v);
42EXPORT_SYMBOL(atomic64_inc_return_386);
43long long atomic64_dec_return_386(long long a, atomic64_t *v);
44EXPORT_SYMBOL(atomic64_dec_return_386);
45long long atomic64_add_386(long long a, atomic64_t *v);
46EXPORT_SYMBOL(atomic64_add_386);
47long long atomic64_sub_386(long long a, atomic64_t *v);
48EXPORT_SYMBOL(atomic64_sub_386);
49long long atomic64_inc_386(long long a, atomic64_t *v);
50EXPORT_SYMBOL(atomic64_inc_386);
51long long atomic64_dec_386(long long a, atomic64_t *v);
52EXPORT_SYMBOL(atomic64_dec_386);
53long long atomic64_dec_if_positive_386(atomic64_t *v);
54EXPORT_SYMBOL(atomic64_dec_if_positive_386);
55int atomic64_inc_not_zero_386(atomic64_t *v);
56EXPORT_SYMBOL(atomic64_inc_not_zero_386);
57int atomic64_add_unless_386(atomic64_t *v, long long a, long long u);
58EXPORT_SYMBOL(atomic64_add_unless_386);
59#endif
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index e8e7e0d06f42..00933d5e992f 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -137,13 +137,13 @@ BEGIN(dec_return)
137RET_ENDP 137RET_ENDP
138#undef v 138#undef v
139 139
140#define v %ecx 140#define v %esi
141BEGIN(add_unless) 141BEGIN(add_unless)
142 addl %eax, %esi 142 addl %eax, %ecx
143 adcl %edx, %edi 143 adcl %edx, %edi
144 addl (v), %eax 144 addl (v), %eax
145 adcl 4(v), %edx 145 adcl 4(v), %edx
146 cmpl %eax, %esi 146 cmpl %eax, %ecx
147 je 3f 147 je 3f
1481: 1481:
149 movl %eax, (v) 149 movl %eax, (v)
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 391a083674b4..f5cc9eb1d51b 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8)
55ENTRY(atomic64_xchg_cx8) 55ENTRY(atomic64_xchg_cx8)
56 CFI_STARTPROC 56 CFI_STARTPROC
57 57
58 movl %ebx, %eax
59 movl %ecx, %edx
601: 581:
61 LOCK_PREFIX 59 LOCK_PREFIX
62 cmpxchg8b (%esi) 60 cmpxchg8b (%esi)
@@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8)
78 movl %edx, %edi 76 movl %edx, %edi
79 movl %ecx, %ebp 77 movl %ecx, %ebp
80 78
81 read64 %ebp 79 read64 %ecx
821: 801:
83 movl %eax, %ebx 81 movl %eax, %ebx
84 movl %edx, %ecx 82 movl %edx, %ecx
@@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8)
159 SAVE ebx 157 SAVE ebx
160/* these just push these two parameters on the stack */ 158/* these just push these two parameters on the stack */
161 SAVE edi 159 SAVE edi
162 SAVE esi 160 SAVE ecx
163 161
164 movl %ecx, %ebp 162 movl %eax, %ebp
165 movl %eax, %esi
166 movl %edx, %edi 163 movl %edx, %edi
167 164
168 read64 %ebp 165 read64 %esi
1691: 1661:
170 cmpl %eax, 0(%esp) 167 cmpl %eax, 0(%esp)
171 je 4f 168 je 4f
1722: 1692:
173 movl %eax, %ebx 170 movl %eax, %ebx
174 movl %edx, %ecx 171 movl %edx, %ecx
175 addl %esi, %ebx 172 addl %ebp, %ebx
176 adcl %edi, %ecx 173 adcl %edi, %ecx
177 LOCK_PREFIX 174 LOCK_PREFIX
178 cmpxchg8b (%ebp) 175 cmpxchg8b (%esi)
179 jne 1b 176 jne 1b
180 177
181 movl $1, %eax 178 movl $1, %eax
@@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8)
199 196
200 read64 %esi 197 read64 %esi
2011: 1981:
202 testl %eax, %eax 199 movl %eax, %ecx
203 je 4f 200 orl %edx, %ecx
2042: 201 jz 3f
205 movl %eax, %ebx 202 movl %eax, %ebx
206 movl %edx, %ecx 203 xorl %ecx, %ecx
207 addl $1, %ebx 204 addl $1, %ebx
208 adcl $0, %ecx 205 adcl %edx, %ecx
209 LOCK_PREFIX 206 LOCK_PREFIX
210 cmpxchg8b (%esi) 207 cmpxchg8b (%esi)
211 jne 1b 208 jne 1b
@@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8)
2143: 2113:
215 RESTORE ebx 212 RESTORE ebx
216 ret 213 ret
2174:
218 testl %edx, %edx
219 jne 2b
220 jmp 3b
221 CFI_ENDPROC 214 CFI_ENDPROC
222ENDPROC(atomic64_inc_not_zero_cx8) 215ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 01c805ba5359..6b34d04d096a 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -20,14 +20,12 @@ ENDPROC(copy_page_c)
20 20
21ENTRY(copy_page) 21ENTRY(copy_page)
22 CFI_STARTPROC 22 CFI_STARTPROC
23 subq $3*8,%rsp 23 subq $2*8,%rsp
24 CFI_ADJUST_CFA_OFFSET 3*8 24 CFI_ADJUST_CFA_OFFSET 2*8
25 movq %rbx,(%rsp) 25 movq %rbx,(%rsp)
26 CFI_REL_OFFSET rbx, 0 26 CFI_REL_OFFSET rbx, 0
27 movq %r12,1*8(%rsp) 27 movq %r12,1*8(%rsp)
28 CFI_REL_OFFSET r12, 1*8 28 CFI_REL_OFFSET r12, 1*8
29 movq %r13,2*8(%rsp)
30 CFI_REL_OFFSET r13, 2*8
31 29
32 movl $(4096/64)-5,%ecx 30 movl $(4096/64)-5,%ecx
33 .p2align 4 31 .p2align 4
@@ -91,10 +89,8 @@ ENTRY(copy_page)
91 CFI_RESTORE rbx 89 CFI_RESTORE rbx
92 movq 1*8(%rsp),%r12 90 movq 1*8(%rsp),%r12
93 CFI_RESTORE r12 91 CFI_RESTORE r12
94 movq 2*8(%rsp),%r13 92 addq $2*8,%rsp
95 CFI_RESTORE r13 93 CFI_ADJUST_CFA_OFFSET -2*8
96 addq $3*8,%rsp
97 CFI_ADJUST_CFA_OFFSET -3*8
98 ret 94 ret
99.Lcopy_page_end: 95.Lcopy_page_end:
100 CFI_ENDPROC 96 CFI_ENDPROC
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index fc45ba887d05..e395693abdb1 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -48,9 +48,9 @@ static void delay_loop(unsigned long loops)
48} 48}
49 49
50/* TSC based delay: */ 50/* TSC based delay: */
51static void delay_tsc(unsigned long loops) 51static void delay_tsc(unsigned long __loops)
52{ 52{
53 unsigned long bclock, now; 53 u32 bclock, now, loops = __loops;
54 int cpu; 54 int cpu;
55 55
56 preempt_disable(); 56 preempt_disable();
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index efbf2a0ecdea..1c273be7c97e 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,9 +27,8 @@
27 .section .altinstr_replacement, "ax", @progbits 27 .section .altinstr_replacement, "ax", @progbits
28.Lmemcpy_c: 28.Lmemcpy_c:
29 movq %rdi, %rax 29 movq %rdi, %rax
30 30 movq %rdx, %rcx
31 movl %edx, %ecx 31 shrq $3, %rcx
32 shrl $3, %ecx
33 andl $7, %edx 32 andl $7, %edx
34 rep movsq 33 rep movsq
35 movl %edx, %ecx 34 movl %edx, %ecx
@@ -48,8 +47,7 @@
48 .section .altinstr_replacement, "ax", @progbits 47 .section .altinstr_replacement, "ax", @progbits
49.Lmemcpy_c_e: 48.Lmemcpy_c_e:
50 movq %rdi, %rax 49 movq %rdi, %rax
51 50 movq %rdx, %rcx
52 movl %edx, %ecx
53 rep movsb 51 rep movsb
54 ret 52 ret
55.Lmemcpy_e_e: 53.Lmemcpy_e_e:
@@ -60,10 +58,7 @@ ENTRY(memcpy)
60 CFI_STARTPROC 58 CFI_STARTPROC
61 movq %rdi, %rax 59 movq %rdi, %rax
62 60
63 /* 61 cmpq $0x20, %rdx
64 * Use 32bit CMP here to avoid long NOP padding.
65 */
66 cmp $0x20, %edx
67 jb .Lhandle_tail 62 jb .Lhandle_tail
68 63
69 /* 64 /*
@@ -72,7 +67,7 @@ ENTRY(memcpy)
72 */ 67 */
73 cmp %dil, %sil 68 cmp %dil, %sil
74 jl .Lcopy_backward 69 jl .Lcopy_backward
75 subl $0x20, %edx 70 subq $0x20, %rdx
76.Lcopy_forward_loop: 71.Lcopy_forward_loop:
77 subq $0x20, %rdx 72 subq $0x20, %rdx
78 73
@@ -91,7 +86,7 @@ ENTRY(memcpy)
91 movq %r11, 3*8(%rdi) 86 movq %r11, 3*8(%rdi)
92 leaq 4*8(%rdi), %rdi 87 leaq 4*8(%rdi), %rdi
93 jae .Lcopy_forward_loop 88 jae .Lcopy_forward_loop
94 addq $0x20, %rdx 89 addl $0x20, %edx
95 jmp .Lhandle_tail 90 jmp .Lhandle_tail
96 91
97.Lcopy_backward: 92.Lcopy_backward:
@@ -123,11 +118,11 @@ ENTRY(memcpy)
123 /* 118 /*
124 * Calculate copy position to head. 119 * Calculate copy position to head.
125 */ 120 */
126 addq $0x20, %rdx 121 addl $0x20, %edx
127 subq %rdx, %rsi 122 subq %rdx, %rsi
128 subq %rdx, %rdi 123 subq %rdx, %rdi
129.Lhandle_tail: 124.Lhandle_tail:
130 cmpq $16, %rdx 125 cmpl $16, %edx
131 jb .Lless_16bytes 126 jb .Lless_16bytes
132 127
133 /* 128 /*
@@ -144,7 +139,7 @@ ENTRY(memcpy)
144 retq 139 retq
145 .p2align 4 140 .p2align 4
146.Lless_16bytes: 141.Lless_16bytes:
147 cmpq $8, %rdx 142 cmpl $8, %edx
148 jb .Lless_8bytes 143 jb .Lless_8bytes
149 /* 144 /*
150 * Move data from 8 bytes to 15 bytes. 145 * Move data from 8 bytes to 15 bytes.
@@ -156,7 +151,7 @@ ENTRY(memcpy)
156 retq 151 retq
157 .p2align 4 152 .p2align 4
158.Lless_8bytes: 153.Lless_8bytes:
159 cmpq $4, %rdx 154 cmpl $4, %edx
160 jb .Lless_3bytes 155 jb .Lless_3bytes
161 156
162 /* 157 /*
@@ -169,18 +164,19 @@ ENTRY(memcpy)
169 retq 164 retq
170 .p2align 4 165 .p2align 4
171.Lless_3bytes: 166.Lless_3bytes:
172 cmpl $0, %edx 167 subl $1, %edx
173 je .Lend 168 jb .Lend
174 /* 169 /*
175 * Move data from 1 bytes to 3 bytes. 170 * Move data from 1 bytes to 3 bytes.
176 */ 171 */
177.Lloop_1: 172 movzbl (%rsi), %ecx
178 movb (%rsi), %r8b 173 jz .Lstore_1byte
179 movb %r8b, (%rdi) 174 movzbq 1(%rsi), %r8
180 incq %rdi 175 movzbq (%rsi, %rdx), %r9
181 incq %rsi 176 movb %r8b, 1(%rdi)
182 decl %edx 177 movb %r9b, (%rdi, %rdx)
183 jnz .Lloop_1 178.Lstore_1byte:
179 movb %cl, (%rdi)
184 180
185.Lend: 181.Lend:
186 retq 182 retq
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 79bd454b78a3..2dcb3808cbda 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -19,16 +19,15 @@
19 .section .altinstr_replacement, "ax", @progbits 19 .section .altinstr_replacement, "ax", @progbits
20.Lmemset_c: 20.Lmemset_c:
21 movq %rdi,%r9 21 movq %rdi,%r9
22 movl %edx,%r8d 22 movq %rdx,%rcx
23 andl $7,%r8d 23 andl $7,%edx
24 movl %edx,%ecx 24 shrq $3,%rcx
25 shrl $3,%ecx
26 /* expand byte value */ 25 /* expand byte value */
27 movzbl %sil,%esi 26 movzbl %sil,%esi
28 movabs $0x0101010101010101,%rax 27 movabs $0x0101010101010101,%rax
29 mulq %rsi /* with rax, clobbers rdx */ 28 imulq %rsi,%rax
30 rep stosq 29 rep stosq
31 movl %r8d,%ecx 30 movl %edx,%ecx
32 rep stosb 31 rep stosb
33 movq %r9,%rax 32 movq %r9,%rax
34 ret 33 ret
@@ -50,7 +49,7 @@
50.Lmemset_c_e: 49.Lmemset_c_e:
51 movq %rdi,%r9 50 movq %rdi,%r9
52 movb %sil,%al 51 movb %sil,%al
53 movl %edx,%ecx 52 movq %rdx,%rcx
54 rep stosb 53 rep stosb
55 movq %r9,%rax 54 movq %r9,%rax
56 ret 55 ret
@@ -61,12 +60,11 @@ ENTRY(memset)
61ENTRY(__memset) 60ENTRY(__memset)
62 CFI_STARTPROC 61 CFI_STARTPROC
63 movq %rdi,%r10 62 movq %rdi,%r10
64 movq %rdx,%r11
65 63
66 /* expand byte value */ 64 /* expand byte value */
67 movzbl %sil,%ecx 65 movzbl %sil,%ecx
68 movabs $0x0101010101010101,%rax 66 movabs $0x0101010101010101,%rax
69 mul %rcx /* with rax, clobbers rdx */ 67 imulq %rcx,%rax
70 68
71 /* align dst */ 69 /* align dst */
72 movl %edi,%r9d 70 movl %edi,%r9d
@@ -75,13 +73,13 @@ ENTRY(__memset)
75 CFI_REMEMBER_STATE 73 CFI_REMEMBER_STATE
76.Lafter_bad_alignment: 74.Lafter_bad_alignment:
77 75
78 movl %r11d,%ecx 76 movq %rdx,%rcx
79 shrl $6,%ecx 77 shrq $6,%rcx
80 jz .Lhandle_tail 78 jz .Lhandle_tail
81 79
82 .p2align 4 80 .p2align 4
83.Lloop_64: 81.Lloop_64:
84 decl %ecx 82 decq %rcx
85 movq %rax,(%rdi) 83 movq %rax,(%rdi)
86 movq %rax,8(%rdi) 84 movq %rax,8(%rdi)
87 movq %rax,16(%rdi) 85 movq %rax,16(%rdi)
@@ -97,7 +95,7 @@ ENTRY(__memset)
97 to predict jump tables. */ 95 to predict jump tables. */
98 .p2align 4 96 .p2align 4
99.Lhandle_tail: 97.Lhandle_tail:
100 movl %r11d,%ecx 98 movl %edx,%ecx
101 andl $63&(~7),%ecx 99 andl $63&(~7),%ecx
102 jz .Lhandle_7 100 jz .Lhandle_7
103 shrl $3,%ecx 101 shrl $3,%ecx
@@ -109,12 +107,11 @@ ENTRY(__memset)
109 jnz .Lloop_8 107 jnz .Lloop_8
110 108
111.Lhandle_7: 109.Lhandle_7:
112 movl %r11d,%ecx 110 andl $7,%edx
113 andl $7,%ecx
114 jz .Lende 111 jz .Lende
115 .p2align 4 112 .p2align 4
116.Lloop_1: 113.Lloop_1:
117 decl %ecx 114 decl %edx
118 movb %al,(%rdi) 115 movb %al,(%rdi)
119 leaq 1(%rdi),%rdi 116 leaq 1(%rdi),%rdi
120 jnz .Lloop_1 117 jnz .Lloop_1
@@ -125,13 +122,13 @@ ENTRY(__memset)
125 122
126 CFI_RESTORE_STATE 123 CFI_RESTORE_STATE
127.Lbad_alignment: 124.Lbad_alignment:
128 cmpq $7,%r11 125 cmpq $7,%rdx
129 jbe .Lhandle_7 126 jbe .Lhandle_7
130 movq %rax,(%rdi) /* unaligned store */ 127 movq %rax,(%rdi) /* unaligned store */
131 movq $8,%r8 128 movq $8,%r8
132 subq %r9,%r8 129 subq %r9,%r8
133 addq %r8,%rdi 130 addq %r8,%rdi
134 subq %r8,%r11 131 subq %r8,%rdx
135 jmp .Lafter_bad_alignment 132 jmp .Lafter_bad_alignment
136.Lfinal: 133.Lfinal:
137 CFI_ENDPROC 134 CFI_ENDPROC
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index e218d5df85ff..d9b094ca7aaa 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -760,9 +760,9 @@ survive:
760 break; 760 break;
761 } 761 }
762 762
763 maddr = kmap_atomic(pg, KM_USER0); 763 maddr = kmap_atomic(pg);
764 memcpy(maddr + offset, from, len); 764 memcpy(maddr + offset, from, len);
765 kunmap_atomic(maddr, KM_USER0); 765 kunmap_atomic(maddr);
766 set_page_dirty_lock(pg); 766 set_page_dirty_lock(pg);
767 put_page(pg); 767 put_page(pg);
768 up_read(&current->mm->mmap_sem); 768 up_read(&current->mm->mmap_sem);
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index f4f29b19fac5..6f31ee56c008 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -51,11 +51,11 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
51} 51}
52EXPORT_SYMBOL(kmap_atomic_prot); 52EXPORT_SYMBOL(kmap_atomic_prot);
53 53
54void *__kmap_atomic(struct page *page) 54void *kmap_atomic(struct page *page)
55{ 55{
56 return kmap_atomic_prot(page, kmap_prot); 56 return kmap_atomic_prot(page, kmap_prot);
57} 57}
58EXPORT_SYMBOL(__kmap_atomic); 58EXPORT_SYMBOL(kmap_atomic);
59 59
60/* 60/*
61 * This is the same as kmap_atomic() but can map memory that doesn't 61 * This is the same as kmap_atomic() but can map memory that doesn't
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index f581a18c0d4d..f6679a7fb8ca 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -308,10 +308,11 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
308{ 308{
309 struct hstate *h = hstate_file(file); 309 struct hstate *h = hstate_file(file);
310 struct mm_struct *mm = current->mm; 310 struct mm_struct *mm = current->mm;
311 struct vm_area_struct *vma, *prev_vma; 311 struct vm_area_struct *vma;
312 unsigned long base = mm->mmap_base, addr = addr0; 312 unsigned long base = mm->mmap_base;
313 unsigned long addr = addr0;
313 unsigned long largest_hole = mm->cached_hole_size; 314 unsigned long largest_hole = mm->cached_hole_size;
314 int first_time = 1; 315 unsigned long start_addr;
315 316
316 /* don't allow allocations above current base */ 317 /* don't allow allocations above current base */
317 if (mm->free_area_cache > base) 318 if (mm->free_area_cache > base)
@@ -322,6 +323,8 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
322 mm->free_area_cache = base; 323 mm->free_area_cache = base;
323 } 324 }
324try_again: 325try_again:
326 start_addr = mm->free_area_cache;
327
325 /* make sure it can fit in the remaining address space */ 328 /* make sure it can fit in the remaining address space */
326 if (mm->free_area_cache < len) 329 if (mm->free_area_cache < len)
327 goto fail; 330 goto fail;
@@ -333,24 +336,18 @@ try_again:
333 * Lookup failure means no vma is above this address, 336 * Lookup failure means no vma is above this address,
334 * i.e. return with success: 337 * i.e. return with success:
335 */ 338 */
336 if (!(vma = find_vma_prev(mm, addr, &prev_vma))) 339 vma = find_vma(mm, addr);
340 if (!vma)
337 return addr; 341 return addr;
338 342
339 /* 343 if (addr + len <= vma->vm_start) {
340 * new region fits between prev_vma->vm_end and
341 * vma->vm_start, use it:
342 */
343 if (addr + len <= vma->vm_start &&
344 (!prev_vma || (addr >= prev_vma->vm_end))) {
345 /* remember the address as a hint for next time */ 344 /* remember the address as a hint for next time */
346 mm->cached_hole_size = largest_hole; 345 mm->cached_hole_size = largest_hole;
347 return (mm->free_area_cache = addr); 346 return (mm->free_area_cache = addr);
348 } else { 347 } else if (mm->free_area_cache == vma->vm_end) {
349 /* pull free_area_cache down to the first hole */ 348 /* pull free_area_cache down to the first hole */
350 if (mm->free_area_cache == vma->vm_end) { 349 mm->free_area_cache = vma->vm_start;
351 mm->free_area_cache = vma->vm_start; 350 mm->cached_hole_size = largest_hole;
352 mm->cached_hole_size = largest_hole;
353 }
354 } 351 }
355 352
356 /* remember the largest hole we saw so far */ 353 /* remember the largest hole we saw so far */
@@ -366,10 +363,9 @@ fail:
366 * if hint left us with no space for the requested 363 * if hint left us with no space for the requested
367 * mapping then try again: 364 * mapping then try again:
368 */ 365 */
369 if (first_time) { 366 if (start_addr != base) {
370 mm->free_area_cache = base; 367 mm->free_area_cache = base;
371 largest_hole = 0; 368 largest_hole = 0;
372 first_time = 0;
373 goto try_again; 369 goto try_again;
374 } 370 }
375 /* 371 /*
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6cabf6570d64..4f0cec7e4ffb 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -12,7 +12,6 @@
12#include <asm/page_types.h> 12#include <asm/page_types.h>
13#include <asm/sections.h> 13#include <asm/sections.h>
14#include <asm/setup.h> 14#include <asm/setup.h>
15#include <asm/system.h>
16#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
17#include <asm/tlb.h> 16#include <asm/tlb.h>
18#include <asm/proto.h> 17#include <asm/proto.h>
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8663f6c47ccb..575d86f85ce4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -35,7 +35,6 @@
35#include <asm/asm.h> 35#include <asm/asm.h>
36#include <asm/bios_ebda.h> 36#include <asm/bios_ebda.h>
37#include <asm/processor.h> 37#include <asm/processor.h>
38#include <asm/system.h>
39#include <asm/uaccess.h> 38#include <asm/uaccess.h>
40#include <asm/pgtable.h> 39#include <asm/pgtable.h>
41#include <asm/dma.h> 40#include <asm/dma.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 436a0309db33..fc18be0f6f29 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -35,7 +35,6 @@
35 35
36#include <asm/processor.h> 36#include <asm/processor.h>
37#include <asm/bios_ebda.h> 37#include <asm/bios_ebda.h>
38#include <asm/system.h>
39#include <asm/uaccess.h> 38#include <asm/uaccess.h>
40#include <asm/pgtable.h> 39#include <asm/pgtable.h>
41#include <asm/pgalloc.h> 40#include <asm/pgalloc.h>
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c
index 036efbea8b28..aef7140c0063 100644
--- a/arch/x86/mm/kmemcheck/selftest.c
+++ b/arch/x86/mm/kmemcheck/selftest.c
@@ -1,3 +1,4 @@
1#include <linux/bug.h>
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2 3
3#include "opcode.h" 4#include "opcode.h"
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 46db56845f18..53489ff6bf82 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -28,7 +28,7 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
28 return -ENOENT; 28 return -ENOENT;
29} 29}
30 30
31static u64 mem_hole_size(u64 start, u64 end) 31static u64 __init mem_hole_size(u64 start, u64 end)
32{ 32{
33 unsigned long start_pfn = PFN_UP(start); 33 unsigned long start_pfn = PFN_UP(start);
34 unsigned long end_pfn = PFN_DOWN(end); 34 unsigned long end_pfn = PFN_DOWN(end);
@@ -60,7 +60,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
60 eb->nid = nid; 60 eb->nid = nid;
61 61
62 if (emu_nid_to_phys[nid] == NUMA_NO_NODE) 62 if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
63 emu_nid_to_phys[nid] = pb->nid; 63 emu_nid_to_phys[nid] = nid;
64 64
65 pb->start += size; 65 pb->start += size;
66 if (pb->start >= pb->end) { 66 if (pb->start >= pb->end) {
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cac718499256..a69bcb8c7621 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,7 +10,6 @@
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/module.h> 11#include <linux/module.h>
12 12
13#include <asm/system.h>
14#include <asm/pgtable.h> 13#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 14#include <asm/pgalloc.h>
16#include <asm/fixmap.h> 15#include <asm/fixmap.h>
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 1c1c4f46a7c1..efb5b4b93711 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -70,7 +70,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
70 return; 70 return;
71 pxm = pa->proximity_domain; 71 pxm = pa->proximity_domain;
72 apic_id = pa->apic_id; 72 apic_id = pa->apic_id;
73 if (!cpu_has_x2apic && (apic_id >= 0xff)) { 73 if (!apic->apic_id_valid(apic_id)) {
74 printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n", 74 printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
75 pxm, apic_id); 75 pxm, apic_id);
76 return; 76 return;
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 66870223f8c5..877b9a1b2152 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,17 @@
18 * r9d : hlen = skb->len - skb->data_len 18 * r9d : hlen = skb->len - skb->data_len
19 */ 19 */
20#define SKBDATA %r8 20#define SKBDATA %r8
21 21#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
22sk_load_word_ind:
23 .globl sk_load_word_ind
24
25 add %ebx,%esi /* offset += X */
26# test %esi,%esi /* if (offset < 0) goto bpf_error; */
27 js bpf_error
28 22
29sk_load_word: 23sk_load_word:
30 .globl sk_load_word 24 .globl sk_load_word
31 25
26 test %esi,%esi
27 js bpf_slow_path_word_neg
28
29sk_load_word_positive_offset:
30 .globl sk_load_word_positive_offset
31
32 mov %r9d,%eax # hlen 32 mov %r9d,%eax # hlen
33 sub %esi,%eax # hlen - offset 33 sub %esi,%eax # hlen - offset
34 cmp $3,%eax 34 cmp $3,%eax
@@ -37,16 +37,15 @@ sk_load_word:
37 bswap %eax /* ntohl() */ 37 bswap %eax /* ntohl() */
38 ret 38 ret
39 39
40
41sk_load_half_ind:
42 .globl sk_load_half_ind
43
44 add %ebx,%esi /* offset += X */
45 js bpf_error
46
47sk_load_half: 40sk_load_half:
48 .globl sk_load_half 41 .globl sk_load_half
49 42
43 test %esi,%esi
44 js bpf_slow_path_half_neg
45
46sk_load_half_positive_offset:
47 .globl sk_load_half_positive_offset
48
50 mov %r9d,%eax 49 mov %r9d,%eax
51 sub %esi,%eax # hlen - offset 50 sub %esi,%eax # hlen - offset
52 cmp $1,%eax 51 cmp $1,%eax
@@ -55,14 +54,15 @@ sk_load_half:
55 rol $8,%ax # ntohs() 54 rol $8,%ax # ntohs()
56 ret 55 ret
57 56
58sk_load_byte_ind:
59 .globl sk_load_byte_ind
60 add %ebx,%esi /* offset += X */
61 js bpf_error
62
63sk_load_byte: 57sk_load_byte:
64 .globl sk_load_byte 58 .globl sk_load_byte
65 59
60 test %esi,%esi
61 js bpf_slow_path_byte_neg
62
63sk_load_byte_positive_offset:
64 .globl sk_load_byte_positive_offset
65
66 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ 66 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
67 jle bpf_slow_path_byte 67 jle bpf_slow_path_byte
68 movzbl (SKBDATA,%rsi),%eax 68 movzbl (SKBDATA,%rsi),%eax
@@ -73,25 +73,21 @@ sk_load_byte:
73 * 73 *
74 * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) 74 * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
75 * Must preserve A accumulator (%eax) 75 * Must preserve A accumulator (%eax)
76 * Inputs : %esi is the offset value, already known positive 76 * Inputs : %esi is the offset value
77 */ 77 */
78ENTRY(sk_load_byte_msh) 78sk_load_byte_msh:
79 CFI_STARTPROC 79 .globl sk_load_byte_msh
80 test %esi,%esi
81 js bpf_slow_path_byte_msh_neg
82
83sk_load_byte_msh_positive_offset:
84 .globl sk_load_byte_msh_positive_offset
80 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ 85 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
81 jle bpf_slow_path_byte_msh 86 jle bpf_slow_path_byte_msh
82 movzbl (SKBDATA,%rsi),%ebx 87 movzbl (SKBDATA,%rsi),%ebx
83 and $15,%bl 88 and $15,%bl
84 shl $2,%bl 89 shl $2,%bl
85 ret 90 ret
86 CFI_ENDPROC
87ENDPROC(sk_load_byte_msh)
88
89bpf_error:
90# force a return 0 from jit handler
91 xor %eax,%eax
92 mov -8(%rbp),%rbx
93 leaveq
94 ret
95 91
96/* rsi contains offset and can be scratched */ 92/* rsi contains offset and can be scratched */
97#define bpf_slow_path_common(LEN) \ 93#define bpf_slow_path_common(LEN) \
@@ -138,3 +134,67 @@ bpf_slow_path_byte_msh:
138 shl $2,%al 134 shl $2,%al
139 xchg %eax,%ebx 135 xchg %eax,%ebx
140 ret 136 ret
137
138#define sk_negative_common(SIZE) \
139 push %rdi; /* save skb */ \
140 push %r9; \
141 push SKBDATA; \
142/* rsi already has offset */ \
143 mov $SIZE,%ecx; /* size */ \
144 call bpf_internal_load_pointer_neg_helper; \
145 test %rax,%rax; \
146 pop SKBDATA; \
147 pop %r9; \
148 pop %rdi; \
149 jz bpf_error
150
151
152bpf_slow_path_word_neg:
153 cmp SKF_MAX_NEG_OFF, %esi /* test range */
154 jl bpf_error /* offset lower -> error */
155sk_load_word_negative_offset:
156 .globl sk_load_word_negative_offset
157 sk_negative_common(4)
158 mov (%rax), %eax
159 bswap %eax
160 ret
161
162bpf_slow_path_half_neg:
163 cmp SKF_MAX_NEG_OFF, %esi
164 jl bpf_error
165sk_load_half_negative_offset:
166 .globl sk_load_half_negative_offset
167 sk_negative_common(2)
168 mov (%rax),%ax
169 rol $8,%ax
170 movzwl %ax,%eax
171 ret
172
173bpf_slow_path_byte_neg:
174 cmp SKF_MAX_NEG_OFF, %esi
175 jl bpf_error
176sk_load_byte_negative_offset:
177 .globl sk_load_byte_negative_offset
178 sk_negative_common(1)
179 movzbl (%rax), %eax
180 ret
181
182bpf_slow_path_byte_msh_neg:
183 cmp SKF_MAX_NEG_OFF, %esi
184 jl bpf_error
185sk_load_byte_msh_negative_offset:
186 .globl sk_load_byte_msh_negative_offset
187 xchg %eax,%ebx /* dont lose A , X is about to be scratched */
188 sk_negative_common(1)
189 movzbl (%rax),%eax
190 and $15,%al
191 shl $2,%al
192 xchg %eax,%ebx
193 ret
194
195bpf_error:
196# force a return 0 from jit handler
197 xor %eax,%eax
198 mov -8(%rbp),%rbx
199 leaveq
200 ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7c1b765ecc59..0597f95b6da6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
30 * assembly code in arch/x86/net/bpf_jit.S 30 * assembly code in arch/x86/net/bpf_jit.S
31 */ 31 */
32extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; 32extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
33extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; 33extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
34extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
35extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
36extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
34 37
35static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) 38static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
36{ 39{
@@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
117 set_fs(old_fs); 120 set_fs(old_fs);
118} 121}
119 122
123#define CHOOSE_LOAD_FUNC(K, func) \
124 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
120 125
121void bpf_jit_compile(struct sk_filter *fp) 126void bpf_jit_compile(struct sk_filter *fp)
122{ 127{
@@ -289,7 +294,7 @@ void bpf_jit_compile(struct sk_filter *fp)
289 EMIT2(0x24, K & 0xFF); /* and imm8,%al */ 294 EMIT2(0x24, K & 0xFF); /* and imm8,%al */
290 } else if (K >= 0xFFFF0000) { 295 } else if (K >= 0xFFFF0000) {
291 EMIT2(0x66, 0x25); /* and imm16,%ax */ 296 EMIT2(0x66, 0x25); /* and imm16,%ax */
292 EMIT2(K, 2); 297 EMIT(K, 2);
293 } else { 298 } else {
294 EMIT1_off32(0x25, K); /* and imm32,%eax */ 299 EMIT1_off32(0x25, K); /* and imm32,%eax */
295 } 300 }
@@ -473,48 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp)
473#endif 478#endif
474 break; 479 break;
475 case BPF_S_LD_W_ABS: 480 case BPF_S_LD_W_ABS:
476 func = sk_load_word; 481 func = CHOOSE_LOAD_FUNC(K, sk_load_word);
477common_load: seen |= SEEN_DATAREF; 482common_load: seen |= SEEN_DATAREF;
478 if ((int)K < 0)
479 goto out;
480 t_offset = func - (image + addrs[i]); 483 t_offset = func - (image + addrs[i]);
481 EMIT1_off32(0xbe, K); /* mov imm32,%esi */ 484 EMIT1_off32(0xbe, K); /* mov imm32,%esi */
482 EMIT1_off32(0xe8, t_offset); /* call */ 485 EMIT1_off32(0xe8, t_offset); /* call */
483 break; 486 break;
484 case BPF_S_LD_H_ABS: 487 case BPF_S_LD_H_ABS:
485 func = sk_load_half; 488 func = CHOOSE_LOAD_FUNC(K, sk_load_half);
486 goto common_load; 489 goto common_load;
487 case BPF_S_LD_B_ABS: 490 case BPF_S_LD_B_ABS:
488 func = sk_load_byte; 491 func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
489 goto common_load; 492 goto common_load;
490 case BPF_S_LDX_B_MSH: 493 case BPF_S_LDX_B_MSH:
491 if ((int)K < 0) { 494 func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
492 if (pc_ret0 > 0) {
493 /* addrs[pc_ret0 - 1] is the start address */
494 EMIT_JMP(addrs[pc_ret0 - 1] - addrs[i]);
495 break;
496 }
497 CLEAR_A();
498 EMIT_JMP(cleanup_addr - addrs[i]);
499 break;
500 }
501 seen |= SEEN_DATAREF | SEEN_XREG; 495 seen |= SEEN_DATAREF | SEEN_XREG;
502 t_offset = sk_load_byte_msh - (image + addrs[i]); 496 t_offset = func - (image + addrs[i]);
503 EMIT1_off32(0xbe, K); /* mov imm32,%esi */ 497 EMIT1_off32(0xbe, K); /* mov imm32,%esi */
504 EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ 498 EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
505 break; 499 break;
506 case BPF_S_LD_W_IND: 500 case BPF_S_LD_W_IND:
507 func = sk_load_word_ind; 501 func = sk_load_word;
508common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; 502common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
509 t_offset = func - (image + addrs[i]); 503 t_offset = func - (image + addrs[i]);
510 EMIT1_off32(0xbe, K); /* mov imm32,%esi */ 504 if (K) {
505 if (is_imm8(K)) {
506 EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
507 } else {
508 EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
509 EMIT(K, 4);
510 }
511 } else {
512 EMIT2(0x89,0xde); /* mov %ebx,%esi */
513 }
511 EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ 514 EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
512 break; 515 break;
513 case BPF_S_LD_H_IND: 516 case BPF_S_LD_H_IND:
514 func = sk_load_half_ind; 517 func = sk_load_half;
515 goto common_load_ind; 518 goto common_load_ind;
516 case BPF_S_LD_B_IND: 519 case BPF_S_LD_B_IND:
517 func = sk_load_byte_ind; 520 func = sk_load_byte;
518 goto common_load_ind; 521 goto common_load_ind;
519 case BPF_S_JMP_JA: 522 case BPF_S_JMP_JA:
520 t_offset = addrs[i + K] - addrs[i]; 523 t_offset = addrs[i + K] - addrs[i];
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index a312e76063a7..ed2835e148b5 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -60,6 +60,16 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
60 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), 60 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
61 }, 61 },
62 }, 62 },
63 /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */
64 {
65 .callback = set_use_crs,
66 .ident = "MSI MS-7253",
67 .matches = {
68 DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
69 DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
70 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
71 },
72 },
63 73
64 /* Now for the blacklist.. */ 74 /* Now for the blacklist.. */
65 75
@@ -282,9 +292,6 @@ static void add_resources(struct pci_root_info *info)
282 int i; 292 int i;
283 struct resource *res, *root, *conflict; 293 struct resource *res, *root, *conflict;
284 294
285 if (!pci_use_crs)
286 return;
287
288 coalesce_windows(info, IORESOURCE_MEM); 295 coalesce_windows(info, IORESOURCE_MEM);
289 coalesce_windows(info, IORESOURCE_IO); 296 coalesce_windows(info, IORESOURCE_IO);
290 297
@@ -336,8 +343,13 @@ get_current_resources(struct acpi_device *device, int busnum,
336 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, 343 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
337 &info); 344 &info);
338 345
339 add_resources(&info); 346 if (pci_use_crs) {
340 return; 347 add_resources(&info);
348
349 return;
350 }
351
352 kfree(info.name);
341 353
342name_alloc_fail: 354name_alloc_fail:
343 kfree(info.res); 355 kfree(info.res);
@@ -404,7 +416,12 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
404 kfree(sd); 416 kfree(sd);
405 } else { 417 } else {
406 get_current_resources(device, busnum, domain, &resources); 418 get_current_resources(device, busnum, domain, &resources);
407 if (list_empty(&resources)) 419
420 /*
421 * _CRS with no apertures is normal, so only fall back to
422 * defaults or native bridge info if we're ignoring _CRS.
423 */
424 if (!pci_use_crs)
408 x86_pci_root_bus_resources(busnum, &resources); 425 x86_pci_root_bus_resources(busnum, &resources);
409 bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, 426 bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd,
410 &resources); 427 &resources);
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 6dd89555fbfa..d0e6e403b4f6 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -164,11 +164,11 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_
164 */ 164 */
165static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev) 165static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev)
166{ 166{
167 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && 167 if ((dev->device & 0xff00) == 0x2400)
168 (dev->device & 0xff00) == 0x2400)
169 dev->transparent = 1; 168 dev->transparent = 1;
170} 169}
171DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge); 170DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
171 PCI_CLASS_BRIDGE_PCI, 8, pci_fixup_transparent_bridge);
172 172
173/* 173/*
174 * Fixup for C1 Halt Disconnect problem on nForce2 systems. 174 * Fixup for C1 Halt Disconnect problem on nForce2 systems.
@@ -322,9 +322,6 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
322 struct pci_bus *bus; 322 struct pci_bus *bus;
323 u16 config; 323 u16 config;
324 324
325 if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
326 return;
327
328 /* Is VGA routed to us? */ 325 /* Is VGA routed to us? */
329 bus = pdev->bus; 326 bus = pdev->bus;
330 while (bus) { 327 while (bus) {
@@ -353,7 +350,8 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
353 dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); 350 dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n");
354 } 351 }
355} 352}
356DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); 353DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
354 PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
357 355
358 356
359static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = { 357static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 91821a1a0c3a..831971e731f7 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -39,6 +39,87 @@
39#include <asm/io_apic.h> 39#include <asm/io_apic.h>
40 40
41 41
42/*
43 * This list of dynamic mappings is for temporarily maintaining
44 * original BIOS BAR addresses for possible reinstatement.
45 */
46struct pcibios_fwaddrmap {
47 struct list_head list;
48 struct pci_dev *dev;
49 resource_size_t fw_addr[DEVICE_COUNT_RESOURCE];
50};
51
52static LIST_HEAD(pcibios_fwaddrmappings);
53static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock);
54
55/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */
56static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev)
57{
58 struct pcibios_fwaddrmap *map;
59
60 WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock));
61
62 list_for_each_entry(map, &pcibios_fwaddrmappings, list)
63 if (map->dev == dev)
64 return map;
65
66 return NULL;
67}
68
69static void
70pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr)
71{
72 unsigned long flags;
73 struct pcibios_fwaddrmap *map;
74
75 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
76 map = pcibios_fwaddrmap_lookup(dev);
77 if (!map) {
78 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
79 map = kzalloc(sizeof(*map), GFP_KERNEL);
80 if (!map)
81 return;
82
83 map->dev = pci_dev_get(dev);
84 map->fw_addr[idx] = fw_addr;
85 INIT_LIST_HEAD(&map->list);
86
87 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
88 list_add_tail(&map->list, &pcibios_fwaddrmappings);
89 } else
90 map->fw_addr[idx] = fw_addr;
91 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
92}
93
94resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
95{
96 unsigned long flags;
97 struct pcibios_fwaddrmap *map;
98 resource_size_t fw_addr = 0;
99
100 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
101 map = pcibios_fwaddrmap_lookup(dev);
102 if (map)
103 fw_addr = map->fw_addr[idx];
104 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
105
106 return fw_addr;
107}
108
109static void pcibios_fw_addr_list_del(void)
110{
111 unsigned long flags;
112 struct pcibios_fwaddrmap *entry, *next;
113
114 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
115 list_for_each_entry_safe(entry, next, &pcibios_fwaddrmappings, list) {
116 list_del(&entry->list);
117 pci_dev_put(entry->dev);
118 kfree(entry);
119 }
120 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
121}
122
42static int 123static int
43skip_isa_ioresource_align(struct pci_dev *dev) { 124skip_isa_ioresource_align(struct pci_dev *dev) {
44 125
@@ -182,7 +263,8 @@ static void __init pcibios_allocate_resources(int pass)
182 idx, r, disabled, pass); 263 idx, r, disabled, pass);
183 if (pci_claim_resource(dev, idx) < 0) { 264 if (pci_claim_resource(dev, idx) < 0) {
184 /* We'll assign a new address later */ 265 /* We'll assign a new address later */
185 dev->fw_addr[idx] = r->start; 266 pcibios_save_fw_addr(dev,
267 idx, r->start);
186 r->end -= r->start; 268 r->end -= r->start;
187 r->start = 0; 269 r->start = 0;
188 } 270 }
@@ -228,6 +310,7 @@ static int __init pcibios_assign_resources(void)
228 } 310 }
229 311
230 pci_assign_unassigned_resources(); 312 pci_assign_unassigned_resources();
313 pcibios_fw_addr_list_del();
231 314
232 return 0; 315 return 0;
233} 316}
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index cb29191cee58..140942f66b31 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -43,6 +43,8 @@
43#define PCI_FIXED_BAR_4_SIZE 0x14 43#define PCI_FIXED_BAR_4_SIZE 0x14
44#define PCI_FIXED_BAR_5_SIZE 0x1c 44#define PCI_FIXED_BAR_5_SIZE 0x1c
45 45
46static int pci_soc_mode = 0;
47
46/** 48/**
47 * fixed_bar_cap - return the offset of the fixed BAR cap if found 49 * fixed_bar_cap - return the offset of the fixed BAR cap if found
48 * @bus: PCI bus 50 * @bus: PCI bus
@@ -148,7 +150,9 @@ static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
148 */ 150 */
149 if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) 151 if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
150 return 0; 152 return 0;
151 if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0))) 153 if (bus == 0 && (devfn == PCI_DEVFN(2, 0)
154 || devfn == PCI_DEVFN(0, 0)
155 || devfn == PCI_DEVFN(3, 0)))
152 return 1; 156 return 1;
153 return 0; /* langwell on others */ 157 return 0; /* langwell on others */
154} 158}
@@ -231,14 +235,43 @@ struct pci_ops pci_mrst_ops = {
231 */ 235 */
232int __init pci_mrst_init(void) 236int __init pci_mrst_init(void)
233{ 237{
234 printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n"); 238 printk(KERN_INFO "Intel MID platform detected, using MID PCI ops\n");
235 pci_mmcfg_late_init(); 239 pci_mmcfg_late_init();
236 pcibios_enable_irq = mrst_pci_irq_enable; 240 pcibios_enable_irq = mrst_pci_irq_enable;
237 pci_root_ops = pci_mrst_ops; 241 pci_root_ops = pci_mrst_ops;
242 pci_soc_mode = 1;
238 /* Continue with standard init */ 243 /* Continue with standard init */
239 return 1; 244 return 1;
240} 245}
241 246
247/* Langwell devices are not true pci devices, they are not subject to 10 ms
248 * d3 to d0 delay required by pci spec.
249 */
250static void __devinit pci_d3delay_fixup(struct pci_dev *dev)
251{
252 /* PCI fixups are effectively decided compile time. If we have a dual
253 SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */
254 if (!pci_soc_mode)
255 return;
256 /* true pci devices in lincroft should allow type 1 access, the rest
257 * are langwell fake pci devices.
258 */
259 if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID))
260 return;
261 dev->d3_delay = 0;
262}
263DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
264
265static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev)
266{
267 pci_set_power_state(dev, PCI_D3cold);
268}
269DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev);
270DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev);
271DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev);
272DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev);
273DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev);
274
242/* 275/*
243 * Langwell devices reside at fixed offsets, don't try to move them. 276 * Langwell devices reside at fixed offsets, don't try to move them.
244 */ 277 */
@@ -248,6 +281,9 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
248 u32 size; 281 u32 size;
249 int i; 282 int i;
250 283
284 if (!pci_soc_mode)
285 return;
286
251 /* Must have extended configuration space */ 287 /* Must have extended configuration space */
252 if (dev->cfg_size < PCIE_CAP_OFFSET + 4) 288 if (dev->cfg_size < PCIE_CAP_OFFSET + 4)
253 return; 289 return;
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index d99346ea8fdb..7415aa927913 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -324,6 +324,32 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
324out: 324out:
325 return ret; 325 return ret;
326} 326}
327
328static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq)
329{
330 int ret = 0;
331
332 if (pci_seg_supported) {
333 struct physdev_pci_device restore_ext;
334
335 restore_ext.seg = pci_domain_nr(dev->bus);
336 restore_ext.bus = dev->bus->number;
337 restore_ext.devfn = dev->devfn;
338 ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext,
339 &restore_ext);
340 if (ret == -ENOSYS)
341 pci_seg_supported = false;
342 WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret);
343 }
344 if (!pci_seg_supported) {
345 struct physdev_restore_msi restore;
346
347 restore.bus = dev->bus->number;
348 restore.devfn = dev->devfn;
349 ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
350 WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
351 }
352}
327#endif 353#endif
328 354
329static void xen_teardown_msi_irqs(struct pci_dev *dev) 355static void xen_teardown_msi_irqs(struct pci_dev *dev)
@@ -446,6 +472,7 @@ int __init pci_xen_initial_domain(void)
446#ifdef CONFIG_PCI_MSI 472#ifdef CONFIG_PCI_MSI
447 x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; 473 x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
448 x86_msi.teardown_msi_irq = xen_teardown_msi_irq; 474 x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
475 x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
449#endif 476#endif
450 xen_setup_acpi_sci(); 477 xen_setup_acpi_sci();
451 __acpi_register_gsi = acpi_register_gsi_xen; 478 __acpi_register_gsi = acpi_register_gsi_xen;
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index e70be38ce039..ce874f872cc6 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -208,16 +208,19 @@
208 interrupts = <14 1>; 208 interrupts = <14 1>;
209 }; 209 };
210 210
211 gpio@b,1 { 211 pcigpio: gpio@b,1 {
212 #gpio-cells = <2>;
213 #interrupt-cells = <2>;
212 compatible = "pci8086,2e67.2", 214 compatible = "pci8086,2e67.2",
213 "pci8086,2e67", 215 "pci8086,2e67",
214 "pciclassff0000", 216 "pciclassff0000",
215 "pciclassff00"; 217 "pciclassff00";
216 218
217 #gpio-cells = <2>;
218 reg = <0x15900 0x0 0x0 0x0 0x0>; 219 reg = <0x15900 0x0 0x0 0x0 0x0>;
219 interrupts = <15 1>; 220 interrupts = <15 1>;
221 interrupt-controller;
220 gpio-controller; 222 gpio-controller;
223 intel,muxctl = <0>;
221 }; 224 };
222 225
223 i2c-controller@b,2 { 226 i2c-controller@b,2 {
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 4cf9bd0a1653..92660edaa1e7 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -26,6 +26,8 @@
26 * Skip non-WB memory and ignore empty memory ranges. 26 * Skip non-WB memory and ignore empty memory ranges.
27 */ 27 */
28 28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
29#include <linux/kernel.h> 31#include <linux/kernel.h>
30#include <linux/init.h> 32#include <linux/init.h>
31#include <linux/efi.h> 33#include <linux/efi.h>
@@ -47,7 +49,6 @@
47#include <asm/x86_init.h> 49#include <asm/x86_init.h>
48 50
49#define EFI_DEBUG 1 51#define EFI_DEBUG 1
50#define PFX "EFI: "
51 52
52int efi_enabled; 53int efi_enabled;
53EXPORT_SYMBOL(efi_enabled); 54EXPORT_SYMBOL(efi_enabled);
@@ -67,6 +68,9 @@ EXPORT_SYMBOL(efi);
67 68
68struct efi_memory_map memmap; 69struct efi_memory_map memmap;
69 70
71bool efi_64bit;
72static bool efi_native;
73
70static struct efi efi_phys __initdata; 74static struct efi efi_phys __initdata;
71static efi_system_table_t efi_systab __initdata; 75static efi_system_table_t efi_systab __initdata;
72 76
@@ -254,7 +258,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
254 258
255 status = efi.get_time(&eft, &cap); 259 status = efi.get_time(&eft, &cap);
256 if (status != EFI_SUCCESS) { 260 if (status != EFI_SUCCESS) {
257 printk(KERN_ERR "Oops: efitime: can't read time!\n"); 261 pr_err("Oops: efitime: can't read time!\n");
258 return -1; 262 return -1;
259 } 263 }
260 264
@@ -268,7 +272,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
268 272
269 status = efi.set_time(&eft); 273 status = efi.set_time(&eft);
270 if (status != EFI_SUCCESS) { 274 if (status != EFI_SUCCESS) {
271 printk(KERN_ERR "Oops: efitime: can't write time!\n"); 275 pr_err("Oops: efitime: can't write time!\n");
272 return -1; 276 return -1;
273 } 277 }
274 return 0; 278 return 0;
@@ -282,7 +286,7 @@ unsigned long efi_get_time(void)
282 286
283 status = efi.get_time(&eft, &cap); 287 status = efi.get_time(&eft, &cap);
284 if (status != EFI_SUCCESS) 288 if (status != EFI_SUCCESS)
285 printk(KERN_ERR "Oops: efitime: can't read time!\n"); 289 pr_err("Oops: efitime: can't read time!\n");
286 290
287 return mktime(eft.year, eft.month, eft.day, eft.hour, 291 return mktime(eft.year, eft.month, eft.day, eft.hour,
288 eft.minute, eft.second); 292 eft.minute, eft.second);
@@ -338,11 +342,16 @@ static void __init do_add_efi_memmap(void)
338 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 342 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
339} 343}
340 344
341void __init efi_memblock_x86_reserve_range(void) 345int __init efi_memblock_x86_reserve_range(void)
342{ 346{
343 unsigned long pmap; 347 unsigned long pmap;
344 348
345#ifdef CONFIG_X86_32 349#ifdef CONFIG_X86_32
350 /* Can't handle data above 4GB at this time */
351 if (boot_params.efi_info.efi_memmap_hi) {
352 pr_err("Memory map is above 4GB, disabling EFI.\n");
353 return -EINVAL;
354 }
346 pmap = boot_params.efi_info.efi_memmap; 355 pmap = boot_params.efi_info.efi_memmap;
347#else 356#else
348 pmap = (boot_params.efi_info.efi_memmap | 357 pmap = (boot_params.efi_info.efi_memmap |
@@ -354,6 +363,8 @@ void __init efi_memblock_x86_reserve_range(void)
354 memmap.desc_version = boot_params.efi_info.efi_memdesc_version; 363 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
355 memmap.desc_size = boot_params.efi_info.efi_memdesc_size; 364 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
356 memblock_reserve(pmap, memmap.nr_map * memmap.desc_size); 365 memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
366
367 return 0;
357} 368}
358 369
359#if EFI_DEBUG 370#if EFI_DEBUG
@@ -367,7 +378,7 @@ static void __init print_efi_memmap(void)
367 p < memmap.map_end; 378 p < memmap.map_end;
368 p += memmap.desc_size, i++) { 379 p += memmap.desc_size, i++) {
369 md = p; 380 md = p;
370 printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, " 381 pr_info("mem%02u: type=%u, attr=0x%llx, "
371 "range=[0x%016llx-0x%016llx) (%lluMB)\n", 382 "range=[0x%016llx-0x%016llx) (%lluMB)\n",
372 i, md->type, md->attribute, md->phys_addr, 383 i, md->type, md->attribute, md->phys_addr,
373 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), 384 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
@@ -400,7 +411,7 @@ void __init efi_reserve_boot_services(void)
400 memblock_is_region_reserved(start, size)) { 411 memblock_is_region_reserved(start, size)) {
401 /* Could not reserve, skip it */ 412 /* Could not reserve, skip it */
402 md->num_pages = 0; 413 md->num_pages = 0;
403 memblock_dbg(PFX "Could not reserve boot range " 414 memblock_dbg("Could not reserve boot range "
404 "[0x%010llx-0x%010llx]\n", 415 "[0x%010llx-0x%010llx]\n",
405 start, start+size-1); 416 start, start+size-1);
406 } else 417 } else
@@ -429,103 +440,172 @@ static void __init efi_free_boot_services(void)
429 } 440 }
430} 441}
431 442
432void __init efi_init(void) 443static int __init efi_systab_init(void *phys)
433{ 444{
434 efi_config_table_t *config_tables; 445 if (efi_64bit) {
435 efi_runtime_services_t *runtime; 446 efi_system_table_64_t *systab64;
436 efi_char16_t *c16; 447 u64 tmp = 0;
437 char vendor[100] = "unknown"; 448
438 int i = 0; 449 systab64 = early_ioremap((unsigned long)phys,
439 void *tmp; 450 sizeof(*systab64));
451 if (systab64 == NULL) {
452 pr_err("Couldn't map the system table!\n");
453 return -ENOMEM;
454 }
440 455
456 efi_systab.hdr = systab64->hdr;
457 efi_systab.fw_vendor = systab64->fw_vendor;
458 tmp |= systab64->fw_vendor;
459 efi_systab.fw_revision = systab64->fw_revision;
460 efi_systab.con_in_handle = systab64->con_in_handle;
461 tmp |= systab64->con_in_handle;
462 efi_systab.con_in = systab64->con_in;
463 tmp |= systab64->con_in;
464 efi_systab.con_out_handle = systab64->con_out_handle;
465 tmp |= systab64->con_out_handle;
466 efi_systab.con_out = systab64->con_out;
467 tmp |= systab64->con_out;
468 efi_systab.stderr_handle = systab64->stderr_handle;
469 tmp |= systab64->stderr_handle;
470 efi_systab.stderr = systab64->stderr;
471 tmp |= systab64->stderr;
472 efi_systab.runtime = (void *)(unsigned long)systab64->runtime;
473 tmp |= systab64->runtime;
474 efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
475 tmp |= systab64->boottime;
476 efi_systab.nr_tables = systab64->nr_tables;
477 efi_systab.tables = systab64->tables;
478 tmp |= systab64->tables;
479
480 early_iounmap(systab64, sizeof(*systab64));
441#ifdef CONFIG_X86_32 481#ifdef CONFIG_X86_32
442 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; 482 if (tmp >> 32) {
443#else 483 pr_err("EFI data located above 4GB, disabling EFI.\n");
444 efi_phys.systab = (efi_system_table_t *) 484 return -EINVAL;
445 (boot_params.efi_info.efi_systab | 485 }
446 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
447#endif 486#endif
487 } else {
488 efi_system_table_32_t *systab32;
489
490 systab32 = early_ioremap((unsigned long)phys,
491 sizeof(*systab32));
492 if (systab32 == NULL) {
493 pr_err("Couldn't map the system table!\n");
494 return -ENOMEM;
495 }
496
497 efi_systab.hdr = systab32->hdr;
498 efi_systab.fw_vendor = systab32->fw_vendor;
499 efi_systab.fw_revision = systab32->fw_revision;
500 efi_systab.con_in_handle = systab32->con_in_handle;
501 efi_systab.con_in = systab32->con_in;
502 efi_systab.con_out_handle = systab32->con_out_handle;
503 efi_systab.con_out = systab32->con_out;
504 efi_systab.stderr_handle = systab32->stderr_handle;
505 efi_systab.stderr = systab32->stderr;
506 efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
507 efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
508 efi_systab.nr_tables = systab32->nr_tables;
509 efi_systab.tables = systab32->tables;
510
511 early_iounmap(systab32, sizeof(*systab32));
512 }
448 513
449 efi.systab = early_ioremap((unsigned long)efi_phys.systab,
450 sizeof(efi_system_table_t));
451 if (efi.systab == NULL)
452 printk(KERN_ERR "Couldn't map the EFI system table!\n");
453 memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
454 early_iounmap(efi.systab, sizeof(efi_system_table_t));
455 efi.systab = &efi_systab; 514 efi.systab = &efi_systab;
456 515
457 /* 516 /*
458 * Verify the EFI Table 517 * Verify the EFI Table
459 */ 518 */
460 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) 519 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
461 printk(KERN_ERR "EFI system table signature incorrect!\n"); 520 pr_err("System table signature incorrect!\n");
521 return -EINVAL;
522 }
462 if ((efi.systab->hdr.revision >> 16) == 0) 523 if ((efi.systab->hdr.revision >> 16) == 0)
463 printk(KERN_ERR "Warning: EFI system table version " 524 pr_err("Warning: System table version "
464 "%d.%02d, expected 1.00 or greater!\n", 525 "%d.%02d, expected 1.00 or greater!\n",
465 efi.systab->hdr.revision >> 16, 526 efi.systab->hdr.revision >> 16,
466 efi.systab->hdr.revision & 0xffff); 527 efi.systab->hdr.revision & 0xffff);
467 528
468 /* 529 return 0;
469 * Show what we know for posterity 530}
470 */
471 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
472 if (c16) {
473 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
474 vendor[i] = *c16++;
475 vendor[i] = '\0';
476 } else
477 printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
478 early_iounmap(tmp, 2);
479 531
480 printk(KERN_INFO "EFI v%u.%.02u by %s\n", 532static int __init efi_config_init(u64 tables, int nr_tables)
481 efi.systab->hdr.revision >> 16, 533{
482 efi.systab->hdr.revision & 0xffff, vendor); 534 void *config_tables, *tablep;
535 int i, sz;
536
537 if (efi_64bit)
538 sz = sizeof(efi_config_table_64_t);
539 else
540 sz = sizeof(efi_config_table_32_t);
483 541
484 /* 542 /*
485 * Let's see what config tables the firmware passed to us. 543 * Let's see what config tables the firmware passed to us.
486 */ 544 */
487 config_tables = early_ioremap( 545 config_tables = early_ioremap(tables, nr_tables * sz);
488 efi.systab->tables, 546 if (config_tables == NULL) {
489 efi.systab->nr_tables * sizeof(efi_config_table_t)); 547 pr_err("Could not map Configuration table!\n");
490 if (config_tables == NULL) 548 return -ENOMEM;
491 printk(KERN_ERR "Could not map EFI Configuration Table!\n"); 549 }
492 550
493 printk(KERN_INFO); 551 tablep = config_tables;
552 pr_info("");
494 for (i = 0; i < efi.systab->nr_tables; i++) { 553 for (i = 0; i < efi.systab->nr_tables; i++) {
495 if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) { 554 efi_guid_t guid;
496 efi.mps = config_tables[i].table; 555 unsigned long table;
497 printk(" MPS=0x%lx ", config_tables[i].table); 556
498 } else if (!efi_guidcmp(config_tables[i].guid, 557 if (efi_64bit) {
499 ACPI_20_TABLE_GUID)) { 558 u64 table64;
500 efi.acpi20 = config_tables[i].table; 559 guid = ((efi_config_table_64_t *)tablep)->guid;
501 printk(" ACPI 2.0=0x%lx ", config_tables[i].table); 560 table64 = ((efi_config_table_64_t *)tablep)->table;
502 } else if (!efi_guidcmp(config_tables[i].guid, 561 table = table64;
503 ACPI_TABLE_GUID)) { 562#ifdef CONFIG_X86_32
504 efi.acpi = config_tables[i].table; 563 if (table64 >> 32) {
505 printk(" ACPI=0x%lx ", config_tables[i].table); 564 pr_cont("\n");
506 } else if (!efi_guidcmp(config_tables[i].guid, 565 pr_err("Table located above 4GB, disabling EFI.\n");
507 SMBIOS_TABLE_GUID)) { 566 early_iounmap(config_tables,
508 efi.smbios = config_tables[i].table; 567 efi.systab->nr_tables * sz);
509 printk(" SMBIOS=0x%lx ", config_tables[i].table); 568 return -EINVAL;
569 }
570#endif
571 } else {
572 guid = ((efi_config_table_32_t *)tablep)->guid;
573 table = ((efi_config_table_32_t *)tablep)->table;
574 }
575 if (!efi_guidcmp(guid, MPS_TABLE_GUID)) {
576 efi.mps = table;
577 pr_cont(" MPS=0x%lx ", table);
578 } else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) {
579 efi.acpi20 = table;
580 pr_cont(" ACPI 2.0=0x%lx ", table);
581 } else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) {
582 efi.acpi = table;
583 pr_cont(" ACPI=0x%lx ", table);
584 } else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) {
585 efi.smbios = table;
586 pr_cont(" SMBIOS=0x%lx ", table);
510#ifdef CONFIG_X86_UV 587#ifdef CONFIG_X86_UV
511 } else if (!efi_guidcmp(config_tables[i].guid, 588 } else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) {
512 UV_SYSTEM_TABLE_GUID)) { 589 efi.uv_systab = table;
513 efi.uv_systab = config_tables[i].table; 590 pr_cont(" UVsystab=0x%lx ", table);
514 printk(" UVsystab=0x%lx ", config_tables[i].table);
515#endif 591#endif
516 } else if (!efi_guidcmp(config_tables[i].guid, 592 } else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) {
517 HCDP_TABLE_GUID)) { 593 efi.hcdp = table;
518 efi.hcdp = config_tables[i].table; 594 pr_cont(" HCDP=0x%lx ", table);
519 printk(" HCDP=0x%lx ", config_tables[i].table); 595 } else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) {
520 } else if (!efi_guidcmp(config_tables[i].guid, 596 efi.uga = table;
521 UGA_IO_PROTOCOL_GUID)) { 597 pr_cont(" UGA=0x%lx ", table);
522 efi.uga = config_tables[i].table;
523 printk(" UGA=0x%lx ", config_tables[i].table);
524 } 598 }
599 tablep += sz;
525 } 600 }
526 printk("\n"); 601 pr_cont("\n");
527 early_iounmap(config_tables, 602 early_iounmap(config_tables, efi.systab->nr_tables * sz);
528 efi.systab->nr_tables * sizeof(efi_config_table_t)); 603 return 0;
604}
605
606static int __init efi_runtime_init(void)
607{
608 efi_runtime_services_t *runtime;
529 609
530 /* 610 /*
531 * Check out the runtime services table. We need to map 611 * Check out the runtime services table. We need to map
@@ -535,43 +615,116 @@ void __init efi_init(void)
535 */ 615 */
536 runtime = early_ioremap((unsigned long)efi.systab->runtime, 616 runtime = early_ioremap((unsigned long)efi.systab->runtime,
537 sizeof(efi_runtime_services_t)); 617 sizeof(efi_runtime_services_t));
538 if (runtime != NULL) { 618 if (!runtime) {
539 /* 619 pr_err("Could not map the runtime service table!\n");
540 * We will only need *early* access to the following 620 return -ENOMEM;
541 * two EFI runtime services before set_virtual_address_map 621 }
542 * is invoked. 622 /*
543 */ 623 * We will only need *early* access to the following
544 efi_phys.get_time = (efi_get_time_t *)runtime->get_time; 624 * two EFI runtime services before set_virtual_address_map
545 efi_phys.set_virtual_address_map = 625 * is invoked.
546 (efi_set_virtual_address_map_t *) 626 */
547 runtime->set_virtual_address_map; 627 efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
548 /* 628 efi_phys.set_virtual_address_map =
549 * Make efi_get_time can be called before entering 629 (efi_set_virtual_address_map_t *)
550 * virtual mode. 630 runtime->set_virtual_address_map;
551 */ 631 /*
552 efi.get_time = phys_efi_get_time; 632 * Make efi_get_time can be called before entering
553 } else 633 * virtual mode.
554 printk(KERN_ERR "Could not map the EFI runtime service " 634 */
555 "table!\n"); 635 efi.get_time = phys_efi_get_time;
556 early_iounmap(runtime, sizeof(efi_runtime_services_t)); 636 early_iounmap(runtime, sizeof(efi_runtime_services_t));
557 637
638 return 0;
639}
640
641static int __init efi_memmap_init(void)
642{
558 /* Map the EFI memory map */ 643 /* Map the EFI memory map */
559 memmap.map = early_ioremap((unsigned long)memmap.phys_map, 644 memmap.map = early_ioremap((unsigned long)memmap.phys_map,
560 memmap.nr_map * memmap.desc_size); 645 memmap.nr_map * memmap.desc_size);
561 if (memmap.map == NULL) 646 if (memmap.map == NULL) {
562 printk(KERN_ERR "Could not map the EFI memory map!\n"); 647 pr_err("Could not map the memory map!\n");
648 return -ENOMEM;
649 }
563 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); 650 memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
564 651
565 if (memmap.desc_size != sizeof(efi_memory_desc_t))
566 printk(KERN_WARNING
567 "Kernel-defined memdesc doesn't match the one from EFI!\n");
568
569 if (add_efi_memmap) 652 if (add_efi_memmap)
570 do_add_efi_memmap(); 653 do_add_efi_memmap();
571 654
655 return 0;
656}
657
658void __init efi_init(void)
659{
660 efi_char16_t *c16;
661 char vendor[100] = "unknown";
662 int i = 0;
663 void *tmp;
664
665#ifdef CONFIG_X86_32
666 if (boot_params.efi_info.efi_systab_hi ||
667 boot_params.efi_info.efi_memmap_hi) {
668 pr_info("Table located above 4GB, disabling EFI.\n");
669 efi_enabled = 0;
670 return;
671 }
672 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
673 efi_native = !efi_64bit;
674#else
675 efi_phys.systab = (efi_system_table_t *)
676 (boot_params.efi_info.efi_systab |
677 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
678 efi_native = efi_64bit;
679#endif
680
681 if (efi_systab_init(efi_phys.systab)) {
682 efi_enabled = 0;
683 return;
684 }
685
686 /*
687 * Show what we know for posterity
688 */
689 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
690 if (c16) {
691 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
692 vendor[i] = *c16++;
693 vendor[i] = '\0';
694 } else
695 pr_err("Could not map the firmware vendor!\n");
696 early_iounmap(tmp, 2);
697
698 pr_info("EFI v%u.%.02u by %s\n",
699 efi.systab->hdr.revision >> 16,
700 efi.systab->hdr.revision & 0xffff, vendor);
701
702 if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) {
703 efi_enabled = 0;
704 return;
705 }
706
707 /*
708 * Note: We currently don't support runtime services on an EFI
709 * that doesn't match the kernel 32/64-bit mode.
710 */
711
712 if (!efi_native)
713 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
714 else if (efi_runtime_init()) {
715 efi_enabled = 0;
716 return;
717 }
718
719 if (efi_memmap_init()) {
720 efi_enabled = 0;
721 return;
722 }
572#ifdef CONFIG_X86_32 723#ifdef CONFIG_X86_32
573 x86_platform.get_wallclock = efi_get_time; 724 if (efi_native) {
574 x86_platform.set_wallclock = efi_set_rtc_mmss; 725 x86_platform.get_wallclock = efi_get_time;
726 x86_platform.set_wallclock = efi_set_rtc_mmss;
727 }
575#endif 728#endif
576 729
577#if EFI_DEBUG 730#if EFI_DEBUG
@@ -629,6 +782,14 @@ void __init efi_enter_virtual_mode(void)
629 782
630 efi.systab = NULL; 783 efi.systab = NULL;
631 784
785 /*
786 * We don't do virtual mode, since we don't do runtime services, on
787 * non-native EFI
788 */
789
790 if (!efi_native)
791 goto out;
792
632 /* Merge contiguous regions of the same type and attribute */ 793 /* Merge contiguous regions of the same type and attribute */
633 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 794 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
634 u64 prev_size; 795 u64 prev_size;
@@ -677,7 +838,7 @@ void __init efi_enter_virtual_mode(void)
677 md->virt_addr = (u64) (unsigned long) va; 838 md->virt_addr = (u64) (unsigned long) va;
678 839
679 if (!va) { 840 if (!va) {
680 printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n", 841 pr_err("ioremap of 0x%llX failed!\n",
681 (unsigned long long)md->phys_addr); 842 (unsigned long long)md->phys_addr);
682 continue; 843 continue;
683 } 844 }
@@ -711,8 +872,8 @@ void __init efi_enter_virtual_mode(void)
711 (efi_memory_desc_t *)__pa(new_memmap)); 872 (efi_memory_desc_t *)__pa(new_memmap));
712 873
713 if (status != EFI_SUCCESS) { 874 if (status != EFI_SUCCESS) {
714 printk(KERN_ALERT "Unable to switch EFI into virtual mode " 875 pr_alert("Unable to switch EFI into virtual mode "
715 "(status=%lx)!\n", status); 876 "(status=%lx)!\n", status);
716 panic("EFI call to SetVirtualAddressMap() failed!"); 877 panic("EFI call to SetVirtualAddressMap() failed!");
717 } 878 }
718 879
@@ -744,6 +905,8 @@ void __init efi_enter_virtual_mode(void)
744 efi.query_capsule_caps = virt_efi_query_capsule_caps; 905 efi.query_capsule_caps = virt_efi_query_capsule_caps;
745 if (__supported_pte_mask & _PAGE_NX) 906 if (__supported_pte_mask & _PAGE_NX)
746 runtime_code_page_mkexec(); 907 runtime_code_page_mkexec();
908
909out:
747 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); 910 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
748 memmap.map = NULL; 911 memmap.map = NULL;
749 kfree(new_memmap); 912 kfree(new_memmap);
diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile
index 07c9cd05021a..5b51194f4c8d 100644
--- a/arch/x86/platform/geode/Makefile
+++ b/arch/x86/platform/geode/Makefile
@@ -1 +1,3 @@
1obj-$(CONFIG_ALIX) += alix.o 1obj-$(CONFIG_ALIX) += alix.o
2obj-$(CONFIG_NET5501) += net5501.o
3obj-$(CONFIG_GEOS) += geos.o
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index dc5f1d32aced..90e23e7679a5 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -6,6 +6,7 @@
6 * 6 *
7 * Copyright (C) 2008 Constantin Baranov <const@mimas.ru> 7 * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
8 * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com> 8 * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
9 * and Philip Prindeville <philipp@redfish-solutions.com>
9 * 10 *
10 * TODO: There are large similarities with leds-net5501.c 11 * TODO: There are large similarities with leds-net5501.c
11 * by Alessandro Zummo <a.zummo@towertech.it> 12 * by Alessandro Zummo <a.zummo@towertech.it>
@@ -24,14 +25,47 @@
24#include <linux/leds.h> 25#include <linux/leds.h>
25#include <linux/platform_device.h> 26#include <linux/platform_device.h>
26#include <linux/gpio.h> 27#include <linux/gpio.h>
28#include <linux/input.h>
29#include <linux/gpio_keys.h>
30#include <linux/dmi.h>
27 31
28#include <asm/geode.h> 32#include <asm/geode.h>
29 33
34#define BIOS_SIGNATURE_TINYBIOS 0xf0000
35#define BIOS_SIGNATURE_COREBOOT 0x500
36#define BIOS_REGION_SIZE 0x10000
37
30static bool force = 0; 38static bool force = 0;
31module_param(force, bool, 0444); 39module_param(force, bool, 0444);
32/* FIXME: Award bios is not automatically detected as Alix platform */ 40/* FIXME: Award bios is not automatically detected as Alix platform */
33MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform"); 41MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform");
34 42
43static struct gpio_keys_button alix_gpio_buttons[] = {
44 {
45 .code = KEY_RESTART,
46 .gpio = 24,
47 .active_low = 1,
48 .desc = "Reset button",
49 .type = EV_KEY,
50 .wakeup = 0,
51 .debounce_interval = 100,
52 .can_disable = 0,
53 }
54};
55static struct gpio_keys_platform_data alix_buttons_data = {
56 .buttons = alix_gpio_buttons,
57 .nbuttons = ARRAY_SIZE(alix_gpio_buttons),
58 .poll_interval = 20,
59};
60
61static struct platform_device alix_buttons_dev = {
62 .name = "gpio-keys-polled",
63 .id = 1,
64 .dev = {
65 .platform_data = &alix_buttons_data,
66 }
67};
68
35static struct gpio_led alix_leds[] = { 69static struct gpio_led alix_leds[] = {
36 { 70 {
37 .name = "alix:1", 71 .name = "alix:1",
@@ -64,17 +98,22 @@ static struct platform_device alix_leds_dev = {
64 .dev.platform_data = &alix_leds_data, 98 .dev.platform_data = &alix_leds_data,
65}; 99};
66 100
101static struct __initdata platform_device *alix_devs[] = {
102 &alix_buttons_dev,
103 &alix_leds_dev,
104};
105
67static void __init register_alix(void) 106static void __init register_alix(void)
68{ 107{
69 /* Setup LED control through leds-gpio driver */ 108 /* Setup LED control through leds-gpio driver */
70 platform_device_register(&alix_leds_dev); 109 platform_add_devices(alix_devs, ARRAY_SIZE(alix_devs));
71} 110}
72 111
73static int __init alix_present(unsigned long bios_phys, 112static bool __init alix_present(unsigned long bios_phys,
74 const char *alix_sig, 113 const char *alix_sig,
75 size_t alix_sig_len) 114 size_t alix_sig_len)
76{ 115{
77 const size_t bios_len = 0x00010000; 116 const size_t bios_len = BIOS_REGION_SIZE;
78 const char *bios_virt; 117 const char *bios_virt;
79 const char *scan_end; 118 const char *scan_end;
80 const char *p; 119 const char *p;
@@ -84,7 +123,7 @@ static int __init alix_present(unsigned long bios_phys,
84 printk(KERN_NOTICE "%s: forced to skip BIOS test, " 123 printk(KERN_NOTICE "%s: forced to skip BIOS test, "
85 "assume system is ALIX.2/ALIX.3\n", 124 "assume system is ALIX.2/ALIX.3\n",
86 KBUILD_MODNAME); 125 KBUILD_MODNAME);
87 return 1; 126 return true;
88 } 127 }
89 128
90 bios_virt = phys_to_virt(bios_phys); 129 bios_virt = phys_to_virt(bios_phys);
@@ -109,15 +148,33 @@ static int __init alix_present(unsigned long bios_phys,
109 *a = '\0'; 148 *a = '\0';
110 149
111 tail = p + alix_sig_len; 150 tail = p + alix_sig_len;
112 if ((tail[0] == '2' || tail[0] == '3')) { 151 if ((tail[0] == '2' || tail[0] == '3' || tail[0] == '6')) {
113 printk(KERN_INFO 152 printk(KERN_INFO
114 "%s: system is recognized as \"%s\"\n", 153 "%s: system is recognized as \"%s\"\n",
115 KBUILD_MODNAME, name); 154 KBUILD_MODNAME, name);
116 return 1; 155 return true;
117 } 156 }
118 } 157 }
119 158
120 return 0; 159 return false;
160}
161
162static bool __init alix_present_dmi(void)
163{
164 const char *vendor, *product;
165
166 vendor = dmi_get_system_info(DMI_SYS_VENDOR);
167 if (!vendor || strcmp(vendor, "PC Engines"))
168 return false;
169
170 product = dmi_get_system_info(DMI_PRODUCT_NAME);
171 if (!product || (strcmp(product, "ALIX.2D") && strcmp(product, "ALIX.6")))
172 return false;
173
174 printk(KERN_INFO "%s: system is recognized as \"%s %s\"\n",
175 KBUILD_MODNAME, vendor, product);
176
177 return true;
121} 178}
122 179
123static int __init alix_init(void) 180static int __init alix_init(void)
@@ -128,8 +185,9 @@ static int __init alix_init(void)
128 if (!is_geode()) 185 if (!is_geode())
129 return 0; 186 return 0;
130 187
131 if (alix_present(0xf0000, tinybios_sig, sizeof(tinybios_sig) - 1) || 188 if (alix_present(BIOS_SIGNATURE_TINYBIOS, tinybios_sig, sizeof(tinybios_sig) - 1) ||
132 alix_present(0x500, coreboot_sig, sizeof(coreboot_sig) - 1)) 189 alix_present(BIOS_SIGNATURE_COREBOOT, coreboot_sig, sizeof(coreboot_sig) - 1) ||
190 alix_present_dmi())
133 register_alix(); 191 register_alix();
134 192
135 return 0; 193 return 0;
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
new file mode 100644
index 000000000000..c2e6d53558be
--- /dev/null
+++ b/arch/x86/platform/geode/geos.c
@@ -0,0 +1,128 @@
1/*
2 * System Specific setup for Traverse Technologies GEOS.
3 * At the moment this means setup of GPIO control of LEDs.
4 *
5 * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
6 * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
7 * and Philip Prindeville <philipp@redfish-solutions.com>
8 *
9 * TODO: There are large similarities with leds-net5501.c
10 * by Alessandro Zummo <a.zummo@towertech.it>
11 * In the future leds-net5501.c should be migrated over to platform
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation.
16 */
17
18#include <linux/kernel.h>
19#include <linux/init.h>
20#include <linux/io.h>
21#include <linux/string.h>
22#include <linux/module.h>
23#include <linux/leds.h>
24#include <linux/platform_device.h>
25#include <linux/gpio.h>
26#include <linux/input.h>
27#include <linux/gpio_keys.h>
28#include <linux/dmi.h>
29
30#include <asm/geode.h>
31
32static struct gpio_keys_button geos_gpio_buttons[] = {
33 {
34 .code = KEY_RESTART,
35 .gpio = 3,
36 .active_low = 1,
37 .desc = "Reset button",
38 .type = EV_KEY,
39 .wakeup = 0,
40 .debounce_interval = 100,
41 .can_disable = 0,
42 }
43};
44static struct gpio_keys_platform_data geos_buttons_data = {
45 .buttons = geos_gpio_buttons,
46 .nbuttons = ARRAY_SIZE(geos_gpio_buttons),
47 .poll_interval = 20,
48};
49
50static struct platform_device geos_buttons_dev = {
51 .name = "gpio-keys-polled",
52 .id = 1,
53 .dev = {
54 .platform_data = &geos_buttons_data,
55 }
56};
57
58static struct gpio_led geos_leds[] = {
59 {
60 .name = "geos:1",
61 .gpio = 6,
62 .default_trigger = "default-on",
63 .active_low = 1,
64 },
65 {
66 .name = "geos:2",
67 .gpio = 25,
68 .default_trigger = "default-off",
69 .active_low = 1,
70 },
71 {
72 .name = "geos:3",
73 .gpio = 27,
74 .default_trigger = "default-off",
75 .active_low = 1,
76 },
77};
78
79static struct gpio_led_platform_data geos_leds_data = {
80 .num_leds = ARRAY_SIZE(geos_leds),
81 .leds = geos_leds,
82};
83
84static struct platform_device geos_leds_dev = {
85 .name = "leds-gpio",
86 .id = -1,
87 .dev.platform_data = &geos_leds_data,
88};
89
90static struct __initdata platform_device *geos_devs[] = {
91 &geos_buttons_dev,
92 &geos_leds_dev,
93};
94
95static void __init register_geos(void)
96{
97 /* Setup LED control through leds-gpio driver */
98 platform_add_devices(geos_devs, ARRAY_SIZE(geos_devs));
99}
100
101static int __init geos_init(void)
102{
103 const char *vendor, *product;
104
105 if (!is_geode())
106 return 0;
107
108 vendor = dmi_get_system_info(DMI_SYS_VENDOR);
109 if (!vendor || strcmp(vendor, "Traverse Technologies"))
110 return 0;
111
112 product = dmi_get_system_info(DMI_PRODUCT_NAME);
113 if (!product || strcmp(product, "Geos"))
114 return 0;
115
116 printk(KERN_INFO "%s: system is recognized as \"%s %s\"\n",
117 KBUILD_MODNAME, vendor, product);
118
119 register_geos();
120
121 return 0;
122}
123
124module_init(geos_init);
125
126MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
127MODULE_DESCRIPTION("Traverse Technologies Geos System Setup");
128MODULE_LICENSE("GPL");
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
new file mode 100644
index 000000000000..66d377e334f7
--- /dev/null
+++ b/arch/x86/platform/geode/net5501.c
@@ -0,0 +1,154 @@
1/*
2 * System Specific setup for Soekris net5501
3 * At the moment this means setup of GPIO control of LEDs and buttons
4 * on net5501 boards.
5 *
6 *
7 * Copyright (C) 2008-2009 Tower Technologies
8 * Written by Alessandro Zummo <a.zummo@towertech.it>
9 *
10 * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
11 * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
12 * and Philip Prindeville <philipp@redfish-solutions.com>
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License version 2
16 * as published by the Free Software Foundation.
17 */
18
19#include <linux/kernel.h>
20#include <linux/init.h>
21#include <linux/io.h>
22#include <linux/string.h>
23#include <linux/module.h>
24#include <linux/leds.h>
25#include <linux/platform_device.h>
26#include <linux/gpio.h>
27#include <linux/input.h>
28#include <linux/gpio_keys.h>
29
30#include <asm/geode.h>
31
32#define BIOS_REGION_BASE 0xffff0000
33#define BIOS_REGION_SIZE 0x00010000
34
35static struct gpio_keys_button net5501_gpio_buttons[] = {
36 {
37 .code = KEY_RESTART,
38 .gpio = 24,
39 .active_low = 1,
40 .desc = "Reset button",
41 .type = EV_KEY,
42 .wakeup = 0,
43 .debounce_interval = 100,
44 .can_disable = 0,
45 }
46};
47static struct gpio_keys_platform_data net5501_buttons_data = {
48 .buttons = net5501_gpio_buttons,
49 .nbuttons = ARRAY_SIZE(net5501_gpio_buttons),
50 .poll_interval = 20,
51};
52
53static struct platform_device net5501_buttons_dev = {
54 .name = "gpio-keys-polled",
55 .id = 1,
56 .dev = {
57 .platform_data = &net5501_buttons_data,
58 }
59};
60
61static struct gpio_led net5501_leds[] = {
62 {
63 .name = "net5501:1",
64 .gpio = 6,
65 .default_trigger = "default-on",
66 .active_low = 1,
67 },
68};
69
70static struct gpio_led_platform_data net5501_leds_data = {
71 .num_leds = ARRAY_SIZE(net5501_leds),
72 .leds = net5501_leds,
73};
74
75static struct platform_device net5501_leds_dev = {
76 .name = "leds-gpio",
77 .id = -1,
78 .dev.platform_data = &net5501_leds_data,
79};
80
81static struct __initdata platform_device *net5501_devs[] = {
82 &net5501_buttons_dev,
83 &net5501_leds_dev,
84};
85
86static void __init register_net5501(void)
87{
88 /* Setup LED control through leds-gpio driver */
89 platform_add_devices(net5501_devs, ARRAY_SIZE(net5501_devs));
90}
91
92struct net5501_board {
93 u16 offset;
94 u16 len;
95 char *sig;
96};
97
98static struct net5501_board __initdata boards[] = {
99 { 0xb7b, 7, "net5501" }, /* net5501 v1.33/1.33c */
100 { 0xb1f, 7, "net5501" }, /* net5501 v1.32i */
101};
102
103static bool __init net5501_present(void)
104{
105 int i;
106 unsigned char *rombase, *bios;
107 bool found = false;
108
109 rombase = ioremap(BIOS_REGION_BASE, BIOS_REGION_SIZE - 1);
110 if (!rombase) {
111 printk(KERN_ERR "%s: failed to get rombase\n", KBUILD_MODNAME);
112 return found;
113 }
114
115 bios = rombase + 0x20; /* null terminated */
116
117 if (memcmp(bios, "comBIOS", 7))
118 goto unmap;
119
120 for (i = 0; i < ARRAY_SIZE(boards); i++) {
121 unsigned char *model = rombase + boards[i].offset;
122
123 if (!memcmp(model, boards[i].sig, boards[i].len)) {
124 printk(KERN_INFO "%s: system is recognized as \"%s\"\n",
125 KBUILD_MODNAME, model);
126
127 found = true;
128 break;
129 }
130 }
131
132unmap:
133 iounmap(rombase);
134 return found;
135}
136
137static int __init net5501_init(void)
138{
139 if (!is_geode())
140 return 0;
141
142 if (!net5501_present())
143 return 0;
144
145 register_net5501();
146
147 return 0;
148}
149
150module_init(net5501_init);
151
152MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
153MODULE_DESCRIPTION("Soekris net5501 System Setup");
154MODULE_LICENSE("GPL");
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index 7baed5135e0f..af1da7e623f9 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1,4 +1,3 @@
1obj-$(CONFIG_X86_INTEL_MID) += mrst.o 1obj-$(CONFIG_X86_INTEL_MID) += mrst.o
2obj-$(CONFIG_X86_INTEL_MID) += vrtc.o 2obj-$(CONFIG_X86_INTEL_MID) += vrtc.o
3obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o 3obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o
4obj-$(CONFIG_X86_MRST) += pmu.o
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 475e2cd0f3c3..e0a37233c0af 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -28,6 +28,8 @@
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/notifier.h> 29#include <linux/notifier.h>
30#include <linux/mfd/intel_msic.h> 30#include <linux/mfd/intel_msic.h>
31#include <linux/gpio.h>
32#include <linux/i2c/tc35876x.h>
31 33
32#include <asm/setup.h> 34#include <asm/setup.h>
33#include <asm/mpspec_def.h> 35#include <asm/mpspec_def.h>
@@ -78,16 +80,11 @@ int sfi_mrtc_num;
78 80
79static void mrst_power_off(void) 81static void mrst_power_off(void)
80{ 82{
81 if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT)
82 intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 1);
83} 83}
84 84
85static void mrst_reboot(void) 85static void mrst_reboot(void)
86{ 86{
87 if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) 87 intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
88 intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
89 else
90 intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
91} 88}
92 89
93/* parse all the mtimer info to a static mtimer array */ 90/* parse all the mtimer info to a static mtimer array */
@@ -200,34 +197,28 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
200 197
201static unsigned long __init mrst_calibrate_tsc(void) 198static unsigned long __init mrst_calibrate_tsc(void)
202{ 199{
203 unsigned long flags, fast_calibrate; 200 unsigned long fast_calibrate;
204 if (__mrst_cpu_chip == MRST_CPU_CHIP_PENWELL) { 201 u32 lo, hi, ratio, fsb;
205 u32 lo, hi, ratio, fsb; 202
206 203 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
207 rdmsr(MSR_IA32_PERF_STATUS, lo, hi); 204 pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
208 pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); 205 ratio = (hi >> 8) & 0x1f;
209 ratio = (hi >> 8) & 0x1f; 206 pr_debug("ratio is %d\n", ratio);
210 pr_debug("ratio is %d\n", ratio); 207 if (!ratio) {
211 if (!ratio) { 208 pr_err("read a zero ratio, should be incorrect!\n");
212 pr_err("read a zero ratio, should be incorrect!\n"); 209 pr_err("force tsc ratio to 16 ...\n");
213 pr_err("force tsc ratio to 16 ...\n"); 210 ratio = 16;
214 ratio = 16;
215 }
216 rdmsr(MSR_FSB_FREQ, lo, hi);
217 if ((lo & 0x7) == 0x7)
218 fsb = PENWELL_FSB_FREQ_83SKU;
219 else
220 fsb = PENWELL_FSB_FREQ_100SKU;
221 fast_calibrate = ratio * fsb;
222 pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
223 lapic_timer_frequency = fsb * 1000 / HZ;
224 /* mark tsc clocksource as reliable */
225 set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
226 } else {
227 local_irq_save(flags);
228 fast_calibrate = apbt_quick_calibrate();
229 local_irq_restore(flags);
230 } 211 }
212 rdmsr(MSR_FSB_FREQ, lo, hi);
213 if ((lo & 0x7) == 0x7)
214 fsb = PENWELL_FSB_FREQ_83SKU;
215 else
216 fsb = PENWELL_FSB_FREQ_100SKU;
217 fast_calibrate = ratio * fsb;
218 pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
219 lapic_timer_frequency = fsb * 1000 / HZ;
220 /* mark tsc clocksource as reliable */
221 set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
231 222
232 if (fast_calibrate) 223 if (fast_calibrate)
233 return fast_calibrate; 224 return fast_calibrate;
@@ -261,16 +252,11 @@ static void __cpuinit mrst_arch_setup(void)
261{ 252{
262 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) 253 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
263 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; 254 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
264 else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26)
265 __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
266 else { 255 else {
267 pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n", 256 pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n",
268 boot_cpu_data.x86, boot_cpu_data.x86_model); 257 boot_cpu_data.x86, boot_cpu_data.x86_model);
269 __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; 258 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
270 } 259 }
271 pr_debug("Moorestown CPU %s identified\n",
272 (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ?
273 "Lincroft" : "Penwell");
274} 260}
275 261
276/* MID systems don't have i8042 controller */ 262/* MID systems don't have i8042 controller */
@@ -686,6 +672,24 @@ static void *msic_ocd_platform_data(void *info)
686 return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD); 672 return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
687} 673}
688 674
675static void *msic_thermal_platform_data(void *info)
676{
677 return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL);
678}
679
680/* tc35876x DSI-LVDS bridge chip and panel platform data */
681static void *tc35876x_platform_data(void *data)
682{
683 static struct tc35876x_platform_data pdata;
684
685 /* gpio pins set to -1 will not be used by the driver */
686 pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
687 pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
688 pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
689
690 return &pdata;
691}
692
689static const struct devs_id __initconst device_ids[] = { 693static const struct devs_id __initconst device_ids[] = {
690 {"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data}, 694 {"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data},
691 {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data}, 695 {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
@@ -698,6 +702,7 @@ static const struct devs_id __initconst device_ids[] = {
698 {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data}, 702 {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
699 {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data}, 703 {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
700 {"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data}, 704 {"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data},
705 {"i2c_disp_brig", SFI_DEV_TYPE_I2C, 0, &tc35876x_platform_data},
701 706
702 /* MSIC subdevices */ 707 /* MSIC subdevices */
703 {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data}, 708 {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data},
@@ -705,6 +710,7 @@ static const struct devs_id __initconst device_ids[] = {
705 {"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data}, 710 {"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data},
706 {"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data}, 711 {"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data},
707 {"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data}, 712 {"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data},
713 {"msic_thermal", SFI_DEV_TYPE_IPC, 1, &msic_thermal_platform_data},
708 714
709 {}, 715 {},
710}; 716};
diff --git a/arch/x86/platform/mrst/pmu.c b/arch/x86/platform/mrst/pmu.c
deleted file mode 100644
index c0ac06da57ac..000000000000
--- a/arch/x86/platform/mrst/pmu.c
+++ /dev/null
@@ -1,817 +0,0 @@
1/*
2 * mrst/pmu.c - driver for MRST Power Management Unit
3 *
4 * Copyright (c) 2011, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20#include <linux/cpuidle.h>
21#include <linux/debugfs.h>
22#include <linux/delay.h>
23#include <linux/interrupt.h>
24#include <linux/module.h>
25#include <linux/pci.h>
26#include <linux/seq_file.h>
27#include <linux/sfi.h>
28#include <asm/intel_scu_ipc.h>
29#include "pmu.h"
30
31#define IPCMSG_FW_REVISION 0xF4
32
33struct mrst_device {
34 u16 pci_dev_num; /* DEBUG only */
35 u16 lss;
36 u16 latest_request;
37 unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */
38};
39
40/*
41 * comlete list of MRST PCI devices
42 */
43static struct mrst_device mrst_devs[] = {
44/* 0 */ { 0x0800, LSS_SPI0 }, /* Moorestown SPI Ctrl 0 */
45/* 1 */ { 0x0801, LSS_SPI1 }, /* Moorestown SPI Ctrl 1 */
46/* 2 */ { 0x0802, LSS_I2C0 }, /* Moorestown I2C 0 */
47/* 3 */ { 0x0803, LSS_I2C1 }, /* Moorestown I2C 1 */
48/* 4 */ { 0x0804, LSS_I2C2 }, /* Moorestown I2C 2 */
49/* 5 */ { 0x0805, LSS_KBD }, /* Moorestown Keyboard Ctrl */
50/* 6 */ { 0x0806, LSS_USB_HC }, /* Moorestown USB Ctrl */
51/* 7 */ { 0x0807, LSS_SD_HC0 }, /* Moorestown SD Host Ctrl 0 */
52/* 8 */ { 0x0808, LSS_SD_HC1 }, /* Moorestown SD Host Ctrl 1 */
53/* 9 */ { 0x0809, LSS_NAND }, /* Moorestown NAND Ctrl */
54/* 10 */ { 0x080a, LSS_AUDIO }, /* Moorestown Audio Ctrl */
55/* 11 */ { 0x080b, LSS_IMAGING }, /* Moorestown ISP */
56/* 12 */ { 0x080c, LSS_SECURITY }, /* Moorestown Security Controller */
57/* 13 */ { 0x080d, LSS_DISPLAY }, /* Moorestown External Displays */
58/* 14 */ { 0x080e, 0 }, /* Moorestown SCU IPC */
59/* 15 */ { 0x080f, LSS_GPIO }, /* Moorestown GPIO Controller */
60/* 16 */ { 0x0810, 0 }, /* Moorestown Power Management Unit */
61/* 17 */ { 0x0811, LSS_USB_OTG }, /* Moorestown OTG Ctrl */
62/* 18 */ { 0x0812, LSS_SPI2 }, /* Moorestown SPI Ctrl 2 */
63/* 19 */ { 0x0813, 0 }, /* Moorestown SC DMA */
64/* 20 */ { 0x0814, LSS_AUDIO_LPE }, /* Moorestown LPE DMA */
65/* 21 */ { 0x0815, LSS_AUDIO_SSP }, /* Moorestown SSP0 */
66
67/* 22 */ { 0x084F, LSS_SD_HC2 }, /* Moorestown SD Host Ctrl 2 */
68
69/* 23 */ { 0x4102, 0 }, /* Lincroft */
70/* 24 */ { 0x4110, 0 }, /* Lincroft */
71};
72
73/* n.b. We ignore PCI-id 0x815 in LSS9 b/c Linux has no driver for it */
74static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
75static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
76 0x0804, 0x0805, 0x080f, 0};
77
78/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
79static spinlock_t mrst_pmu_power_state_lock;
80
81static unsigned int wake_counters[MRST_NUM_LSS]; /* DEBUG only */
82static unsigned int pmu_irq_stats[INT_INVALID + 1]; /* DEBUG only */
83
84static int graphics_is_off;
85static int lss_s0i3_enabled;
86static bool mrst_pmu_s0i3_enable;
87
88/* debug counters */
89static u32 pmu_wait_ready_calls;
90static u32 pmu_wait_ready_udelays;
91static u32 pmu_wait_ready_udelays_max;
92static u32 pmu_wait_done_calls;
93static u32 pmu_wait_done_udelays;
94static u32 pmu_wait_done_udelays_max;
95static u32 pmu_set_power_state_entry;
96static u32 pmu_set_power_state_send_cmd;
97
98static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
99{
100 int index = 0;
101
102 if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
103 index = pci_dev_num - 0x800;
104 else if (pci_dev_num == 0x084F)
105 index = 22;
106 else if (pci_dev_num == 0x4102)
107 index = 23;
108 else if (pci_dev_num == 0x4110)
109 index = 24;
110
111 if (pci_dev_num != mrst_devs[index].pci_dev_num) {
112 WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
113 return 0;
114 }
115
116 return &mrst_devs[index];
117}
118
119/**
120 * mrst_pmu_validate_cstates
121 * @dev: cpuidle_device
122 *
123 * Certain states are not appropriate for governor to pick in some cases.
124 * This function will be called as cpuidle_device's prepare callback and
125 * thus tells governor to ignore such states when selecting the next state
126 * to enter.
127 */
128
129#define IDLE_STATE4_IS_C6 4
130#define IDLE_STATE5_IS_S0I3 5
131
132int mrst_pmu_invalid_cstates(void)
133{
134 int cpu = smp_processor_id();
135
136 /*
137 * Demote to C4 if the PMU is busy.
138 * Since LSS changes leave the busy bit clear...
139 * busy means either the PMU is waiting for an ACK-C6 that
140 * isn't coming due to an MWAIT that returned immediately;
141 * or we returned from S0i3 successfully, and the PMU
142 * is not done sending us interrupts.
143 */
144 if (pmu_read_busy_status())
145 return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
146
147 /*
148 * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
149 * or if device LSS is insufficient, or the GPU is active,
150 * or if it has been explicitly disabled.
151 */
152 if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
153 !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
154 return 1 << IDLE_STATE5_IS_S0I3;
155 else
156 return 0;
157}
158
159/*
160 * pmu_update_wake_counters(): read PM_WKS, update wake_counters[]
161 * DEBUG only.
162 */
163static void pmu_update_wake_counters(void)
164{
165 int lss;
166 u32 wake_status;
167
168 wake_status = pmu_read_wks();
169
170 for (lss = 0; lss < MRST_NUM_LSS; ++lss) {
171 if (wake_status & (1 << lss))
172 wake_counters[lss]++;
173 }
174}
175
176int mrst_pmu_s0i3_entry(void)
177{
178 int status;
179
180 /* Clear any possible error conditions */
181 pmu_write_ics(0x300);
182
183 /* set wake control to current D-states */
184 pmu_write_wssc(S0I3_SSS_TARGET);
185
186 status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
187 pmu_update_wake_counters();
188 return status;
189}
190
191/* poll for maximum of 5ms for busy bit to clear */
192static int pmu_wait_ready(void)
193{
194 int udelays;
195
196 pmu_wait_ready_calls++;
197
198 for (udelays = 0; udelays < 500; ++udelays) {
199 if (udelays > pmu_wait_ready_udelays_max)
200 pmu_wait_ready_udelays_max = udelays;
201
202 if (pmu_read_busy_status() == 0)
203 return 0;
204
205 udelay(10);
206 pmu_wait_ready_udelays++;
207 }
208
209 /*
210 * if this fires, observe
211 * /sys/kernel/debug/mrst_pmu_wait_ready_calls
212 * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
213 */
214 WARN_ONCE(1, "SCU not ready for 5ms");
215 return -EBUSY;
216}
217/* poll for maximum of 50ms us for busy bit to clear */
218static int pmu_wait_done(void)
219{
220 int udelays;
221
222 pmu_wait_done_calls++;
223
224 for (udelays = 0; udelays < 500; ++udelays) {
225 if (udelays > pmu_wait_done_udelays_max)
226 pmu_wait_done_udelays_max = udelays;
227
228 if (pmu_read_busy_status() == 0)
229 return 0;
230
231 udelay(100);
232 pmu_wait_done_udelays++;
233 }
234
235 /*
236 * if this fires, observe
237 * /sys/kernel/debug/mrst_pmu_wait_done_calls
238 * /sys/kernel/debug/mrst_pmu_wait_done_udelays
239 */
240 WARN_ONCE(1, "SCU not done for 50ms");
241 return -EBUSY;
242}
243
244u32 mrst_pmu_msi_is_disabled(void)
245{
246 return pmu_msi_is_disabled();
247}
248
249void mrst_pmu_enable_msi(void)
250{
251 pmu_msi_enable();
252}
253
254/**
255 * pmu_irq - pmu driver interrupt handler
256 * Context: interrupt context
257 */
258static irqreturn_t pmu_irq(int irq, void *dummy)
259{
260 union pmu_pm_ics pmu_ics;
261
262 pmu_ics.value = pmu_read_ics();
263
264 if (!pmu_ics.bits.pending)
265 return IRQ_NONE;
266
267 switch (pmu_ics.bits.cause) {
268 case INT_SPURIOUS:
269 case INT_CMD_DONE:
270 case INT_CMD_ERR:
271 case INT_WAKE_RX:
272 case INT_SS_ERROR:
273 case INT_S0IX_MISS:
274 case INT_NO_ACKC6:
275 pmu_irq_stats[pmu_ics.bits.cause]++;
276 break;
277 default:
278 pmu_irq_stats[INT_INVALID]++;
279 }
280
281 pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
282
283 return IRQ_HANDLED;
284}
285
286/*
287 * Translate PCI power management to MRST LSS D-states
288 */
289static int pci_2_mrst_state(int lss, pci_power_t pci_state)
290{
291 switch (pci_state) {
292 case PCI_D0:
293 if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
294 return D0i1;
295 else
296 return D0;
297 case PCI_D1:
298 return D0i1;
299 case PCI_D2:
300 return D0i2;
301 case PCI_D3hot:
302 case PCI_D3cold:
303 return D0i3;
304 default:
305 WARN(1, "pci_state %d\n", pci_state);
306 return 0;
307 }
308}
309
310static int pmu_issue_command(u32 pm_ssc)
311{
312 union pmu_pm_set_cfg_cmd_t command;
313
314 if (pmu_read_busy_status()) {
315 pr_debug("pmu is busy, Operation not permitted\n");
316 return -1;
317 }
318
319 /*
320 * enable interrupts in PMU so that interrupts are
321 * propagated when ioc bit for a particular set
322 * command is set
323 */
324
325 pmu_irq_enable();
326
327 /* Configure the sub systems for pmu2 */
328
329 pmu_write_ssc(pm_ssc);
330
331 /*
332 * Send the set config command for pmu its configured
333 * for mode CM_IMMEDIATE & hence with No Trigger
334 */
335
336 command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE;
337 command.pmu2_params.d_param.cfg_delay = 0;
338 command.pmu2_params.d_param.rsvd = 0;
339
340 /* construct the command to send SET_CFG to particular PMU */
341 command.pmu2_params.d_param.cmd = SET_CFG_CMD;
342 command.pmu2_params.d_param.ioc = 0;
343 command.pmu2_params.d_param.mode_id = 0;
344 command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
345
346 /* write the value of PM_CMD into particular PMU */
347 pr_debug("pmu command being written %x\n",
348 command.pmu_pm_set_cfg_cmd_value);
349
350 pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value);
351
352 return 0;
353}
354
355static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
356{
357 u16 existing_request;
358 int i;
359
360 for (i = 0; ids[i]; ++i) {
361 struct mrst_device *mrst_dev;
362
363 mrst_dev = pci_id_2_mrst_dev(ids[i]);
364 if (unlikely(!mrst_dev))
365 continue;
366
367 existing_request = mrst_dev->latest_request;
368 if (existing_request < pci_state)
369 pci_state = existing_request;
370 }
371 return pci_state;
372}
373
374/**
375 * pmu_pci_set_power_state - Callback function is used by all the PCI devices
376 * for a platform specific device power on/shutdown.
377 */
378
379int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
380{
381 u32 old_sss, new_sss;
382 int status = 0;
383 struct mrst_device *mrst_dev;
384
385 pmu_set_power_state_entry++;
386
387 BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
388 BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
389
390 mrst_dev = pci_id_2_mrst_dev(pdev->device);
391 if (unlikely(!mrst_dev))
392 return -ENODEV;
393
394 mrst_dev->pci_state_counts[pci_state]++; /* count invocations */
395
396 /* PMU driver calls self as part of PCI initialization, ignore */
397 if (pdev->device == PCI_DEV_ID_MRST_PMU)
398 return 0;
399
400 BUG_ON(!pmu_reg); /* SW bug if called before initialized */
401
402 spin_lock(&mrst_pmu_power_state_lock);
403
404 if (pdev->d3_delay) {
405 dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
406 pdev->d3_delay);
407 pdev->d3_delay = 0;
408 }
409 /*
410 * If Lincroft graphics, simply remember state
411 */
412 if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY
413 && !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) {
414 if (pci_state == PCI_D0)
415 graphics_is_off = 0;
416 else
417 graphics_is_off = 1;
418 goto ret;
419 }
420
421 if (!mrst_dev->lss)
422 goto ret; /* device with no LSS */
423
424 if (mrst_dev->latest_request == pci_state)
425 goto ret; /* no change */
426
427 mrst_dev->latest_request = pci_state; /* record latest request */
428
429 /*
430 * LSS9 and LSS10 contain multiple PCI devices.
431 * Use the lowest numbered (highest power) state in the LSS
432 */
433 if (mrst_dev->lss == 9)
434 pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state);
435 else if (mrst_dev->lss == 10)
436 pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
437
438 status = pmu_wait_ready();
439 if (status)
440 goto ret;
441
442 old_sss = pmu_read_sss();
443 new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
444 new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state),
445 mrst_dev->lss);
446
447 if (new_sss == old_sss)
448 goto ret; /* nothing to do */
449
450 pmu_set_power_state_send_cmd++;
451
452 status = pmu_issue_command(new_sss);
453
454 if (unlikely(status != 0)) {
455 dev_err(&pdev->dev, "Failed to Issue a PM command\n");
456 goto ret;
457 }
458
459 if (pmu_wait_done())
460 goto ret;
461
462 lss_s0i3_enabled =
463 ((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
464ret:
465 spin_unlock(&mrst_pmu_power_state_lock);
466 return status;
467}
468
469#ifdef CONFIG_DEBUG_FS
470static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"};
471
472static inline const char *d0ix_name(int state)
473{
474 return d0ix_names[(int) state];
475}
476
477static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
478{
479 struct pci_dev *pdev = NULL;
480 u32 cur_pmsss;
481 int lss;
482
483 seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
484
485 cur_pmsss = pmu_read_sss();
486
487 seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
488
489 seq_printf(s, "0x%08X Current SSS ", cur_pmsss);
490 seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n");
491
492 if (cpumask_equal(cpu_online_mask, cpumask_of(0)))
493 seq_printf(s, "cpu0 is only cpu online\n");
494 else
495 seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n");
496
497 seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]");
498
499
500 for_each_pci_dev(pdev) {
501 int pos;
502 u16 pmcsr;
503 struct mrst_device *mrst_dev;
504 int i;
505
506 mrst_dev = pci_id_2_mrst_dev(pdev->device);
507
508 seq_printf(s, "%s %04x/%04X %-16.16s ",
509 dev_name(&pdev->dev),
510 pdev->vendor, pdev->device,
511 dev_driver_string(&pdev->dev));
512
513 if (unlikely (!mrst_dev)) {
514 seq_printf(s, " UNKNOWN\n");
515 continue;
516 }
517
518 if (mrst_dev->lss)
519 seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
520 d0ix_name(((cur_pmsss >>
521 (mrst_dev->lss * 2)) & 0x3)));
522 else
523 seq_printf(s, " ");
524
525 /* PCI PM config space setting */
526 pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
527 if (pos != 0) {
528 pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
529 seq_printf(s, "PCI-%-4s",
530 pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK));
531 } else {
532 seq_printf(s, " ");
533 }
534
535 seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request));
536 for (i = 0; i <= PCI_D3cold; ++i)
537 seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
538
539 if (mrst_dev->lss) {
540 unsigned int lssmask;
541
542 lssmask = SSMSK(D0i3, mrst_dev->lss);
543
544 if ((lssmask & S0I3_SSS_TARGET) &&
545 ((lssmask & cur_pmsss) !=
546 (lssmask & S0I3_SSS_TARGET)))
547 seq_printf(s , "[BLOCKS s0i3]");
548 }
549
550 seq_printf(s, "\n");
551 }
552 seq_printf(s, "Wake Counters:\n");
553 for (lss = 0; lss < MRST_NUM_LSS; ++lss)
554 seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]);
555
556 seq_printf(s, "Interrupt Counters:\n");
557 seq_printf(s,
558 "INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n"
559 "INT_CMD_ERR \t%8u\n" "INT_WAKE_RX \t%8u\n"
560 "INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n"
561 "INT_NO_ACKC6 \t%8u\n" "INT_INVALID \t%8u\n",
562 pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE],
563 pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
564 pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
565 pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
566
567 seq_printf(s, "mrst_pmu_wait_ready_calls %8d\n",
568 pmu_wait_ready_calls);
569 seq_printf(s, "mrst_pmu_wait_ready_udelays %8d\n",
570 pmu_wait_ready_udelays);
571 seq_printf(s, "mrst_pmu_wait_ready_udelays_max %8d\n",
572 pmu_wait_ready_udelays_max);
573 seq_printf(s, "mrst_pmu_wait_done_calls %8d\n",
574 pmu_wait_done_calls);
575 seq_printf(s, "mrst_pmu_wait_done_udelays %8d\n",
576 pmu_wait_done_udelays);
577 seq_printf(s, "mrst_pmu_wait_done_udelays_max %8d\n",
578 pmu_wait_done_udelays_max);
579 seq_printf(s, "mrst_pmu_set_power_state_entry %8d\n",
580 pmu_set_power_state_entry);
581 seq_printf(s, "mrst_pmu_set_power_state_send_cmd %8d\n",
582 pmu_set_power_state_send_cmd);
583 seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
584
585 return 0;
586}
587
588static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
589{
590 return single_open(file, debug_mrst_pmu_show, NULL);
591}
592
593static const struct file_operations devices_state_operations = {
594 .open = debug_mrst_pmu_open,
595 .read = seq_read,
596 .llseek = seq_lseek,
597 .release = single_release,
598};
599#endif /* DEBUG_FS */
600
601/*
602 * Validate SCU PCI shim PCI vendor capability byte
603 * against LSS hard-coded in mrst_devs[] above.
604 * DEBUG only.
605 */
606static void pmu_scu_firmware_debug(void)
607{
608 struct pci_dev *pdev = NULL;
609
610 for_each_pci_dev(pdev) {
611 struct mrst_device *mrst_dev;
612 u8 pci_config_lss;
613 int pos;
614
615 mrst_dev = pci_id_2_mrst_dev(pdev->device);
616 if (unlikely(!mrst_dev)) {
617 printk(KERN_ERR FW_BUG "pmu: Unknown "
618 "PCI device 0x%04X\n", pdev->device);
619 continue;
620 }
621
622 if (mrst_dev->lss == 0)
623 continue; /* no LSS in our table */
624
625 pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
626 if (!pos != 0) {
627 printk(KERN_ERR FW_BUG "pmu: 0x%04X "
628 "missing PCI Vendor Capability\n",
629 pdev->device);
630 continue;
631 }
632 pci_read_config_byte(pdev, pos + 4, &pci_config_lss);
633 if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) {
634 printk(KERN_ERR FW_BUG "pmu: 0x%04X "
635 "invalid PCI Vendor Capability 0x%x "
636 " expected LSS 0x%X\n",
637 pdev->device, pci_config_lss, mrst_dev->lss);
638 continue;
639 }
640 pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK;
641
642 if (mrst_dev->lss == pci_config_lss)
643 continue;
644
645 printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n",
646 pdev->device, pci_config_lss, mrst_dev->lss);
647 }
648}
649
650/**
651 * pmu_probe
652 */
653static int __devinit pmu_probe(struct pci_dev *pdev,
654 const struct pci_device_id *pci_id)
655{
656 int ret;
657 struct mrst_pmu_reg *pmu;
658
659 /* Init the device */
660 ret = pci_enable_device(pdev);
661 if (ret) {
662 dev_err(&pdev->dev, "Unable to Enable PCI device\n");
663 return ret;
664 }
665
666 ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME);
667 if (ret < 0) {
668 dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n");
669 goto out_err1;
670 }
671
672 /* Map the memory of PMU reg base */
673 pmu = pci_iomap(pdev, 0, 0);
674 if (!pmu) {
675 dev_err(&pdev->dev, "Unable to map the PMU address space\n");
676 ret = -ENOMEM;
677 goto out_err2;
678 }
679
680#ifdef CONFIG_DEBUG_FS
681 /* /sys/kernel/debug/mrst_pmu */
682 (void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO,
683 NULL, NULL, &devices_state_operations);
684#endif
685 pmu_reg = pmu; /* success */
686
687 if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) {
688 dev_err(&pdev->dev, "Registering isr has failed\n");
689 ret = -1;
690 goto out_err3;
691 }
692
693 pmu_scu_firmware_debug();
694
695 pmu_write_wkc(S0I3_WAKE_SOURCES); /* Enable S0i3 wakeup sources */
696
697 pmu_wait_ready();
698
699 pmu_write_ssc(D0I1_ACG_SSS_TARGET); /* Enable Auto-Clock_Gating */
700 pmu_write_cmd(0x201);
701
702 spin_lock_init(&mrst_pmu_power_state_lock);
703
704 /* Enable the hardware interrupt */
705 pmu_irq_enable();
706 return 0;
707
708out_err3:
709 free_irq(pdev->irq, NULL);
710 pci_iounmap(pdev, pmu_reg);
711 pmu_reg = NULL;
712out_err2:
713 pci_release_region(pdev, 0);
714out_err1:
715 pci_disable_device(pdev);
716 return ret;
717}
718
719static void __devexit pmu_remove(struct pci_dev *pdev)
720{
721 dev_err(&pdev->dev, "Mid PM pmu_remove called\n");
722
723 /* Freeing up the irq */
724 free_irq(pdev->irq, NULL);
725
726 pci_iounmap(pdev, pmu_reg);
727 pmu_reg = NULL;
728
729 /* disable the current PCI device */
730 pci_release_region(pdev, 0);
731 pci_disable_device(pdev);
732}
733
734static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = {
735 { PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 },
736 { }
737};
738
739MODULE_DEVICE_TABLE(pci, pmu_pci_ids);
740
741static struct pci_driver driver = {
742 .name = MRST_PMU_DRV_NAME,
743 .id_table = pmu_pci_ids,
744 .probe = pmu_probe,
745 .remove = __devexit_p(pmu_remove),
746};
747
748/**
749 * pmu_pci_register - register the PMU driver as PCI device
750 */
751static int __init pmu_pci_register(void)
752{
753 return pci_register_driver(&driver);
754}
755
756/* Register and probe via fs_initcall() to preceed device_initcall() */
757fs_initcall(pmu_pci_register);
758
759static void __exit mid_pci_cleanup(void)
760{
761 pci_unregister_driver(&driver);
762}
763
764static int ia_major;
765static int ia_minor;
766
767static int pmu_sfi_parse_oem(struct sfi_table_header *table)
768{
769 struct sfi_table_simple *sb;
770
771 sb = (struct sfi_table_simple *)table;
772 ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
773 ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
774 printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
775 ia_major, ia_minor);
776
777 return 0;
778}
779
780static int __init scu_fw_check(void)
781{
782 int ret;
783 u32 fw_version;
784
785 if (!pmu_reg)
786 return 0; /* this driver didn't probe-out */
787
788 sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
789
790 if (ia_major < 0x6005 || ia_minor < 0x1525) {
791 WARN(1, "mrst_pmu: IA FW version too old\n");
792 return -1;
793 }
794
795 ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
796 &fw_version, 1);
797
798 if (ret) {
799 WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
800 } else {
801 int scu_major = (fw_version >> 8) & 0xFF;
802 int scu_minor = (fw_version >> 0) & 0xFF;
803
804 printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
805
806 if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
807 printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
808 mrst_pmu_s0i3_enable = true;
809 } else {
810 WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
811 scu_major, scu_minor);
812 }
813 }
814 return 0;
815}
816late_initcall(scu_fw_check);
817module_exit(mid_pci_cleanup);
diff --git a/arch/x86/platform/mrst/pmu.h b/arch/x86/platform/mrst/pmu.h
deleted file mode 100644
index bfbfe64b167b..000000000000
--- a/arch/x86/platform/mrst/pmu.h
+++ /dev/null
@@ -1,234 +0,0 @@
1/*
2 * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c
3 *
4 * Copyright (c) 2011, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20#ifndef _MRST_PMU_H_
21#define _MRST_PMU_H_
22
23#define PCI_DEV_ID_MRST_PMU 0x0810
24#define MRST_PMU_DRV_NAME "mrst_pmu"
25#define PCI_SUB_CLASS_MASK 0xFF00
26
27#define PCI_VENDOR_CAP_LOG_ID_MASK 0x7F
28#define PCI_VENDOR_CAP_LOG_SS_MASK 0x80
29
30#define SUB_SYS_ALL_D0I1 0x01155555
31#define S0I3_WAKE_SOURCES 0x00001FFF
32
33#define PM_S0I3_COMMAND \
34 ((0 << 31) | /* Reserved */ \
35 (0 << 30) | /* Core must be idle */ \
36 (0xc2 << 22) | /* ACK C6 trigger */ \
37 (3 << 19) | /* Trigger on DMI message */ \
38 (3 << 16) | /* Enter S0i3 */ \
39 (0 << 13) | /* Numeric mode ID (sw) */ \
40 (3 << 9) | /* Trigger mode */ \
41 (0 << 8) | /* Do not interrupt */ \
42 (1 << 0)) /* Set configuration */
43
44#define LSS_DMI 0
45#define LSS_SD_HC0 1
46#define LSS_SD_HC1 2
47#define LSS_NAND 3
48#define LSS_IMAGING 4
49#define LSS_SECURITY 5
50#define LSS_DISPLAY 6
51#define LSS_USB_HC 7
52#define LSS_USB_OTG 8
53#define LSS_AUDIO 9
54#define LSS_AUDIO_LPE 9
55#define LSS_AUDIO_SSP 9
56#define LSS_I2C0 10
57#define LSS_I2C1 10
58#define LSS_I2C2 10
59#define LSS_KBD 10
60#define LSS_SPI0 10
61#define LSS_SPI1 10
62#define LSS_SPI2 10
63#define LSS_GPIO 10
64#define LSS_SRAM 11 /* used by SCU, do not touch */
65#define LSS_SD_HC2 12
66/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
67#define MRST_NUM_LSS 13
68
69#define MIN(a, b) (((a) < (b)) ? (a) : (b))
70
71#define SSMSK(mask, lss) ((mask) << ((lss) * 2))
72#define D0 0
73#define D0i1 1
74#define D0i2 2
75#define D0i3 3
76
77#define S0I3_SSS_TARGET ( \
78 SSMSK(D0i1, LSS_DMI) | \
79 SSMSK(D0i3, LSS_SD_HC0) | \
80 SSMSK(D0i3, LSS_SD_HC1) | \
81 SSMSK(D0i3, LSS_NAND) | \
82 SSMSK(D0i3, LSS_SD_HC2) | \
83 SSMSK(D0i3, LSS_IMAGING) | \
84 SSMSK(D0i3, LSS_SECURITY) | \
85 SSMSK(D0i3, LSS_DISPLAY) | \
86 SSMSK(D0i3, LSS_USB_HC) | \
87 SSMSK(D0i3, LSS_USB_OTG) | \
88 SSMSK(D0i3, LSS_AUDIO) | \
89 SSMSK(D0i1, LSS_I2C0))
90
91/*
92 * D0i1 on Langwell is Autonomous Clock Gating (ACG).
93 * Enable ACG on every LSS except camera and audio
94 */
95#define D0I1_ACG_SSS_TARGET \
96 (SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
97
98enum cm_mode {
99 CM_NOP, /* ignore the config mode value */
100 CM_IMMEDIATE,
101 CM_DELAY,
102 CM_TRIGGER,
103 CM_INVALID
104};
105
106enum sys_state {
107 SYS_STATE_S0I0,
108 SYS_STATE_S0I1,
109 SYS_STATE_S0I2,
110 SYS_STATE_S0I3,
111 SYS_STATE_S3,
112 SYS_STATE_S5
113};
114
115#define SET_CFG_CMD 1
116
117enum int_status {
118 INT_SPURIOUS = 0,
119 INT_CMD_DONE = 1,
120 INT_CMD_ERR = 2,
121 INT_WAKE_RX = 3,
122 INT_SS_ERROR = 4,
123 INT_S0IX_MISS = 5,
124 INT_NO_ACKC6 = 6,
125 INT_INVALID = 7,
126};
127
128/* PMU register interface */
129static struct mrst_pmu_reg {
130 u32 pm_sts; /* 0x00 */
131 u32 pm_cmd; /* 0x04 */
132 u32 pm_ics; /* 0x08 */
133 u32 _resv1; /* 0x0C */
134 u32 pm_wkc[2]; /* 0x10 */
135 u32 pm_wks[2]; /* 0x18 */
136 u32 pm_ssc[4]; /* 0x20 */
137 u32 pm_sss[4]; /* 0x30 */
138 u32 pm_wssc[4]; /* 0x40 */
139 u32 pm_c3c4; /* 0x50 */
140 u32 pm_c5c6; /* 0x54 */
141 u32 pm_msi_disable; /* 0x58 */
142} *pmu_reg;
143
144static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); }
145static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); }
146static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); }
147static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); }
148
149static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
150static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
151static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
152static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
153static inline void pmu_write_wssc(u32 arg)
154 { writel(arg, &pmu_reg->pm_wssc[0]); }
155
156static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
157static inline u32 pmu_msi_is_disabled(void)
158 { return readl(&pmu_reg->pm_msi_disable); }
159
160union pmu_pm_ics {
161 struct {
162 u32 cause:8;
163 u32 enable:1;
164 u32 pending:1;
165 u32 reserved:22;
166 } bits;
167 u32 value;
168};
169
170static inline void pmu_irq_enable(void)
171{
172 union pmu_pm_ics pmu_ics;
173
174 pmu_ics.value = pmu_read_ics();
175 pmu_ics.bits.enable = 1;
176 pmu_write_ics(pmu_ics.value);
177}
178
179union pmu_pm_status {
180 struct {
181 u32 pmu_rev:8;
182 u32 pmu_busy:1;
183 u32 mode_id:4;
184 u32 Reserved:19;
185 } pmu_status_parts;
186 u32 pmu_status_value;
187};
188
189static inline int pmu_read_busy_status(void)
190{
191 union pmu_pm_status result;
192
193 result.pmu_status_value = pmu_read_sts();
194
195 return result.pmu_status_parts.pmu_busy;
196}
197
198/* pmu set config parameters */
199struct cfg_delay_param_t {
200 u32 cmd:8;
201 u32 ioc:1;
202 u32 cfg_mode:4;
203 u32 mode_id:3;
204 u32 sys_state:3;
205 u32 cfg_delay:8;
206 u32 rsvd:5;
207};
208
209struct cfg_trig_param_t {
210 u32 cmd:8;
211 u32 ioc:1;
212 u32 cfg_mode:4;
213 u32 mode_id:3;
214 u32 sys_state:3;
215 u32 cfg_trig_type:3;
216 u32 cfg_trig_val:8;
217 u32 cmbi:1;
218 u32 rsvd1:1;
219};
220
221union pmu_pm_set_cfg_cmd_t {
222 union {
223 struct cfg_delay_param_t d_param;
224 struct cfg_trig_param_t t_param;
225 } pmu2_params;
226 u32 pmu_pm_set_cfg_cmd_value;
227};
228
229#ifdef FUTURE_PATCH
230extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
231#else
232static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
233#endif
234#endif
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 2b235b77d9ab..23e5b9d7977b 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -23,7 +23,66 @@
23#define XO15_SCI_CLASS DRV_NAME 23#define XO15_SCI_CLASS DRV_NAME
24#define XO15_SCI_DEVICE_NAME "OLPC XO-1.5 SCI" 24#define XO15_SCI_DEVICE_NAME "OLPC XO-1.5 SCI"
25 25
26static unsigned long xo15_sci_gpe; 26static unsigned long xo15_sci_gpe;
27static bool lid_wake_on_close;
28
29/*
30 * The normal ACPI LID wakeup behavior is wake-on-open, but not
31 * wake-on-close. This is implemented as standard by the XO-1.5 DSDT.
32 *
33 * We provide here a sysfs attribute that will additionally enable
34 * wake-on-close behavior. This is useful (e.g.) when we oportunistically
35 * suspend with the display running; if the lid is then closed, we want to
36 * wake up to turn the display off.
37 *
38 * This is controlled through a custom method in the XO-1.5 DSDT.
39 */
40static int set_lid_wake_behavior(bool wake_on_close)
41{
42 struct acpi_object_list arg_list;
43 union acpi_object arg;
44 acpi_status status;
45
46 arg_list.count = 1;
47 arg_list.pointer = &arg;
48 arg.type = ACPI_TYPE_INTEGER;
49 arg.integer.value = wake_on_close;
50
51 status = acpi_evaluate_object(NULL, "\\_SB.PCI0.LID.LIDW", &arg_list, NULL);
52 if (ACPI_FAILURE(status)) {
53 pr_warning(PFX "failed to set lid behavior\n");
54 return 1;
55 }
56
57 lid_wake_on_close = wake_on_close;
58
59 return 0;
60}
61
62static ssize_t
63lid_wake_on_close_show(struct kobject *s, struct kobj_attribute *attr, char *buf)
64{
65 return sprintf(buf, "%u\n", lid_wake_on_close);
66}
67
68static ssize_t lid_wake_on_close_store(struct kobject *s,
69 struct kobj_attribute *attr,
70 const char *buf, size_t n)
71{
72 unsigned int val;
73
74 if (sscanf(buf, "%u", &val) != 1)
75 return -EINVAL;
76
77 set_lid_wake_behavior(!!val);
78
79 return n;
80}
81
82static struct kobj_attribute lid_wake_on_close_attr =
83 __ATTR(lid_wake_on_close, 0644,
84 lid_wake_on_close_show,
85 lid_wake_on_close_store);
27 86
28static void battery_status_changed(void) 87static void battery_status_changed(void)
29{ 88{
@@ -91,6 +150,7 @@ static int xo15_sci_add(struct acpi_device *device)
91{ 150{
92 unsigned long long tmp; 151 unsigned long long tmp;
93 acpi_status status; 152 acpi_status status;
153 int r;
94 154
95 if (!device) 155 if (!device)
96 return -EINVAL; 156 return -EINVAL;
@@ -112,6 +172,10 @@ static int xo15_sci_add(struct acpi_device *device)
112 172
113 dev_info(&device->dev, "Initialized, GPE = 0x%lx\n", xo15_sci_gpe); 173 dev_info(&device->dev, "Initialized, GPE = 0x%lx\n", xo15_sci_gpe);
114 174
175 r = sysfs_create_file(&device->dev.kobj, &lid_wake_on_close_attr.attr);
176 if (r)
177 goto err_sysfs;
178
115 /* Flush queue, and enable all SCI events */ 179 /* Flush queue, and enable all SCI events */
116 process_sci_queue(); 180 process_sci_queue();
117 olpc_ec_mask_write(EC_SCI_SRC_ALL); 181 olpc_ec_mask_write(EC_SCI_SRC_ALL);
@@ -123,6 +187,11 @@ static int xo15_sci_add(struct acpi_device *device)
123 device_init_wakeup(&device->dev, true); 187 device_init_wakeup(&device->dev, true);
124 188
125 return 0; 189 return 0;
190
191err_sysfs:
192 acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
193 cancel_work_sync(&sci_work);
194 return r;
126} 195}
127 196
128static int xo15_sci_remove(struct acpi_device *device, int type) 197static int xo15_sci_remove(struct acpi_device *device, int type)
@@ -130,6 +199,7 @@ static int xo15_sci_remove(struct acpi_device *device, int type)
130 acpi_disable_gpe(NULL, xo15_sci_gpe); 199 acpi_disable_gpe(NULL, xo15_sci_gpe);
131 acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler); 200 acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
132 cancel_work_sync(&sci_work); 201 cancel_work_sync(&sci_work);
202 sysfs_remove_file(&device->dev.kobj, &lid_wake_on_close_attr.attr);
133 return 0; 203 return 0;
134} 204}
135 205
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 7cce722667b8..a4bee53c2e54 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -20,6 +20,8 @@
20#include <linux/platform_device.h> 20#include <linux/platform_device.h>
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/syscore_ops.h> 22#include <linux/syscore_ops.h>
23#include <linux/debugfs.h>
24#include <linux/mutex.h>
23 25
24#include <asm/geode.h> 26#include <asm/geode.h>
25#include <asm/setup.h> 27#include <asm/setup.h>
@@ -31,6 +33,15 @@ EXPORT_SYMBOL_GPL(olpc_platform_info);
31 33
32static DEFINE_SPINLOCK(ec_lock); 34static DEFINE_SPINLOCK(ec_lock);
33 35
36/* debugfs interface to EC commands */
37#define EC_MAX_CMD_ARGS (5 + 1) /* cmd byte + 5 args */
38#define EC_MAX_CMD_REPLY (8)
39
40static struct dentry *ec_debugfs_dir;
41static DEFINE_MUTEX(ec_debugfs_cmd_lock);
42static unsigned char ec_debugfs_resp[EC_MAX_CMD_REPLY];
43static unsigned int ec_debugfs_resp_bytes;
44
34/* EC event mask to be applied during suspend (defining wakeup sources). */ 45/* EC event mask to be applied during suspend (defining wakeup sources). */
35static u16 ec_wakeup_mask; 46static u16 ec_wakeup_mask;
36 47
@@ -269,6 +280,91 @@ int olpc_ec_sci_query(u16 *sci_value)
269} 280}
270EXPORT_SYMBOL_GPL(olpc_ec_sci_query); 281EXPORT_SYMBOL_GPL(olpc_ec_sci_query);
271 282
283static ssize_t ec_debugfs_cmd_write(struct file *file, const char __user *buf,
284 size_t size, loff_t *ppos)
285{
286 int i, m;
287 unsigned char ec_cmd[EC_MAX_CMD_ARGS];
288 unsigned int ec_cmd_int[EC_MAX_CMD_ARGS];
289 char cmdbuf[64];
290 int ec_cmd_bytes;
291
292 mutex_lock(&ec_debugfs_cmd_lock);
293
294 size = simple_write_to_buffer(cmdbuf, sizeof(cmdbuf), ppos, buf, size);
295
296 m = sscanf(cmdbuf, "%x:%u %x %x %x %x %x", &ec_cmd_int[0],
297 &ec_debugfs_resp_bytes,
298 &ec_cmd_int[1], &ec_cmd_int[2], &ec_cmd_int[3],
299 &ec_cmd_int[4], &ec_cmd_int[5]);
300 if (m < 2 || ec_debugfs_resp_bytes > EC_MAX_CMD_REPLY) {
301 /* reset to prevent overflow on read */
302 ec_debugfs_resp_bytes = 0;
303
304 printk(KERN_DEBUG "olpc-ec: bad ec cmd: "
305 "cmd:response-count [arg1 [arg2 ...]]\n");
306 size = -EINVAL;
307 goto out;
308 }
309
310 /* convert scanf'd ints to char */
311 ec_cmd_bytes = m - 2;
312 for (i = 0; i <= ec_cmd_bytes; i++)
313 ec_cmd[i] = ec_cmd_int[i];
314
315 printk(KERN_DEBUG "olpc-ec: debugfs cmd 0x%02x with %d args "
316 "%02x %02x %02x %02x %02x, want %d returns\n",
317 ec_cmd[0], ec_cmd_bytes, ec_cmd[1], ec_cmd[2], ec_cmd[3],
318 ec_cmd[4], ec_cmd[5], ec_debugfs_resp_bytes);
319
320 olpc_ec_cmd(ec_cmd[0], (ec_cmd_bytes == 0) ? NULL : &ec_cmd[1],
321 ec_cmd_bytes, ec_debugfs_resp, ec_debugfs_resp_bytes);
322
323 printk(KERN_DEBUG "olpc-ec: response "
324 "%02x %02x %02x %02x %02x %02x %02x %02x (%d bytes expected)\n",
325 ec_debugfs_resp[0], ec_debugfs_resp[1], ec_debugfs_resp[2],
326 ec_debugfs_resp[3], ec_debugfs_resp[4], ec_debugfs_resp[5],
327 ec_debugfs_resp[6], ec_debugfs_resp[7], ec_debugfs_resp_bytes);
328
329out:
330 mutex_unlock(&ec_debugfs_cmd_lock);
331 return size;
332}
333
334static ssize_t ec_debugfs_cmd_read(struct file *file, char __user *buf,
335 size_t size, loff_t *ppos)
336{
337 unsigned int i, r;
338 char *rp;
339 char respbuf[64];
340
341 mutex_lock(&ec_debugfs_cmd_lock);
342 rp = respbuf;
343 rp += sprintf(rp, "%02x", ec_debugfs_resp[0]);
344 for (i = 1; i < ec_debugfs_resp_bytes; i++)
345 rp += sprintf(rp, ", %02x", ec_debugfs_resp[i]);
346 mutex_unlock(&ec_debugfs_cmd_lock);
347 rp += sprintf(rp, "\n");
348
349 r = rp - respbuf;
350 return simple_read_from_buffer(buf, size, ppos, respbuf, r);
351}
352
353static const struct file_operations ec_debugfs_genops = {
354 .write = ec_debugfs_cmd_write,
355 .read = ec_debugfs_cmd_read,
356};
357
358static void setup_debugfs(void)
359{
360 ec_debugfs_dir = debugfs_create_dir("olpc-ec", 0);
361 if (ec_debugfs_dir == ERR_PTR(-ENODEV))
362 return;
363
364 debugfs_create_file("cmd", 0600, ec_debugfs_dir, NULL,
365 &ec_debugfs_genops);
366}
367
272static int olpc_ec_suspend(void) 368static int olpc_ec_suspend(void)
273{ 369{
274 return olpc_ec_mask_write(ec_wakeup_mask); 370 return olpc_ec_mask_write(ec_wakeup_mask);
@@ -372,6 +468,7 @@ static int __init olpc_init(void)
372 } 468 }
373 469
374 register_syscore_ops(&olpc_syscore_ops); 470 register_syscore_ops(&olpc_syscore_ops);
471 setup_debugfs();
375 472
376 return 0; 473 return 0;
377} 474}
diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
index 7e004acbe526..7a9ad30d6c9f 100644
--- a/arch/x86/platform/scx200/scx200_32.c
+++ b/arch/x86/platform/scx200/scx200_32.c
@@ -17,8 +17,6 @@
17/* Verify that the configuration block really is there */ 17/* Verify that the configuration block really is there */
18#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base)) 18#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
19 19
20#define NAME "scx200"
21
22MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>"); 20MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
23MODULE_DESCRIPTION("NatSemi SCx200 Driver"); 21MODULE_DESCRIPTION("NatSemi SCx200 Driver");
24MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
@@ -29,10 +27,10 @@ unsigned long scx200_gpio_shadow[2];
29unsigned scx200_cb_base = 0; 27unsigned scx200_cb_base = 0;
30 28
31static struct pci_device_id scx200_tbl[] = { 29static struct pci_device_id scx200_tbl[] = {
32 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) }, 30 { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
33 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) }, 31 { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
34 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) }, 32 { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
35 { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) }, 33 { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
36 { }, 34 { },
37}; 35};
38MODULE_DEVICE_TABLE(pci,scx200_tbl); 36MODULE_DEVICE_TABLE(pci,scx200_tbl);
@@ -63,10 +61,11 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_
63 if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || 61 if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
64 pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) { 62 pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
65 base = pci_resource_start(pdev, 0); 63 base = pci_resource_start(pdev, 0);
66 printk(KERN_INFO NAME ": GPIO base 0x%x\n", base); 64 pr_info("GPIO base 0x%x\n", base);
67 65
68 if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) { 66 if (!request_region(base, SCx200_GPIO_SIZE,
69 printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n"); 67 "NatSemi SCx200 GPIO")) {
68 pr_err("can't allocate I/O for GPIOs\n");
70 return -EBUSY; 69 return -EBUSY;
71 } 70 }
72 71
@@ -82,11 +81,11 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_
82 if (scx200_cb_probe(base)) { 81 if (scx200_cb_probe(base)) {
83 scx200_cb_base = base; 82 scx200_cb_base = base;
84 } else { 83 } else {
85 printk(KERN_WARNING NAME ": Configuration Block not found\n"); 84 pr_warn("Configuration Block not found\n");
86 return -ENODEV; 85 return -ENODEV;
87 } 86 }
88 } 87 }
89 printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base); 88 pr_info("Configuration Block base 0x%x\n", scx200_cb_base);
90 } 89 }
91 90
92 return 0; 91 return 0;
@@ -111,8 +110,7 @@ u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits)
111 110
112static int __init scx200_init(void) 111static int __init scx200_init(void)
113{ 112{
114 printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); 113 pr_info("NatSemi SCx200 Driver\n");
115
116 return pci_register_driver(&scx200_pci_driver); 114 return pci_register_driver(&scx200_pci_driver);
117} 115}
118 116
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 9f29a01ee1b3..5032e0d19b86 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -37,7 +37,7 @@ static void uv_rtc_timer_setup(enum clock_event_mode,
37 37
38static struct clocksource clocksource_uv = { 38static struct clocksource clocksource_uv = {
39 .name = RTC_NAME, 39 .name = RTC_NAME,
40 .rating = 400, 40 .rating = 299,
41 .read = uv_read_rtc, 41 .read = uv_read_rtc,
42 .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, 42 .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
43 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 43 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -379,10 +379,6 @@ static __init int uv_rtc_setup_clock(void)
379 if (!is_uv_system()) 379 if (!is_uv_system())
380 return -ENODEV; 380 return -ENODEV;
381 381
382 /* If single blade, prefer tsc */
383 if (uv_num_possible_blades() == 1)
384 clocksource_uv.rating = 250;
385
386 rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second); 382 rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second);
387 if (rc) 383 if (rc)
388 printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); 384 printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index f10c0afa1cb4..218cdb16163c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -20,6 +20,7 @@
20#include <asm/xcr.h> 20#include <asm/xcr.h>
21#include <asm/suspend.h> 21#include <asm/suspend.h>
22#include <asm/debugreg.h> 22#include <asm/debugreg.h>
23#include <asm/fpu-internal.h> /* pcntxt_mask */
23 24
24#ifdef CONFIG_X86_32 25#ifdef CONFIG_X86_32
25static struct saved_context saved_context; 26static struct saved_context saved_context;
@@ -114,7 +115,7 @@ static void __save_processor_state(struct saved_context *ctxt)
114void save_processor_state(void) 115void save_processor_state(void)
115{ 116{
116 __save_processor_state(&saved_context); 117 __save_processor_state(&saved_context);
117 save_sched_clock_state(); 118 x86_platform.save_sched_clock_state();
118} 119}
119#ifdef CONFIG_X86_32 120#ifdef CONFIG_X86_32
120EXPORT_SYMBOL(save_processor_state); 121EXPORT_SYMBOL(save_processor_state);
@@ -224,6 +225,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
224 fix_processor_context(); 225 fix_processor_context();
225 226
226 do_fpu_end(); 227 do_fpu_end();
228 x86_platform.restore_sched_clock_state();
227 mtrr_bp_restore(); 229 mtrr_bp_restore();
228} 230}
229 231
@@ -231,7 +233,6 @@ static void __restore_processor_state(struct saved_context *ctxt)
231void restore_processor_state(void) 233void restore_processor_state(void)
232{ 234{
233 __restore_processor_state(&saved_context); 235 __restore_processor_state(&saved_context);
234 restore_sched_clock_state();
235} 236}
236#ifdef CONFIG_X86_32 237#ifdef CONFIG_X86_32
237EXPORT_SYMBOL(restore_processor_state); 238EXPORT_SYMBOL(restore_processor_state);
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 3769079874d8..74202c1910cd 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -10,7 +10,6 @@
10#include <linux/suspend.h> 10#include <linux/suspend.h>
11#include <linux/bootmem.h> 11#include <linux/bootmem.h>
12 12
13#include <asm/system.h>
14#include <asm/page.h> 13#include <asm/page.h>
15#include <asm/pgtable.h> 14#include <asm/pgtable.h>
16#include <asm/mmzone.h> 15#include <asm/mmzone.h>
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 031cef84fe43..29f9f0554f7d 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -288,7 +288,7 @@
288279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend 288279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
289280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive 289280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
290281 i386 mq_notify sys_mq_notify compat_sys_mq_notify 290281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
291282 i386 mq_getsetaddr sys_mq_getsetattr compat_sys_mq_getsetattr 291282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
292283 i386 kexec_load sys_kexec_load compat_sys_kexec_load 292283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
293284 i386 waitid sys_waitid compat_sys_waitid 293284 i386 waitid sys_waitid compat_sys_waitid
294# 285 sys_setaltroot 294# 285 sys_setaltroot
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index b2b54d2edf53..9926e11a772d 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -15,8 +15,8 @@ config UML_X86
15 select GENERIC_FIND_FIRST_BIT 15 select GENERIC_FIND_FIRST_BIT
16 16
17config 64BIT 17config 64BIT
18 bool 18 bool "64-bit kernel" if SUBARCH = "x86"
19 default SUBARCH = "x86_64" 19 default SUBARCH != "i386"
20 20
21config X86_32 21config X86_32
22 def_bool !64BIT 22 def_bool !64BIT
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index 2c32df6fe231..04f82e020f2b 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -17,6 +17,16 @@
17#define ARCH_IS_STACKGROW(address) \ 17#define ARCH_IS_STACKGROW(address) \
18 (address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(&current->thread.regs.regs)) 18 (address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(&current->thread.regs.regs))
19 19
20#include <asm/user.h>
21
22/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
23static inline void rep_nop(void)
24{
25 __asm__ __volatile__("rep;nop": : :"memory");
26}
27
28#define cpu_relax() rep_nop()
29
20#include <asm/processor-generic.h> 30#include <asm/processor-generic.h>
21 31
22#endif 32#endif
diff --git a/arch/x86/um/asm/processor_32.h b/arch/x86/um/asm/processor_32.h
index 018f732704dd..6c6689e574ce 100644
--- a/arch/x86/um/asm/processor_32.h
+++ b/arch/x86/um/asm/processor_32.h
@@ -45,16 +45,6 @@ static inline void arch_copy_thread(struct arch_thread *from,
45 memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array)); 45 memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array));
46} 46}
47 47
48#include <asm/user.h>
49
50/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
51static inline void rep_nop(void)
52{
53 __asm__ __volatile__("rep;nop": : :"memory");
54}
55
56#define cpu_relax() rep_nop()
57
58/* 48/*
59 * Default implementation of macro that returns current 49 * Default implementation of macro that returns current
60 * instruction pointer ("program counter"). Stolen 50 * instruction pointer ("program counter"). Stolen
diff --git a/arch/x86/um/asm/processor_64.h b/arch/x86/um/asm/processor_64.h
index 61de92d916c3..4b02a8455bd1 100644
--- a/arch/x86/um/asm/processor_64.h
+++ b/arch/x86/um/asm/processor_64.h
@@ -14,14 +14,6 @@ struct arch_thread {
14 struct faultinfo faultinfo; 14 struct faultinfo faultinfo;
15}; 15};
16 16
17/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
18static inline void rep_nop(void)
19{
20 __asm__ __volatile__("rep;nop": : :"memory");
21}
22
23#define cpu_relax() rep_nop()
24
25#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \ 17#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \
26 .debugregs_seq = 0, \ 18 .debugregs_seq = 0, \
27 .fs = 0, \ 19 .fs = 0, \
@@ -37,8 +29,6 @@ static inline void arch_copy_thread(struct arch_thread *from,
37 to->fs = from->fs; 29 to->fs = from->fs;
38} 30}
39 31
40#include <asm/user.h>
41
42#define current_text_addr() \ 32#define current_text_addr() \
43 ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; }) 33 ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; })
44 34
diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c
index a1fba5fb9dbe..17d88cf2c6c4 100644
--- a/arch/x86/um/bugs_32.c
+++ b/arch/x86/um/bugs_32.c
@@ -13,8 +13,6 @@
13static int host_has_cmov = 1; 13static int host_has_cmov = 1;
14static jmp_buf cmov_test_return; 14static jmp_buf cmov_test_return;
15 15
16#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
17
18static void cmov_sigill_test_handler(int sig) 16static void cmov_sigill_test_handler(int sig)
19{ 17{
20 host_has_cmov = 0; 18 host_has_cmov = 0;
@@ -51,7 +49,7 @@ void arch_examine_signal(int sig, struct uml_pt_regs *regs)
51 * This is testing for a cmov (0x0f 0x4x) instruction causing a 49 * This is testing for a cmov (0x0f 0x4x) instruction causing a
52 * SIGILL in init. 50 * SIGILL in init.
53 */ 51 */
54 if ((sig != SIGILL) || (TASK_PID(get_current()) != 1)) 52 if ((sig != SIGILL) || (get_current_pid() != 1))
55 return; 53 return;
56 54
57 if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) { 55 if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) {
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
index 639900a6fde9..f40281e5d6a2 100644
--- a/arch/x86/um/mem_32.c
+++ b/arch/x86/um/mem_32.c
@@ -23,14 +23,6 @@ static int __init gate_vma_init(void)
23 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; 23 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
24 gate_vma.vm_page_prot = __P101; 24 gate_vma.vm_page_prot = __P101;
25 25
26 /*
27 * Make sure the vDSO gets into every core dump.
28 * Dumping its contents makes post-mortem fully interpretable later
29 * without matching up the same kernel and hardware config to see
30 * what PC values meant.
31 */
32 gate_vma.vm_flags |= VM_ALWAYSDUMP;
33
34 return 0; 26 return 0;
35} 27}
36__initcall(gate_vma_init); 28__initcall(gate_vma_init);
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index 91f4ec9a0a56..af91901babb8 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -64,8 +64,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
64 64
65 err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, 65 err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
66 VM_READ|VM_EXEC| 66 VM_READ|VM_EXEC|
67 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 67 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
68 VM_ALWAYSDUMP,
69 vdsop); 68 vdsop);
70 69
71 up_write(&mm->mmap_sem); 70 up_write(&mm->mmap_sem);
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 6bc0e723b6e8..885eff49d6ab 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -70,100 +70,98 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
70 return ret; 70 return ret;
71} 71}
72 72
73notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
74{
75 long ret;
76
77 asm("syscall" : "=a" (ret) :
78 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
79 return ret;
80}
81
82
73notrace static inline long vgetns(void) 83notrace static inline long vgetns(void)
74{ 84{
75 long v; 85 long v;
76 cycles_t cycles; 86 cycles_t cycles;
77 if (gtod->clock.vclock_mode == VCLOCK_TSC) 87 if (gtod->clock.vclock_mode == VCLOCK_TSC)
78 cycles = vread_tsc(); 88 cycles = vread_tsc();
79 else 89 else if (gtod->clock.vclock_mode == VCLOCK_HPET)
80 cycles = vread_hpet(); 90 cycles = vread_hpet();
91 else
92 return 0;
81 v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; 93 v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
82 return (v * gtod->clock.mult) >> gtod->clock.shift; 94 return (v * gtod->clock.mult) >> gtod->clock.shift;
83} 95}
84 96
85notrace static noinline int do_realtime(struct timespec *ts) 97/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
98notrace static int __always_inline do_realtime(struct timespec *ts)
86{ 99{
87 unsigned long seq, ns; 100 unsigned long seq, ns;
101 int mode;
102
88 do { 103 do {
89 seq = read_seqbegin(&gtod->lock); 104 seq = read_seqcount_begin(&gtod->seq);
105 mode = gtod->clock.vclock_mode;
90 ts->tv_sec = gtod->wall_time_sec; 106 ts->tv_sec = gtod->wall_time_sec;
91 ts->tv_nsec = gtod->wall_time_nsec; 107 ts->tv_nsec = gtod->wall_time_nsec;
92 ns = vgetns(); 108 ns = vgetns();
93 } while (unlikely(read_seqretry(&gtod->lock, seq))); 109 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
110
94 timespec_add_ns(ts, ns); 111 timespec_add_ns(ts, ns);
95 return 0; 112 return mode;
96} 113}
97 114
98notrace static noinline int do_monotonic(struct timespec *ts) 115notrace static int do_monotonic(struct timespec *ts)
99{ 116{
100 unsigned long seq, ns, secs; 117 unsigned long seq, ns;
118 int mode;
119
101 do { 120 do {
102 seq = read_seqbegin(&gtod->lock); 121 seq = read_seqcount_begin(&gtod->seq);
103 secs = gtod->wall_time_sec; 122 mode = gtod->clock.vclock_mode;
104 ns = gtod->wall_time_nsec + vgetns(); 123 ts->tv_sec = gtod->monotonic_time_sec;
105 secs += gtod->wall_to_monotonic.tv_sec; 124 ts->tv_nsec = gtod->monotonic_time_nsec;
106 ns += gtod->wall_to_monotonic.tv_nsec; 125 ns = vgetns();
107 } while (unlikely(read_seqretry(&gtod->lock, seq))); 126 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
108 127 timespec_add_ns(ts, ns);
109 /* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec
110 * are all guaranteed to be nonnegative.
111 */
112 while (ns >= NSEC_PER_SEC) {
113 ns -= NSEC_PER_SEC;
114 ++secs;
115 }
116 ts->tv_sec = secs;
117 ts->tv_nsec = ns;
118 128
119 return 0; 129 return mode;
120} 130}
121 131
122notrace static noinline int do_realtime_coarse(struct timespec *ts) 132notrace static int do_realtime_coarse(struct timespec *ts)
123{ 133{
124 unsigned long seq; 134 unsigned long seq;
125 do { 135 do {
126 seq = read_seqbegin(&gtod->lock); 136 seq = read_seqcount_begin(&gtod->seq);
127 ts->tv_sec = gtod->wall_time_coarse.tv_sec; 137 ts->tv_sec = gtod->wall_time_coarse.tv_sec;
128 ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; 138 ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
129 } while (unlikely(read_seqretry(&gtod->lock, seq))); 139 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
130 return 0; 140 return 0;
131} 141}
132 142
133notrace static noinline int do_monotonic_coarse(struct timespec *ts) 143notrace static int do_monotonic_coarse(struct timespec *ts)
134{ 144{
135 unsigned long seq, ns, secs; 145 unsigned long seq;
136 do { 146 do {
137 seq = read_seqbegin(&gtod->lock); 147 seq = read_seqcount_begin(&gtod->seq);
138 secs = gtod->wall_time_coarse.tv_sec; 148 ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
139 ns = gtod->wall_time_coarse.tv_nsec; 149 ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
140 secs += gtod->wall_to_monotonic.tv_sec; 150 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
141 ns += gtod->wall_to_monotonic.tv_nsec;
142 } while (unlikely(read_seqretry(&gtod->lock, seq)));
143
144 /* wall_time_nsec and wall_to_monotonic.tv_nsec are
145 * guaranteed to be between 0 and NSEC_PER_SEC.
146 */
147 if (ns >= NSEC_PER_SEC) {
148 ns -= NSEC_PER_SEC;
149 ++secs;
150 }
151 ts->tv_sec = secs;
152 ts->tv_nsec = ns;
153 151
154 return 0; 152 return 0;
155} 153}
156 154
157notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 155notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
158{ 156{
157 int ret = VCLOCK_NONE;
158
159 switch (clock) { 159 switch (clock) {
160 case CLOCK_REALTIME: 160 case CLOCK_REALTIME:
161 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) 161 ret = do_realtime(ts);
162 return do_realtime(ts);
163 break; 162 break;
164 case CLOCK_MONOTONIC: 163 case CLOCK_MONOTONIC:
165 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) 164 ret = do_monotonic(ts);
166 return do_monotonic(ts);
167 break; 165 break;
168 case CLOCK_REALTIME_COARSE: 166 case CLOCK_REALTIME_COARSE:
169 return do_realtime_coarse(ts); 167 return do_realtime_coarse(ts);
@@ -171,32 +169,33 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
171 return do_monotonic_coarse(ts); 169 return do_monotonic_coarse(ts);
172 } 170 }
173 171
174 return vdso_fallback_gettime(clock, ts); 172 if (ret == VCLOCK_NONE)
173 return vdso_fallback_gettime(clock, ts);
174 return 0;
175} 175}
176int clock_gettime(clockid_t, struct timespec *) 176int clock_gettime(clockid_t, struct timespec *)
177 __attribute__((weak, alias("__vdso_clock_gettime"))); 177 __attribute__((weak, alias("__vdso_clock_gettime")));
178 178
179notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 179notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
180{ 180{
181 long ret; 181 long ret = VCLOCK_NONE;
182 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) { 182
183 if (likely(tv != NULL)) { 183 if (likely(tv != NULL)) {
184 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != 184 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
185 offsetof(struct timespec, tv_nsec) || 185 offsetof(struct timespec, tv_nsec) ||
186 sizeof(*tv) != sizeof(struct timespec)); 186 sizeof(*tv) != sizeof(struct timespec));
187 do_realtime((struct timespec *)tv); 187 ret = do_realtime((struct timespec *)tv);
188 tv->tv_usec /= 1000; 188 tv->tv_usec /= 1000;
189 }
190 if (unlikely(tz != NULL)) {
191 /* Avoid memcpy. Some old compilers fail to inline it */
192 tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
193 tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
194 }
195 return 0;
196 } 189 }
197 asm("syscall" : "=a" (ret) : 190 if (unlikely(tz != NULL)) {
198 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); 191 /* Avoid memcpy. Some old compilers fail to inline it */
199 return ret; 192 tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
193 tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
194 }
195
196 if (ret == VCLOCK_NONE)
197 return vdso_fallback_gtod(tv, tz);
198 return 0;
200} 199}
201int gettimeofday(struct timeval *, struct timezone *) 200int gettimeofday(struct timeval *, struct timezone *)
202 __attribute__((weak, alias("__vdso_gettimeofday"))); 201 __attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 10f9f59477db..66e6d9359826 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -250,13 +250,7 @@ static int __init gate_vma_init(void)
250 gate_vma.vm_end = FIXADDR_USER_END; 250 gate_vma.vm_end = FIXADDR_USER_END;
251 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; 251 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
252 gate_vma.vm_page_prot = __P101; 252 gate_vma.vm_page_prot = __P101;
253 /* 253
254 * Make sure the vDSO gets into every core dump.
255 * Dumping its contents makes post-mortem fully interpretable later
256 * without matching up the same kernel and hardware config to see
257 * what PC values meant.
258 */
259 gate_vma.vm_flags |= VM_ALWAYSDUMP;
260 return 0; 254 return 0;
261} 255}
262 256
@@ -348,17 +342,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
348 if (compat_uses_vma || !compat) { 342 if (compat_uses_vma || !compat) {
349 /* 343 /*
350 * MAYWRITE to allow gdb to COW and set breakpoints 344 * MAYWRITE to allow gdb to COW and set breakpoints
351 *
352 * Make sure the vDSO gets into every core dump.
353 * Dumping its contents makes post-mortem fully
354 * interpretable later without matching up the same
355 * kernel and hardware config to see what PC values
356 * meant.
357 */ 345 */
358 ret = install_special_mapping(mm, addr, PAGE_SIZE, 346 ret = install_special_mapping(mm, addr, PAGE_SIZE,
359 VM_READ|VM_EXEC| 347 VM_READ|VM_EXEC|
360 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 348 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
361 VM_ALWAYSDUMP,
362 vdso32_pages); 349 vdso32_pages);
363 350
364 if (ret) 351 if (ret)
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index d7dce1dbf8c9..00aaf047b39f 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -172,8 +172,7 @@ static int setup_additional_pages(struct linux_binprm *bprm,
172 172
173 ret = install_special_mapping(mm, addr, size, 173 ret = install_special_mapping(mm, addr, size,
174 VM_READ|VM_EXEC| 174 VM_READ|VM_EXEC|
175 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 175 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
176 VM_ALWAYSDUMP,
177 pages); 176 pages);
178 if (ret) { 177 if (ret) {
179 current->mm->context.vdso = NULL; 178 current->mm->context.vdso = NULL;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 4172af8ceeb3..4f51bebac02c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -62,6 +62,15 @@
62#include <asm/reboot.h> 62#include <asm/reboot.h>
63#include <asm/stackprotector.h> 63#include <asm/stackprotector.h>
64#include <asm/hypervisor.h> 64#include <asm/hypervisor.h>
65#include <asm/mwait.h>
66
67#ifdef CONFIG_ACPI
68#include <linux/acpi.h>
69#include <asm/acpi.h>
70#include <acpi/pdc_intel.h>
71#include <acpi/processor.h>
72#include <xen/interface/platform.h>
73#endif
65 74
66#include "xen-ops.h" 75#include "xen-ops.h"
67#include "mmu.h" 76#include "mmu.h"
@@ -200,13 +209,17 @@ static void __init xen_banner(void)
200static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; 209static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
201static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; 210static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
202 211
212static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
213static __read_mostly unsigned int cpuid_leaf5_ecx_val;
214static __read_mostly unsigned int cpuid_leaf5_edx_val;
215
203static void xen_cpuid(unsigned int *ax, unsigned int *bx, 216static void xen_cpuid(unsigned int *ax, unsigned int *bx,
204 unsigned int *cx, unsigned int *dx) 217 unsigned int *cx, unsigned int *dx)
205{ 218{
206 unsigned maskebx = ~0; 219 unsigned maskebx = ~0;
207 unsigned maskecx = ~0; 220 unsigned maskecx = ~0;
208 unsigned maskedx = ~0; 221 unsigned maskedx = ~0;
209 222 unsigned setecx = 0;
210 /* 223 /*
211 * Mask out inconvenient features, to try and disable as many 224 * Mask out inconvenient features, to try and disable as many
212 * unsupported kernel subsystems as possible. 225 * unsupported kernel subsystems as possible.
@@ -214,9 +227,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
214 switch (*ax) { 227 switch (*ax) {
215 case 1: 228 case 1:
216 maskecx = cpuid_leaf1_ecx_mask; 229 maskecx = cpuid_leaf1_ecx_mask;
230 setecx = cpuid_leaf1_ecx_set_mask;
217 maskedx = cpuid_leaf1_edx_mask; 231 maskedx = cpuid_leaf1_edx_mask;
218 break; 232 break;
219 233
234 case CPUID_MWAIT_LEAF:
235 /* Synthesize the values.. */
236 *ax = 0;
237 *bx = 0;
238 *cx = cpuid_leaf5_ecx_val;
239 *dx = cpuid_leaf5_edx_val;
240 return;
241
220 case 0xb: 242 case 0xb:
221 /* Suppress extended topology stuff */ 243 /* Suppress extended topology stuff */
222 maskebx = 0; 244 maskebx = 0;
@@ -232,9 +254,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
232 254
233 *bx &= maskebx; 255 *bx &= maskebx;
234 *cx &= maskecx; 256 *cx &= maskecx;
257 *cx |= setecx;
235 *dx &= maskedx; 258 *dx &= maskedx;
259
236} 260}
237 261
262static bool __init xen_check_mwait(void)
263{
264#ifdef CONFIG_ACPI
265 struct xen_platform_op op = {
266 .cmd = XENPF_set_processor_pminfo,
267 .u.set_pminfo.id = -1,
268 .u.set_pminfo.type = XEN_PM_PDC,
269 };
270 uint32_t buf[3];
271 unsigned int ax, bx, cx, dx;
272 unsigned int mwait_mask;
273
274 /* We need to determine whether it is OK to expose the MWAIT
275 * capability to the kernel to harvest deeper than C3 states from ACPI
276 * _CST using the processor_harvest_xen.c module. For this to work, we
277 * need to gather the MWAIT_LEAF values (which the cstate.c code
278 * checks against). The hypervisor won't expose the MWAIT flag because
279 * it would break backwards compatibility; so we will find out directly
280 * from the hardware and hypercall.
281 */
282 if (!xen_initial_domain())
283 return false;
284
285 ax = 1;
286 cx = 0;
287
288 native_cpuid(&ax, &bx, &cx, &dx);
289
290 mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
291 (1 << (X86_FEATURE_MWAIT % 32));
292
293 if ((cx & mwait_mask) != mwait_mask)
294 return false;
295
296 /* We need to emulate the MWAIT_LEAF and for that we need both
297 * ecx and edx. The hypercall provides only partial information.
298 */
299
300 ax = CPUID_MWAIT_LEAF;
301 bx = 0;
302 cx = 0;
303 dx = 0;
304
305 native_cpuid(&ax, &bx, &cx, &dx);
306
307 /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
308 * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
309 */
310 buf[0] = ACPI_PDC_REVISION_ID;
311 buf[1] = 1;
312 buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
313
314 set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
315
316 if ((HYPERVISOR_dom0_op(&op) == 0) &&
317 (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
318 cpuid_leaf5_ecx_val = cx;
319 cpuid_leaf5_edx_val = dx;
320 }
321 return true;
322#else
323 return false;
324#endif
325}
238static void __init xen_init_cpuid_mask(void) 326static void __init xen_init_cpuid_mask(void)
239{ 327{
240 unsigned int ax, bx, cx, dx; 328 unsigned int ax, bx, cx, dx;
@@ -261,6 +349,9 @@ static void __init xen_init_cpuid_mask(void)
261 /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ 349 /* Xen will set CR4.OSXSAVE if supported and not disabled by force */
262 if ((cx & xsave_mask) != xsave_mask) 350 if ((cx & xsave_mask) != xsave_mask)
263 cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ 351 cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
352
353 if (xen_check_mwait())
354 cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
264} 355}
265 356
266static void xen_set_debugreg(int reg, unsigned long val) 357static void xen_set_debugreg(int reg, unsigned long val)
@@ -777,11 +868,11 @@ static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
777 868
778static unsigned long xen_read_cr0(void) 869static unsigned long xen_read_cr0(void)
779{ 870{
780 unsigned long cr0 = percpu_read(xen_cr0_value); 871 unsigned long cr0 = this_cpu_read(xen_cr0_value);
781 872
782 if (unlikely(cr0 == 0)) { 873 if (unlikely(cr0 == 0)) {
783 cr0 = native_read_cr0(); 874 cr0 = native_read_cr0();
784 percpu_write(xen_cr0_value, cr0); 875 this_cpu_write(xen_cr0_value, cr0);
785 } 876 }
786 877
787 return cr0; 878 return cr0;
@@ -791,7 +882,7 @@ static void xen_write_cr0(unsigned long cr0)
791{ 882{
792 struct multicall_space mcs; 883 struct multicall_space mcs;
793 884
794 percpu_write(xen_cr0_value, cr0); 885 this_cpu_write(xen_cr0_value, cr0);
795 886
796 /* Only pay attention to cr0.TS; everything else is 887 /* Only pay attention to cr0.TS; everything else is
797 ignored. */ 888 ignored. */
@@ -876,7 +967,7 @@ void xen_setup_shared_info(void)
876 xen_setup_mfn_list_list(); 967 xen_setup_mfn_list_list();
877} 968}
878 969
879/* This is called once we have the cpu_possible_map */ 970/* This is called once we have the cpu_possible_mask */
880void xen_setup_vcpu_info_placement(void) 971void xen_setup_vcpu_info_placement(void)
881{ 972{
882 int cpu; 973 int cpu;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 8bbb465b6f0a..157337657971 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -26,7 +26,7 @@ static unsigned long xen_save_fl(void)
26 struct vcpu_info *vcpu; 26 struct vcpu_info *vcpu;
27 unsigned long flags; 27 unsigned long flags;
28 28
29 vcpu = percpu_read(xen_vcpu); 29 vcpu = this_cpu_read(xen_vcpu);
30 30
31 /* flag has opposite sense of mask */ 31 /* flag has opposite sense of mask */
32 flags = !vcpu->evtchn_upcall_mask; 32 flags = !vcpu->evtchn_upcall_mask;
@@ -50,7 +50,7 @@ static void xen_restore_fl(unsigned long flags)
50 make sure we're don't switch CPUs between getting the vcpu 50 make sure we're don't switch CPUs between getting the vcpu
51 pointer and updating the mask. */ 51 pointer and updating the mask. */
52 preempt_disable(); 52 preempt_disable();
53 vcpu = percpu_read(xen_vcpu); 53 vcpu = this_cpu_read(xen_vcpu);
54 vcpu->evtchn_upcall_mask = flags; 54 vcpu->evtchn_upcall_mask = flags;
55 preempt_enable_no_resched(); 55 preempt_enable_no_resched();
56 56
@@ -72,7 +72,7 @@ static void xen_irq_disable(void)
72 make sure we're don't switch CPUs between getting the vcpu 72 make sure we're don't switch CPUs between getting the vcpu
73 pointer and updating the mask. */ 73 pointer and updating the mask. */
74 preempt_disable(); 74 preempt_disable();
75 percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; 75 this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
76 preempt_enable_no_resched(); 76 preempt_enable_no_resched();
77} 77}
78PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); 78PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
@@ -86,7 +86,7 @@ static void xen_irq_enable(void)
86 the caller is confused and is trying to re-enable interrupts 86 the caller is confused and is trying to re-enable interrupts
87 on an indeterminate processor. */ 87 on an indeterminate processor. */
88 88
89 vcpu = percpu_read(xen_vcpu); 89 vcpu = this_cpu_read(xen_vcpu);
90 vcpu->evtchn_upcall_mask = 0; 90 vcpu->evtchn_upcall_mask = 0;
91 91
92 /* Doesn't matter if we get preempted here, because any 92 /* Doesn't matter if we get preempted here, because any
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 95c1cf60c669..b8e279479a6b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1071,14 +1071,14 @@ static void drop_other_mm_ref(void *info)
1071 struct mm_struct *mm = info; 1071 struct mm_struct *mm = info;
1072 struct mm_struct *active_mm; 1072 struct mm_struct *active_mm;
1073 1073
1074 active_mm = percpu_read(cpu_tlbstate.active_mm); 1074 active_mm = this_cpu_read(cpu_tlbstate.active_mm);
1075 1075
1076 if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK) 1076 if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
1077 leave_mm(smp_processor_id()); 1077 leave_mm(smp_processor_id());
1078 1078
1079 /* If this cpu still has a stale cr3 reference, then make sure 1079 /* If this cpu still has a stale cr3 reference, then make sure
1080 it has been flushed. */ 1080 it has been flushed. */
1081 if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) 1081 if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
1082 load_cr3(swapper_pg_dir); 1082 load_cr3(swapper_pg_dir);
1083} 1083}
1084 1084
@@ -1185,17 +1185,17 @@ static void __init xen_pagetable_setup_done(pgd_t *base)
1185 1185
1186static void xen_write_cr2(unsigned long cr2) 1186static void xen_write_cr2(unsigned long cr2)
1187{ 1187{
1188 percpu_read(xen_vcpu)->arch.cr2 = cr2; 1188 this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
1189} 1189}
1190 1190
1191static unsigned long xen_read_cr2(void) 1191static unsigned long xen_read_cr2(void)
1192{ 1192{
1193 return percpu_read(xen_vcpu)->arch.cr2; 1193 return this_cpu_read(xen_vcpu)->arch.cr2;
1194} 1194}
1195 1195
1196unsigned long xen_read_cr2_direct(void) 1196unsigned long xen_read_cr2_direct(void)
1197{ 1197{
1198 return percpu_read(xen_vcpu_info.arch.cr2); 1198 return this_cpu_read(xen_vcpu_info.arch.cr2);
1199} 1199}
1200 1200
1201static void xen_flush_tlb(void) 1201static void xen_flush_tlb(void)
@@ -1278,12 +1278,12 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1278 1278
1279static unsigned long xen_read_cr3(void) 1279static unsigned long xen_read_cr3(void)
1280{ 1280{
1281 return percpu_read(xen_cr3); 1281 return this_cpu_read(xen_cr3);
1282} 1282}
1283 1283
1284static void set_current_cr3(void *v) 1284static void set_current_cr3(void *v)
1285{ 1285{
1286 percpu_write(xen_current_cr3, (unsigned long)v); 1286 this_cpu_write(xen_current_cr3, (unsigned long)v);
1287} 1287}
1288 1288
1289static void __xen_write_cr3(bool kernel, unsigned long cr3) 1289static void __xen_write_cr3(bool kernel, unsigned long cr3)
@@ -1306,7 +1306,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
1306 xen_extend_mmuext_op(&op); 1306 xen_extend_mmuext_op(&op);
1307 1307
1308 if (kernel) { 1308 if (kernel) {
1309 percpu_write(xen_cr3, cr3); 1309 this_cpu_write(xen_cr3, cr3);
1310 1310
1311 /* Update xen_current_cr3 once the batch has actually 1311 /* Update xen_current_cr3 once the batch has actually
1312 been submitted. */ 1312 been submitted. */
@@ -1322,7 +1322,7 @@ static void xen_write_cr3(unsigned long cr3)
1322 1322
1323 /* Update while interrupts are disabled, so its atomic with 1323 /* Update while interrupts are disabled, so its atomic with
1324 respect to ipis */ 1324 respect to ipis */
1325 percpu_write(xen_cr3, cr3); 1325 this_cpu_write(xen_cr3, cr3);
1326 1326
1327 __xen_write_cr3(true, cr3); 1327 __xen_write_cr3(true, cr3);
1328 1328
@@ -1859,6 +1859,7 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1859#endif /* CONFIG_X86_64 */ 1859#endif /* CONFIG_X86_64 */
1860 1860
1861static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; 1861static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
1862static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss;
1862 1863
1863static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) 1864static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1864{ 1865{
@@ -1899,7 +1900,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1899 * We just don't map the IO APIC - all access is via 1900 * We just don't map the IO APIC - all access is via
1900 * hypercalls. Keep the address in the pte for reference. 1901 * hypercalls. Keep the address in the pte for reference.
1901 */ 1902 */
1902 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); 1903 pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL);
1903 break; 1904 break;
1904#endif 1905#endif
1905 1906
@@ -2064,6 +2065,7 @@ void __init xen_init_mmu_ops(void)
2064 pv_mmu_ops = xen_mmu_ops; 2065 pv_mmu_ops = xen_mmu_ops;
2065 2066
2066 memset(dummy_mapping, 0xff, PAGE_SIZE); 2067 memset(dummy_mapping, 0xff, PAGE_SIZE);
2068 memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE);
2067} 2069}
2068 2070
2069/* Protected by xen_reservation_lock. */ 2071/* Protected by xen_reservation_lock. */
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index dee79b78a90f..9c2e74f9096c 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -47,7 +47,7 @@ static inline void xen_mc_issue(unsigned mode)
47 xen_mc_flush(); 47 xen_mc_flush();
48 48
49 /* restore flags saved in xen_mc_batch */ 49 /* restore flags saved in xen_mc_batch */
50 local_irq_restore(percpu_read(xen_mc_irq_flags)); 50 local_irq_restore(this_cpu_read(xen_mc_irq_flags));
51} 51}
52 52
53/* Set up a callback to be called when the current batch is flushed */ 53/* Set up a callback to be called when the current batch is flushed */
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index b480d4207a4c..967633ad98c4 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -12,8 +12,8 @@ int xen_swiotlb __read_mostly;
12 12
13static struct dma_map_ops xen_swiotlb_dma_ops = { 13static struct dma_map_ops xen_swiotlb_dma_ops = {
14 .mapping_error = xen_swiotlb_dma_mapping_error, 14 .mapping_error = xen_swiotlb_dma_mapping_error,
15 .alloc_coherent = xen_swiotlb_alloc_coherent, 15 .alloc = xen_swiotlb_alloc_coherent,
16 .free_coherent = xen_swiotlb_free_coherent, 16 .free = xen_swiotlb_free_coherent,
17 .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, 17 .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
18 .sync_single_for_device = xen_swiotlb_sync_single_for_device, 18 .sync_single_for_device = xen_swiotlb_sync_single_for_device,
19 .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, 19 .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index e03c63692176..1ba8dff26753 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -10,6 +10,7 @@
10#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/memblock.h> 11#include <linux/memblock.h>
12#include <linux/cpuidle.h> 12#include <linux/cpuidle.h>
13#include <linux/cpufreq.h>
13 14
14#include <asm/elf.h> 15#include <asm/elf.h>
15#include <asm/vdso.h> 16#include <asm/vdso.h>
@@ -420,7 +421,7 @@ void __init xen_arch_setup(void)
420 boot_cpu_data.hlt_works_ok = 1; 421 boot_cpu_data.hlt_works_ok = 1;
421#endif 422#endif
422 disable_cpuidle(); 423 disable_cpuidle();
423 boot_option_idle_override = IDLE_HALT; 424 disable_cpufreq();
424 WARN_ON(set_pm_idle_to_default()); 425 WARN_ON(set_pm_idle_to_default());
425 fiddle_vdso(); 426 fiddle_vdso();
426} 427}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 501d4e0244ba..5fac6919b957 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -59,7 +59,7 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
59 59
60static void __cpuinit cpu_bringup(void) 60static void __cpuinit cpu_bringup(void)
61{ 61{
62 int cpu = smp_processor_id(); 62 int cpu;
63 63
64 cpu_init(); 64 cpu_init();
65 touch_softlockup_watchdog(); 65 touch_softlockup_watchdog();
@@ -75,8 +75,14 @@ static void __cpuinit cpu_bringup(void)
75 75
76 xen_setup_cpu_clockevents(); 76 xen_setup_cpu_clockevents();
77 77
78 notify_cpu_starting(cpu);
79
80 ipi_call_lock();
78 set_cpu_online(cpu, true); 81 set_cpu_online(cpu, true);
79 percpu_write(cpu_state, CPU_ONLINE); 82 ipi_call_unlock();
83
84 this_cpu_write(cpu_state, CPU_ONLINE);
85
80 wmb(); 86 wmb();
81 87
82 /* We can take interrupts now: we're officially "up". */ 88 /* We can take interrupts now: we're officially "up". */