summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/boot/Makefile16
-rw-r--r--arch/alpha/boot/main.c1
-rw-r--r--arch/alpha/boot/stdio.c306
-rw-r--r--arch/alpha/boot/tools/objstrip.c3
-rw-r--r--arch/alpha/include/asm/cmpxchg.h2
-rw-r--r--arch/alpha/include/asm/types.h1
-rw-r--r--arch/alpha/include/asm/unistd.h2
-rw-r--r--arch/alpha/include/uapi/asm/unistd.h3
-rw-r--r--arch/alpha/kernel/err_ev6.c1
-rw-r--r--arch/alpha/kernel/irq.c1
-rw-r--r--arch/alpha/kernel/osf_sys.c3
-rw-r--r--arch/alpha/kernel/process.c7
-rw-r--r--arch/alpha/kernel/smp.c8
-rw-r--r--arch/alpha/kernel/srmcons.c3
-rw-r--r--arch/alpha/kernel/sys_marvel.c2
-rw-r--r--arch/alpha/kernel/systbls.S3
-rw-r--r--arch/alpha/kernel/traps.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev4.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev5.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev6.c1
-rw-r--r--arch/alpha/oprofile/op_model_ev67.c1
-rw-r--r--arch/arc/include/asm/io.h1
-rw-r--r--arch/arm/boot/dts/Makefile2
-rw-r--r--arch/arm/boot/dts/am335x-boneblack.dts4
-rw-r--r--arch/arm/boot/dts/am335x-evmsk.dts2
-rw-r--r--arch/arm/boot/dts/exynos4412-trats2.dts2
-rw-r--r--arch/arm/boot/dts/imx27.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000.dts2
-rw-r--r--arch/arm/boot/dts/zynq-7000.dtsi4
-rw-r--r--arch/arm/configs/multi_v7_defconfig2
-rw-r--r--arch/arm/include/asm/barrier.h2
-rw-r--r--arch/arm/include/asm/io.h1
-rw-r--r--arch/arm/kernel/entry-common.S4
-rw-r--r--arch/arm/kernel/perf_event_cpu.c9
-rw-r--r--arch/arm/mach-imx/gpc.c16
-rw-r--r--arch/arm/mach-pxa/pxa_cplds_irqs.c2
-rw-r--r--arch/arm/mm/mmu.c20
-rw-r--r--arch/arm/xen/enlighten.c1
-rw-r--r--arch/arm64/include/asm/barrier.h2
-rw-r--r--arch/arm64/include/asm/io.h1
-rw-r--r--arch/avr32/include/asm/cmpxchg.h2
-rw-r--r--arch/avr32/include/asm/io.h1
-rw-r--r--arch/frv/include/asm/io.h4
-rw-r--r--arch/hexagon/include/asm/cmpxchg.h1
-rw-r--r--arch/ia64/include/asm/barrier.h7
-rw-r--r--arch/ia64/include/uapi/asm/cmpxchg.h2
-rw-r--r--arch/ia64/pci/pci.c13
-rw-r--r--arch/m32r/include/asm/cmpxchg.h2
-rw-r--r--arch/m32r/include/asm/io.h1
-rw-r--r--arch/m68k/include/asm/cmpxchg.h1
-rw-r--r--arch/m68k/include/asm/io_mm.h4
-rw-r--r--arch/m68k/include/asm/io_no.h4
-rw-r--r--arch/metag/include/asm/barrier.h2
-rw-r--r--arch/metag/include/asm/cmpxchg.h2
-rw-r--r--arch/metag/include/asm/io.h3
-rw-r--r--arch/microblaze/include/asm/io.h2
-rw-r--r--arch/mips/ath79/prom.c3
-rw-r--r--arch/mips/configs/fuloong2e_defconfig2
-rw-r--r--arch/mips/include/asm/barrier.h4
-rw-r--r--arch/mips/include/asm/cmpxchg.h2
-rw-r--r--arch/mips/kernel/irq.c2
-rw-r--r--arch/mips/kernel/smp-bmips.c2
-rw-r--r--arch/mips/lib/strnlen_user.S15
-rw-r--r--arch/mn10300/include/asm/io.h1
-rw-r--r--arch/nios2/include/asm/io.h1
-rw-r--r--arch/parisc/include/asm/cmpxchg.h2
-rw-r--r--arch/powerpc/include/asm/barrier.h2
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h1
-rw-r--r--arch/powerpc/kernel/mce.c4
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c5
-rw-r--r--arch/powerpc/mm/hugetlbpage.c25
-rw-r--r--arch/powerpc/mm/pgtable_64.c11
-rw-r--r--arch/s390/crypto/ghash_s390.c25
-rw-r--r--arch/s390/crypto/prng.c2
-rw-r--r--arch/s390/include/asm/barrier.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h2
-rw-r--r--arch/s390/include/asm/io.h1
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/s390/net/bpf_jit_comp.c19
-rw-r--r--arch/score/include/asm/cmpxchg.h2
-rw-r--r--arch/sh/include/asm/barrier.h2
-rw-r--r--arch/sh/include/asm/cmpxchg.h2
-rw-r--r--arch/sparc/include/asm/barrier_64.h4
-rw-r--r--arch/sparc/include/asm/cmpxchg_32.h1
-rw-r--r--arch/sparc/include/asm/cmpxchg_64.h2
-rw-r--r--arch/sparc/include/asm/cpudata_64.h3
-rw-r--r--arch/sparc/include/asm/io_32.h1
-rw-r--r--arch/sparc/include/asm/io_64.h1
-rw-r--r--arch/sparc/include/asm/pgtable_64.h22
-rw-r--r--arch/sparc/include/asm/topology_64.h3
-rw-r--r--arch/sparc/include/asm/trap_block.h2
-rw-r--r--arch/sparc/kernel/entry.h2
-rw-r--r--arch/sparc/kernel/leon_pci_grpci2.c1
-rw-r--r--arch/sparc/kernel/mdesc.c136
-rw-r--r--arch/sparc/kernel/pci.c59
-rw-r--r--arch/sparc/kernel/setup_64.c21
-rw-r--r--arch/sparc/kernel/smp_64.c13
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S5
-rw-r--r--arch/sparc/mm/init_64.c74
-rw-r--r--arch/tile/include/asm/atomic_64.h3
-rw-r--r--arch/tile/include/asm/io.h2
-rw-r--r--arch/x86/Kbuild5
-rw-r--r--arch/x86/Kconfig225
-rw-r--r--arch/x86/Kconfig.debug11
-rw-r--r--arch/x86/Makefile14
-rw-r--r--arch/x86/entry/Makefile10
-rw-r--r--arch/x86/entry/calling.h (renamed from arch/x86/include/asm/calling.h)98
-rw-r--r--arch/x86/entry/entry_32.S1248
-rw-r--r--arch/x86/entry/entry_64.S (renamed from arch/x86/kernel/entry_64.S)1074
-rw-r--r--arch/x86/entry/entry_64_compat.S556
-rw-r--r--arch/x86/entry/syscall_32.c (renamed from arch/x86/kernel/syscall_32.c)6
-rw-r--r--arch/x86/entry/syscall_64.c (renamed from arch/x86/kernel/syscall_64.c)0
-rw-r--r--arch/x86/entry/syscalls/Makefile (renamed from arch/x86/syscalls/Makefile)4
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl (renamed from arch/x86/syscalls/syscall_32.tbl)0
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl (renamed from arch/x86/syscalls/syscall_64.tbl)0
-rw-r--r--arch/x86/entry/syscalls/syscallhdr.sh (renamed from arch/x86/syscalls/syscallhdr.sh)0
-rw-r--r--arch/x86/entry/syscalls/syscalltbl.sh (renamed from arch/x86/syscalls/syscalltbl.sh)0
-rw-r--r--arch/x86/entry/thunk_32.S (renamed from arch/x86/lib/thunk_32.S)15
-rw-r--r--arch/x86/entry/thunk_64.S (renamed from arch/x86/lib/thunk_64.S)46
-rw-r--r--arch/x86/entry/vdso/.gitignore (renamed from arch/x86/vdso/.gitignore)0
-rw-r--r--arch/x86/entry/vdso/Makefile (renamed from arch/x86/vdso/Makefile)0
-rwxr-xr-xarch/x86/entry/vdso/checkundef.sh (renamed from arch/x86/vdso/checkundef.sh)0
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c (renamed from arch/x86/vdso/vclock_gettime.c)0
-rw-r--r--arch/x86/entry/vdso/vdso-layout.lds.S (renamed from arch/x86/vdso/vdso-layout.lds.S)0
-rw-r--r--arch/x86/entry/vdso/vdso-note.S (renamed from arch/x86/vdso/vdso-note.S)0
-rw-r--r--arch/x86/entry/vdso/vdso.lds.S (renamed from arch/x86/vdso/vdso.lds.S)0
-rw-r--r--arch/x86/entry/vdso/vdso2c.c (renamed from arch/x86/vdso/vdso2c.c)0
-rw-r--r--arch/x86/entry/vdso/vdso2c.h (renamed from arch/x86/vdso/vdso2c.h)0
-rw-r--r--arch/x86/entry/vdso/vdso32-setup.c (renamed from arch/x86/vdso/vdso32-setup.c)0
-rw-r--r--arch/x86/entry/vdso/vdso32/.gitignore (renamed from arch/x86/vdso/vdso32/.gitignore)0
-rw-r--r--arch/x86/entry/vdso/vdso32/int80.S (renamed from arch/x86/vdso/vdso32/int80.S)0
-rw-r--r--arch/x86/entry/vdso/vdso32/note.S (renamed from arch/x86/vdso/vdso32/note.S)0
-rw-r--r--arch/x86/entry/vdso/vdso32/sigreturn.S (renamed from arch/x86/vdso/vdso32/sigreturn.S)0
-rw-r--r--arch/x86/entry/vdso/vdso32/syscall.S (renamed from arch/x86/vdso/vdso32/syscall.S)0
-rw-r--r--arch/x86/entry/vdso/vdso32/sysenter.S (renamed from arch/x86/vdso/vdso32/sysenter.S)0
-rw-r--r--arch/x86/entry/vdso/vdso32/vclock_gettime.c (renamed from arch/x86/vdso/vdso32/vclock_gettime.c)0
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso-fakesections.c (renamed from arch/x86/vdso/vdso32/vdso-fakesections.c)0
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso32.lds.S (renamed from arch/x86/vdso/vdso32/vdso32.lds.S)0
-rw-r--r--arch/x86/entry/vdso/vdsox32.lds.S (renamed from arch/x86/vdso/vdsox32.lds.S)0
-rw-r--r--arch/x86/entry/vdso/vgetcpu.c (renamed from arch/x86/vdso/vgetcpu.c)0
-rw-r--r--arch/x86/entry/vdso/vma.c (renamed from arch/x86/vdso/vma.c)0
-rw-r--r--arch/x86/entry/vsyscall/Makefile7
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c (renamed from arch/x86/kernel/vsyscall_64.c)0
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_emu_64.S (renamed from arch/x86/kernel/vsyscall_emu_64.S)0
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_gtod.c (renamed from arch/x86/kernel/vsyscall_gtod.c)0
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_trace.h (renamed from arch/x86/kernel/vsyscall_trace.h)2
-rw-r--r--arch/x86/ia32/Makefile2
-rw-r--r--arch/x86/ia32/ia32entry.S591
-rw-r--r--arch/x86/include/asm/barrier.h4
-rw-r--r--arch/x86/include/asm/cacheflush.h6
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/dwarf2.h170
-rw-r--r--arch/x86/include/asm/entry_arch.h3
-rw-r--r--arch/x86/include/asm/frame.h7
-rw-r--r--arch/x86/include/asm/hardirq.h3
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/io.h9
-rw-r--r--arch/x86/include/asm/irq_vectors.h11
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/mce.h28
-rw-r--r--arch/x86/include/asm/msr-index.h (renamed from arch/x86/include/uapi/asm/msr-index.h)3
-rw-r--r--arch/x86/include/asm/msr.h12
-rw-r--r--arch/x86/include/asm/mtrr.h15
-rw-r--r--arch/x86/include/asm/paravirt.h29
-rw-r--r--arch/x86/include/asm/paravirt_types.h10
-rw-r--r--arch/x86/include/asm/pat.h9
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/include/asm/proto.h10
-rw-r--r--arch/x86/include/asm/qspinlock.h57
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h6
-rw-r--r--arch/x86/include/asm/segment.h14
-rw-r--r--arch/x86/include/asm/special_insns.h38
-rw-r--r--arch/x86/include/asm/spinlock.h5
-rw-r--r--arch/x86/include/asm/spinlock_types.h4
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h6
-rw-r--r--arch/x86/include/asm/traps.h3
-rw-r--r--arch/x86/include/uapi/asm/msr.h2
-rw-r--r--arch/x86/include/uapi/asm/mtrr.h8
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c6
-rw-r--r--arch/x86/kernel/cpu/common.c12
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c57
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c141
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c44
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c209
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c48
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h2
-rw-r--r--arch/x86/kernel/crash.c1
-rw-r--r--arch/x86/kernel/early-quirks.c8
-rw-r--r--arch/x86/kernel/entry_32.S1401
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S33
-rw-r--r--arch/x86/kernel/head_64.S20
-rw-r--r--arch/x86/kernel/i387.c15
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kernel/kvm.c43
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c24
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c22
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c22
-rw-r--r--arch/x86/kernel/traps.c19
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/mmu.c16
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/svm.c1
-rw-r--r--arch/x86/kvm/vmx.c1
-rw-r--r--arch/x86/kvm/x86.c26
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/atomic64_386_32.S7
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S61
-rw-r--r--arch/x86/lib/checksum_32.S52
-rw-r--r--arch/x86/lib/clear_page_64.S7
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S12
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S11
-rw-r--r--arch/x86/lib/copy_page_64.S11
-rw-r--r--arch/x86/lib/copy_user_64.S15
-rw-r--r--arch/x86/lib/csum-copy_64.S17
-rw-r--r--arch/x86/lib/getuser.S13
-rw-r--r--arch/x86/lib/iomap_copy_64.S3
-rw-r--r--arch/x86/lib/memcpy_64.S3
-rw-r--r--arch/x86/lib/memmove_64.S3
-rw-r--r--arch/x86/lib/memset_64.S5
-rw-r--r--arch/x86/lib/msr-reg.S44
-rw-r--r--arch/x86/lib/putuser.S8
-rw-r--r--arch/x86/lib/rwsem.S49
-rw-r--r--arch/x86/mm/init.c6
-rw-r--r--arch/x86/mm/iomap_32.c12
-rw-r--r--arch/x86/mm/ioremap.c71
-rw-r--r--arch/x86/mm/pageattr-test.c1
-rw-r--r--arch/x86/mm/pageattr.c84
-rw-r--r--arch/x86/mm/pat.c337
-rw-r--r--arch/x86/mm/pat_internal.h2
-rw-r--r--arch/x86/mm/pat_rbtree.c6
-rw-r--r--arch/x86/mm/pgtable.c60
-rw-r--r--arch/x86/net/bpf_jit.S1
-rw-r--r--arch/x86/net/bpf_jit_comp.c7
-rw-r--r--arch/x86/pci/acpi.c13
-rw-r--r--arch/x86/pci/i386.c6
-rw-r--r--arch/x86/platform/Makefile1
-rw-r--r--arch/x86/platform/atom/Makefile1
-rw-r--r--arch/x86/platform/atom/punit_atom_debug.c183
-rw-r--r--arch/x86/um/Makefile2
-rw-r--r--arch/x86/um/asm/barrier.h3
-rw-r--r--arch/x86/xen/enlighten.c5
-rw-r--r--arch/x86/xen/p2m.c1
-rw-r--r--arch/x86/xen/spinlock.c64
-rw-r--r--arch/x86/xen/xen-asm_64.S6
-rw-r--r--arch/xtensa/include/asm/dma-mapping.h13
-rw-r--r--arch/xtensa/include/asm/io.h1
258 files changed, 4919 insertions, 3914 deletions
diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile
index cd143887380a..8399bd0e68e8 100644
--- a/arch/alpha/boot/Makefile
+++ b/arch/alpha/boot/Makefile
@@ -14,6 +14,9 @@ targets := vmlinux.gz vmlinux \
14 tools/bootpzh bootloader bootpheader bootpzheader 14 tools/bootpzh bootloader bootpheader bootpzheader
15OBJSTRIP := $(obj)/tools/objstrip 15OBJSTRIP := $(obj)/tools/objstrip
16 16
17HOSTCFLAGS := -Wall -I$(objtree)/usr/include
18BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
19
17# SRM bootable image. Copy to offset 512 of a partition. 20# SRM bootable image. Copy to offset 512 of a partition.
18$(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh 21$(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh
19 ( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@ 22 ( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@
@@ -96,13 +99,14 @@ $(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE
96$(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE 99$(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE
97 $(call if_changed,objstrip) 100 $(call if_changed,objstrip)
98 101
99LDFLAGS_bootloader := -static -uvsprintf -T #-N -relax 102LDFLAGS_bootloader := -static -T # -N -relax
100LDFLAGS_bootpheader := -static -uvsprintf -T #-N -relax 103LDFLAGS_bootloader := -static -T # -N -relax
101LDFLAGS_bootpzheader := -static -uvsprintf -T #-N -relax 104LDFLAGS_bootpheader := -static -T # -N -relax
105LDFLAGS_bootpzheader := -static -T # -N -relax
102 106
103OBJ_bootlx := $(obj)/head.o $(obj)/main.o 107OBJ_bootlx := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o
104OBJ_bootph := $(obj)/head.o $(obj)/bootp.o 108OBJ_bootph := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o
105OBJ_bootpzh := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o 109OBJ_bootpzh := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o
106 110
107$(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE 111$(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE
108 $(call if_changed,ld) 112 $(call if_changed,ld)
diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c
index 3baf2d1e908d..dd6eb4a33582 100644
--- a/arch/alpha/boot/main.c
+++ b/arch/alpha/boot/main.c
@@ -19,7 +19,6 @@
19 19
20#include "ksize.h" 20#include "ksize.h"
21 21
22extern int vsprintf(char *, const char *, va_list);
23extern unsigned long switch_to_osf_pal(unsigned long nr, 22extern unsigned long switch_to_osf_pal(unsigned long nr,
24 struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, 23 struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
25 unsigned long *vptb); 24 unsigned long *vptb);
diff --git a/arch/alpha/boot/stdio.c b/arch/alpha/boot/stdio.c
new file mode 100644
index 000000000000..f844dae8a54a
--- /dev/null
+++ b/arch/alpha/boot/stdio.c
@@ -0,0 +1,306 @@
1/*
2 * Copyright (C) Paul Mackerras 1997.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <stdarg.h>
10#include <stddef.h>
11
12size_t strnlen(const char * s, size_t count)
13{
14 const char *sc;
15
16 for (sc = s; count-- && *sc != '\0'; ++sc)
17 /* nothing */;
18 return sc - s;
19}
20
21# define do_div(n, base) ({ \
22 unsigned int __base = (base); \
23 unsigned int __rem; \
24 __rem = ((unsigned long long)(n)) % __base; \
25 (n) = ((unsigned long long)(n)) / __base; \
26 __rem; \
27})
28
29
30static int skip_atoi(const char **s)
31{
32 int i, c;
33
34 for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
35 i = i*10 + c - '0';
36 return i;
37}
38
39#define ZEROPAD 1 /* pad with zero */
40#define SIGN 2 /* unsigned/signed long */
41#define PLUS 4 /* show plus */
42#define SPACE 8 /* space if plus */
43#define LEFT 16 /* left justified */
44#define SPECIAL 32 /* 0x */
45#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
46
47static char * number(char * str, unsigned long long num, int base, int size, int precision, int type)
48{
49 char c,sign,tmp[66];
50 const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
51 int i;
52
53 if (type & LARGE)
54 digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
55 if (type & LEFT)
56 type &= ~ZEROPAD;
57 if (base < 2 || base > 36)
58 return 0;
59 c = (type & ZEROPAD) ? '0' : ' ';
60 sign = 0;
61 if (type & SIGN) {
62 if ((signed long long)num < 0) {
63 sign = '-';
64 num = - (signed long long)num;
65 size--;
66 } else if (type & PLUS) {
67 sign = '+';
68 size--;
69 } else if (type & SPACE) {
70 sign = ' ';
71 size--;
72 }
73 }
74 if (type & SPECIAL) {
75 if (base == 16)
76 size -= 2;
77 else if (base == 8)
78 size--;
79 }
80 i = 0;
81 if (num == 0)
82 tmp[i++]='0';
83 else while (num != 0) {
84 tmp[i++] = digits[do_div(num, base)];
85 }
86 if (i > precision)
87 precision = i;
88 size -= precision;
89 if (!(type&(ZEROPAD+LEFT)))
90 while(size-->0)
91 *str++ = ' ';
92 if (sign)
93 *str++ = sign;
94 if (type & SPECIAL) {
95 if (base==8)
96 *str++ = '0';
97 else if (base==16) {
98 *str++ = '0';
99 *str++ = digits[33];
100 }
101 }
102 if (!(type & LEFT))
103 while (size-- > 0)
104 *str++ = c;
105 while (i < precision--)
106 *str++ = '0';
107 while (i-- > 0)
108 *str++ = tmp[i];
109 while (size-- > 0)
110 *str++ = ' ';
111 return str;
112}
113
114int vsprintf(char *buf, const char *fmt, va_list args)
115{
116 int len;
117 unsigned long long num;
118 int i, base;
119 char * str;
120 const char *s;
121
122 int flags; /* flags to number() */
123
124 int field_width; /* width of output field */
125 int precision; /* min. # of digits for integers; max
126 number of chars for from string */
127 int qualifier; /* 'h', 'l', or 'L' for integer fields */
128 /* 'z' support added 23/7/1999 S.H. */
129 /* 'z' changed to 'Z' --davidm 1/25/99 */
130
131
132 for (str=buf ; *fmt ; ++fmt) {
133 if (*fmt != '%') {
134 *str++ = *fmt;
135 continue;
136 }
137
138 /* process flags */
139 flags = 0;
140 repeat:
141 ++fmt; /* this also skips first '%' */
142 switch (*fmt) {
143 case '-': flags |= LEFT; goto repeat;
144 case '+': flags |= PLUS; goto repeat;
145 case ' ': flags |= SPACE; goto repeat;
146 case '#': flags |= SPECIAL; goto repeat;
147 case '0': flags |= ZEROPAD; goto repeat;
148 }
149
150 /* get field width */
151 field_width = -1;
152 if ('0' <= *fmt && *fmt <= '9')
153 field_width = skip_atoi(&fmt);
154 else if (*fmt == '*') {
155 ++fmt;
156 /* it's the next argument */
157 field_width = va_arg(args, int);
158 if (field_width < 0) {
159 field_width = -field_width;
160 flags |= LEFT;
161 }
162 }
163
164 /* get the precision */
165 precision = -1;
166 if (*fmt == '.') {
167 ++fmt;
168 if ('0' <= *fmt && *fmt <= '9')
169 precision = skip_atoi(&fmt);
170 else if (*fmt == '*') {
171 ++fmt;
172 /* it's the next argument */
173 precision = va_arg(args, int);
174 }
175 if (precision < 0)
176 precision = 0;
177 }
178
179 /* get the conversion qualifier */
180 qualifier = -1;
181 if (*fmt == 'l' && *(fmt + 1) == 'l') {
182 qualifier = 'q';
183 fmt += 2;
184 } else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L'
185 || *fmt == 'Z') {
186 qualifier = *fmt;
187 ++fmt;
188 }
189
190 /* default base */
191 base = 10;
192
193 switch (*fmt) {
194 case 'c':
195 if (!(flags & LEFT))
196 while (--field_width > 0)
197 *str++ = ' ';
198 *str++ = (unsigned char) va_arg(args, int);
199 while (--field_width > 0)
200 *str++ = ' ';
201 continue;
202
203 case 's':
204 s = va_arg(args, char *);
205 if (!s)
206 s = "<NULL>";
207
208 len = strnlen(s, precision);
209
210 if (!(flags & LEFT))
211 while (len < field_width--)
212 *str++ = ' ';
213 for (i = 0; i < len; ++i)
214 *str++ = *s++;
215 while (len < field_width--)
216 *str++ = ' ';
217 continue;
218
219 case 'p':
220 if (field_width == -1) {
221 field_width = 2*sizeof(void *);
222 flags |= ZEROPAD;
223 }
224 str = number(str,
225 (unsigned long) va_arg(args, void *), 16,
226 field_width, precision, flags);
227 continue;
228
229
230 case 'n':
231 if (qualifier == 'l') {
232 long * ip = va_arg(args, long *);
233 *ip = (str - buf);
234 } else if (qualifier == 'Z') {
235 size_t * ip = va_arg(args, size_t *);
236 *ip = (str - buf);
237 } else {
238 int * ip = va_arg(args, int *);
239 *ip = (str - buf);
240 }
241 continue;
242
243 case '%':
244 *str++ = '%';
245 continue;
246
247 /* integer number formats - set up the flags and "break" */
248 case 'o':
249 base = 8;
250 break;
251
252 case 'X':
253 flags |= LARGE;
254 case 'x':
255 base = 16;
256 break;
257
258 case 'd':
259 case 'i':
260 flags |= SIGN;
261 case 'u':
262 break;
263
264 default:
265 *str++ = '%';
266 if (*fmt)
267 *str++ = *fmt;
268 else
269 --fmt;
270 continue;
271 }
272 if (qualifier == 'l') {
273 num = va_arg(args, unsigned long);
274 if (flags & SIGN)
275 num = (signed long) num;
276 } else if (qualifier == 'q') {
277 num = va_arg(args, unsigned long long);
278 if (flags & SIGN)
279 num = (signed long long) num;
280 } else if (qualifier == 'Z') {
281 num = va_arg(args, size_t);
282 } else if (qualifier == 'h') {
283 num = (unsigned short) va_arg(args, int);
284 if (flags & SIGN)
285 num = (signed short) num;
286 } else {
287 num = va_arg(args, unsigned int);
288 if (flags & SIGN)
289 num = (signed int) num;
290 }
291 str = number(str, num, base, field_width, precision, flags);
292 }
293 *str = '\0';
294 return str-buf;
295}
296
297int sprintf(char * buf, const char *fmt, ...)
298{
299 va_list args;
300 int i;
301
302 va_start(args, fmt);
303 i=vsprintf(buf,fmt,args);
304 va_end(args);
305 return i;
306}
diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c
index 367d53d031fc..dee82695f48b 100644
--- a/arch/alpha/boot/tools/objstrip.c
+++ b/arch/alpha/boot/tools/objstrip.c
@@ -27,6 +27,9 @@
27#include <linux/param.h> 27#include <linux/param.h>
28#ifdef __ELF__ 28#ifdef __ELF__
29# include <linux/elf.h> 29# include <linux/elf.h>
30# define elfhdr elf64_hdr
31# define elf_phdr elf64_phdr
32# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
30#endif 33#endif
31 34
32/* bootfile size must be multiple of BLOCK_SIZE: */ 35/* bootfile size must be multiple of BLOCK_SIZE: */
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 429e8cd0d78e..e5117766529e 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -66,6 +66,4 @@
66#undef __ASM__MB 66#undef __ASM__MB
67#undef ____cmpxchg 67#undef ____cmpxchg
68 68
69#define __HAVE_ARCH_CMPXCHG 1
70
71#endif /* _ALPHA_CMPXCHG_H */ 69#endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index f61e1a56c378..4cb4b6d3452c 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -2,6 +2,5 @@
2#define _ALPHA_TYPES_H 2#define _ALPHA_TYPES_H
3 3
4#include <asm-generic/int-ll64.h> 4#include <asm-generic/int-ll64.h>
5#include <uapi/asm/types.h>
6 5
7#endif /* _ALPHA_TYPES_H */ 6#endif /* _ALPHA_TYPES_H */
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index c509d306db45..a56e608db2f9 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -3,7 +3,7 @@
3 3
4#include <uapi/asm/unistd.h> 4#include <uapi/asm/unistd.h>
5 5
6#define NR_SYSCALLS 511 6#define NR_SYSCALLS 514
7 7
8#define __ARCH_WANT_OLD_READDIR 8#define __ARCH_WANT_OLD_READDIR
9#define __ARCH_WANT_STAT64 9#define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
index d214a0358100..aa33bf5aacb6 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -472,5 +472,8 @@
472#define __NR_sched_setattr 508 472#define __NR_sched_setattr 508
473#define __NR_sched_getattr 509 473#define __NR_sched_getattr 509
474#define __NR_renameat2 510 474#define __NR_renameat2 510
475#define __NR_getrandom 511
476#define __NR_memfd_create 512
477#define __NR_execveat 513
475 478
476#endif /* _UAPI_ALPHA_UNISTD_H */ 479#endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c
index 253cf1a87481..51267ac5729b 100644
--- a/arch/alpha/kernel/err_ev6.c
+++ b/arch/alpha/kernel/err_ev6.c
@@ -6,7 +6,6 @@
6 * Error handling code supporting Alpha systems 6 * Error handling code supporting Alpha systems
7 */ 7 */
8 8
9#include <linux/init.h>
10#include <linux/sched.h> 9#include <linux/sched.h>
11 10
12#include <asm/io.h> 11#include <asm/io.h>
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 7b2be251c30f..51f2c8654253 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -19,7 +19,6 @@
19#include <linux/ptrace.h> 19#include <linux/ptrace.h>
20#include <linux/interrupt.h> 20#include <linux/interrupt.h>
21#include <linux/random.h> 21#include <linux/random.h>
22#include <linux/init.h>
23#include <linux/irq.h> 22#include <linux/irq.h>
24#include <linux/proc_fs.h> 23#include <linux/proc_fs.h>
25#include <linux/seq_file.h> 24#include <linux/seq_file.h>
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index e51f578636a5..36dc91ace83a 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1019,14 +1019,13 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
1019 if (tv) { 1019 if (tv) {
1020 if (get_tv32((struct timeval *)&kts, tv)) 1020 if (get_tv32((struct timeval *)&kts, tv))
1021 return -EFAULT; 1021 return -EFAULT;
1022 kts.tv_nsec *= 1000;
1022 } 1023 }
1023 if (tz) { 1024 if (tz) {
1024 if (copy_from_user(&ktz, tz, sizeof(*tz))) 1025 if (copy_from_user(&ktz, tz, sizeof(*tz)))
1025 return -EFAULT; 1026 return -EFAULT;
1026 } 1027 }
1027 1028
1028 kts.tv_nsec *= 1000;
1029
1030 return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); 1029 return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
1031} 1030}
1032 1031
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 1941a07b5811..84d13263ce46 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -236,12 +236,11 @@ release_thread(struct task_struct *dead_task)
236} 236}
237 237
238/* 238/*
239 * Copy an alpha thread.. 239 * Copy architecture-specific thread state
240 */ 240 */
241
242int 241int
243copy_thread(unsigned long clone_flags, unsigned long usp, 242copy_thread(unsigned long clone_flags, unsigned long usp,
244 unsigned long arg, 243 unsigned long kthread_arg,
245 struct task_struct *p) 244 struct task_struct *p)
246{ 245{
247 extern void ret_from_fork(void); 246 extern void ret_from_fork(void);
@@ -262,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
262 sizeof(struct switch_stack) + sizeof(struct pt_regs)); 261 sizeof(struct switch_stack) + sizeof(struct pt_regs));
263 childstack->r26 = (unsigned long) ret_from_kernel_thread; 262 childstack->r26 = (unsigned long) ret_from_kernel_thread;
264 childstack->r9 = usp; /* function */ 263 childstack->r9 = usp; /* function */
265 childstack->r10 = arg; 264 childstack->r10 = kthread_arg;
266 childregs->hae = alpha_mv.hae_cache, 265 childregs->hae = alpha_mv.hae_cache,
267 childti->pcb.usp = 0; 266 childti->pcb.usp = 0;
268 return 0; 267 return 0;
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 99ac36d5de4e..2f24447fef92 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -63,7 +63,6 @@ static struct {
63enum ipi_message_type { 63enum ipi_message_type {
64 IPI_RESCHEDULE, 64 IPI_RESCHEDULE,
65 IPI_CALL_FUNC, 65 IPI_CALL_FUNC,
66 IPI_CALL_FUNC_SINGLE,
67 IPI_CPU_STOP, 66 IPI_CPU_STOP,
68}; 67};
69 68
@@ -506,7 +505,6 @@ setup_profiling_timer(unsigned int multiplier)
506 return -EINVAL; 505 return -EINVAL;
507} 506}
508 507
509
510static void 508static void
511send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation) 509send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
512{ 510{
@@ -552,10 +550,6 @@ handle_ipi(struct pt_regs *regs)
552 generic_smp_call_function_interrupt(); 550 generic_smp_call_function_interrupt();
553 break; 551 break;
554 552
555 case IPI_CALL_FUNC_SINGLE:
556 generic_smp_call_function_single_interrupt();
557 break;
558
559 case IPI_CPU_STOP: 553 case IPI_CPU_STOP:
560 halt(); 554 halt();
561 555
@@ -606,7 +600,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
606 600
607void arch_send_call_function_single_ipi(int cpu) 601void arch_send_call_function_single_ipi(int cpu)
608{ 602{
609 send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); 603 send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
610} 604}
611 605
612static void 606static void
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 6f01d9ad7b81..72b59511e59a 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -237,8 +237,7 @@ srmcons_init(void)
237 237
238 return -ENODEV; 238 return -ENODEV;
239} 239}
240 240device_initcall(srmcons_init);
241module_init(srmcons_init);
242 241
243 242
244/* 243/*
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index f21d61fab678..24e41bd7d3c9 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -331,7 +331,7 @@ marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
331 pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline); 331 pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline);
332 irq = intline; 332 irq = intline;
333 333
334 msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI); 334 msi_loc = dev->msi_cap;
335 msg_ctl = 0; 335 msg_ctl = 0;
336 if (msi_loc) 336 if (msi_loc)
337 pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl); 337 pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl);
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 24789713f1ea..9b62e3fd4f03 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -529,6 +529,9 @@ sys_call_table:
529 .quad sys_sched_setattr 529 .quad sys_sched_setattr
530 .quad sys_sched_getattr 530 .quad sys_sched_getattr
531 .quad sys_renameat2 /* 510 */ 531 .quad sys_renameat2 /* 510 */
532 .quad sys_getrandom
533 .quad sys_memfd_create
534 .quad sys_execveat
532 535
533 .size sys_call_table, . - sys_call_table 536 .size sys_call_table, . - sys_call_table
534 .type sys_call_table, @object 537 .type sys_call_table, @object
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 9c4c189eb22f..74aceead06e9 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -14,7 +14,6 @@
14#include <linux/tty.h> 14#include <linux/tty.h>
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/init.h>
18#include <linux/kallsyms.h> 17#include <linux/kallsyms.h>
19#include <linux/ratelimit.h> 18#include <linux/ratelimit.h>
20 19
diff --git a/arch/alpha/oprofile/op_model_ev4.c b/arch/alpha/oprofile/op_model_ev4.c
index 18aa9b4f94f1..086a0d5445c5 100644
--- a/arch/alpha/oprofile/op_model_ev4.c
+++ b/arch/alpha/oprofile/op_model_ev4.c
@@ -8,7 +8,6 @@
8 */ 8 */
9 9
10#include <linux/oprofile.h> 10#include <linux/oprofile.h>
11#include <linux/init.h>
12#include <linux/smp.h> 11#include <linux/smp.h>
13#include <asm/ptrace.h> 12#include <asm/ptrace.h>
14 13
diff --git a/arch/alpha/oprofile/op_model_ev5.c b/arch/alpha/oprofile/op_model_ev5.c
index c32f8a0ad925..c300f5ef3482 100644
--- a/arch/alpha/oprofile/op_model_ev5.c
+++ b/arch/alpha/oprofile/op_model_ev5.c
@@ -8,7 +8,6 @@
8 */ 8 */
9 9
10#include <linux/oprofile.h> 10#include <linux/oprofile.h>
11#include <linux/init.h>
12#include <linux/smp.h> 11#include <linux/smp.h>
13#include <asm/ptrace.h> 12#include <asm/ptrace.h>
14 13
diff --git a/arch/alpha/oprofile/op_model_ev6.c b/arch/alpha/oprofile/op_model_ev6.c
index 1c84cc257fc7..02edf5971614 100644
--- a/arch/alpha/oprofile/op_model_ev6.c
+++ b/arch/alpha/oprofile/op_model_ev6.c
@@ -8,7 +8,6 @@
8 */ 8 */
9 9
10#include <linux/oprofile.h> 10#include <linux/oprofile.h>
11#include <linux/init.h>
12#include <linux/smp.h> 11#include <linux/smp.h>
13#include <asm/ptrace.h> 12#include <asm/ptrace.h>
14 13
diff --git a/arch/alpha/oprofile/op_model_ev67.c b/arch/alpha/oprofile/op_model_ev67.c
index 34a57a126553..adb1744d20f3 100644
--- a/arch/alpha/oprofile/op_model_ev67.c
+++ b/arch/alpha/oprofile/op_model_ev67.c
@@ -9,7 +9,6 @@
9 */ 9 */
10 10
11#include <linux/oprofile.h> 11#include <linux/oprofile.h>
12#include <linux/init.h>
13#include <linux/smp.h> 12#include <linux/smp.h>
14#include <asm/ptrace.h> 13#include <asm/ptrace.h>
15 14
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index cabd518cb253..7cc4ced5dbf4 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -20,6 +20,7 @@ extern void iounmap(const void __iomem *addr);
20 20
21#define ioremap_nocache(phy, sz) ioremap(phy, sz) 21#define ioremap_nocache(phy, sz) ioremap(phy, sz)
22#define ioremap_wc(phy, sz) ioremap(phy, sz) 22#define ioremap_wc(phy, sz) ioremap(phy, sz)
23#define ioremap_wt(phy, sz) ioremap(phy, sz)
23 24
24/* Change struct page to physical address */ 25/* Change struct page to physical address */
25#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) 26#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 86217db2937a..992736b5229b 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -223,7 +223,7 @@ dtb-$(CONFIG_SOC_IMX25) += \
223 imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \ 223 imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \
224 imx25-karo-tx25.dtb \ 224 imx25-karo-tx25.dtb \
225 imx25-pdk.dtb 225 imx25-pdk.dtb
226dtb-$(CONFIG_SOC_IMX31) += \ 226dtb-$(CONFIG_SOC_IMX27) += \
227 imx27-apf27.dtb \ 227 imx27-apf27.dtb \
228 imx27-apf27dev.dtb \ 228 imx27-apf27dev.dtb \
229 imx27-eukrea-mbimxsd27-baseboard.dtb \ 229 imx27-eukrea-mbimxsd27-baseboard.dtb \
diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts
index 5c42d259fa68..901739fcb85a 100644
--- a/arch/arm/boot/dts/am335x-boneblack.dts
+++ b/arch/arm/boot/dts/am335x-boneblack.dts
@@ -80,7 +80,3 @@
80 status = "okay"; 80 status = "okay";
81 }; 81 };
82}; 82};
83
84&rtc {
85 system-power-controller;
86};
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index 87fc7a35e802..156d05efcb70 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -654,7 +654,7 @@
654 wlcore: wlcore@2 { 654 wlcore: wlcore@2 {
655 compatible = "ti,wl1271"; 655 compatible = "ti,wl1271";
656 reg = <2>; 656 reg = <2>;
657 interrupt-parent = <&gpio1>; 657 interrupt-parent = <&gpio0>;
658 interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */ 658 interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */
659 ref-clock-frequency = <38400000>; 659 ref-clock-frequency = <38400000>;
660 }; 660 };
diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts
index 173ffa479ad3..792394dd0f2a 100644
--- a/arch/arm/boot/dts/exynos4412-trats2.dts
+++ b/arch/arm/boot/dts/exynos4412-trats2.dts
@@ -736,7 +736,7 @@
736 736
737 display-timings { 737 display-timings {
738 timing-0 { 738 timing-0 {
739 clock-frequency = <0>; 739 clock-frequency = <57153600>;
740 hactive = <720>; 740 hactive = <720>;
741 vactive = <1280>; 741 vactive = <1280>;
742 hfront-porch = <5>; 742 hfront-porch = <5>;
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index 6951b66d1ab7..bc215e4b75fd 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -533,7 +533,7 @@
533 533
534 fec: ethernet@1002b000 { 534 fec: ethernet@1002b000 {
535 compatible = "fsl,imx27-fec"; 535 compatible = "fsl,imx27-fec";
536 reg = <0x1002b000 0x4000>; 536 reg = <0x1002b000 0x1000>;
537 interrupts = <50>; 537 interrupts = <50>;
538 clocks = <&clks IMX27_CLK_FEC_IPG_GATE>, 538 clocks = <&clks IMX27_CLK_FEC_IPG_GATE>,
539 <&clks IMX27_CLK_FEC_AHB_GATE>; 539 <&clks IMX27_CLK_FEC_AHB_GATE>;
diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts
index 134d3f27a8ec..921de6605f07 100644
--- a/arch/arm/boot/dts/omap3-devkit8000.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000.dts
@@ -110,6 +110,8 @@
110 nand@0,0 { 110 nand@0,0 {
111 reg = <0 0 4>; /* CS0, offset 0, IO size 4 */ 111 reg = <0 0 4>; /* CS0, offset 0, IO size 4 */
112 nand-bus-width = <16>; 112 nand-bus-width = <16>;
113 gpmc,device-width = <2>;
114 ti,nand-ecc-opt = "sw";
113 115
114 gpmc,sync-clk-ps = <0>; 116 gpmc,sync-clk-ps = <0>;
115 gpmc,cs-on-ns = <0>; 117 gpmc,cs-on-ns = <0>;
diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi
index a5cd2eda3edf..9ea54b3dba09 100644
--- a/arch/arm/boot/dts/zynq-7000.dtsi
+++ b/arch/arm/boot/dts/zynq-7000.dtsi
@@ -193,7 +193,7 @@
193 }; 193 };
194 194
195 gem0: ethernet@e000b000 { 195 gem0: ethernet@e000b000 {
196 compatible = "cdns,gem"; 196 compatible = "cdns,zynq-gem";
197 reg = <0xe000b000 0x1000>; 197 reg = <0xe000b000 0x1000>;
198 status = "disabled"; 198 status = "disabled";
199 interrupts = <0 22 4>; 199 interrupts = <0 22 4>;
@@ -204,7 +204,7 @@
204 }; 204 };
205 205
206 gem1: ethernet@e000c000 { 206 gem1: ethernet@e000c000 {
207 compatible = "cdns,gem"; 207 compatible = "cdns,zynq-gem";
208 reg = <0xe000c000 0x1000>; 208 reg = <0xe000c000 0x1000>;
209 status = "disabled"; 209 status = "disabled";
210 interrupts = <0 45 4>; 210 interrupts = <0 45 4>;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 0ca4a3eaf65d..fbbb1915c6a9 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -429,7 +429,7 @@ CONFIG_USB_EHCI_EXYNOS=y
429CONFIG_USB_EHCI_TEGRA=y 429CONFIG_USB_EHCI_TEGRA=y
430CONFIG_USB_EHCI_HCD_STI=y 430CONFIG_USB_EHCI_HCD_STI=y
431CONFIG_USB_EHCI_HCD_PLATFORM=y 431CONFIG_USB_EHCI_HCD_PLATFORM=y
432CONFIG_USB_ISP1760_HCD=y 432CONFIG_USB_ISP1760=y
433CONFIG_USB_OHCI_HCD=y 433CONFIG_USB_OHCI_HCD=y
434CONFIG_USB_OHCI_HCD_STI=y 434CONFIG_USB_OHCI_HCD_STI=y
435CONFIG_USB_OHCI_HCD_PLATFORM=y 435CONFIG_USB_OHCI_HCD_PLATFORM=y
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index d2f81e6b8c1c..6c2327e1c732 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -81,7 +81,7 @@ do { \
81#define read_barrier_depends() do { } while(0) 81#define read_barrier_depends() do { } while(0)
82#define smp_read_barrier_depends() do { } while(0) 82#define smp_read_barrier_depends() do { } while(0)
83 83
84#define set_mb(var, value) do { var = value; smp_mb(); } while (0) 84#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
85 85
86#define smp_mb__before_atomic() smp_mb() 86#define smp_mb__before_atomic() smp_mb()
87#define smp_mb__after_atomic() smp_mb() 87#define smp_mb__after_atomic() smp_mb()
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index db58deb00aa7..1b7677d1e5e1 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -336,6 +336,7 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
336#define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) 336#define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE)
337#define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) 337#define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED)
338#define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) 338#define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC)
339#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE)
339#define iounmap __arm_iounmap 340#define iounmap __arm_iounmap
340 341
341/* 342/*
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index f8ccc21fa032..4e7f40c577e6 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -33,7 +33,9 @@ ret_fast_syscall:
33 UNWIND(.fnstart ) 33 UNWIND(.fnstart )
34 UNWIND(.cantunwind ) 34 UNWIND(.cantunwind )
35 disable_irq @ disable interrupts 35 disable_irq @ disable interrupts
36 ldr r1, [tsk, #TI_FLAGS] 36 ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
37 tst r1, #_TIF_SYSCALL_WORK
38 bne __sys_trace_return
37 tst r1, #_TIF_WORK_MASK 39 tst r1, #_TIF_WORK_MASK
38 bne fast_work_pending 40 bne fast_work_pending
39 asm_trace_hardirqs_on 41 asm_trace_hardirqs_on
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 213919ba326f..3b8c2833c537 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -304,16 +304,17 @@ static int probe_current_pmu(struct arm_pmu *pmu)
304static int of_pmu_irq_cfg(struct platform_device *pdev) 304static int of_pmu_irq_cfg(struct platform_device *pdev)
305{ 305{
306 int i, irq; 306 int i, irq;
307 int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); 307 int *irqs;
308
309 if (!irqs)
310 return -ENOMEM;
311 308
312 /* Don't bother with PPIs; they're already affine */ 309 /* Don't bother with PPIs; they're already affine */
313 irq = platform_get_irq(pdev, 0); 310 irq = platform_get_irq(pdev, 0);
314 if (irq >= 0 && irq_is_percpu(irq)) 311 if (irq >= 0 && irq_is_percpu(irq))
315 return 0; 312 return 0;
316 313
314 irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
315 if (!irqs)
316 return -ENOMEM;
317
317 for (i = 0; i < pdev->num_resources; ++i) { 318 for (i = 0; i < pdev->num_resources; ++i) {
318 struct device_node *dn; 319 struct device_node *dn;
319 int cpu; 320 int cpu;
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 4d60005e9277..6d0893a3828e 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -280,9 +280,15 @@ void __init imx_gpc_check_dt(void)
280 struct device_node *np; 280 struct device_node *np;
281 281
282 np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc"); 282 np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc");
283 if (WARN_ON(!np || 283 if (WARN_ON(!np))
284 !of_find_property(np, "interrupt-controller", NULL))) 284 return;
285 pr_warn("Outdated DT detected, system is about to crash!!!\n"); 285
286 if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
287 pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
288
289 /* map GPC, so that at least CPUidle and WARs keep working */
290 gpc_base = of_iomap(np, 0);
291 }
286} 292}
287 293
288#ifdef CONFIG_PM_GENERIC_DOMAINS 294#ifdef CONFIG_PM_GENERIC_DOMAINS
@@ -443,6 +449,10 @@ static int imx_gpc_probe(struct platform_device *pdev)
443 struct regulator *pu_reg; 449 struct regulator *pu_reg;
444 int ret; 450 int ret;
445 451
452 /* bail out if DT too old and doesn't provide the necessary info */
453 if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells"))
454 return 0;
455
446 pu_reg = devm_regulator_get_optional(&pdev->dev, "pu"); 456 pu_reg = devm_regulator_get_optional(&pdev->dev, "pu");
447 if (PTR_ERR(pu_reg) == -ENODEV) 457 if (PTR_ERR(pu_reg) == -ENODEV)
448 pu_reg = NULL; 458 pu_reg = NULL;
diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
index f1aeb54fabe3..2385052b0ce1 100644
--- a/arch/arm/mach-pxa/pxa_cplds_irqs.c
+++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c
@@ -107,7 +107,7 @@ static int cplds_probe(struct platform_device *pdev)
107 struct resource *res; 107 struct resource *res;
108 struct cplds *fpga; 108 struct cplds *fpga;
109 int ret; 109 int ret;
110 unsigned int base_irq = 0; 110 int base_irq;
111 unsigned long irqflags = 0; 111 unsigned long irqflags = 0;
112 112
113 fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL); 113 fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4e6ef896c619..7186382672b5 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void)
1112 } 1112 }
1113 1113
1114 /* 1114 /*
1115 * Find the first non-section-aligned page, and point 1115 * Find the first non-pmd-aligned page, and point
1116 * memblock_limit at it. This relies on rounding the 1116 * memblock_limit at it. This relies on rounding the
1117 * limit down to be section-aligned, which happens at 1117 * limit down to be pmd-aligned, which happens at the
1118 * the end of this function. 1118 * end of this function.
1119 * 1119 *
1120 * With this algorithm, the start or end of almost any 1120 * With this algorithm, the start or end of almost any
1121 * bank can be non-section-aligned. The only exception 1121 * bank can be non-pmd-aligned. The only exception is
1122 * is that the start of the bank 0 must be section- 1122 * that the start of the bank 0 must be section-
1123 * aligned, since otherwise memory would need to be 1123 * aligned, since otherwise memory would need to be
1124 * allocated when mapping the start of bank 0, which 1124 * allocated when mapping the start of bank 0, which
1125 * occurs before any free memory is mapped. 1125 * occurs before any free memory is mapped.
1126 */ 1126 */
1127 if (!memblock_limit) { 1127 if (!memblock_limit) {
1128 if (!IS_ALIGNED(block_start, SECTION_SIZE)) 1128 if (!IS_ALIGNED(block_start, PMD_SIZE))
1129 memblock_limit = block_start; 1129 memblock_limit = block_start;
1130 else if (!IS_ALIGNED(block_end, SECTION_SIZE)) 1130 else if (!IS_ALIGNED(block_end, PMD_SIZE))
1131 memblock_limit = arm_lowmem_limit; 1131 memblock_limit = arm_lowmem_limit;
1132 } 1132 }
1133 1133
@@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void)
1137 high_memory = __va(arm_lowmem_limit - 1) + 1; 1137 high_memory = __va(arm_lowmem_limit - 1) + 1;
1138 1138
1139 /* 1139 /*
1140 * Round the memblock limit down to a section size. This 1140 * Round the memblock limit down to a pmd size. This
1141 * helps to ensure that we will allocate memory from the 1141 * helps to ensure that we will allocate memory from the
1142 * last full section, which should be mapped. 1142 * last full pmd, which should be mapped.
1143 */ 1143 */
1144 if (memblock_limit) 1144 if (memblock_limit)
1145 memblock_limit = round_down(memblock_limit, SECTION_SIZE); 1145 memblock_limit = round_down(memblock_limit, PMD_SIZE);
1146 if (!memblock_limit) 1146 if (!memblock_limit)
1147 memblock_limit = arm_lowmem_limit; 1147 memblock_limit = arm_lowmem_limit;
1148 1148
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 224081ccc92f..7d0f07020c80 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -272,6 +272,7 @@ void xen_arch_pre_suspend(void) { }
272void xen_arch_post_suspend(int suspend_cancelled) { } 272void xen_arch_post_suspend(int suspend_cancelled) { }
273void xen_timer_resume(void) { } 273void xen_timer_resume(void) { }
274void xen_arch_resume(void) { } 274void xen_arch_resume(void) { }
275void xen_arch_suspend(void) { }
275 276
276 277
277/* In the hypervisor.S file. */ 278/* In the hypervisor.S file. */
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 71f19c4dc0de..0fa47c4275cb 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -114,7 +114,7 @@ do { \
114#define read_barrier_depends() do { } while(0) 114#define read_barrier_depends() do { } while(0)
115#define smp_read_barrier_depends() do { } while(0) 115#define smp_read_barrier_depends() do { } while(0)
116 116
117#define set_mb(var, value) do { var = value; smp_mb(); } while (0) 117#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
118#define nop() asm volatile("nop"); 118#define nop() asm volatile("nop");
119 119
120#define smp_mb__before_atomic() smp_mb() 120#define smp_mb__before_atomic() smp_mb()
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 540f7c0aea82..7116d3973058 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -170,6 +170,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
170#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) 170#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
171#define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) 171#define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
172#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) 172#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
173#define ioremap_wt(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
173#define iounmap __iounmap 174#define iounmap __iounmap
174 175
175/* 176/*
diff --git a/arch/avr32/include/asm/cmpxchg.h b/arch/avr32/include/asm/cmpxchg.h
index 962a6aeab787..366bbeaeb405 100644
--- a/arch/avr32/include/asm/cmpxchg.h
+++ b/arch/avr32/include/asm/cmpxchg.h
@@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
70 if something tries to do an invalid cmpxchg(). */ 70 if something tries to do an invalid cmpxchg(). */
71extern void __cmpxchg_called_with_bad_pointer(void); 71extern void __cmpxchg_called_with_bad_pointer(void);
72 72
73#define __HAVE_ARCH_CMPXCHG 1
74
75static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, 73static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
76 unsigned long new, int size) 74 unsigned long new, int size)
77{ 75{
diff --git a/arch/avr32/include/asm/io.h b/arch/avr32/include/asm/io.h
index 4f5ec2bb7172..e998ff5d8e1a 100644
--- a/arch/avr32/include/asm/io.h
+++ b/arch/avr32/include/asm/io.h
@@ -296,6 +296,7 @@ extern void __iounmap(void __iomem *addr);
296 __iounmap(addr) 296 __iounmap(addr)
297 297
298#define ioremap_wc ioremap_nocache 298#define ioremap_wc ioremap_nocache
299#define ioremap_wt ioremap_nocache
299 300
300#define cached(addr) P1SEGADDR(addr) 301#define cached(addr) P1SEGADDR(addr)
301#define uncached(addr) P2SEGADDR(addr) 302#define uncached(addr) P2SEGADDR(addr)
diff --git a/arch/frv/include/asm/io.h b/arch/frv/include/asm/io.h
index 0b78bc89e840..a31b63ec4930 100644
--- a/arch/frv/include/asm/io.h
+++ b/arch/frv/include/asm/io.h
@@ -17,6 +17,8 @@
17 17
18#ifdef __KERNEL__ 18#ifdef __KERNEL__
19 19
20#define ARCH_HAS_IOREMAP_WT
21
20#include <linux/types.h> 22#include <linux/types.h>
21#include <asm/virtconvert.h> 23#include <asm/virtconvert.h>
22#include <asm/string.h> 24#include <asm/string.h>
@@ -265,7 +267,7 @@ static inline void __iomem *ioremap_nocache(unsigned long physaddr, unsigned lon
265 return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); 267 return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
266} 268}
267 269
268static inline void __iomem *ioremap_writethrough(unsigned long physaddr, unsigned long size) 270static inline void __iomem *ioremap_wt(unsigned long physaddr, unsigned long size)
269{ 271{
270 return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); 272 return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
271} 273}
diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index 9e7802911a57..a6e34e2acbba 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
64 * looks just like atomic_cmpxchg on our arch currently with a bunch of 64 * looks just like atomic_cmpxchg on our arch currently with a bunch of
65 * variable casting. 65 * variable casting.
66 */ 66 */
67#define __HAVE_ARCH_CMPXCHG 1
68 67
69#define cmpxchg(ptr, old, new) \ 68#define cmpxchg(ptr, old, new) \
70({ \ 69({ \
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index f6769eb2bbf9..843ba435e43b 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,12 +77,7 @@ do { \
77 ___p1; \ 77 ___p1; \
78}) 78})
79 79
80/* 80#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
81 * XXX check on this ---I suspect what Linus really wants here is
82 * acquire vs release semantics but we can't discuss this stuff with
83 * Linus just yet. Grrr...
84 */
85#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
86 81
87/* 82/*
88 * The group barrier in front of the rsm & ssm are necessary to ensure 83 * The group barrier in front of the rsm & ssm are necessary to ensure
diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h
index f35109b1d907..a0e3620f8f13 100644
--- a/arch/ia64/include/uapi/asm/cmpxchg.h
+++ b/arch/ia64/include/uapi/asm/cmpxchg.h
@@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void);
61 * indicated by comparing RETURN with OLD. 61 * indicated by comparing RETURN with OLD.
62 */ 62 */
63 63
64#define __HAVE_ARCH_CMPXCHG 1
65
66/* 64/*
67 * This function doesn't exist, so you'll get a linker error 65 * This function doesn't exist, so you'll get a linker error
68 * if something tries to do an invalid cmpxchg(). 66 * if something tries to do an invalid cmpxchg().
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index d4e162d35b34..7cc3be9fa7c6 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -478,9 +478,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
478 478
479int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) 479int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
480{ 480{
481 struct pci_controller *controller = bridge->bus->sysdata; 481 /*
482 482 * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
483 ACPI_COMPANION_SET(&bridge->dev, controller->companion); 483 * here, pci_create_root_bus() has been called by someone else and
484 * sysdata is likely to be different from what we expect. Let it go in
485 * that case.
486 */
487 if (!bridge->dev.parent) {
488 struct pci_controller *controller = bridge->bus->sysdata;
489 ACPI_COMPANION_SET(&bridge->dev, controller->companion);
490 }
484 return 0; 491 return 0;
485} 492}
486 493
diff --git a/arch/m32r/include/asm/cmpxchg.h b/arch/m32r/include/asm/cmpxchg.h
index de651db20b43..14bf9b739dd2 100644
--- a/arch/m32r/include/asm/cmpxchg.h
+++ b/arch/m32r/include/asm/cmpxchg.h
@@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size)
107 ((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \ 107 ((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \
108 sizeof(*(ptr)))) 108 sizeof(*(ptr))))
109 109
110#define __HAVE_ARCH_CMPXCHG 1
111
112static inline unsigned long 110static inline unsigned long
113__cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new) 111__cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new)
114{ 112{
diff --git a/arch/m32r/include/asm/io.h b/arch/m32r/include/asm/io.h
index 9cc00dbd59ce..0c3f25ee3381 100644
--- a/arch/m32r/include/asm/io.h
+++ b/arch/m32r/include/asm/io.h
@@ -68,6 +68,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
68extern void iounmap(volatile void __iomem *addr); 68extern void iounmap(volatile void __iomem *addr);
69#define ioremap_nocache(off,size) ioremap(off,size) 69#define ioremap_nocache(off,size) ioremap(off,size)
70#define ioremap_wc ioremap_nocache 70#define ioremap_wc ioremap_nocache
71#define ioremap_wt ioremap_nocache
71 72
72/* 73/*
73 * IO bus memory addresses are also 1:1 with the physical address 74 * IO bus memory addresses are also 1:1 with the physical address
diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index bc755bc620ad..83b1df80f0ac 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *,
90 * indicated by comparing RETURN with OLD. 90 * indicated by comparing RETURN with OLD.
91 */ 91 */
92#ifdef CONFIG_RMW_INSNS 92#ifdef CONFIG_RMW_INSNS
93#define __HAVE_ARCH_CMPXCHG 1
94 93
95static inline unsigned long __cmpxchg(volatile void *p, unsigned long old, 94static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
96 unsigned long new, int size) 95 unsigned long new, int size)
diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h
index 8955b40a5dc4..618c85d3c786 100644
--- a/arch/m68k/include/asm/io_mm.h
+++ b/arch/m68k/include/asm/io_mm.h
@@ -20,6 +20,8 @@
20 20
21#ifdef __KERNEL__ 21#ifdef __KERNEL__
22 22
23#define ARCH_HAS_IOREMAP_WT
24
23#include <linux/compiler.h> 25#include <linux/compiler.h>
24#include <asm/raw_io.h> 26#include <asm/raw_io.h>
25#include <asm/virtconvert.h> 27#include <asm/virtconvert.h>
@@ -465,7 +467,7 @@ static inline void __iomem *ioremap_nocache(unsigned long physaddr, unsigned lon
465{ 467{
466 return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); 468 return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
467} 469}
468static inline void __iomem *ioremap_writethrough(unsigned long physaddr, 470static inline void __iomem *ioremap_wt(unsigned long physaddr,
469 unsigned long size) 471 unsigned long size)
470{ 472{
471 return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); 473 return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h
index a93c8cde4d38..ad7bd40e6742 100644
--- a/arch/m68k/include/asm/io_no.h
+++ b/arch/m68k/include/asm/io_no.h
@@ -3,6 +3,8 @@
3 3
4#ifdef __KERNEL__ 4#ifdef __KERNEL__
5 5
6#define ARCH_HAS_IOREMAP_WT
7
6#include <asm/virtconvert.h> 8#include <asm/virtconvert.h>
7#include <asm-generic/iomap.h> 9#include <asm-generic/iomap.h>
8 10
@@ -153,7 +155,7 @@ static inline void *ioremap_nocache(unsigned long physaddr, unsigned long size)
153{ 155{
154 return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); 156 return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
155} 157}
156static inline void *ioremap_writethrough(unsigned long physaddr, unsigned long size) 158static inline void *ioremap_wt(unsigned long physaddr, unsigned long size)
157{ 159{
158 return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); 160 return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
159} 161}
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index d703d8e26a65..5a696e507930 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -84,7 +84,7 @@ static inline void fence(void)
84#define read_barrier_depends() do { } while (0) 84#define read_barrier_depends() do { } while (0)
85#define smp_read_barrier_depends() do { } while (0) 85#define smp_read_barrier_depends() do { } while (0)
86 86
87#define set_mb(var, value) do { var = value; smp_mb(); } while (0) 87#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
88 88
89#define smp_store_release(p, v) \ 89#define smp_store_release(p, v) \
90do { \ 90do { \
diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h
index b1bc1be8540f..be29e3e44321 100644
--- a/arch/metag/include/asm/cmpxchg.h
+++ b/arch/metag/include/asm/cmpxchg.h
@@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
51 return old; 51 return old;
52} 52}
53 53
54#define __HAVE_ARCH_CMPXCHG 1
55
56#define cmpxchg(ptr, o, n) \ 54#define cmpxchg(ptr, o, n) \
57 ({ \ 55 ({ \
58 __typeof__(*(ptr)) _o_ = (o); \ 56 __typeof__(*(ptr)) _o_ = (o); \
diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h
index d5779b0ec573..9890f21eadbe 100644
--- a/arch/metag/include/asm/io.h
+++ b/arch/metag/include/asm/io.h
@@ -160,6 +160,9 @@ extern void __iounmap(void __iomem *addr);
160#define ioremap_wc(offset, size) \ 160#define ioremap_wc(offset, size) \
161 __ioremap((offset), (size), _PAGE_WR_COMBINE) 161 __ioremap((offset), (size), _PAGE_WR_COMBINE)
162 162
163#define ioremap_wt(offset, size) \
164 __ioremap((offset), (size), 0)
165
163#define iounmap(addr) \ 166#define iounmap(addr) \
164 __iounmap(addr) 167 __iounmap(addr)
165 168
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index 940f5fc1d1da..39b6315db82e 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -39,10 +39,10 @@ extern resource_size_t isa_mem_base;
39extern void iounmap(void __iomem *addr); 39extern void iounmap(void __iomem *addr);
40 40
41extern void __iomem *ioremap(phys_addr_t address, unsigned long size); 41extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
42#define ioremap_writethrough(addr, size) ioremap((addr), (size))
43#define ioremap_nocache(addr, size) ioremap((addr), (size)) 42#define ioremap_nocache(addr, size) ioremap((addr), (size))
44#define ioremap_fullcache(addr, size) ioremap((addr), (size)) 43#define ioremap_fullcache(addr, size) ioremap((addr), (size))
45#define ioremap_wc(addr, size) ioremap((addr), (size)) 44#define ioremap_wc(addr, size) ioremap((addr), (size))
45#define ioremap_wt(addr, size) ioremap((addr), (size))
46 46
47#endif /* CONFIG_MMU */ 47#endif /* CONFIG_MMU */
48 48
diff --git a/arch/mips/ath79/prom.c b/arch/mips/ath79/prom.c
index e1fe63051136..597899ad5438 100644
--- a/arch/mips/ath79/prom.c
+++ b/arch/mips/ath79/prom.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Atheros AR71XX/AR724X/AR913X specific prom routines 2 * Atheros AR71XX/AR724X/AR913X specific prom routines
3 * 3 *
4 * Copyright (C) 2015 Laurent Fasnacht <l@libres.ch>
4 * Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org> 5 * Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
5 * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org> 6 * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
6 * 7 *
@@ -25,12 +26,14 @@ void __init prom_init(void)
25{ 26{
26 fw_init_cmdline(); 27 fw_init_cmdline();
27 28
29#ifdef CONFIG_BLK_DEV_INITRD
28 /* Read the initrd address from the firmware environment */ 30 /* Read the initrd address from the firmware environment */
29 initrd_start = fw_getenvl("initrd_start"); 31 initrd_start = fw_getenvl("initrd_start");
30 if (initrd_start) { 32 if (initrd_start) {
31 initrd_start = KSEG0ADDR(initrd_start); 33 initrd_start = KSEG0ADDR(initrd_start);
32 initrd_end = initrd_start + fw_getenvl("initrd_size"); 34 initrd_end = initrd_start + fw_getenvl("initrd_size");
33 } 35 }
36#endif
34} 37}
35 38
36void __init prom_free_prom_memory(void) 39void __init prom_free_prom_memory(void)
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 002680648dcb..b2a577ebce0b 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -194,7 +194,7 @@ CONFIG_USB_WUSB_CBAF=m
194CONFIG_USB_C67X00_HCD=m 194CONFIG_USB_C67X00_HCD=m
195CONFIG_USB_EHCI_HCD=y 195CONFIG_USB_EHCI_HCD=y
196CONFIG_USB_EHCI_ROOT_HUB_TT=y 196CONFIG_USB_EHCI_ROOT_HUB_TT=y
197CONFIG_USB_ISP1760_HCD=m 197CONFIG_USB_ISP1760=m
198CONFIG_USB_OHCI_HCD=y 198CONFIG_USB_OHCI_HCD=y
199CONFIG_USB_UHCI_HCD=m 199CONFIG_USB_UHCI_HCD=m
200CONFIG_USB_R8A66597_HCD=m 200CONFIG_USB_R8A66597_HCD=m
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 2b8bbbcb9be0..7ecba84656d4 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -112,8 +112,8 @@
112#define __WEAK_LLSC_MB " \n" 112#define __WEAK_LLSC_MB " \n"
113#endif 113#endif
114 114
115#define set_mb(var, value) \ 115#define smp_store_mb(var, value) \
116 do { var = value; smp_mb(); } while (0) 116 do { WRITE_ONCE(var, value); smp_mb(); } while (0)
117 117
118#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") 118#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
119 119
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 412f945f1f5e..b71ab4a5fd50 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
138 __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \ 138 __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \
139}) 139})
140 140
141#define __HAVE_ARCH_CMPXCHG 1
142
143#define __cmpxchg_asm(ld, st, m, old, new) \ 141#define __cmpxchg_asm(ld, st, m, old, new) \
144({ \ 142({ \
145 __typeof(*(m)) __ret; \ 143 __typeof(*(m)) __ret; \
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index d2bfbc2e8995..51f57d841662 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -29,7 +29,7 @@
29int kgdb_early_setup; 29int kgdb_early_setup;
30#endif 30#endif
31 31
32static unsigned long irq_map[NR_IRQS / BITS_PER_LONG]; 32static DECLARE_BITMAP(irq_map, NR_IRQS);
33 33
34int allocate_irqno(void) 34int allocate_irqno(void)
35{ 35{
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index fd528d7ea278..336708ae5c5b 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -444,7 +444,7 @@ struct plat_smp_ops bmips5000_smp_ops = {
444static void bmips_wr_vec(unsigned long dst, char *start, char *end) 444static void bmips_wr_vec(unsigned long dst, char *start, char *end)
445{ 445{
446 memcpy((void *)dst, start, end - start); 446 memcpy((void *)dst, start, end - start);
447 dma_cache_wback((unsigned long)start, end - start); 447 dma_cache_wback(dst, end - start);
448 local_flush_icache_range(dst, dst + (end - start)); 448 local_flush_icache_range(dst, dst + (end - start));
449 instruction_hazard(); 449 instruction_hazard();
450} 450}
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index 7d12c0dded3d..77e64942f004 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -34,7 +34,12 @@ LEAF(__strnlen_\func\()_asm)
34FEXPORT(__strnlen_\func\()_nocheck_asm) 34FEXPORT(__strnlen_\func\()_nocheck_asm)
35 move v0, a0 35 move v0, a0
36 PTR_ADDU a1, a0 # stop pointer 36 PTR_ADDU a1, a0 # stop pointer
371: beq v0, a1, 1f # limit reached? 371:
38#ifdef CONFIG_CPU_DADDI_WORKAROUNDS
39 .set noat
40 li AT, 1
41#endif
42 beq v0, a1, 1f # limit reached?
38.ifeqs "\func", "kernel" 43.ifeqs "\func", "kernel"
39 EX(lb, t0, (v0), .Lfault\@) 44 EX(lb, t0, (v0), .Lfault\@)
40.else 45.else
@@ -42,7 +47,13 @@ FEXPORT(__strnlen_\func\()_nocheck_asm)
42.endif 47.endif
43 .set noreorder 48 .set noreorder
44 bnez t0, 1b 49 bnez t0, 1b
451: PTR_ADDIU v0, 1 501:
51#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
52 PTR_ADDIU v0, 1
53#else
54 PTR_ADDU v0, AT
55 .set at
56#endif
46 .set reorder 57 .set reorder
47 PTR_SUBU v0, a0 58 PTR_SUBU v0, a0
48 jr ra 59 jr ra
diff --git a/arch/mn10300/include/asm/io.h b/arch/mn10300/include/asm/io.h
index cc4a2ba9e228..07c5b4a3903b 100644
--- a/arch/mn10300/include/asm/io.h
+++ b/arch/mn10300/include/asm/io.h
@@ -282,6 +282,7 @@ static inline void __iomem *ioremap_nocache(unsigned long offset, unsigned long
282} 282}
283 283
284#define ioremap_wc ioremap_nocache 284#define ioremap_wc ioremap_nocache
285#define ioremap_wt ioremap_nocache
285 286
286static inline void iounmap(void __iomem *addr) 287static inline void iounmap(void __iomem *addr)
287{ 288{
diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h
index 6e24d7cceb0c..c5a62da22cd2 100644
--- a/arch/nios2/include/asm/io.h
+++ b/arch/nios2/include/asm/io.h
@@ -46,6 +46,7 @@ static inline void iounmap(void __iomem *addr)
46} 46}
47 47
48#define ioremap_wc ioremap_nocache 48#define ioremap_wc ioremap_nocache
49#define ioremap_wt ioremap_nocache
49 50
50/* Pages to physical address... */ 51/* Pages to physical address... */
51#define page_to_phys(page) virt_to_phys(page_to_virt(page)) 52#define page_to_phys(page) virt_to_phys(page_to_virt(page))
diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h
index dbd13354ec41..0a90b965cccb 100644
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
46#define xchg(ptr, x) \ 46#define xchg(ptr, x) \
47 ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) 47 ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
48 48
49#define __HAVE_ARCH_CMPXCHG 1
50
51/* bug catcher for when unsupported size is used - won't link */ 49/* bug catcher for when unsupported size is used - won't link */
52extern void __cmpxchg_called_with_bad_pointer(void); 50extern void __cmpxchg_called_with_bad_pointer(void);
53 51
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index a3bf5be111ff..39505d660a70 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
34#define rmb() __asm__ __volatile__ ("sync" : : : "memory") 34#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
35#define wmb() __asm__ __volatile__ ("sync" : : : "memory") 35#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
36 36
37#define set_mb(var, value) do { var = value; mb(); } while (0) 37#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
38 38
39#ifdef __SUBARCH_HAS_LWSYNC 39#ifdef __SUBARCH_HAS_LWSYNC
40# define SMPWMB LWSYNC 40# define SMPWMB LWSYNC
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index d463c68fe7f0..ad6263cffb0f 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
144 * Compare and exchange - if *p == old, set it to new, 144 * Compare and exchange - if *p == old, set it to new,
145 * and return the old value of *p. 145 * and return the old value of *p.
146 */ 146 */
147#define __HAVE_ARCH_CMPXCHG 1
148 147
149static __always_inline unsigned long 148static __always_inline unsigned long
150__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) 149__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 15c99b649b04..b2eb4686bd8f 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -73,7 +73,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
73 uint64_t nip, uint64_t addr) 73 uint64_t nip, uint64_t addr)
74{ 74{
75 uint64_t srr1; 75 uint64_t srr1;
76 int index = __this_cpu_inc_return(mce_nest_count); 76 int index = __this_cpu_inc_return(mce_nest_count) - 1;
77 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); 77 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
78 78
79 /* 79 /*
@@ -184,7 +184,7 @@ void machine_check_queue_event(void)
184 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 184 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
185 return; 185 return;
186 186
187 index = __this_cpu_inc_return(mce_queue_count); 187 index = __this_cpu_inc_return(mce_queue_count) - 1;
188 /* If queue is full, just return for now. */ 188 /* If queue is full, just return for now. */
189 if (index >= MAX_MC_EVT) { 189 if (index >= MAX_MC_EVT) {
190 __this_cpu_dec(mce_queue_count); 190 __this_cpu_dec(mce_queue_count);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index f096e72262f4..1db685104ffc 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -213,6 +213,7 @@ SECTIONS
213 *(.opd) 213 *(.opd)
214 } 214 }
215 215
216 . = ALIGN(256);
216 .got : AT(ADDR(.got) - LOAD_OFFSET) { 217 .got : AT(ADDR(.got) - LOAD_OFFSET) {
217 __toc_start = .; 218 __toc_start = .;
218#ifndef CONFIG_RELOCATABLE 219#ifndef CONFIG_RELOCATABLE
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 48d3c5d2ecc9..df81caab7383 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc)
1952 */ 1952 */
1953static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) 1953static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1954{ 1954{
1955 struct kvm_vcpu *vcpu; 1955 struct kvm_vcpu *vcpu, *vnext;
1956 int i; 1956 int i;
1957 int srcu_idx; 1957 int srcu_idx;
1958 1958
@@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
1982 */ 1982 */
1983 if ((threads_per_core > 1) && 1983 if ((threads_per_core > 1) &&
1984 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { 1984 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
1985 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1985 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
1986 arch.run_list) {
1986 vcpu->arch.ret = -EBUSY; 1987 vcpu->arch.ret = -EBUSY;
1987 kvmppc_remove_runnable(vc, vcpu); 1988 kvmppc_remove_runnable(vc, vcpu);
1988 wake_up(&vcpu->arch.cpu_run); 1989 wake_up(&vcpu->arch.cpu_run);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ce968b00b7c..3385e3d0506e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -689,27 +689,34 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
689struct page * 689struct page *
690follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 690follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
691{ 691{
692 pte_t *ptep; 692 pte_t *ptep, pte;
693 struct page *page;
694 unsigned shift; 693 unsigned shift;
695 unsigned long mask, flags; 694 unsigned long mask, flags;
695 struct page *page = ERR_PTR(-EINVAL);
696
697 local_irq_save(flags);
698 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
699 if (!ptep)
700 goto no_page;
701 pte = READ_ONCE(*ptep);
696 /* 702 /*
703 * Verify it is a huge page else bail.
697 * Transparent hugepages are handled by generic code. We can skip them 704 * Transparent hugepages are handled by generic code. We can skip them
698 * here. 705 * here.
699 */ 706 */
700 local_irq_save(flags); 707 if (!shift || pmd_trans_huge(__pmd(pte_val(pte))))
701 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); 708 goto no_page;
702 709
703 /* Verify it is a huge page else bail. */ 710 if (!pte_present(pte)) {
704 if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) { 711 page = NULL;
705 local_irq_restore(flags); 712 goto no_page;
706 return ERR_PTR(-EINVAL);
707 } 713 }
708 mask = (1UL << shift) - 1; 714 mask = (1UL << shift) - 1;
709 page = pte_page(*ptep); 715 page = pte_page(pte);
710 if (page) 716 if (page)
711 page += (address & mask) / PAGE_SIZE; 717 page += (address & mask) / PAGE_SIZE;
712 718
719no_page:
713 local_irq_restore(flags); 720 local_irq_restore(flags);
714 return page; 721 return page;
715} 722}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 59daa5eeec25..6bfadf1aa5cb 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -839,6 +839,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
839 * hash fault look at them. 839 * hash fault look at them.
840 */ 840 */
841 memset(pgtable, 0, PTE_FRAG_SIZE); 841 memset(pgtable, 0, PTE_FRAG_SIZE);
842 /*
843 * Serialize against find_linux_pte_or_hugepte which does lock-less
844 * lookup in page tables with local interrupts disabled. For huge pages
845 * it casts pmd_t to pte_t. Since format of pte_t is different from
846 * pmd_t we want to prevent transit from pmd pointing to page table
847 * to pmd pointing to huge page (and back) while interrupts are disabled.
848 * We clear pmd to possibly replace it with page table pointer in
849 * different code paths. So make sure we wait for the parallel
850 * find_linux_pte_or_hugepage to finish.
851 */
852 kick_all_cpus_sync();
842 return old_pmd; 853 return old_pmd;
843} 854}
844 855
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 7940dc90e80b..b258110da952 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -16,11 +16,12 @@
16#define GHASH_DIGEST_SIZE 16 16#define GHASH_DIGEST_SIZE 16
17 17
18struct ghash_ctx { 18struct ghash_ctx {
19 u8 icv[16]; 19 u8 key[GHASH_BLOCK_SIZE];
20 u8 key[16];
21}; 20};
22 21
23struct ghash_desc_ctx { 22struct ghash_desc_ctx {
23 u8 icv[GHASH_BLOCK_SIZE];
24 u8 key[GHASH_BLOCK_SIZE];
24 u8 buffer[GHASH_BLOCK_SIZE]; 25 u8 buffer[GHASH_BLOCK_SIZE];
25 u32 bytes; 26 u32 bytes;
26}; 27};
@@ -28,8 +29,10 @@ struct ghash_desc_ctx {
28static int ghash_init(struct shash_desc *desc) 29static int ghash_init(struct shash_desc *desc)
29{ 30{
30 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 31 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
32 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
31 33
32 memset(dctx, 0, sizeof(*dctx)); 34 memset(dctx, 0, sizeof(*dctx));
35 memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
33 36
34 return 0; 37 return 0;
35} 38}
@@ -45,7 +48,6 @@ static int ghash_setkey(struct crypto_shash *tfm,
45 } 48 }
46 49
47 memcpy(ctx->key, key, GHASH_BLOCK_SIZE); 50 memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
48 memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
49 51
50 return 0; 52 return 0;
51} 53}
@@ -54,7 +56,6 @@ static int ghash_update(struct shash_desc *desc,
54 const u8 *src, unsigned int srclen) 56 const u8 *src, unsigned int srclen)
55{ 57{
56 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 58 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
57 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
58 unsigned int n; 59 unsigned int n;
59 u8 *buf = dctx->buffer; 60 u8 *buf = dctx->buffer;
60 int ret; 61 int ret;
@@ -70,7 +71,7 @@ static int ghash_update(struct shash_desc *desc,
70 src += n; 71 src += n;
71 72
72 if (!dctx->bytes) { 73 if (!dctx->bytes) {
73 ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, 74 ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf,
74 GHASH_BLOCK_SIZE); 75 GHASH_BLOCK_SIZE);
75 if (ret != GHASH_BLOCK_SIZE) 76 if (ret != GHASH_BLOCK_SIZE)
76 return -EIO; 77 return -EIO;
@@ -79,7 +80,7 @@ static int ghash_update(struct shash_desc *desc,
79 80
80 n = srclen & ~(GHASH_BLOCK_SIZE - 1); 81 n = srclen & ~(GHASH_BLOCK_SIZE - 1);
81 if (n) { 82 if (n) {
82 ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n); 83 ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n);
83 if (ret != n) 84 if (ret != n)
84 return -EIO; 85 return -EIO;
85 src += n; 86 src += n;
@@ -94,7 +95,7 @@ static int ghash_update(struct shash_desc *desc,
94 return 0; 95 return 0;
95} 96}
96 97
97static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) 98static int ghash_flush(struct ghash_desc_ctx *dctx)
98{ 99{
99 u8 *buf = dctx->buffer; 100 u8 *buf = dctx->buffer;
100 int ret; 101 int ret;
@@ -104,24 +105,24 @@ static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
104 105
105 memset(pos, 0, dctx->bytes); 106 memset(pos, 0, dctx->bytes);
106 107
107 ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE); 108 ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
108 if (ret != GHASH_BLOCK_SIZE) 109 if (ret != GHASH_BLOCK_SIZE)
109 return -EIO; 110 return -EIO;
111
112 dctx->bytes = 0;
110 } 113 }
111 114
112 dctx->bytes = 0;
113 return 0; 115 return 0;
114} 116}
115 117
116static int ghash_final(struct shash_desc *desc, u8 *dst) 118static int ghash_final(struct shash_desc *desc, u8 *dst)
117{ 119{
118 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 120 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
119 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
120 int ret; 121 int ret;
121 122
122 ret = ghash_flush(ctx, dctx); 123 ret = ghash_flush(dctx);
123 if (!ret) 124 if (!ret)
124 memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE); 125 memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
125 return ret; 126 return ret;
126} 127}
127 128
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 1f374b39a4ec..9d5192c94963 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -125,7 +125,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
125 /* fill page with urandom bytes */ 125 /* fill page with urandom bytes */
126 get_random_bytes(pg, PAGE_SIZE); 126 get_random_bytes(pg, PAGE_SIZE);
127 /* exor page with stckf values */ 127 /* exor page with stckf values */
128 for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) { 128 for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) {
129 u64 *p = ((u64 *)pg) + n; 129 u64 *p = ((u64 *)pg) + n;
130 *p ^= get_tod_clock_fast(); 130 *p ^= get_tod_clock_fast();
131 } 131 }
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 8d724718ec21..e6f8615a11eb 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
36#define smp_mb__before_atomic() smp_mb() 36#define smp_mb__before_atomic() smp_mb()
37#define smp_mb__after_atomic() smp_mb() 37#define smp_mb__after_atomic() smp_mb()
38 38
39#define set_mb(var, value) do { var = value; mb(); } while (0) 39#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
40 40
41#define smp_store_release(p, v) \ 41#define smp_store_release(p, v) \
42do { \ 42do { \
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 4eadec466b8c..411464f4c97a 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -32,8 +32,6 @@
32 __old; \ 32 __old; \
33}) 33})
34 34
35#define __HAVE_ARCH_CMPXCHG
36
37#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ 35#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
38({ \ 36({ \
39 register __typeof__(*(p1)) __old1 asm("2") = (o1); \ 37 register __typeof__(*(p1)) __old1 asm("2") = (o1); \
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index 30fd5c84680e..cb5fdf3a78fc 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -29,6 +29,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
29 29
30#define ioremap_nocache(addr, size) ioremap(addr, size) 30#define ioremap_nocache(addr, size) ioremap(addr, size)
31#define ioremap_wc ioremap_nocache 31#define ioremap_wc ioremap_nocache
32#define ioremap_wt ioremap_nocache
32 33
33static inline void __iomem *ioremap(unsigned long offset, unsigned long size) 34static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
34{ 35{
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index fc642399b489..ef24a212eeb7 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -494,7 +494,7 @@ static inline int pmd_large(pmd_t pmd)
494 return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; 494 return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
495} 495}
496 496
497static inline int pmd_pfn(pmd_t pmd) 497static inline unsigned long pmd_pfn(pmd_t pmd)
498{ 498{
499 unsigned long origin_mask; 499 unsigned long origin_mask;
500 500
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 7690dc8e1ab5..20c146d1251a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -443,8 +443,11 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
443 443
444/* 444/*
445 * Compile one eBPF instruction into s390x code 445 * Compile one eBPF instruction into s390x code
446 *
447 * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
448 * stack space for the large switch statement.
446 */ 449 */
447static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) 450static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
448{ 451{
449 struct bpf_insn *insn = &fp->insnsi[i]; 452 struct bpf_insn *insn = &fp->insnsi[i];
450 int jmp_off, last, insn_count = 1; 453 int jmp_off, last, insn_count = 1;
@@ -588,8 +591,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
588 EMIT4(0xb9160000, dst_reg, rc_reg); 591 EMIT4(0xb9160000, dst_reg, rc_reg);
589 break; 592 break;
590 } 593 }
591 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */ 594 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
592 case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */ 595 case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
593 { 596 {
594 int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; 597 int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
595 598
@@ -602,10 +605,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
602 EMIT4_IMM(0xa7090000, REG_W0, 0); 605 EMIT4_IMM(0xa7090000, REG_W0, 0);
603 /* lgr %w1,%dst */ 606 /* lgr %w1,%dst */
604 EMIT4(0xb9040000, REG_W1, dst_reg); 607 EMIT4(0xb9040000, REG_W1, dst_reg);
605 /* llgfr %dst,%src (u32 cast) */
606 EMIT4(0xb9160000, dst_reg, src_reg);
607 /* dlgr %w0,%dst */ 608 /* dlgr %w0,%dst */
608 EMIT4(0xb9870000, REG_W0, dst_reg); 609 EMIT4(0xb9870000, REG_W0, src_reg);
609 /* lgr %dst,%rc */ 610 /* lgr %dst,%rc */
610 EMIT4(0xb9040000, dst_reg, rc_reg); 611 EMIT4(0xb9040000, dst_reg, rc_reg);
611 break; 612 break;
@@ -632,8 +633,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
632 EMIT4(0xb9160000, dst_reg, rc_reg); 633 EMIT4(0xb9160000, dst_reg, rc_reg);
633 break; 634 break;
634 } 635 }
635 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */ 636 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
636 case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */ 637 case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
637 { 638 {
638 int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; 639 int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
639 640
@@ -649,7 +650,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
649 EMIT4(0xb9040000, REG_W1, dst_reg); 650 EMIT4(0xb9040000, REG_W1, dst_reg);
650 /* dlg %w0,<d(imm)>(%l) */ 651 /* dlg %w0,<d(imm)>(%l) */
651 EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, 652 EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
652 EMIT_CONST_U64((u32) imm)); 653 EMIT_CONST_U64(imm));
653 /* lgr %dst,%rc */ 654 /* lgr %dst,%rc */
654 EMIT4(0xb9040000, dst_reg, rc_reg); 655 EMIT4(0xb9040000, dst_reg, rc_reg);
655 break; 656 break;
diff --git a/arch/score/include/asm/cmpxchg.h b/arch/score/include/asm/cmpxchg.h
index f384839c3ee5..cc3f6420b71c 100644
--- a/arch/score/include/asm/cmpxchg.h
+++ b/arch/score/include/asm/cmpxchg.h
@@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
42 (unsigned long)(o), \ 42 (unsigned long)(o), \
43 (unsigned long)(n))) 43 (unsigned long)(n)))
44 44
45#define __HAVE_ARCH_CMPXCHG 1
46
47#include <asm-generic/cmpxchg-local.h> 45#include <asm-generic/cmpxchg-local.h>
48 46
49#endif /* _ASM_SCORE_CMPXCHG_H */ 47#endif /* _ASM_SCORE_CMPXCHG_H */
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 43715308b068..bf91037db4e0 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -32,7 +32,7 @@
32#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop") 32#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
33#endif 33#endif
34 34
35#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 35#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
36 36
37#include <asm-generic/barrier.h> 37#include <asm-generic/barrier.h>
38 38
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index f6bd1406b897..85c97b188d71 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void);
46 * if something tries to do an invalid cmpxchg(). */ 46 * if something tries to do an invalid cmpxchg(). */
47extern void __cmpxchg_called_with_bad_pointer(void); 47extern void __cmpxchg_called_with_bad_pointer(void);
48 48
49#define __HAVE_ARCH_CMPXCHG 1
50
51static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old, 49static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
52 unsigned long new, int size) 50 unsigned long new, int size)
53{ 51{
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 76648941fea7..809941e33e12 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -40,8 +40,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
40#define dma_rmb() rmb() 40#define dma_rmb() rmb()
41#define dma_wmb() wmb() 41#define dma_wmb() wmb()
42 42
43#define set_mb(__var, __value) \ 43#define smp_store_mb(__var, __value) \
44 do { __var = __value; membar_safe("#StoreLoad"); } while(0) 44 do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
45 45
46#ifdef CONFIG_SMP 46#ifdef CONFIG_SMP
47#define smp_mb() mb() 47#define smp_mb() mb()
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
index d38b52dca216..83ffb83c5397 100644
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
34 * 34 *
35 * Cribbed from <asm-parisc/atomic.h> 35 * Cribbed from <asm-parisc/atomic.h>
36 */ 36 */
37#define __HAVE_ARCH_CMPXCHG 1
38 37
39/* bug catcher for when unsupported size is used - won't link */ 38/* bug catcher for when unsupported size is used - won't link */
40void __cmpxchg_called_with_bad_pointer(void); 39void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h
index 0e1ed6cfbf68..faa2f61058c2 100644
--- a/arch/sparc/include/asm/cmpxchg_64.h
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
65 65
66#include <asm-generic/cmpxchg-local.h> 66#include <asm-generic/cmpxchg-local.h>
67 67
68#define __HAVE_ARCH_CMPXCHG 1
69
70static inline unsigned long 68static inline unsigned long
71__cmpxchg_u32(volatile int *m, int old, int new) 69__cmpxchg_u32(volatile int *m, int old, int new)
72{ 70{
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index a6e424d185d0..a6cfdabb6054 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -24,7 +24,8 @@ typedef struct {
24 unsigned int icache_line_size; 24 unsigned int icache_line_size;
25 unsigned int ecache_size; 25 unsigned int ecache_size;
26 unsigned int ecache_line_size; 26 unsigned int ecache_line_size;
27 int core_id; 27 unsigned short sock_id;
28 unsigned short core_id;
28 int proc_id; 29 int proc_id;
29} cpuinfo_sparc; 30} cpuinfo_sparc;
30 31
diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h
index 407ac14295f4..57f26c398dc9 100644
--- a/arch/sparc/include/asm/io_32.h
+++ b/arch/sparc/include/asm/io_32.h
@@ -129,6 +129,7 @@ static inline void sbus_memcpy_toio(volatile void __iomem *dst,
129void __iomem *ioremap(unsigned long offset, unsigned long size); 129void __iomem *ioremap(unsigned long offset, unsigned long size);
130#define ioremap_nocache(X,Y) ioremap((X),(Y)) 130#define ioremap_nocache(X,Y) ioremap((X),(Y))
131#define ioremap_wc(X,Y) ioremap((X),(Y)) 131#define ioremap_wc(X,Y) ioremap((X),(Y))
132#define ioremap_wt(X,Y) ioremap((X),(Y))
132void iounmap(volatile void __iomem *addr); 133void iounmap(volatile void __iomem *addr);
133 134
134/* Create a virtual mapping cookie for an IO port range */ 135/* Create a virtual mapping cookie for an IO port range */
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index 50d4840d9aeb..c32fa3f752c8 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -402,6 +402,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
402 402
403#define ioremap_nocache(X,Y) ioremap((X),(Y)) 403#define ioremap_nocache(X,Y) ioremap((X),(Y))
404#define ioremap_wc(X,Y) ioremap((X),(Y)) 404#define ioremap_wc(X,Y) ioremap((X),(Y))
405#define ioremap_wt(X,Y) ioremap((X),(Y))
405 406
406static inline void iounmap(volatile void __iomem *addr) 407static inline void iounmap(volatile void __iomem *addr)
407{ 408{
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index dc165ebdf05a..2a52c91d2c8a 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -308,12 +308,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
308 " sllx %1, 32, %1\n" 308 " sllx %1, 32, %1\n"
309 " or %0, %1, %0\n" 309 " or %0, %1, %0\n"
310 " .previous\n" 310 " .previous\n"
311 " .section .sun_m7_2insn_patch, \"ax\"\n"
312 " .word 661b\n"
313 " sethi %%uhi(%4), %1\n"
314 " sethi %%hi(%4), %0\n"
315 " .word 662b\n"
316 " or %1, %%ulo(%4), %1\n"
317 " or %0, %%lo(%4), %0\n"
318 " .word 663b\n"
319 " sllx %1, 32, %1\n"
320 " or %0, %1, %0\n"
321 " .previous\n"
311 : "=r" (mask), "=r" (tmp) 322 : "=r" (mask), "=r" (tmp)
312 : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | 323 : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
313 _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | 324 _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U |
314 _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U), 325 _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U),
315 "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | 326 "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
316 _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | 327 _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V |
328 _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V),
329 "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
330 _PAGE_CP_4V | _PAGE_E_4V |
317 _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V)); 331 _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V));
318 332
319 return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); 333 return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
@@ -342,9 +356,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
342 " andn %0, %4, %0\n" 356 " andn %0, %4, %0\n"
343 " or %0, %5, %0\n" 357 " or %0, %5, %0\n"
344 " .previous\n" 358 " .previous\n"
359 " .section .sun_m7_2insn_patch, \"ax\"\n"
360 " .word 661b\n"
361 " andn %0, %6, %0\n"
362 " or %0, %5, %0\n"
363 " .previous\n"
345 : "=r" (val) 364 : "=r" (val)
346 : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U), 365 : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U),
347 "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V)); 366 "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V),
367 "i" (_PAGE_CP_4V));
348 368
349 return __pgprot(val); 369 return __pgprot(val);
350} 370}
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index ed8f071132e4..d1761df5cca6 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -40,11 +40,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
40#ifdef CONFIG_SMP 40#ifdef CONFIG_SMP
41#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) 41#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id)
42#define topology_core_id(cpu) (cpu_data(cpu).core_id) 42#define topology_core_id(cpu) (cpu_data(cpu).core_id)
43#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) 43#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu])
44#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) 44#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
45#endif /* CONFIG_SMP */ 45#endif /* CONFIG_SMP */
46 46
47extern cpumask_t cpu_core_map[NR_CPUS]; 47extern cpumask_t cpu_core_map[NR_CPUS];
48extern cpumask_t cpu_core_sib_map[NR_CPUS];
48static inline const struct cpumask *cpu_coregroup_mask(int cpu) 49static inline const struct cpumask *cpu_coregroup_mask(int cpu)
49{ 50{
50 return &cpu_core_map[cpu]; 51 return &cpu_core_map[cpu];
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h
index 6fd4436d32f0..ec9c04de3664 100644
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -79,6 +79,8 @@ struct sun4v_2insn_patch_entry {
79}; 79};
80extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, 80extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
81 __sun4v_2insn_patch_end; 81 __sun4v_2insn_patch_end;
82extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch,
83 __sun_m7_2insn_patch_end;
82 84
83 85
84#endif /* !(__ASSEMBLY__) */ 86#endif /* !(__ASSEMBLY__) */
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 07cc49e541f4..0f679421b468 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -69,6 +69,8 @@ void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
69 struct sun4v_1insn_patch_entry *); 69 struct sun4v_1insn_patch_entry *);
70void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, 70void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
71 struct sun4v_2insn_patch_entry *); 71 struct sun4v_2insn_patch_entry *);
72void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *,
73 struct sun4v_2insn_patch_entry *);
72extern unsigned int dcache_parity_tl1_occurred; 74extern unsigned int dcache_parity_tl1_occurred;
73extern unsigned int icache_parity_tl1_occurred; 75extern unsigned int icache_parity_tl1_occurred;
74 76
diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c
index 94e392bdee7d..814fb1729b12 100644
--- a/arch/sparc/kernel/leon_pci_grpci2.c
+++ b/arch/sparc/kernel/leon_pci_grpci2.c
@@ -723,7 +723,6 @@ static int grpci2_of_probe(struct platform_device *ofdev)
723 err = -ENOMEM; 723 err = -ENOMEM;
724 goto err1; 724 goto err1;
725 } 725 }
726 memset(grpci2priv, 0, sizeof(*grpci2priv));
727 priv->regs = regs; 726 priv->regs = regs;
728 priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */ 727 priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */
729 priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT; 728 priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT;
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 26c80e18d7b1..6f80936e0eea 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -614,45 +614,68 @@ static void fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp)
614 } 614 }
615} 615}
616 616
617static void mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id) 617static void find_back_node_value(struct mdesc_handle *hp, u64 node,
618 char *srch_val,
619 void (*func)(struct mdesc_handle *, u64, int),
620 u64 val, int depth)
618{ 621{
619 u64 a; 622 u64 arc;
620
621 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
622 u64 t = mdesc_arc_target(hp, a);
623 const char *name;
624 const u64 *id;
625 623
626 name = mdesc_node_name(hp, t); 624 /* Since we have an estimate of recursion depth, do a sanity check. */
627 if (!strcmp(name, "cpu")) { 625 if (depth == 0)
628 id = mdesc_get_property(hp, t, "id", NULL); 626 return;
629 if (*id < NR_CPUS)
630 cpu_data(*id).core_id = core_id;
631 } else {
632 u64 j;
633 627
634 mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) { 628 mdesc_for_each_arc(arc, hp, node, MDESC_ARC_TYPE_BACK) {
635 u64 n = mdesc_arc_target(hp, j); 629 u64 n = mdesc_arc_target(hp, arc);
636 const char *n_name; 630 const char *name = mdesc_node_name(hp, n);
637 631
638 n_name = mdesc_node_name(hp, n); 632 if (!strcmp(srch_val, name))
639 if (strcmp(n_name, "cpu")) 633 (*func)(hp, n, val);
640 continue;
641 634
642 id = mdesc_get_property(hp, n, "id", NULL); 635 find_back_node_value(hp, n, srch_val, func, val, depth-1);
643 if (*id < NR_CPUS)
644 cpu_data(*id).core_id = core_id;
645 }
646 }
647 } 636 }
648} 637}
649 638
639static void __mark_core_id(struct mdesc_handle *hp, u64 node,
640 int core_id)
641{
642 const u64 *id = mdesc_get_property(hp, node, "id", NULL);
643
644 if (*id < num_possible_cpus())
645 cpu_data(*id).core_id = core_id;
646}
647
648static void __mark_sock_id(struct mdesc_handle *hp, u64 node,
649 int sock_id)
650{
651 const u64 *id = mdesc_get_property(hp, node, "id", NULL);
652
653 if (*id < num_possible_cpus())
654 cpu_data(*id).sock_id = sock_id;
655}
656
657static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
658 int core_id)
659{
660 find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
661}
662
663static void mark_sock_ids(struct mdesc_handle *hp, u64 mp,
664 int sock_id)
665{
666 find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10);
667}
668
650static void set_core_ids(struct mdesc_handle *hp) 669static void set_core_ids(struct mdesc_handle *hp)
651{ 670{
652 int idx; 671 int idx;
653 u64 mp; 672 u64 mp;
654 673
655 idx = 1; 674 idx = 1;
675
676 /* Identify unique cores by looking for cpus backpointed to by
677 * level 1 instruction caches.
678 */
656 mdesc_for_each_node_by_name(hp, mp, "cache") { 679 mdesc_for_each_node_by_name(hp, mp, "cache") {
657 const u64 *level; 680 const u64 *level;
658 const char *type; 681 const char *type;
@@ -667,11 +690,72 @@ static void set_core_ids(struct mdesc_handle *hp)
667 continue; 690 continue;
668 691
669 mark_core_ids(hp, mp, idx); 692 mark_core_ids(hp, mp, idx);
693 idx++;
694 }
695}
696
697static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
698{
699 u64 mp;
700 int idx = 1;
701 int fnd = 0;
702
703 /* Identify unique sockets by looking for cpus backpointed to by
704 * shared level n caches.
705 */
706 mdesc_for_each_node_by_name(hp, mp, "cache") {
707 const u64 *cur_lvl;
708
709 cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
710 if (*cur_lvl != level)
711 continue;
712
713 mark_sock_ids(hp, mp, idx);
714 idx++;
715 fnd = 1;
716 }
717 return fnd;
718}
719
720static void set_sock_ids_by_socket(struct mdesc_handle *hp, u64 mp)
721{
722 int idx = 1;
670 723
724 mdesc_for_each_node_by_name(hp, mp, "socket") {
725 u64 a;
726
727 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
728 u64 t = mdesc_arc_target(hp, a);
729 const char *name;
730 const u64 *id;
731
732 name = mdesc_node_name(hp, t);
733 if (strcmp(name, "cpu"))
734 continue;
735
736 id = mdesc_get_property(hp, t, "id", NULL);
737 if (*id < num_possible_cpus())
738 cpu_data(*id).sock_id = idx;
739 }
671 idx++; 740 idx++;
672 } 741 }
673} 742}
674 743
744static void set_sock_ids(struct mdesc_handle *hp)
745{
746 u64 mp;
747
748 /* If machine description exposes sockets data use it.
749 * Otherwise fallback to use shared L3 or L2 caches.
750 */
751 mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
752 if (mp != MDESC_NODE_NULL)
753 return set_sock_ids_by_socket(hp, mp);
754
755 if (!set_sock_ids_by_cache(hp, 3))
756 set_sock_ids_by_cache(hp, 2);
757}
758
675static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) 759static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
676{ 760{
677 u64 a; 761 u64 a;
@@ -707,7 +791,6 @@ static void __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name)
707 continue; 791 continue;
708 792
709 mark_proc_ids(hp, mp, idx); 793 mark_proc_ids(hp, mp, idx);
710
711 idx++; 794 idx++;
712 } 795 }
713} 796}
@@ -900,6 +983,7 @@ void mdesc_fill_in_cpu_data(cpumask_t *mask)
900 983
901 set_core_ids(hp); 984 set_core_ids(hp);
902 set_proc_ids(hp); 985 set_proc_ids(hp);
986 set_sock_ids(hp);
903 987
904 mdesc_release(hp); 988 mdesc_release(hp);
905 989
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 6f7251fd2eab..c928bc64b4ba 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -1002,6 +1002,38 @@ static int __init pcibios_init(void)
1002subsys_initcall(pcibios_init); 1002subsys_initcall(pcibios_init);
1003 1003
1004#ifdef CONFIG_SYSFS 1004#ifdef CONFIG_SYSFS
1005
1006#define SLOT_NAME_SIZE 11 /* Max decimal digits + null in u32 */
1007
1008static void pcie_bus_slot_names(struct pci_bus *pbus)
1009{
1010 struct pci_dev *pdev;
1011 struct pci_bus *bus;
1012
1013 list_for_each_entry(pdev, &pbus->devices, bus_list) {
1014 char name[SLOT_NAME_SIZE];
1015 struct pci_slot *pci_slot;
1016 const u32 *slot_num;
1017 int len;
1018
1019 slot_num = of_get_property(pdev->dev.of_node,
1020 "physical-slot#", &len);
1021
1022 if (slot_num == NULL || len != 4)
1023 continue;
1024
1025 snprintf(name, sizeof(name), "%u", slot_num[0]);
1026 pci_slot = pci_create_slot(pbus, slot_num[0], name, NULL);
1027
1028 if (IS_ERR(pci_slot))
1029 pr_err("PCI: pci_create_slot returned %ld.\n",
1030 PTR_ERR(pci_slot));
1031 }
1032
1033 list_for_each_entry(bus, &pbus->children, node)
1034 pcie_bus_slot_names(bus);
1035}
1036
1005static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus) 1037static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus)
1006{ 1038{
1007 const struct pci_slot_names { 1039 const struct pci_slot_names {
@@ -1053,18 +1085,29 @@ static int __init of_pci_slot_init(void)
1053 1085
1054 while ((pbus = pci_find_next_bus(pbus)) != NULL) { 1086 while ((pbus = pci_find_next_bus(pbus)) != NULL) {
1055 struct device_node *node; 1087 struct device_node *node;
1088 struct pci_dev *pdev;
1089
1090 pdev = list_first_entry(&pbus->devices, struct pci_dev,
1091 bus_list);
1056 1092
1057 if (pbus->self) { 1093 if (pdev && pci_is_pcie(pdev)) {
1058 /* PCI->PCI bridge */ 1094 pcie_bus_slot_names(pbus);
1059 node = pbus->self->dev.of_node;
1060 } else { 1095 } else {
1061 struct pci_pbm_info *pbm = pbus->sysdata;
1062 1096
1063 /* Host PCI controller */ 1097 if (pbus->self) {
1064 node = pbm->op->dev.of_node; 1098
1065 } 1099 /* PCI->PCI bridge */
1100 node = pbus->self->dev.of_node;
1101
1102 } else {
1103 struct pci_pbm_info *pbm = pbus->sysdata;
1066 1104
1067 pci_bus_slot_names(node, pbus); 1105 /* Host PCI controller */
1106 node = pbm->op->dev.of_node;
1107 }
1108
1109 pci_bus_slot_names(node, pbus);
1110 }
1068 } 1111 }
1069 1112
1070 return 0; 1113 return 0;
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index c38d19fc27ba..f7b261749383 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -255,6 +255,24 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
255 } 255 }
256} 256}
257 257
258void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
259 struct sun4v_2insn_patch_entry *end)
260{
261 while (start < end) {
262 unsigned long addr = start->addr;
263
264 *(unsigned int *) (addr + 0) = start->insns[0];
265 wmb();
266 __asm__ __volatile__("flush %0" : : "r" (addr + 0));
267
268 *(unsigned int *) (addr + 4) = start->insns[1];
269 wmb();
270 __asm__ __volatile__("flush %0" : : "r" (addr + 4));
271
272 start++;
273 }
274}
275
258static void __init sun4v_patch(void) 276static void __init sun4v_patch(void)
259{ 277{
260 extern void sun4v_hvapi_init(void); 278 extern void sun4v_hvapi_init(void);
@@ -267,6 +285,9 @@ static void __init sun4v_patch(void)
267 285
268 sun4v_patch_2insn_range(&__sun4v_2insn_patch, 286 sun4v_patch_2insn_range(&__sun4v_2insn_patch,
269 &__sun4v_2insn_patch_end); 287 &__sun4v_2insn_patch_end);
288 if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7)
289 sun_m7_patch_2insn_range(&__sun_m7_2insn_patch,
290 &__sun_m7_2insn_patch_end);
270 291
271 sun4v_hvapi_init(); 292 sun4v_hvapi_init();
272} 293}
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 61139d9924ca..19cd08d18672 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -60,8 +60,12 @@ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
60cpumask_t cpu_core_map[NR_CPUS] __read_mostly = 60cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
61 { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; 61 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
62 62
63cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
64 [0 ... NR_CPUS-1] = CPU_MASK_NONE };
65
63EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 66EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
64EXPORT_SYMBOL(cpu_core_map); 67EXPORT_SYMBOL(cpu_core_map);
68EXPORT_SYMBOL(cpu_core_sib_map);
65 69
66static cpumask_t smp_commenced_mask; 70static cpumask_t smp_commenced_mask;
67 71
@@ -1243,6 +1247,15 @@ void smp_fill_in_sib_core_maps(void)
1243 } 1247 }
1244 } 1248 }
1245 1249
1250 for_each_present_cpu(i) {
1251 unsigned int j;
1252
1253 for_each_present_cpu(j) {
1254 if (cpu_data(i).sock_id == cpu_data(j).sock_id)
1255 cpumask_set_cpu(j, &cpu_core_sib_map[i]);
1256 }
1257 }
1258
1246 for_each_present_cpu(i) { 1259 for_each_present_cpu(i) {
1247 unsigned int j; 1260 unsigned int j;
1248 1261
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 09243057cb0b..f1a2f688b28a 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -138,6 +138,11 @@ SECTIONS
138 *(.pause_3insn_patch) 138 *(.pause_3insn_patch)
139 __pause_3insn_patch_end = .; 139 __pause_3insn_patch_end = .;
140 } 140 }
141 .sun_m7_2insn_patch : {
142 __sun_m7_2insn_patch = .;
143 *(.sun_m7_2insn_patch)
144 __sun_m7_2insn_patch_end = .;
145 }
141 PERCPU_SECTION(SMP_CACHE_BYTES) 146 PERCPU_SECTION(SMP_CACHE_BYTES)
142 147
143 . = ALIGN(PAGE_SIZE); 148 . = ALIGN(PAGE_SIZE);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 4ca0d6ba5ec8..559cb744112c 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -54,6 +54,7 @@
54#include "init_64.h" 54#include "init_64.h"
55 55
56unsigned long kern_linear_pte_xor[4] __read_mostly; 56unsigned long kern_linear_pte_xor[4] __read_mostly;
57static unsigned long page_cache4v_flag;
57 58
58/* A bitmap, two bits for every 256MB of physical memory. These two 59/* A bitmap, two bits for every 256MB of physical memory. These two
59 * bits determine what page size we use for kernel linear 60 * bits determine what page size we use for kernel linear
@@ -1909,11 +1910,24 @@ static void __init sun4u_linear_pte_xor_finalize(void)
1909 1910
1910static void __init sun4v_linear_pte_xor_finalize(void) 1911static void __init sun4v_linear_pte_xor_finalize(void)
1911{ 1912{
1913 unsigned long pagecv_flag;
1914
1915 /* Bit 9 of TTE is no longer CV bit on M7 processor and it instead
1916 * enables MCD error. Do not set bit 9 on M7 processor.
1917 */
1918 switch (sun4v_chip_type) {
1919 case SUN4V_CHIP_SPARC_M7:
1920 pagecv_flag = 0x00;
1921 break;
1922 default:
1923 pagecv_flag = _PAGE_CV_4V;
1924 break;
1925 }
1912#ifndef CONFIG_DEBUG_PAGEALLOC 1926#ifndef CONFIG_DEBUG_PAGEALLOC
1913 if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { 1927 if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
1914 kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ 1928 kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
1915 PAGE_OFFSET; 1929 PAGE_OFFSET;
1916 kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | 1930 kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag |
1917 _PAGE_P_4V | _PAGE_W_4V); 1931 _PAGE_P_4V | _PAGE_W_4V);
1918 } else { 1932 } else {
1919 kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; 1933 kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
@@ -1922,7 +1936,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
1922 if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { 1936 if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
1923 kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ 1937 kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
1924 PAGE_OFFSET; 1938 PAGE_OFFSET;
1925 kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | 1939 kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag |
1926 _PAGE_P_4V | _PAGE_W_4V); 1940 _PAGE_P_4V | _PAGE_W_4V);
1927 } else { 1941 } else {
1928 kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; 1942 kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
@@ -1931,7 +1945,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
1931 if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { 1945 if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
1932 kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ 1946 kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
1933 PAGE_OFFSET; 1947 PAGE_OFFSET;
1934 kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | 1948 kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag |
1935 _PAGE_P_4V | _PAGE_W_4V); 1949 _PAGE_P_4V | _PAGE_W_4V);
1936 } else { 1950 } else {
1937 kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; 1951 kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
@@ -1958,6 +1972,13 @@ static phys_addr_t __init available_memory(void)
1958 return available; 1972 return available;
1959} 1973}
1960 1974
1975#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
1976#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
1977#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
1978#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
1979#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
1980#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
1981
1961/* We need to exclude reserved regions. This exclusion will include 1982/* We need to exclude reserved regions. This exclusion will include
1962 * vmlinux and initrd. To be more precise the initrd size could be used to 1983 * vmlinux and initrd. To be more precise the initrd size could be used to
1963 * compute a new lower limit because it is freed later during initialization. 1984 * compute a new lower limit because it is freed later during initialization.
@@ -2034,6 +2055,25 @@ void __init paging_init(void)
2034 memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); 2055 memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
2035#endif 2056#endif
2036 2057
2058 /* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde
2059 * bit on M7 processor. This is a conflicting usage of the same
2060 * bit. Enabling TTE.cv on M7 would turn on Memory Corruption
2061 * Detection error on all pages and this will lead to problems
2062 * later. Kernel does not run with MCD enabled and hence rest
2063 * of the required steps to fully configure memory corruption
2064 * detection are not taken. We need to ensure TTE.mcde is not
2065 * set on M7 processor. Compute the value of cacheability
2066 * flag for use later taking this into consideration.
2067 */
2068 switch (sun4v_chip_type) {
2069 case SUN4V_CHIP_SPARC_M7:
2070 page_cache4v_flag = _PAGE_CP_4V;
2071 break;
2072 default:
2073 page_cache4v_flag = _PAGE_CACHE_4V;
2074 break;
2075 }
2076
2037 if (tlb_type == hypervisor) 2077 if (tlb_type == hypervisor)
2038 sun4v_pgprot_init(); 2078 sun4v_pgprot_init();
2039 else 2079 else
@@ -2274,13 +2314,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
2274} 2314}
2275#endif 2315#endif
2276 2316
2277#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
2278#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
2279#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
2280#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
2281#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
2282#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
2283
2284pgprot_t PAGE_KERNEL __read_mostly; 2317pgprot_t PAGE_KERNEL __read_mostly;
2285EXPORT_SYMBOL(PAGE_KERNEL); 2318EXPORT_SYMBOL(PAGE_KERNEL);
2286 2319
@@ -2312,8 +2345,7 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
2312 _PAGE_P_4U | _PAGE_W_4U); 2345 _PAGE_P_4U | _PAGE_W_4U);
2313 if (tlb_type == hypervisor) 2346 if (tlb_type == hypervisor)
2314 pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V | 2347 pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
2315 _PAGE_CP_4V | _PAGE_CV_4V | 2348 page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V);
2316 _PAGE_P_4V | _PAGE_W_4V);
2317 2349
2318 pte_base |= _PAGE_PMD_HUGE; 2350 pte_base |= _PAGE_PMD_HUGE;
2319 2351
@@ -2450,14 +2482,14 @@ static void __init sun4v_pgprot_init(void)
2450 int i; 2482 int i;
2451 2483
2452 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | 2484 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
2453 _PAGE_CACHE_4V | _PAGE_P_4V | 2485 page_cache4v_flag | _PAGE_P_4V |
2454 __ACCESS_BITS_4V | __DIRTY_BITS_4V | 2486 __ACCESS_BITS_4V | __DIRTY_BITS_4V |
2455 _PAGE_EXEC_4V); 2487 _PAGE_EXEC_4V);
2456 PAGE_KERNEL_LOCKED = PAGE_KERNEL; 2488 PAGE_KERNEL_LOCKED = PAGE_KERNEL;
2457 2489
2458 _PAGE_IE = _PAGE_IE_4V; 2490 _PAGE_IE = _PAGE_IE_4V;
2459 _PAGE_E = _PAGE_E_4V; 2491 _PAGE_E = _PAGE_E_4V;
2460 _PAGE_CACHE = _PAGE_CACHE_4V; 2492 _PAGE_CACHE = page_cache4v_flag;
2461 2493
2462#ifdef CONFIG_DEBUG_PAGEALLOC 2494#ifdef CONFIG_DEBUG_PAGEALLOC
2463 kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; 2495 kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
@@ -2465,8 +2497,8 @@ static void __init sun4v_pgprot_init(void)
2465 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ 2497 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
2466 PAGE_OFFSET; 2498 PAGE_OFFSET;
2467#endif 2499#endif
2468 kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | 2500 kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V |
2469 _PAGE_P_4V | _PAGE_W_4V); 2501 _PAGE_W_4V);
2470 2502
2471 for (i = 1; i < 4; i++) 2503 for (i = 1; i < 4; i++)
2472 kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; 2504 kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
@@ -2479,12 +2511,12 @@ static void __init sun4v_pgprot_init(void)
2479 _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | 2511 _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
2480 _PAGE_SZ64K_4V | _PAGE_SZ8K_4V); 2512 _PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
2481 2513
2482 page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V; 2514 page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag;
2483 page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | 2515 page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
2484 __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V); 2516 __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
2485 page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | 2517 page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
2486 __ACCESS_BITS_4V | _PAGE_EXEC_4V); 2518 __ACCESS_BITS_4V | _PAGE_EXEC_4V);
2487 page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | 2519 page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
2488 __ACCESS_BITS_4V | _PAGE_EXEC_4V); 2520 __ACCESS_BITS_4V | _PAGE_EXEC_4V);
2489 2521
2490 page_exec_bit = _PAGE_EXEC_4V; 2522 page_exec_bit = _PAGE_EXEC_4V;
@@ -2542,7 +2574,7 @@ static unsigned long kern_large_tte(unsigned long paddr)
2542 _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U); 2574 _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
2543 if (tlb_type == hypervisor) 2575 if (tlb_type == hypervisor)
2544 val = (_PAGE_VALID | _PAGE_SZ4MB_4V | 2576 val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
2545 _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | 2577 page_cache4v_flag | _PAGE_P_4V |
2546 _PAGE_EXEC_4V | _PAGE_W_4V); 2578 _PAGE_EXEC_4V | _PAGE_W_4V);
2547 2579
2548 return val | paddr; 2580 return val | paddr;
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 7b11c5fadd42..0496970cef82 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
105 105
106#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) 106#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
107 107
108/* Define this to indicate that cmpxchg is an efficient operation. */
109#define __HAVE_ARCH_CMPXCHG
110
111#endif /* !__ASSEMBLY__ */ 108#endif /* !__ASSEMBLY__ */
112 109
113#endif /* _ASM_TILE_ATOMIC_64_H */ 110#endif /* _ASM_TILE_ATOMIC_64_H */
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index 6ef4ecab1df2..dc61de15c1f9 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -54,7 +54,7 @@ extern void iounmap(volatile void __iomem *addr);
54 54
55#define ioremap_nocache(physaddr, size) ioremap(physaddr, size) 55#define ioremap_nocache(physaddr, size) ioremap(physaddr, size)
56#define ioremap_wc(physaddr, size) ioremap(physaddr, size) 56#define ioremap_wc(physaddr, size) ioremap(physaddr, size)
57#define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) 57#define ioremap_wt(physaddr, size) ioremap(physaddr, size)
58#define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) 58#define ioremap_fullcache(physaddr, size) ioremap(physaddr, size)
59 59
60#define mmiowb() 60#define mmiowb()
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 3942f74c92d7..1538562cc720 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,3 +1,6 @@
1
2obj-y += entry/
3
1obj-$(CONFIG_KVM) += kvm/ 4obj-$(CONFIG_KVM) += kvm/
2 5
3# Xen paravirtualization support 6# Xen paravirtualization support
@@ -11,7 +14,7 @@ obj-y += kernel/
11obj-y += mm/ 14obj-y += mm/
12 15
13obj-y += crypto/ 16obj-y += crypto/
14obj-y += vdso/ 17
15obj-$(CONFIG_IA32_EMULATION) += ia32/ 18obj-$(CONFIG_IA32_EMULATION) += ia32/
16 19
17obj-y += platform/ 20obj-y += platform/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6bbb991d0f3c..7e39f9b22705 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -9,140 +9,141 @@ config 64BIT
9config X86_32 9config X86_32
10 def_bool y 10 def_bool y
11 depends on !64BIT 11 depends on !64BIT
12 select CLKSRC_I8253
13 select HAVE_UID16
14 12
15config X86_64 13config X86_64
16 def_bool y 14 def_bool y
17 depends on 64BIT 15 depends on 64BIT
18 select X86_DEV_DMA_OPS
19 select ARCH_USE_CMPXCHG_LOCKREF
20 select HAVE_LIVEPATCH
21 16
22### Arch settings 17### Arch settings
23config X86 18config X86
24 def_bool y 19 def_bool y
25 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI 20 select ACPI_LEGACY_TABLES_LOOKUP if ACPI
26 select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI 21 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
22 select ANON_INODES
23 select ARCH_CLOCKSOURCE_DATA
24 select ARCH_DISCARD_MEMBLOCK
25 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
27 select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS 26 select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
27 select ARCH_HAS_ELF_RANDOMIZE
28 select ARCH_HAS_FAST_MULTIPLIER 28 select ARCH_HAS_FAST_MULTIPLIER
29 select ARCH_HAS_GCOV_PROFILE_ALL 29 select ARCH_HAS_GCOV_PROFILE_ALL
30 select ARCH_HAS_SG_CHAIN
31 select ARCH_HAVE_NMI_SAFE_CMPXCHG
32 select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
30 select ARCH_MIGHT_HAVE_PC_PARPORT 33 select ARCH_MIGHT_HAVE_PC_PARPORT
31 select ARCH_MIGHT_HAVE_PC_SERIO 34 select ARCH_MIGHT_HAVE_PC_SERIO
32 select HAVE_AOUT if X86_32 35 select ARCH_SUPPORTS_ATOMIC_RMW
33 select HAVE_UNSTABLE_SCHED_CLOCK 36 select ARCH_SUPPORTS_INT128 if X86_64
34 select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 37 select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
35 select ARCH_SUPPORTS_INT128 if X86_64 38 select ARCH_USE_BUILTIN_BSWAP
36 select HAVE_IDE 39 select ARCH_USE_CMPXCHG_LOCKREF if X86_64
37 select HAVE_OPROFILE 40 select ARCH_USE_QUEUED_RWLOCKS
38 select HAVE_PCSPKR_PLATFORM 41 select ARCH_USE_QUEUED_SPINLOCKS
39 select HAVE_PERF_EVENTS
40 select HAVE_IOREMAP_PROT
41 select HAVE_KPROBES
42 select HAVE_MEMBLOCK
43 select HAVE_MEMBLOCK_NODE_MAP
44 select ARCH_DISCARD_MEMBLOCK
45 select ARCH_WANT_OPTIONAL_GPIOLIB
46 select ARCH_WANT_FRAME_POINTERS 42 select ARCH_WANT_FRAME_POINTERS
47 select HAVE_DMA_ATTRS 43 select ARCH_WANT_IPC_PARSE_VERSION if X86_32
48 select HAVE_DMA_CONTIGUOUS 44 select ARCH_WANT_OPTIONAL_GPIOLIB
49 select HAVE_KRETPROBES 45 select BUILDTIME_EXTABLE_SORT
46 select CLKEVT_I8253
47 select CLKSRC_I8253 if X86_32
48 select CLOCKSOURCE_VALIDATE_LAST_CYCLE
49 select CLOCKSOURCE_WATCHDOG
50 select CLONE_BACKWARDS if X86_32
51 select COMPAT_OLD_SIGACTION if IA32_EMULATION
52 select DCACHE_WORD_ACCESS
53 select GENERIC_CLOCKEVENTS
54 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
55 select GENERIC_CLOCKEVENTS_MIN_ADJUST
56 select GENERIC_CMOS_UPDATE
57 select GENERIC_CPU_AUTOPROBE
50 select GENERIC_EARLY_IOREMAP 58 select GENERIC_EARLY_IOREMAP
51 select HAVE_OPTPROBES 59 select GENERIC_FIND_FIRST_BIT
52 select HAVE_KPROBES_ON_FTRACE 60 select GENERIC_IOMAP
53 select HAVE_FTRACE_MCOUNT_RECORD 61 select GENERIC_IRQ_PROBE
54 select HAVE_FENTRY if X86_64 62 select GENERIC_IRQ_SHOW
63 select GENERIC_PENDING_IRQ if SMP
64 select GENERIC_SMP_IDLE_THREAD
65 select GENERIC_STRNCPY_FROM_USER
66 select GENERIC_STRNLEN_USER
67 select GENERIC_TIME_VSYSCALL
68 select HAVE_ACPI_APEI if ACPI
69 select HAVE_ACPI_APEI_NMI if ACPI
70 select HAVE_ALIGNED_STRUCT_PAGE if SLUB
71 select HAVE_AOUT if X86_32
72 select HAVE_ARCH_AUDITSYSCALL
73 select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
74 select HAVE_ARCH_JUMP_LABEL
75 select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
76 select HAVE_ARCH_KGDB
77 select HAVE_ARCH_KMEMCHECK
78 select HAVE_ARCH_SECCOMP_FILTER
79 select HAVE_ARCH_SOFT_DIRTY if X86_64
80 select HAVE_ARCH_TRACEHOOK
81 select HAVE_ARCH_TRANSPARENT_HUGEPAGE
82 select HAVE_BPF_JIT if X86_64
83 select HAVE_CC_STACKPROTECTOR
84 select HAVE_CMPXCHG_DOUBLE
85 select HAVE_CMPXCHG_LOCAL
86 select HAVE_CONTEXT_TRACKING if X86_64
55 select HAVE_C_RECORDMCOUNT 87 select HAVE_C_RECORDMCOUNT
88 select HAVE_DEBUG_KMEMLEAK
89 select HAVE_DEBUG_STACKOVERFLOW
90 select HAVE_DMA_API_DEBUG
91 select HAVE_DMA_ATTRS
92 select HAVE_DMA_CONTIGUOUS
56 select HAVE_DYNAMIC_FTRACE 93 select HAVE_DYNAMIC_FTRACE
57 select HAVE_DYNAMIC_FTRACE_WITH_REGS 94 select HAVE_DYNAMIC_FTRACE_WITH_REGS
58 select HAVE_FUNCTION_TRACER
59 select HAVE_FUNCTION_GRAPH_TRACER
60 select HAVE_FUNCTION_GRAPH_FP_TEST
61 select HAVE_SYSCALL_TRACEPOINTS
62 select SYSCTL_EXCEPTION_TRACE
63 select HAVE_KVM
64 select HAVE_ARCH_KGDB
65 select HAVE_ARCH_TRACEHOOK
66 select HAVE_GENERIC_DMA_COHERENT if X86_32
67 select HAVE_EFFICIENT_UNALIGNED_ACCESS 95 select HAVE_EFFICIENT_UNALIGNED_ACCESS
68 select USER_STACKTRACE_SUPPORT 96 select HAVE_FENTRY if X86_64
69 select HAVE_REGS_AND_STACK_ACCESS_API 97 select HAVE_FTRACE_MCOUNT_RECORD
70 select HAVE_DMA_API_DEBUG 98 select HAVE_FUNCTION_GRAPH_FP_TEST
71 select HAVE_KERNEL_GZIP 99 select HAVE_FUNCTION_GRAPH_TRACER
100 select HAVE_FUNCTION_TRACER
101 select HAVE_GENERIC_DMA_COHERENT if X86_32
102 select HAVE_HW_BREAKPOINT
103 select HAVE_IDE
104 select HAVE_IOREMAP_PROT
105 select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
106 select HAVE_IRQ_TIME_ACCOUNTING
72 select HAVE_KERNEL_BZIP2 107 select HAVE_KERNEL_BZIP2
108 select HAVE_KERNEL_GZIP
109 select HAVE_KERNEL_LZ4
73 select HAVE_KERNEL_LZMA 110 select HAVE_KERNEL_LZMA
74 select HAVE_KERNEL_XZ
75 select HAVE_KERNEL_LZO 111 select HAVE_KERNEL_LZO
76 select HAVE_KERNEL_LZ4 112 select HAVE_KERNEL_XZ
77 select HAVE_HW_BREAKPOINT 113 select HAVE_KPROBES
114 select HAVE_KPROBES_ON_FTRACE
115 select HAVE_KRETPROBES
116 select HAVE_KVM
117 select HAVE_LIVEPATCH if X86_64
118 select HAVE_MEMBLOCK
119 select HAVE_MEMBLOCK_NODE_MAP
78 select HAVE_MIXED_BREAKPOINTS_REGS 120 select HAVE_MIXED_BREAKPOINTS_REGS
79 select PERF_EVENTS 121 select HAVE_OPROFILE
122 select HAVE_OPTPROBES
123 select HAVE_PCSPKR_PLATFORM
124 select HAVE_PERF_EVENTS
80 select HAVE_PERF_EVENTS_NMI 125 select HAVE_PERF_EVENTS_NMI
81 select HAVE_PERF_REGS 126 select HAVE_PERF_REGS
82 select HAVE_PERF_USER_STACK_DUMP 127 select HAVE_PERF_USER_STACK_DUMP
83 select HAVE_DEBUG_KMEMLEAK 128 select HAVE_REGS_AND_STACK_ACCESS_API
84 select ANON_INODES 129 select HAVE_SYSCALL_TRACEPOINTS
85 select HAVE_ALIGNED_STRUCT_PAGE if SLUB 130 select HAVE_UID16 if X86_32
86 select HAVE_CMPXCHG_LOCAL 131 select HAVE_UNSTABLE_SCHED_CLOCK
87 select HAVE_CMPXCHG_DOUBLE
88 select HAVE_ARCH_KMEMCHECK
89 select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
90 select HAVE_USER_RETURN_NOTIFIER 132 select HAVE_USER_RETURN_NOTIFIER
91 select ARCH_HAS_ELF_RANDOMIZE
92 select HAVE_ARCH_JUMP_LABEL
93 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
94 select SPARSE_IRQ
95 select GENERIC_FIND_FIRST_BIT
96 select GENERIC_IRQ_PROBE
97 select GENERIC_PENDING_IRQ if SMP
98 select GENERIC_IRQ_SHOW
99 select GENERIC_CLOCKEVENTS_MIN_ADJUST
100 select IRQ_FORCED_THREADING 133 select IRQ_FORCED_THREADING
101 select HAVE_BPF_JIT if X86_64 134 select MODULES_USE_ELF_RELA if X86_64
102 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 135 select MODULES_USE_ELF_REL if X86_32
103 select HAVE_ARCH_HUGE_VMAP if X86_64 || (X86_32 && X86_PAE) 136 select OLD_SIGACTION if X86_32
104 select ARCH_HAS_SG_CHAIN 137 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
105 select CLKEVT_I8253 138 select PERF_EVENTS
106 select ARCH_HAVE_NMI_SAFE_CMPXCHG
107 select GENERIC_IOMAP
108 select DCACHE_WORD_ACCESS
109 select GENERIC_SMP_IDLE_THREAD
110 select ARCH_WANT_IPC_PARSE_VERSION if X86_32
111 select HAVE_ARCH_SECCOMP_FILTER
112 select BUILDTIME_EXTABLE_SORT
113 select GENERIC_CMOS_UPDATE
114 select HAVE_ARCH_SOFT_DIRTY if X86_64
115 select CLOCKSOURCE_WATCHDOG
116 select GENERIC_CLOCKEVENTS
117 select ARCH_CLOCKSOURCE_DATA
118 select CLOCKSOURCE_VALIDATE_LAST_CYCLE
119 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
120 select GENERIC_TIME_VSYSCALL
121 select GENERIC_STRNCPY_FROM_USER
122 select GENERIC_STRNLEN_USER
123 select HAVE_CONTEXT_TRACKING if X86_64
124 select HAVE_IRQ_TIME_ACCOUNTING
125 select VIRT_TO_BUS
126 select MODULES_USE_ELF_REL if X86_32
127 select MODULES_USE_ELF_RELA if X86_64
128 select CLONE_BACKWARDS if X86_32
129 select ARCH_USE_BUILTIN_BSWAP
130 select ARCH_USE_QUEUE_RWLOCK
131 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
132 select OLD_SIGACTION if X86_32
133 select COMPAT_OLD_SIGACTION if IA32_EMULATION
134 select RTC_LIB 139 select RTC_LIB
135 select HAVE_DEBUG_STACKOVERFLOW 140 select SPARSE_IRQ
136 select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
137 select HAVE_CC_STACKPROTECTOR
138 select GENERIC_CPU_AUTOPROBE
139 select HAVE_ARCH_AUDITSYSCALL
140 select ARCH_SUPPORTS_ATOMIC_RMW
141 select HAVE_ACPI_APEI if ACPI
142 select HAVE_ACPI_APEI_NMI if ACPI
143 select ACPI_LEGACY_TABLES_LOOKUP if ACPI
144 select X86_FEATURE_NAMES if PROC_FS
145 select SRCU 141 select SRCU
142 select SYSCTL_EXCEPTION_TRACE
143 select USER_STACKTRACE_SUPPORT
144 select VIRT_TO_BUS
145 select X86_DEV_DMA_OPS if X86_64
146 select X86_FEATURE_NAMES if PROC_FS
146 147
147config INSTRUCTION_DECODER 148config INSTRUCTION_DECODER
148 def_bool y 149 def_bool y
@@ -260,10 +261,6 @@ config X86_64_SMP
260 def_bool y 261 def_bool y
261 depends on X86_64 && SMP 262 depends on X86_64 && SMP
262 263
263config X86_HT
264 def_bool y
265 depends on SMP
266
267config X86_32_LAZY_GS 264config X86_32_LAZY_GS
268 def_bool y 265 def_bool y
269 depends on X86_32 && !CC_STACKPROTECTOR 266 depends on X86_32 && !CC_STACKPROTECTOR
@@ -441,6 +438,7 @@ config X86_UV
441 depends on X86_EXTENDED_PLATFORM 438 depends on X86_EXTENDED_PLATFORM
442 depends on NUMA 439 depends on NUMA
443 depends on X86_X2APIC 440 depends on X86_X2APIC
441 depends on PCI
444 ---help--- 442 ---help---
445 This option is needed in order to support SGI Ultraviolet systems. 443 This option is needed in order to support SGI Ultraviolet systems.
446 If you don't have one of these, you should say N here. 444 If you don't have one of these, you should say N here.
@@ -665,7 +663,7 @@ config PARAVIRT_DEBUG
665config PARAVIRT_SPINLOCKS 663config PARAVIRT_SPINLOCKS
666 bool "Paravirtualization layer for spinlocks" 664 bool "Paravirtualization layer for spinlocks"
667 depends on PARAVIRT && SMP 665 depends on PARAVIRT && SMP
668 select UNINLINE_SPIN_UNLOCK 666 select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
669 ---help--- 667 ---help---
670 Paravirtualized spinlocks allow a pvops backend to replace the 668 Paravirtualized spinlocks allow a pvops backend to replace the
671 spinlock implementation with something virtualization-friendly 669 spinlock implementation with something virtualization-friendly
@@ -850,11 +848,12 @@ config NR_CPUS
850 default "1" if !SMP 848 default "1" if !SMP
851 default "8192" if MAXSMP 849 default "8192" if MAXSMP
852 default "32" if SMP && X86_BIGSMP 850 default "32" if SMP && X86_BIGSMP
853 default "8" if SMP 851 default "8" if SMP && X86_32
852 default "64" if SMP
854 ---help--- 853 ---help---
855 This allows you to specify the maximum number of CPUs which this 854 This allows you to specify the maximum number of CPUs which this
856 kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum 855 kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum
857 supported value is 4096, otherwise the maximum value is 512. The 856 supported value is 8192, otherwise the maximum value is 512. The
858 minimum value which makes sense is 2. 857 minimum value which makes sense is 2.
859 858
860 This is purely to save memory - each supported CPU adds 859 This is purely to save memory - each supported CPU adds
@@ -862,7 +861,7 @@ config NR_CPUS
862 861
863config SCHED_SMT 862config SCHED_SMT
864 bool "SMT (Hyperthreading) scheduler support" 863 bool "SMT (Hyperthreading) scheduler support"
865 depends on X86_HT 864 depends on SMP
866 ---help--- 865 ---help---
867 SMT scheduler support improves the CPU scheduler's decision making 866 SMT scheduler support improves the CPU scheduler's decision making
868 when dealing with Intel Pentium 4 chips with HyperThreading at a 867 when dealing with Intel Pentium 4 chips with HyperThreading at a
@@ -872,7 +871,7 @@ config SCHED_SMT
872config SCHED_MC 871config SCHED_MC
873 def_bool y 872 def_bool y
874 prompt "Multi-core scheduler support" 873 prompt "Multi-core scheduler support"
875 depends on X86_HT 874 depends on SMP
876 ---help--- 875 ---help---
877 Multi-core scheduler support improves the CPU scheduler's decision 876 Multi-core scheduler support improves the CPU scheduler's decision
878 making when dealing with multi-core CPU chips at a cost of slightly 877 making when dealing with multi-core CPU chips at a cost of slightly
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 72484a645f05..a5973f851750 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -332,4 +332,15 @@ config X86_DEBUG_STATIC_CPU_HAS
332 332
333 If unsure, say N. 333 If unsure, say N.
334 334
335config PUNIT_ATOM_DEBUG
336 tristate "ATOM Punit debug driver"
337 select DEBUG_FS
338 select IOSF_MBI
339 ---help---
340 This is a debug driver, which gets the power states
341 of all Punit North Complex devices. The power states of
342 each device is exposed as part of the debugfs interface.
343 The current power state can be read from
344 /sys/kernel/debug/punit_atom/dev_power_state
345
335endmenu 346endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 57996ee840dd..118e6debc483 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -149,12 +149,6 @@ endif
149sp-$(CONFIG_X86_32) := esp 149sp-$(CONFIG_X86_32) := esp
150sp-$(CONFIG_X86_64) := rsp 150sp-$(CONFIG_X86_64) := rsp
151 151
152# do binutils support CFI?
153cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
154# is .cfi_signal_frame supported too?
155cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
156cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
157
158# does binutils support specific instructions? 152# does binutils support specific instructions?
159asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) 153asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
160asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) 154asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
@@ -162,8 +156,8 @@ asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
162avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) 156avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
163avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) 157avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
164 158
165KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) 159KBUILD_AFLAGS += $(asinstr) $(avx_instr) $(avx2_instr)
166KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) 160KBUILD_CFLAGS += $(asinstr) $(avx_instr) $(avx2_instr)
167 161
168LDFLAGS := -m elf_$(UTS_MACHINE) 162LDFLAGS := -m elf_$(UTS_MACHINE)
169 163
@@ -187,7 +181,7 @@ archscripts: scripts_basic
187# Syscall table generation 181# Syscall table generation
188 182
189archheaders: 183archheaders:
190 $(Q)$(MAKE) $(build)=arch/x86/syscalls all 184 $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
191 185
192archprepare: 186archprepare:
193ifeq ($(CONFIG_KEXEC_FILE),y) 187ifeq ($(CONFIG_KEXEC_FILE),y)
@@ -250,7 +244,7 @@ install:
250 244
251PHONY += vdso_install 245PHONY += vdso_install
252vdso_install: 246vdso_install:
253 $(Q)$(MAKE) $(build)=arch/x86/vdso $@ 247 $(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@
254 248
255archclean: 249archclean:
256 $(Q)rm -rf $(objtree)/arch/i386 250 $(Q)rm -rf $(objtree)/arch/i386
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
new file mode 100644
index 000000000000..7a144971db79
--- /dev/null
+++ b/arch/x86/entry/Makefile
@@ -0,0 +1,10 @@
1#
2# Makefile for the x86 low level entry code
3#
4obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
5
6obj-y += vdso/
7obj-y += vsyscall/
8
9obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o
10
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/entry/calling.h
index 1c8b50edb2db..f4e6308c4200 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/entry/calling.h
@@ -46,8 +46,6 @@ For 32-bit we have the following conventions - kernel is built with
46 46
47*/ 47*/
48 48
49#include <asm/dwarf2.h>
50
51#ifdef CONFIG_X86_64 49#ifdef CONFIG_X86_64
52 50
53/* 51/*
@@ -91,28 +89,27 @@ For 32-bit we have the following conventions - kernel is built with
91#define SIZEOF_PTREGS 21*8 89#define SIZEOF_PTREGS 21*8
92 90
93 .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 91 .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
94 subq $15*8+\addskip, %rsp 92 addq $-(15*8+\addskip), %rsp
95 CFI_ADJUST_CFA_OFFSET 15*8+\addskip
96 .endm 93 .endm
97 94
98 .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 95 .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
99 .if \r11 96 .if \r11
100 movq_cfi r11, 6*8+\offset 97 movq %r11, 6*8+\offset(%rsp)
101 .endif 98 .endif
102 .if \r8910 99 .if \r8910
103 movq_cfi r10, 7*8+\offset 100 movq %r10, 7*8+\offset(%rsp)
104 movq_cfi r9, 8*8+\offset 101 movq %r9, 8*8+\offset(%rsp)
105 movq_cfi r8, 9*8+\offset 102 movq %r8, 9*8+\offset(%rsp)
106 .endif 103 .endif
107 .if \rax 104 .if \rax
108 movq_cfi rax, 10*8+\offset 105 movq %rax, 10*8+\offset(%rsp)
109 .endif 106 .endif
110 .if \rcx 107 .if \rcx
111 movq_cfi rcx, 11*8+\offset 108 movq %rcx, 11*8+\offset(%rsp)
112 .endif 109 .endif
113 movq_cfi rdx, 12*8+\offset 110 movq %rdx, 12*8+\offset(%rsp)
114 movq_cfi rsi, 13*8+\offset 111 movq %rsi, 13*8+\offset(%rsp)
115 movq_cfi rdi, 14*8+\offset 112 movq %rdi, 14*8+\offset(%rsp)
116 .endm 113 .endm
117 .macro SAVE_C_REGS offset=0 114 .macro SAVE_C_REGS offset=0
118 SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 115 SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
@@ -131,24 +128,24 @@ For 32-bit we have the following conventions - kernel is built with
131 .endm 128 .endm
132 129
133 .macro SAVE_EXTRA_REGS offset=0 130 .macro SAVE_EXTRA_REGS offset=0
134 movq_cfi r15, 0*8+\offset 131 movq %r15, 0*8+\offset(%rsp)
135 movq_cfi r14, 1*8+\offset 132 movq %r14, 1*8+\offset(%rsp)
136 movq_cfi r13, 2*8+\offset 133 movq %r13, 2*8+\offset(%rsp)
137 movq_cfi r12, 3*8+\offset 134 movq %r12, 3*8+\offset(%rsp)
138 movq_cfi rbp, 4*8+\offset 135 movq %rbp, 4*8+\offset(%rsp)
139 movq_cfi rbx, 5*8+\offset 136 movq %rbx, 5*8+\offset(%rsp)
140 .endm 137 .endm
141 .macro SAVE_EXTRA_REGS_RBP offset=0 138 .macro SAVE_EXTRA_REGS_RBP offset=0
142 movq_cfi rbp, 4*8+\offset 139 movq %rbp, 4*8+\offset(%rsp)
143 .endm 140 .endm
144 141
145 .macro RESTORE_EXTRA_REGS offset=0 142 .macro RESTORE_EXTRA_REGS offset=0
146 movq_cfi_restore 0*8+\offset, r15 143 movq 0*8+\offset(%rsp), %r15
147 movq_cfi_restore 1*8+\offset, r14 144 movq 1*8+\offset(%rsp), %r14
148 movq_cfi_restore 2*8+\offset, r13 145 movq 2*8+\offset(%rsp), %r13
149 movq_cfi_restore 3*8+\offset, r12 146 movq 3*8+\offset(%rsp), %r12
150 movq_cfi_restore 4*8+\offset, rbp 147 movq 4*8+\offset(%rsp), %rbp
151 movq_cfi_restore 5*8+\offset, rbx 148 movq 5*8+\offset(%rsp), %rbx
152 .endm 149 .endm
153 150
154 .macro ZERO_EXTRA_REGS 151 .macro ZERO_EXTRA_REGS
@@ -162,24 +159,24 @@ For 32-bit we have the following conventions - kernel is built with
162 159
163 .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 160 .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
164 .if \rstor_r11 161 .if \rstor_r11
165 movq_cfi_restore 6*8, r11 162 movq 6*8(%rsp), %r11
166 .endif 163 .endif
167 .if \rstor_r8910 164 .if \rstor_r8910
168 movq_cfi_restore 7*8, r10 165 movq 7*8(%rsp), %r10
169 movq_cfi_restore 8*8, r9 166 movq 8*8(%rsp), %r9
170 movq_cfi_restore 9*8, r8 167 movq 9*8(%rsp), %r8
171 .endif 168 .endif
172 .if \rstor_rax 169 .if \rstor_rax
173 movq_cfi_restore 10*8, rax 170 movq 10*8(%rsp), %rax
174 .endif 171 .endif
175 .if \rstor_rcx 172 .if \rstor_rcx
176 movq_cfi_restore 11*8, rcx 173 movq 11*8(%rsp), %rcx
177 .endif 174 .endif
178 .if \rstor_rdx 175 .if \rstor_rdx
179 movq_cfi_restore 12*8, rdx 176 movq 12*8(%rsp), %rdx
180 .endif 177 .endif
181 movq_cfi_restore 13*8, rsi 178 movq 13*8(%rsp), %rsi
182 movq_cfi_restore 14*8, rdi 179 movq 14*8(%rsp), %rdi
183 .endm 180 .endm
184 .macro RESTORE_C_REGS 181 .macro RESTORE_C_REGS
185 RESTORE_C_REGS_HELPER 1,1,1,1,1 182 RESTORE_C_REGS_HELPER 1,1,1,1,1
@@ -204,8 +201,7 @@ For 32-bit we have the following conventions - kernel is built with
204 .endm 201 .endm
205 202
206 .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 203 .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
207 addq $15*8+\addskip, %rsp 204 subq $-(15*8+\addskip), %rsp
208 CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
209 .endm 205 .endm
210 206
211 .macro icebp 207 .macro icebp
@@ -224,23 +220,23 @@ For 32-bit we have the following conventions - kernel is built with
224 */ 220 */
225 221
226 .macro SAVE_ALL 222 .macro SAVE_ALL
227 pushl_cfi_reg eax 223 pushl %eax
228 pushl_cfi_reg ebp 224 pushl %ebp
229 pushl_cfi_reg edi 225 pushl %edi
230 pushl_cfi_reg esi 226 pushl %esi
231 pushl_cfi_reg edx 227 pushl %edx
232 pushl_cfi_reg ecx 228 pushl %ecx
233 pushl_cfi_reg ebx 229 pushl %ebx
234 .endm 230 .endm
235 231
236 .macro RESTORE_ALL 232 .macro RESTORE_ALL
237 popl_cfi_reg ebx 233 popl %ebx
238 popl_cfi_reg ecx 234 popl %ecx
239 popl_cfi_reg edx 235 popl %edx
240 popl_cfi_reg esi 236 popl %esi
241 popl_cfi_reg edi 237 popl %edi
242 popl_cfi_reg ebp 238 popl %ebp
243 popl_cfi_reg eax 239 popl %eax
244 .endm 240 .endm
245 241
246#endif /* CONFIG_X86_64 */ 242#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
new file mode 100644
index 000000000000..21dc60a60b5f
--- /dev/null
+++ b/arch/x86/entry/entry_32.S
@@ -0,0 +1,1248 @@
1/*
2 * Copyright (C) 1991,1992 Linus Torvalds
3 *
4 * entry_32.S contains the system-call and low-level fault and trap handling routines.
5 *
6 * Stack layout in 'syscall_exit':
7 * ptrace needs to have all registers on the stack.
8 * If the order here is changed, it needs to be
9 * updated in fork.c:copy_process(), signal.c:do_signal(),
10 * ptrace.c and ptrace.h
11 *
12 * 0(%esp) - %ebx
13 * 4(%esp) - %ecx
14 * 8(%esp) - %edx
15 * C(%esp) - %esi
16 * 10(%esp) - %edi
17 * 14(%esp) - %ebp
18 * 18(%esp) - %eax
19 * 1C(%esp) - %ds
20 * 20(%esp) - %es
21 * 24(%esp) - %fs
22 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
23 * 2C(%esp) - orig_eax
24 * 30(%esp) - %eip
25 * 34(%esp) - %cs
26 * 38(%esp) - %eflags
27 * 3C(%esp) - %oldesp
28 * 40(%esp) - %oldss
29 */
30
31#include <linux/linkage.h>
32#include <linux/err.h>
33#include <asm/thread_info.h>
34#include <asm/irqflags.h>
35#include <asm/errno.h>
36#include <asm/segment.h>
37#include <asm/smp.h>
38#include <asm/page_types.h>
39#include <asm/percpu.h>
40#include <asm/processor-flags.h>
41#include <asm/ftrace.h>
42#include <asm/irq_vectors.h>
43#include <asm/cpufeature.h>
44#include <asm/alternative-asm.h>
45#include <asm/asm.h>
46#include <asm/smap.h>
47
48/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
49#include <linux/elf-em.h>
50#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
51#define __AUDIT_ARCH_LE 0x40000000
52
53#ifndef CONFIG_AUDITSYSCALL
54# define sysenter_audit syscall_trace_entry
55# define sysexit_audit syscall_exit_work
56#endif
57
58 .section .entry.text, "ax"
59
60/*
61 * We use macros for low-level operations which need to be overridden
62 * for paravirtualization. The following will never clobber any registers:
63 * INTERRUPT_RETURN (aka. "iret")
64 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
65 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
66 *
67 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
68 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
69 * Allowing a register to be clobbered can shrink the paravirt replacement
70 * enough to patch inline, increasing performance.
71 */
72
73#ifdef CONFIG_PREEMPT
74# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
75#else
76# define preempt_stop(clobbers)
77# define resume_kernel restore_all
78#endif
79
80.macro TRACE_IRQS_IRET
81#ifdef CONFIG_TRACE_IRQFLAGS
82 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off?
83 jz 1f
84 TRACE_IRQS_ON
851:
86#endif
87.endm
88
89/*
90 * User gs save/restore
91 *
92 * %gs is used for userland TLS and kernel only uses it for stack
93 * canary which is required to be at %gs:20 by gcc. Read the comment
94 * at the top of stackprotector.h for more info.
95 *
96 * Local labels 98 and 99 are used.
97 */
98#ifdef CONFIG_X86_32_LAZY_GS
99
100 /* unfortunately push/pop can't be no-op */
101.macro PUSH_GS
102 pushl $0
103.endm
104.macro POP_GS pop=0
105 addl $(4 + \pop), %esp
106.endm
107.macro POP_GS_EX
108.endm
109
110 /* all the rest are no-op */
111.macro PTGS_TO_GS
112.endm
113.macro PTGS_TO_GS_EX
114.endm
115.macro GS_TO_REG reg
116.endm
117.macro REG_TO_PTGS reg
118.endm
119.macro SET_KERNEL_GS reg
120.endm
121
122#else /* CONFIG_X86_32_LAZY_GS */
123
124.macro PUSH_GS
125 pushl %gs
126.endm
127
128.macro POP_GS pop=0
12998: popl %gs
130 .if \pop <> 0
131 add $\pop, %esp
132 .endif
133.endm
134.macro POP_GS_EX
135.pushsection .fixup, "ax"
13699: movl $0, (%esp)
137 jmp 98b
138.popsection
139 _ASM_EXTABLE(98b, 99b)
140.endm
141
142.macro PTGS_TO_GS
14398: mov PT_GS(%esp), %gs
144.endm
145.macro PTGS_TO_GS_EX
146.pushsection .fixup, "ax"
14799: movl $0, PT_GS(%esp)
148 jmp 98b
149.popsection
150 _ASM_EXTABLE(98b, 99b)
151.endm
152
153.macro GS_TO_REG reg
154 movl %gs, \reg
155.endm
156.macro REG_TO_PTGS reg
157 movl \reg, PT_GS(%esp)
158.endm
159.macro SET_KERNEL_GS reg
160 movl $(__KERNEL_STACK_CANARY), \reg
161 movl \reg, %gs
162.endm
163
164#endif /* CONFIG_X86_32_LAZY_GS */
165
166.macro SAVE_ALL
167 cld
168 PUSH_GS
169 pushl %fs
170 pushl %es
171 pushl %ds
172 pushl %eax
173 pushl %ebp
174 pushl %edi
175 pushl %esi
176 pushl %edx
177 pushl %ecx
178 pushl %ebx
179 movl $(__USER_DS), %edx
180 movl %edx, %ds
181 movl %edx, %es
182 movl $(__KERNEL_PERCPU), %edx
183 movl %edx, %fs
184 SET_KERNEL_GS %edx
185.endm
186
187.macro RESTORE_INT_REGS
188 popl %ebx
189 popl %ecx
190 popl %edx
191 popl %esi
192 popl %edi
193 popl %ebp
194 popl %eax
195.endm
196
197.macro RESTORE_REGS pop=0
198 RESTORE_INT_REGS
1991: popl %ds
2002: popl %es
2013: popl %fs
202 POP_GS \pop
203.pushsection .fixup, "ax"
2044: movl $0, (%esp)
205 jmp 1b
2065: movl $0, (%esp)
207 jmp 2b
2086: movl $0, (%esp)
209 jmp 3b
210.popsection
211 _ASM_EXTABLE(1b, 4b)
212 _ASM_EXTABLE(2b, 5b)
213 _ASM_EXTABLE(3b, 6b)
214 POP_GS_EX
215.endm
216
217ENTRY(ret_from_fork)
218 pushl %eax
219 call schedule_tail
220 GET_THREAD_INFO(%ebp)
221 popl %eax
222 pushl $0x0202 # Reset kernel eflags
223 popfl
224 jmp syscall_exit
225END(ret_from_fork)
226
227ENTRY(ret_from_kernel_thread)
228 pushl %eax
229 call schedule_tail
230 GET_THREAD_INFO(%ebp)
231 popl %eax
232 pushl $0x0202 # Reset kernel eflags
233 popfl
234 movl PT_EBP(%esp), %eax
235 call *PT_EBX(%esp)
236 movl $0, PT_EAX(%esp)
237 jmp syscall_exit
238ENDPROC(ret_from_kernel_thread)
239
240/*
241 * Return to user mode is not as complex as all this looks,
242 * but we want the default path for a system call return to
243 * go as quickly as possible which is why some of this is
244 * less clear than it otherwise should be.
245 */
246
247 # userspace resumption stub bypassing syscall exit tracing
248 ALIGN
249ret_from_exception:
250 preempt_stop(CLBR_ANY)
251ret_from_intr:
252 GET_THREAD_INFO(%ebp)
253#ifdef CONFIG_VM86
254 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
255 movb PT_CS(%esp), %al
256 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
257#else
258 /*
259 * We can be coming here from child spawned by kernel_thread().
260 */
261 movl PT_CS(%esp), %eax
262 andl $SEGMENT_RPL_MASK, %eax
263#endif
264 cmpl $USER_RPL, %eax
265 jb resume_kernel # not returning to v8086 or userspace
266
267ENTRY(resume_userspace)
268 LOCKDEP_SYS_EXIT
269 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
270 # setting need_resched or sigpending
271 # between sampling and the iret
272 TRACE_IRQS_OFF
273 movl TI_flags(%ebp), %ecx
274 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
275 # int/exception return?
276 jne work_pending
277 jmp restore_all
278END(ret_from_exception)
279
280#ifdef CONFIG_PREEMPT
281ENTRY(resume_kernel)
282 DISABLE_INTERRUPTS(CLBR_ANY)
283need_resched:
284 cmpl $0, PER_CPU_VAR(__preempt_count)
285 jnz restore_all
286 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
287 jz restore_all
288 call preempt_schedule_irq
289 jmp need_resched
290END(resume_kernel)
291#endif
292
293/*
294 * SYSENTER_RETURN points to after the SYSENTER instruction
295 * in the vsyscall page. See vsyscall-sysentry.S, which defines
296 * the symbol.
297 */
298
299 # SYSENTER call handler stub
300ENTRY(entry_SYSENTER_32)
301 movl TSS_sysenter_sp0(%esp), %esp
302sysenter_past_esp:
303 /*
304 * Interrupts are disabled here, but we can't trace it until
305 * enough kernel state to call TRACE_IRQS_OFF can be called - but
306 * we immediately enable interrupts at that point anyway.
307 */
308 pushl $__USER_DS
309 pushl %ebp
310 pushfl
311 orl $X86_EFLAGS_IF, (%esp)
312 pushl $__USER_CS
313 /*
314 * Push current_thread_info()->sysenter_return to the stack.
315 * A tiny bit of offset fixup is necessary: TI_sysenter_return
316 * is relative to thread_info, which is at the bottom of the
317 * kernel stack page. 4*4 means the 4 words pushed above;
318 * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
319 * and THREAD_SIZE takes us to the bottom.
320 */
321 pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
322
323 pushl %eax
324 SAVE_ALL
325 ENABLE_INTERRUPTS(CLBR_NONE)
326
327/*
328 * Load the potential sixth argument from user stack.
329 * Careful about security.
330 */
331 cmpl $__PAGE_OFFSET-3, %ebp
332 jae syscall_fault
333 ASM_STAC
3341: movl (%ebp), %ebp
335 ASM_CLAC
336 movl %ebp, PT_EBP(%esp)
337 _ASM_EXTABLE(1b, syscall_fault)
338
339 GET_THREAD_INFO(%ebp)
340
341 testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
342 jnz sysenter_audit
343sysenter_do_call:
344 cmpl $(NR_syscalls), %eax
345 jae sysenter_badsys
346 call *sys_call_table(, %eax, 4)
347sysenter_after_call:
348 movl %eax, PT_EAX(%esp)
349 LOCKDEP_SYS_EXIT
350 DISABLE_INTERRUPTS(CLBR_ANY)
351 TRACE_IRQS_OFF
352 movl TI_flags(%ebp), %ecx
353 testl $_TIF_ALLWORK_MASK, %ecx
354 jnz sysexit_audit
355sysenter_exit:
356/* if something modifies registers it must also disable sysexit */
357 movl PT_EIP(%esp), %edx
358 movl PT_OLDESP(%esp), %ecx
359 xorl %ebp, %ebp
360 TRACE_IRQS_ON
3611: mov PT_FS(%esp), %fs
362 PTGS_TO_GS
363 ENABLE_INTERRUPTS_SYSEXIT
364
365#ifdef CONFIG_AUDITSYSCALL
366sysenter_audit:
367 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), TI_flags(%ebp)
368 jnz syscall_trace_entry
369 /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
370 movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */
371 /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
372 pushl PT_ESI(%esp) /* a3: 5th arg */
373 pushl PT_EDX+4(%esp) /* a2: 4th arg */
374 call __audit_syscall_entry
375 popl %ecx /* get that remapped edx off the stack */
376 popl %ecx /* get that remapped esi off the stack */
377 movl PT_EAX(%esp), %eax /* reload syscall number */
378 jmp sysenter_do_call
379
380sysexit_audit:
381 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
382 jnz syscall_exit_work
383 TRACE_IRQS_ON
384 ENABLE_INTERRUPTS(CLBR_ANY)
385 movl %eax, %edx /* second arg, syscall return value */
386 cmpl $-MAX_ERRNO, %eax /* is it an error ? */
387 setbe %al /* 1 if so, 0 if not */
388 movzbl %al, %eax /* zero-extend that */
389 call __audit_syscall_exit
390 DISABLE_INTERRUPTS(CLBR_ANY)
391 TRACE_IRQS_OFF
392 movl TI_flags(%ebp), %ecx
393 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
394 jnz syscall_exit_work
395 movl PT_EAX(%esp), %eax /* reload syscall return value */
396 jmp sysenter_exit
397#endif
398
399.pushsection .fixup, "ax"
4002: movl $0, PT_FS(%esp)
401 jmp 1b
402.popsection
403 _ASM_EXTABLE(1b, 2b)
404 PTGS_TO_GS_EX
405ENDPROC(entry_SYSENTER_32)
406
407 # system call handler stub
408ENTRY(entry_INT80_32)
409 ASM_CLAC
410 pushl %eax # save orig_eax
411 SAVE_ALL
412 GET_THREAD_INFO(%ebp)
413 # system call tracing in operation / emulation
414 testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
415 jnz syscall_trace_entry
416 cmpl $(NR_syscalls), %eax
417 jae syscall_badsys
418syscall_call:
419 call *sys_call_table(, %eax, 4)
420syscall_after_call:
421 movl %eax, PT_EAX(%esp) # store the return value
422syscall_exit:
423 LOCKDEP_SYS_EXIT
424 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
425 # setting need_resched or sigpending
426 # between sampling and the iret
427 TRACE_IRQS_OFF
428 movl TI_flags(%ebp), %ecx
429 testl $_TIF_ALLWORK_MASK, %ecx # current->work
430 jnz syscall_exit_work
431
432restore_all:
433 TRACE_IRQS_IRET
434restore_all_notrace:
435#ifdef CONFIG_X86_ESPFIX32
436 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
437 /*
438 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
439 * are returning to the kernel.
440 * See comments in process.c:copy_thread() for details.
441 */
442 movb PT_OLDSS(%esp), %ah
443 movb PT_CS(%esp), %al
444 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
445 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
446 je ldt_ss # returning to user-space with LDT SS
447#endif
448restore_nocheck:
449 RESTORE_REGS 4 # skip orig_eax/error_code
450irq_return:
451 INTERRUPT_RETURN
452.section .fixup, "ax"
453ENTRY(iret_exc )
454 pushl $0 # no error code
455 pushl $do_iret_error
456 jmp error_code
457.previous
458 _ASM_EXTABLE(irq_return, iret_exc)
459
460#ifdef CONFIG_X86_ESPFIX32
461ldt_ss:
462#ifdef CONFIG_PARAVIRT
463 /*
464 * The kernel can't run on a non-flat stack if paravirt mode
465 * is active. Rather than try to fixup the high bits of
466 * ESP, bypass this code entirely. This may break DOSemu
467 * and/or Wine support in a paravirt VM, although the option
468 * is still available to implement the setting of the high
469 * 16-bits in the INTERRUPT_RETURN paravirt-op.
470 */
471 cmpl $0, pv_info+PARAVIRT_enabled
472 jne restore_nocheck
473#endif
474
475/*
476 * Setup and switch to ESPFIX stack
477 *
478 * We're returning to userspace with a 16 bit stack. The CPU will not
479 * restore the high word of ESP for us on executing iret... This is an
480 * "official" bug of all the x86-compatible CPUs, which we can work
481 * around to make dosemu and wine happy. We do this by preloading the
482 * high word of ESP with the high word of the userspace ESP while
483 * compensating for the offset by changing to the ESPFIX segment with
484 * a base address that matches for the difference.
485 */
486#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
487 mov %esp, %edx /* load kernel esp */
488 mov PT_OLDESP(%esp), %eax /* load userspace esp */
489 mov %dx, %ax /* eax: new kernel esp */
490 sub %eax, %edx /* offset (low word is 0) */
491 shr $16, %edx
492 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
493 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
494 pushl $__ESPFIX_SS
495 pushl %eax /* new kernel esp */
496 /*
497 * Disable interrupts, but do not irqtrace this section: we
498 * will soon execute iret and the tracer was already set to
499 * the irqstate after the IRET:
500 */
501 DISABLE_INTERRUPTS(CLBR_EAX)
502 lss (%esp), %esp /* switch to espfix segment */
503 jmp restore_nocheck
504#endif
505ENDPROC(entry_INT80_32)
506
507 # perform work that needs to be done immediately before resumption
508 ALIGN
509work_pending:
510 testb $_TIF_NEED_RESCHED, %cl
511 jz work_notifysig
512work_resched:
513 call schedule
514 LOCKDEP_SYS_EXIT
515 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
516 # setting need_resched or sigpending
517 # between sampling and the iret
518 TRACE_IRQS_OFF
519 movl TI_flags(%ebp), %ecx
520 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
521 # than syscall tracing?
522 jz restore_all
523 testb $_TIF_NEED_RESCHED, %cl
524 jnz work_resched
525
526work_notifysig: # deal with pending signals and
527 # notify-resume requests
528#ifdef CONFIG_VM86
529 testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
530 movl %esp, %eax
531 jnz work_notifysig_v86 # returning to kernel-space or
532 # vm86-space
5331:
534#else
535 movl %esp, %eax
536#endif
537 TRACE_IRQS_ON
538 ENABLE_INTERRUPTS(CLBR_NONE)
539 movb PT_CS(%esp), %bl
540 andb $SEGMENT_RPL_MASK, %bl
541 cmpb $USER_RPL, %bl
542 jb resume_kernel
543 xorl %edx, %edx
544 call do_notify_resume
545 jmp resume_userspace
546
547#ifdef CONFIG_VM86
548 ALIGN
549work_notifysig_v86:
550 pushl %ecx # save ti_flags for do_notify_resume
551 call save_v86_state # %eax contains pt_regs pointer
552 popl %ecx
553 movl %eax, %esp
554 jmp 1b
555#endif
556END(work_pending)
557
558 # perform syscall exit tracing
559 ALIGN
560syscall_trace_entry:
561 movl $-ENOSYS, PT_EAX(%esp)
562 movl %esp, %eax
563 call syscall_trace_enter
564 /* What it returned is what we'll actually use. */
565 cmpl $(NR_syscalls), %eax
566 jnae syscall_call
567 jmp syscall_exit
568END(syscall_trace_entry)
569
570 # perform syscall exit tracing
571 ALIGN
572syscall_exit_work:
573 testl $_TIF_WORK_SYSCALL_EXIT, %ecx
574 jz work_pending
575 TRACE_IRQS_ON
576 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
577 # schedule() instead
578 movl %esp, %eax
579 call syscall_trace_leave
580 jmp resume_userspace
581END(syscall_exit_work)
582
583syscall_fault:
584 ASM_CLAC
585 GET_THREAD_INFO(%ebp)
586 movl $-EFAULT, PT_EAX(%esp)
587 jmp resume_userspace
588END(syscall_fault)
589
590syscall_badsys:
591 movl $-ENOSYS, %eax
592 jmp syscall_after_call
593END(syscall_badsys)
594
595sysenter_badsys:
596 movl $-ENOSYS, %eax
597 jmp sysenter_after_call
598END(sysenter_badsys)
599
600.macro FIXUP_ESPFIX_STACK
601/*
602 * Switch back for ESPFIX stack to the normal zerobased stack
603 *
604 * We can't call C functions using the ESPFIX stack. This code reads
605 * the high word of the segment base from the GDT and swiches to the
606 * normal stack and adjusts ESP with the matching offset.
607 */
608#ifdef CONFIG_X86_ESPFIX32
609 /* fixup the stack */
610 mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
611 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
612 shl $16, %eax
613 addl %esp, %eax /* the adjusted stack pointer */
614 pushl $__KERNEL_DS
615 pushl %eax
616 lss (%esp), %esp /* switch to the normal stack segment */
617#endif
618.endm
619.macro UNWIND_ESPFIX_STACK
620#ifdef CONFIG_X86_ESPFIX32
621 movl %ss, %eax
622 /* see if on espfix stack */
623 cmpw $__ESPFIX_SS, %ax
624 jne 27f
625 movl $__KERNEL_DS, %eax
626 movl %eax, %ds
627 movl %eax, %es
628 /* switch to normal stack */
629 FIXUP_ESPFIX_STACK
63027:
631#endif
632.endm
633
634/*
635 * Build the entry stubs with some assembler magic.
636 * We pack 1 stub into every 8-byte block.
637 */
638 .align 8
639ENTRY(irq_entries_start)
640 vector=FIRST_EXTERNAL_VECTOR
641 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
642 pushl $(~vector+0x80) /* Note: always in signed byte range */
643 vector=vector+1
644 jmp common_interrupt
645 .align 8
646 .endr
647END(irq_entries_start)
648
649/*
650 * the CPU automatically disables interrupts when executing an IRQ vector,
651 * so IRQ-flags tracing has to follow that:
652 */
653 .p2align CONFIG_X86_L1_CACHE_SHIFT
654common_interrupt:
655 ASM_CLAC
656 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
657 SAVE_ALL
658 TRACE_IRQS_OFF
659 movl %esp, %eax
660 call do_IRQ
661 jmp ret_from_intr
662ENDPROC(common_interrupt)
663
664#define BUILD_INTERRUPT3(name, nr, fn) \
665ENTRY(name) \
666 ASM_CLAC; \
667 pushl $~(nr); \
668 SAVE_ALL; \
669 TRACE_IRQS_OFF \
670 movl %esp, %eax; \
671 call fn; \
672 jmp ret_from_intr; \
673ENDPROC(name)
674
675
676#ifdef CONFIG_TRACING
677# define TRACE_BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
678#else
679# define TRACE_BUILD_INTERRUPT(name, nr)
680#endif
681
682#define BUILD_INTERRUPT(name, nr) \
683 BUILD_INTERRUPT3(name, nr, smp_##name); \
684 TRACE_BUILD_INTERRUPT(name, nr)
685
686/* The include is where all of the SMP etc. interrupts come from */
687#include <asm/entry_arch.h>
688
689ENTRY(coprocessor_error)
690 ASM_CLAC
691 pushl $0
692 pushl $do_coprocessor_error
693 jmp error_code
694END(coprocessor_error)
695
696ENTRY(simd_coprocessor_error)
697 ASM_CLAC
698 pushl $0
699#ifdef CONFIG_X86_INVD_BUG
700 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
701 ALTERNATIVE "pushl $do_general_protection", \
702 "pushl $do_simd_coprocessor_error", \
703 X86_FEATURE_XMM
704#else
705 pushl $do_simd_coprocessor_error
706#endif
707 jmp error_code
708END(simd_coprocessor_error)
709
710ENTRY(device_not_available)
711 ASM_CLAC
712 pushl $-1 # mark this as an int
713 pushl $do_device_not_available
714 jmp error_code
715END(device_not_available)
716
717#ifdef CONFIG_PARAVIRT
718ENTRY(native_iret)
719 iret
720 _ASM_EXTABLE(native_iret, iret_exc)
721END(native_iret)
722
723ENTRY(native_irq_enable_sysexit)
724 sti
725 sysexit
726END(native_irq_enable_sysexit)
727#endif
728
729ENTRY(overflow)
730 ASM_CLAC
731 pushl $0
732 pushl $do_overflow
733 jmp error_code
734END(overflow)
735
736ENTRY(bounds)
737 ASM_CLAC
738 pushl $0
739 pushl $do_bounds
740 jmp error_code
741END(bounds)
742
743ENTRY(invalid_op)
744 ASM_CLAC
745 pushl $0
746 pushl $do_invalid_op
747 jmp error_code
748END(invalid_op)
749
750ENTRY(coprocessor_segment_overrun)
751 ASM_CLAC
752 pushl $0
753 pushl $do_coprocessor_segment_overrun
754 jmp error_code
755END(coprocessor_segment_overrun)
756
757ENTRY(invalid_TSS)
758 ASM_CLAC
759 pushl $do_invalid_TSS
760 jmp error_code
761END(invalid_TSS)
762
763ENTRY(segment_not_present)
764 ASM_CLAC
765 pushl $do_segment_not_present
766 jmp error_code
767END(segment_not_present)
768
769ENTRY(stack_segment)
770 ASM_CLAC
771 pushl $do_stack_segment
772 jmp error_code
773END(stack_segment)
774
775ENTRY(alignment_check)
776 ASM_CLAC
777 pushl $do_alignment_check
778 jmp error_code
779END(alignment_check)
780
781ENTRY(divide_error)
782 ASM_CLAC
783 pushl $0 # no error code
784 pushl $do_divide_error
785 jmp error_code
786END(divide_error)
787
788#ifdef CONFIG_X86_MCE
789ENTRY(machine_check)
790 ASM_CLAC
791 pushl $0
792 pushl machine_check_vector
793 jmp error_code
794END(machine_check)
795#endif
796
797ENTRY(spurious_interrupt_bug)
798 ASM_CLAC
799 pushl $0
800 pushl $do_spurious_interrupt_bug
801 jmp error_code
802END(spurious_interrupt_bug)
803
804#ifdef CONFIG_XEN
805/*
806 * Xen doesn't set %esp to be precisely what the normal SYSENTER
807 * entry point expects, so fix it up before using the normal path.
808 */
809ENTRY(xen_sysenter_target)
810 addl $5*4, %esp /* remove xen-provided frame */
811 jmp sysenter_past_esp
812
813ENTRY(xen_hypervisor_callback)
814 pushl $-1 /* orig_ax = -1 => not a system call */
815 SAVE_ALL
816 TRACE_IRQS_OFF
817
818 /*
819 * Check to see if we got the event in the critical
820 * region in xen_iret_direct, after we've reenabled
821 * events and checked for pending events. This simulates
822 * iret instruction's behaviour where it delivers a
823 * pending interrupt when enabling interrupts:
824 */
825 movl PT_EIP(%esp), %eax
826 cmpl $xen_iret_start_crit, %eax
827 jb 1f
828 cmpl $xen_iret_end_crit, %eax
829 jae 1f
830
831 jmp xen_iret_crit_fixup
832
833ENTRY(xen_do_upcall)
8341: mov %esp, %eax
835 call xen_evtchn_do_upcall
836#ifndef CONFIG_PREEMPT
837 call xen_maybe_preempt_hcall
838#endif
839 jmp ret_from_intr
840ENDPROC(xen_hypervisor_callback)
841
842/*
843 * Hypervisor uses this for application faults while it executes.
844 * We get here for two reasons:
845 * 1. Fault while reloading DS, ES, FS or GS
846 * 2. Fault while executing IRET
847 * Category 1 we fix up by reattempting the load, and zeroing the segment
848 * register if the load fails.
849 * Category 2 we fix up by jumping to do_iret_error. We cannot use the
850 * normal Linux return path in this case because if we use the IRET hypercall
851 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
852 * We distinguish between categories by maintaining a status value in EAX.
853 */
854ENTRY(xen_failsafe_callback)
855 pushl %eax
856 movl $1, %eax
8571: mov 4(%esp), %ds
8582: mov 8(%esp), %es
8593: mov 12(%esp), %fs
8604: mov 16(%esp), %gs
861 /* EAX == 0 => Category 1 (Bad segment)
862 EAX != 0 => Category 2 (Bad IRET) */
863 testl %eax, %eax
864 popl %eax
865 lea 16(%esp), %esp
866 jz 5f
867 jmp iret_exc
8685: pushl $-1 /* orig_ax = -1 => not a system call */
869 SAVE_ALL
870 jmp ret_from_exception
871
872.section .fixup, "ax"
8736: xorl %eax, %eax
874 movl %eax, 4(%esp)
875 jmp 1b
8767: xorl %eax, %eax
877 movl %eax, 8(%esp)
878 jmp 2b
8798: xorl %eax, %eax
880 movl %eax, 12(%esp)
881 jmp 3b
8829: xorl %eax, %eax
883 movl %eax, 16(%esp)
884 jmp 4b
885.previous
886 _ASM_EXTABLE(1b, 6b)
887 _ASM_EXTABLE(2b, 7b)
888 _ASM_EXTABLE(3b, 8b)
889 _ASM_EXTABLE(4b, 9b)
890ENDPROC(xen_failsafe_callback)
891
892BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
893 xen_evtchn_do_upcall)
894
895#endif /* CONFIG_XEN */
896
897#if IS_ENABLED(CONFIG_HYPERV)
898
899BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
900 hyperv_vector_handler)
901
902#endif /* CONFIG_HYPERV */
903
904#ifdef CONFIG_FUNCTION_TRACER
905#ifdef CONFIG_DYNAMIC_FTRACE
906
907ENTRY(mcount)
908 ret
909END(mcount)
910
911ENTRY(ftrace_caller)
912 pushl %eax
913 pushl %ecx
914 pushl %edx
915 pushl $0 /* Pass NULL as regs pointer */
916 movl 4*4(%esp), %eax
917 movl 0x4(%ebp), %edx
918 movl function_trace_op, %ecx
919 subl $MCOUNT_INSN_SIZE, %eax
920
921.globl ftrace_call
922ftrace_call:
923 call ftrace_stub
924
925 addl $4, %esp /* skip NULL pointer */
926 popl %edx
927 popl %ecx
928 popl %eax
929ftrace_ret:
930#ifdef CONFIG_FUNCTION_GRAPH_TRACER
931.globl ftrace_graph_call
932ftrace_graph_call:
933 jmp ftrace_stub
934#endif
935
936.globl ftrace_stub
937ftrace_stub:
938 ret
939END(ftrace_caller)
940
941ENTRY(ftrace_regs_caller)
942 pushf /* push flags before compare (in cs location) */
943
944 /*
945 * i386 does not save SS and ESP when coming from kernel.
946 * Instead, to get sp, &regs->sp is used (see ptrace.h).
947 * Unfortunately, that means eflags must be at the same location
948 * as the current return ip is. We move the return ip into the
949 * ip location, and move flags into the return ip location.
950 */
951 pushl 4(%esp) /* save return ip into ip slot */
952
953 pushl $0 /* Load 0 into orig_ax */
954 pushl %gs
955 pushl %fs
956 pushl %es
957 pushl %ds
958 pushl %eax
959 pushl %ebp
960 pushl %edi
961 pushl %esi
962 pushl %edx
963 pushl %ecx
964 pushl %ebx
965
966 movl 13*4(%esp), %eax /* Get the saved flags */
967 movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
968 /* clobbering return ip */
969 movl $__KERNEL_CS, 13*4(%esp)
970
971 movl 12*4(%esp), %eax /* Load ip (1st parameter) */
972 subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
973 movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
974 movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
975 pushl %esp /* Save pt_regs as 4th parameter */
976
977GLOBAL(ftrace_regs_call)
978 call ftrace_stub
979
980 addl $4, %esp /* Skip pt_regs */
981 movl 14*4(%esp), %eax /* Move flags back into cs */
982 movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
983 movl 12*4(%esp), %eax /* Get return ip from regs->ip */
984 movl %eax, 14*4(%esp) /* Put return ip back for ret */
985
986 popl %ebx
987 popl %ecx
988 popl %edx
989 popl %esi
990 popl %edi
991 popl %ebp
992 popl %eax
993 popl %ds
994 popl %es
995 popl %fs
996 popl %gs
997 addl $8, %esp /* Skip orig_ax and ip */
998 popf /* Pop flags at end (no addl to corrupt flags) */
999 jmp ftrace_ret
1000
1001 popf
1002 jmp ftrace_stub
1003#else /* ! CONFIG_DYNAMIC_FTRACE */
1004
1005ENTRY(mcount)
1006 cmpl $__PAGE_OFFSET, %esp
1007 jb ftrace_stub /* Paging not enabled yet? */
1008
1009 cmpl $ftrace_stub, ftrace_trace_function
1010 jnz trace
1011#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1012 cmpl $ftrace_stub, ftrace_graph_return
1013 jnz ftrace_graph_caller
1014
1015 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
1016 jnz ftrace_graph_caller
1017#endif
1018.globl ftrace_stub
1019ftrace_stub:
1020 ret
1021
1022 /* taken from glibc */
1023trace:
1024 pushl %eax
1025 pushl %ecx
1026 pushl %edx
1027 movl 0xc(%esp), %eax
1028 movl 0x4(%ebp), %edx
1029 subl $MCOUNT_INSN_SIZE, %eax
1030
1031 call *ftrace_trace_function
1032
1033 popl %edx
1034 popl %ecx
1035 popl %eax
1036 jmp ftrace_stub
1037END(mcount)
1038#endif /* CONFIG_DYNAMIC_FTRACE */
1039#endif /* CONFIG_FUNCTION_TRACER */
1040
1041#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1042ENTRY(ftrace_graph_caller)
1043 pushl %eax
1044 pushl %ecx
1045 pushl %edx
1046 movl 0xc(%esp), %eax
1047 lea 0x4(%ebp), %edx
1048 movl (%ebp), %ecx
1049 subl $MCOUNT_INSN_SIZE, %eax
1050 call prepare_ftrace_return
1051 popl %edx
1052 popl %ecx
1053 popl %eax
1054 ret
1055END(ftrace_graph_caller)
1056
1057.globl return_to_handler
1058return_to_handler:
1059 pushl %eax
1060 pushl %edx
1061 movl %ebp, %eax
1062 call ftrace_return_to_handler
1063 movl %eax, %ecx
1064 popl %edx
1065 popl %eax
1066 jmp *%ecx
1067#endif
1068
1069#ifdef CONFIG_TRACING
1070ENTRY(trace_page_fault)
1071 ASM_CLAC
1072 pushl $trace_do_page_fault
1073 jmp error_code
1074END(trace_page_fault)
1075#endif
1076
1077ENTRY(page_fault)
1078 ASM_CLAC
1079 pushl $do_page_fault
1080 ALIGN
1081error_code:
1082 /* the function address is in %gs's slot on the stack */
1083 pushl %fs
1084 pushl %es
1085 pushl %ds
1086 pushl %eax
1087 pushl %ebp
1088 pushl %edi
1089 pushl %esi
1090 pushl %edx
1091 pushl %ecx
1092 pushl %ebx
1093 cld
1094 movl $(__KERNEL_PERCPU), %ecx
1095 movl %ecx, %fs
1096 UNWIND_ESPFIX_STACK
1097 GS_TO_REG %ecx
1098 movl PT_GS(%esp), %edi # get the function address
1099 movl PT_ORIG_EAX(%esp), %edx # get the error code
1100 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1101 REG_TO_PTGS %ecx
1102 SET_KERNEL_GS %ecx
1103 movl $(__USER_DS), %ecx
1104 movl %ecx, %ds
1105 movl %ecx, %es
1106 TRACE_IRQS_OFF
1107 movl %esp, %eax # pt_regs pointer
1108 call *%edi
1109 jmp ret_from_exception
1110END(page_fault)
1111
1112/*
1113 * Debug traps and NMI can happen at the one SYSENTER instruction
1114 * that sets up the real kernel stack. Check here, since we can't
1115 * allow the wrong stack to be used.
1116 *
1117 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
1118 * already pushed 3 words if it hits on the sysenter instruction:
1119 * eflags, cs and eip.
1120 *
1121 * We just load the right stack, and push the three (known) values
1122 * by hand onto the new stack - while updating the return eip past
1123 * the instruction that would have done it for sysenter.
1124 */
1125.macro FIX_STACK offset ok label
1126 cmpw $__KERNEL_CS, 4(%esp)
1127 jne \ok
1128\label:
1129 movl TSS_sysenter_sp0 + \offset(%esp), %esp
1130 pushfl
1131 pushl $__KERNEL_CS
1132 pushl $sysenter_past_esp
1133.endm
1134
1135ENTRY(debug)
1136 ASM_CLAC
1137 cmpl $entry_SYSENTER_32, (%esp)
1138 jne debug_stack_correct
1139 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1140debug_stack_correct:
1141 pushl $-1 # mark this as an int
1142 SAVE_ALL
1143 TRACE_IRQS_OFF
1144 xorl %edx, %edx # error code 0
1145 movl %esp, %eax # pt_regs pointer
1146 call do_debug
1147 jmp ret_from_exception
1148END(debug)
1149
1150/*
1151 * NMI is doubly nasty. It can happen _while_ we're handling
1152 * a debug fault, and the debug fault hasn't yet been able to
1153 * clear up the stack. So we first check whether we got an
1154 * NMI on the sysenter entry path, but after that we need to
1155 * check whether we got an NMI on the debug path where the debug
1156 * fault happened on the sysenter path.
1157 */
1158ENTRY(nmi)
1159 ASM_CLAC
1160#ifdef CONFIG_X86_ESPFIX32
1161 pushl %eax
1162 movl %ss, %eax
1163 cmpw $__ESPFIX_SS, %ax
1164 popl %eax
1165 je nmi_espfix_stack
1166#endif
1167 cmpl $entry_SYSENTER_32, (%esp)
1168 je nmi_stack_fixup
1169 pushl %eax
1170 movl %esp, %eax
1171 /*
1172 * Do not access memory above the end of our stack page,
1173 * it might not exist.
1174 */
1175 andl $(THREAD_SIZE-1), %eax
1176 cmpl $(THREAD_SIZE-20), %eax
1177 popl %eax
1178 jae nmi_stack_correct
1179 cmpl $entry_SYSENTER_32, 12(%esp)
1180 je nmi_debug_stack_check
1181nmi_stack_correct:
1182 pushl %eax
1183 SAVE_ALL
1184 xorl %edx, %edx # zero error code
1185 movl %esp, %eax # pt_regs pointer
1186 call do_nmi
1187 jmp restore_all_notrace
1188
1189nmi_stack_fixup:
1190 FIX_STACK 12, nmi_stack_correct, 1
1191 jmp nmi_stack_correct
1192
1193nmi_debug_stack_check:
1194 cmpw $__KERNEL_CS, 16(%esp)
1195 jne nmi_stack_correct
1196 cmpl $debug, (%esp)
1197 jb nmi_stack_correct
1198 cmpl $debug_esp_fix_insn, (%esp)
1199 ja nmi_stack_correct
1200 FIX_STACK 24, nmi_stack_correct, 1
1201 jmp nmi_stack_correct
1202
1203#ifdef CONFIG_X86_ESPFIX32
1204nmi_espfix_stack:
1205 /*
1206 * create the pointer to lss back
1207 */
1208 pushl %ss
1209 pushl %esp
1210 addl $4, (%esp)
1211 /* copy the iret frame of 12 bytes */
1212 .rept 3
1213 pushl 16(%esp)
1214 .endr
1215 pushl %eax
1216 SAVE_ALL
1217 FIXUP_ESPFIX_STACK # %eax == %esp
1218 xorl %edx, %edx # zero error code
1219 call do_nmi
1220 RESTORE_REGS
1221 lss 12+4(%esp), %esp # back to espfix stack
1222 jmp irq_return
1223#endif
1224END(nmi)
1225
1226ENTRY(int3)
1227 ASM_CLAC
1228 pushl $-1 # mark this as an int
1229 SAVE_ALL
1230 TRACE_IRQS_OFF
1231 xorl %edx, %edx # zero error code
1232 movl %esp, %eax # pt_regs pointer
1233 call do_int3
1234 jmp ret_from_exception
1235END(int3)
1236
1237ENTRY(general_protection)
1238 pushl $do_general_protection
1239 jmp error_code
1240END(general_protection)
1241
1242#ifdef CONFIG_KVM_GUEST
1243ENTRY(async_page_fault)
1244 ASM_CLAC
1245 pushl $do_async_page_fault
1246 jmp error_code
1247END(async_page_fault)
1248#endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/entry/entry_64.S
index 22aadc917868..3bb2c4302df1 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -4,34 +4,25 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */ 7 *
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines. 8 * entry.S contains the system-call and fault low-level handling routines.
11 * 9 *
12 * Some of this is documented in Documentation/x86/entry_64.txt 10 * Some of this is documented in Documentation/x86/entry_64.txt
13 * 11 *
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
16 *
17 * A note on terminology: 12 * A note on terminology:
18 * - iret frame: Architecture defined interrupt frame from SS to RIP 13 * - iret frame: Architecture defined interrupt frame from SS to RIP
19 * at the top of the kernel process stack. 14 * at the top of the kernel process stack.
20 * 15 *
21 * Some macro usage: 16 * Some macro usage:
22 * - CFI macros are used to generate dwarf2 unwind information for better 17 * - ENTRY/END: Define functions in the symbol table.
23 * backtraces. They don't change any code. 18 * - TRACE_IRQ_*: Trace hardirq state for lock debugging.
24 * - ENTRY/END Define functions in the symbol table. 19 * - idtentry: Define exception entry points.
25 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
26 * - idtentry - Define exception entry points.
27 */ 20 */
28
29#include <linux/linkage.h> 21#include <linux/linkage.h>
30#include <asm/segment.h> 22#include <asm/segment.h>
31#include <asm/cache.h> 23#include <asm/cache.h>
32#include <asm/errno.h> 24#include <asm/errno.h>
33#include <asm/dwarf2.h> 25#include "calling.h"
34#include <asm/calling.h>
35#include <asm/asm-offsets.h> 26#include <asm/asm-offsets.h>
36#include <asm/msr.h> 27#include <asm/msr.h>
37#include <asm/unistd.h> 28#include <asm/unistd.h>
@@ -49,13 +40,12 @@
49 40
50/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 41/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
51#include <linux/elf-em.h> 42#include <linux/elf-em.h>
52#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) 43#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
53#define __AUDIT_ARCH_64BIT 0x80000000 44#define __AUDIT_ARCH_64BIT 0x80000000
54#define __AUDIT_ARCH_LE 0x40000000 45#define __AUDIT_ARCH_LE 0x40000000
55
56 .code64
57 .section .entry.text, "ax"
58 46
47.code64
48.section .entry.text, "ax"
59 49
60#ifdef CONFIG_PARAVIRT 50#ifdef CONFIG_PARAVIRT
61ENTRY(native_usergs_sysret64) 51ENTRY(native_usergs_sysret64)
@@ -64,11 +54,10 @@ ENTRY(native_usergs_sysret64)
64ENDPROC(native_usergs_sysret64) 54ENDPROC(native_usergs_sysret64)
65#endif /* CONFIG_PARAVIRT */ 55#endif /* CONFIG_PARAVIRT */
66 56
67
68.macro TRACE_IRQS_IRETQ 57.macro TRACE_IRQS_IRETQ
69#ifdef CONFIG_TRACE_IRQFLAGS 58#ifdef CONFIG_TRACE_IRQFLAGS
70 bt $9,EFLAGS(%rsp) /* interrupts off? */ 59 bt $9, EFLAGS(%rsp) /* interrupts off? */
71 jnc 1f 60 jnc 1f
72 TRACE_IRQS_ON 61 TRACE_IRQS_ON
731: 621:
74#endif 63#endif
@@ -88,89 +77,34 @@ ENDPROC(native_usergs_sysret64)
88#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) 77#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
89 78
90.macro TRACE_IRQS_OFF_DEBUG 79.macro TRACE_IRQS_OFF_DEBUG
91 call debug_stack_set_zero 80 call debug_stack_set_zero
92 TRACE_IRQS_OFF 81 TRACE_IRQS_OFF
93 call debug_stack_reset 82 call debug_stack_reset
94.endm 83.endm
95 84
96.macro TRACE_IRQS_ON_DEBUG 85.macro TRACE_IRQS_ON_DEBUG
97 call debug_stack_set_zero 86 call debug_stack_set_zero
98 TRACE_IRQS_ON 87 TRACE_IRQS_ON
99 call debug_stack_reset 88 call debug_stack_reset
100.endm 89.endm
101 90
102.macro TRACE_IRQS_IRETQ_DEBUG 91.macro TRACE_IRQS_IRETQ_DEBUG
103 bt $9,EFLAGS(%rsp) /* interrupts off? */ 92 bt $9, EFLAGS(%rsp) /* interrupts off? */
104 jnc 1f 93 jnc 1f
105 TRACE_IRQS_ON_DEBUG 94 TRACE_IRQS_ON_DEBUG
1061: 951:
107.endm 96.endm
108 97
109#else 98#else
110# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF 99# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
111# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON 100# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
112# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ 101# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
113#endif 102#endif
114 103
115/* 104/*
116 * empty frame 105 * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
117 */
118 .macro EMPTY_FRAME start=1 offset=0
119 .if \start
120 CFI_STARTPROC simple
121 CFI_SIGNAL_FRAME
122 CFI_DEF_CFA rsp,8+\offset
123 .else
124 CFI_DEF_CFA_OFFSET 8+\offset
125 .endif
126 .endm
127
128/*
129 * initial frame state for interrupts (and exceptions without error code)
130 */
131 .macro INTR_FRAME start=1 offset=0
132 EMPTY_FRAME \start, 5*8+\offset
133 /*CFI_REL_OFFSET ss, 4*8+\offset*/
134 CFI_REL_OFFSET rsp, 3*8+\offset
135 /*CFI_REL_OFFSET rflags, 2*8+\offset*/
136 /*CFI_REL_OFFSET cs, 1*8+\offset*/
137 CFI_REL_OFFSET rip, 0*8+\offset
138 .endm
139
140/*
141 * initial frame state for exceptions with error code (and interrupts
142 * with vector already pushed)
143 */
144 .macro XCPT_FRAME start=1 offset=0
145 INTR_FRAME \start, 1*8+\offset
146 .endm
147
148/*
149 * frame that enables passing a complete pt_regs to a C function.
150 */
151 .macro DEFAULT_FRAME start=1 offset=0
152 XCPT_FRAME \start, ORIG_RAX+\offset
153 CFI_REL_OFFSET rdi, RDI+\offset
154 CFI_REL_OFFSET rsi, RSI+\offset
155 CFI_REL_OFFSET rdx, RDX+\offset
156 CFI_REL_OFFSET rcx, RCX+\offset
157 CFI_REL_OFFSET rax, RAX+\offset
158 CFI_REL_OFFSET r8, R8+\offset
159 CFI_REL_OFFSET r9, R9+\offset
160 CFI_REL_OFFSET r10, R10+\offset
161 CFI_REL_OFFSET r11, R11+\offset
162 CFI_REL_OFFSET rbx, RBX+\offset
163 CFI_REL_OFFSET rbp, RBP+\offset
164 CFI_REL_OFFSET r12, R12+\offset
165 CFI_REL_OFFSET r13, R13+\offset
166 CFI_REL_OFFSET r14, R14+\offset
167 CFI_REL_OFFSET r15, R15+\offset
168 .endm
169
170/*
171 * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
172 * 106 *
173 * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, 107 * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
174 * then loads new ss, cs, and rip from previously programmed MSRs. 108 * then loads new ss, cs, and rip from previously programmed MSRs.
175 * rflags gets masked by a value from another MSR (so CLD and CLAC 109 * rflags gets masked by a value from another MSR (so CLD and CLAC
176 * are not needed). SYSCALL does not save anything on the stack 110 * are not needed). SYSCALL does not save anything on the stack
@@ -186,7 +120,7 @@ ENDPROC(native_usergs_sysret64)
186 * r10 arg3 (needs to be moved to rcx to conform to C ABI) 120 * r10 arg3 (needs to be moved to rcx to conform to C ABI)
187 * r8 arg4 121 * r8 arg4
188 * r9 arg5 122 * r9 arg5
189 * (note: r12-r15,rbp,rbx are callee-preserved in C ABI) 123 * (note: r12-r15, rbp, rbx are callee-preserved in C ABI)
190 * 124 *
191 * Only called from user space. 125 * Only called from user space.
192 * 126 *
@@ -195,13 +129,7 @@ ENDPROC(native_usergs_sysret64)
195 * with them due to bugs in both AMD and Intel CPUs. 129 * with them due to bugs in both AMD and Intel CPUs.
196 */ 130 */
197 131
198ENTRY(system_call) 132ENTRY(entry_SYSCALL_64)
199 CFI_STARTPROC simple
200 CFI_SIGNAL_FRAME
201 CFI_DEF_CFA rsp,0
202 CFI_REGISTER rip,rcx
203 /*CFI_REGISTER rflags,r11*/
204
205 /* 133 /*
206 * Interrupts are off on entry. 134 * Interrupts are off on entry.
207 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, 135 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
@@ -213,14 +141,14 @@ ENTRY(system_call)
213 * after the swapgs, so that it can do the swapgs 141 * after the swapgs, so that it can do the swapgs
214 * for the guest and jump here on syscall. 142 * for the guest and jump here on syscall.
215 */ 143 */
216GLOBAL(system_call_after_swapgs) 144GLOBAL(entry_SYSCALL_64_after_swapgs)
217 145
218 movq %rsp,PER_CPU_VAR(rsp_scratch) 146 movq %rsp, PER_CPU_VAR(rsp_scratch)
219 movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp 147 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
220 148
221 /* Construct struct pt_regs on stack */ 149 /* Construct struct pt_regs on stack */
222 pushq_cfi $__USER_DS /* pt_regs->ss */ 150 pushq $__USER_DS /* pt_regs->ss */
223 pushq_cfi PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ 151 pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
224 /* 152 /*
225 * Re-enable interrupts. 153 * Re-enable interrupts.
226 * We use 'rsp_scratch' as a scratch space, hence irq-off block above 154 * We use 'rsp_scratch' as a scratch space, hence irq-off block above
@@ -229,36 +157,34 @@ GLOBAL(system_call_after_swapgs)
229 * with using rsp_scratch: 157 * with using rsp_scratch:
230 */ 158 */
231 ENABLE_INTERRUPTS(CLBR_NONE) 159 ENABLE_INTERRUPTS(CLBR_NONE)
232 pushq_cfi %r11 /* pt_regs->flags */ 160 pushq %r11 /* pt_regs->flags */
233 pushq_cfi $__USER_CS /* pt_regs->cs */ 161 pushq $__USER_CS /* pt_regs->cs */
234 pushq_cfi %rcx /* pt_regs->ip */ 162 pushq %rcx /* pt_regs->ip */
235 CFI_REL_OFFSET rip,0 163 pushq %rax /* pt_regs->orig_ax */
236 pushq_cfi_reg rax /* pt_regs->orig_ax */ 164 pushq %rdi /* pt_regs->di */
237 pushq_cfi_reg rdi /* pt_regs->di */ 165 pushq %rsi /* pt_regs->si */
238 pushq_cfi_reg rsi /* pt_regs->si */ 166 pushq %rdx /* pt_regs->dx */
239 pushq_cfi_reg rdx /* pt_regs->dx */ 167 pushq %rcx /* pt_regs->cx */
240 pushq_cfi_reg rcx /* pt_regs->cx */ 168 pushq $-ENOSYS /* pt_regs->ax */
241 pushq_cfi $-ENOSYS /* pt_regs->ax */ 169 pushq %r8 /* pt_regs->r8 */
242 pushq_cfi_reg r8 /* pt_regs->r8 */ 170 pushq %r9 /* pt_regs->r9 */
243 pushq_cfi_reg r9 /* pt_regs->r9 */ 171 pushq %r10 /* pt_regs->r10 */
244 pushq_cfi_reg r10 /* pt_regs->r10 */ 172 pushq %r11 /* pt_regs->r11 */
245 pushq_cfi_reg r11 /* pt_regs->r11 */ 173 sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
246 sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */ 174
247 CFI_ADJUST_CFA_OFFSET 6*8 175 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
248 176 jnz tracesys
249 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) 177entry_SYSCALL_64_fastpath:
250 jnz tracesys
251system_call_fastpath:
252#if __SYSCALL_MASK == ~0 178#if __SYSCALL_MASK == ~0
253 cmpq $__NR_syscall_max,%rax 179 cmpq $__NR_syscall_max, %rax
254#else 180#else
255 andl $__SYSCALL_MASK,%eax 181 andl $__SYSCALL_MASK, %eax
256 cmpl $__NR_syscall_max,%eax 182 cmpl $__NR_syscall_max, %eax
257#endif 183#endif
258 ja 1f /* return -ENOSYS (already in pt_regs->ax) */ 184 ja 1f /* return -ENOSYS (already in pt_regs->ax) */
259 movq %r10,%rcx 185 movq %r10, %rcx
260 call *sys_call_table(,%rax,8) 186 call *sys_call_table(, %rax, 8)
261 movq %rax,RAX(%rsp) 187 movq %rax, RAX(%rsp)
2621: 1881:
263/* 189/*
264 * Syscall return path ending with SYSRET (fast path). 190 * Syscall return path ending with SYSRET (fast path).
@@ -279,19 +205,15 @@ system_call_fastpath:
279 * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is 205 * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
280 * very bad. 206 * very bad.
281 */ 207 */
282 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) 208 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
283 jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ 209 jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
284
285 CFI_REMEMBER_STATE
286 210
287 RESTORE_C_REGS_EXCEPT_RCX_R11 211 RESTORE_C_REGS_EXCEPT_RCX_R11
288 movq RIP(%rsp),%rcx 212 movq RIP(%rsp), %rcx
289 CFI_REGISTER rip,rcx 213 movq EFLAGS(%rsp), %r11
290 movq EFLAGS(%rsp),%r11 214 movq RSP(%rsp), %rsp
291 /*CFI_REGISTER rflags,r11*/
292 movq RSP(%rsp),%rsp
293 /* 215 /*
294 * 64bit SYSRET restores rip from rcx, 216 * 64-bit SYSRET restores rip from rcx,
295 * rflags from r11 (but RF and VM bits are forced to 0), 217 * rflags from r11 (but RF and VM bits are forced to 0),
296 * cs and ss are loaded from MSRs. 218 * cs and ss are loaded from MSRs.
297 * Restoration of rflags re-enables interrupts. 219 * Restoration of rflags re-enables interrupts.
@@ -307,25 +229,23 @@ system_call_fastpath:
307 */ 229 */
308 USERGS_SYSRET64 230 USERGS_SYSRET64
309 231
310 CFI_RESTORE_STATE
311
312 /* Do syscall entry tracing */ 232 /* Do syscall entry tracing */
313tracesys: 233tracesys:
314 movq %rsp, %rdi 234 movq %rsp, %rdi
315 movl $AUDIT_ARCH_X86_64, %esi 235 movl $AUDIT_ARCH_X86_64, %esi
316 call syscall_trace_enter_phase1 236 call syscall_trace_enter_phase1
317 test %rax, %rax 237 test %rax, %rax
318 jnz tracesys_phase2 /* if needed, run the slow path */ 238 jnz tracesys_phase2 /* if needed, run the slow path */
319 RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ 239 RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
320 movq ORIG_RAX(%rsp), %rax 240 movq ORIG_RAX(%rsp), %rax
321 jmp system_call_fastpath /* and return to the fast path */ 241 jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
322 242
323tracesys_phase2: 243tracesys_phase2:
324 SAVE_EXTRA_REGS 244 SAVE_EXTRA_REGS
325 movq %rsp, %rdi 245 movq %rsp, %rdi
326 movl $AUDIT_ARCH_X86_64, %esi 246 movl $AUDIT_ARCH_X86_64, %esi
327 movq %rax,%rdx 247 movq %rax, %rdx
328 call syscall_trace_enter_phase2 248 call syscall_trace_enter_phase2
329 249
330 /* 250 /*
331 * Reload registers from stack in case ptrace changed them. 251 * Reload registers from stack in case ptrace changed them.
@@ -335,15 +255,15 @@ tracesys_phase2:
335 RESTORE_C_REGS_EXCEPT_RAX 255 RESTORE_C_REGS_EXCEPT_RAX
336 RESTORE_EXTRA_REGS 256 RESTORE_EXTRA_REGS
337#if __SYSCALL_MASK == ~0 257#if __SYSCALL_MASK == ~0
338 cmpq $__NR_syscall_max,%rax 258 cmpq $__NR_syscall_max, %rax
339#else 259#else
340 andl $__SYSCALL_MASK,%eax 260 andl $__SYSCALL_MASK, %eax
341 cmpl $__NR_syscall_max,%eax 261 cmpl $__NR_syscall_max, %eax
342#endif 262#endif
343 ja 1f /* return -ENOSYS (already in pt_regs->ax) */ 263 ja 1f /* return -ENOSYS (already in pt_regs->ax) */
344 movq %r10,%rcx /* fixup for C */ 264 movq %r10, %rcx /* fixup for C */
345 call *sys_call_table(,%rax,8) 265 call *sys_call_table(, %rax, 8)
346 movq %rax,RAX(%rsp) 266 movq %rax, RAX(%rsp)
3471: 2671:
348 /* Use IRET because user could have changed pt_regs->foo */ 268 /* Use IRET because user could have changed pt_regs->foo */
349 269
@@ -355,31 +275,33 @@ GLOBAL(int_ret_from_sys_call)
355 DISABLE_INTERRUPTS(CLBR_NONE) 275 DISABLE_INTERRUPTS(CLBR_NONE)
356int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */ 276int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */
357 TRACE_IRQS_OFF 277 TRACE_IRQS_OFF
358 movl $_TIF_ALLWORK_MASK,%edi 278 movl $_TIF_ALLWORK_MASK, %edi
359 /* edi: mask to check */ 279 /* edi: mask to check */
360GLOBAL(int_with_check) 280GLOBAL(int_with_check)
361 LOCKDEP_SYS_EXIT_IRQ 281 LOCKDEP_SYS_EXIT_IRQ
362 GET_THREAD_INFO(%rcx) 282 GET_THREAD_INFO(%rcx)
363 movl TI_flags(%rcx),%edx 283 movl TI_flags(%rcx), %edx
364 andl %edi,%edx 284 andl %edi, %edx
365 jnz int_careful 285 jnz int_careful
366 andl $~TS_COMPAT,TI_status(%rcx) 286 andl $~TS_COMPAT, TI_status(%rcx)
367 jmp syscall_return 287 jmp syscall_return
368 288
369 /* Either reschedule or signal or syscall exit tracking needed. */ 289 /*
370 /* First do a reschedule test. */ 290 * Either reschedule or signal or syscall exit tracking needed.
371 /* edx: work, edi: workmask */ 291 * First do a reschedule test.
292 * edx: work, edi: workmask
293 */
372int_careful: 294int_careful:
373 bt $TIF_NEED_RESCHED,%edx 295 bt $TIF_NEED_RESCHED, %edx
374 jnc int_very_careful 296 jnc int_very_careful
375 TRACE_IRQS_ON 297 TRACE_IRQS_ON
376 ENABLE_INTERRUPTS(CLBR_NONE) 298 ENABLE_INTERRUPTS(CLBR_NONE)
377 pushq_cfi %rdi 299 pushq %rdi
378 SCHEDULE_USER 300 SCHEDULE_USER
379 popq_cfi %rdi 301 popq %rdi
380 DISABLE_INTERRUPTS(CLBR_NONE) 302 DISABLE_INTERRUPTS(CLBR_NONE)
381 TRACE_IRQS_OFF 303 TRACE_IRQS_OFF
382 jmp int_with_check 304 jmp int_with_check
383 305
384 /* handle signals and tracing -- both require a full pt_regs */ 306 /* handle signals and tracing -- both require a full pt_regs */
385int_very_careful: 307int_very_careful:
@@ -387,27 +309,27 @@ int_very_careful:
387 ENABLE_INTERRUPTS(CLBR_NONE) 309 ENABLE_INTERRUPTS(CLBR_NONE)
388 SAVE_EXTRA_REGS 310 SAVE_EXTRA_REGS
389 /* Check for syscall exit trace */ 311 /* Check for syscall exit trace */
390 testl $_TIF_WORK_SYSCALL_EXIT,%edx 312 testl $_TIF_WORK_SYSCALL_EXIT, %edx
391 jz int_signal 313 jz int_signal
392 pushq_cfi %rdi 314 pushq %rdi
393 leaq 8(%rsp),%rdi # &ptregs -> arg1 315 leaq 8(%rsp), %rdi /* &ptregs -> arg1 */
394 call syscall_trace_leave 316 call syscall_trace_leave
395 popq_cfi %rdi 317 popq %rdi
396 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 318 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU), %edi
397 jmp int_restore_rest 319 jmp int_restore_rest
398 320
399int_signal: 321int_signal:
400 testl $_TIF_DO_NOTIFY_MASK,%edx 322 testl $_TIF_DO_NOTIFY_MASK, %edx
401 jz 1f 323 jz 1f
402 movq %rsp,%rdi # &ptregs -> arg1 324 movq %rsp, %rdi /* &ptregs -> arg1 */
403 xorl %esi,%esi # oldset -> arg2 325 xorl %esi, %esi /* oldset -> arg2 */
404 call do_notify_resume 326 call do_notify_resume
4051: movl $_TIF_WORK_MASK,%edi 3271: movl $_TIF_WORK_MASK, %edi
406int_restore_rest: 328int_restore_rest:
407 RESTORE_EXTRA_REGS 329 RESTORE_EXTRA_REGS
408 DISABLE_INTERRUPTS(CLBR_NONE) 330 DISABLE_INTERRUPTS(CLBR_NONE)
409 TRACE_IRQS_OFF 331 TRACE_IRQS_OFF
410 jmp int_with_check 332 jmp int_with_check
411 333
412syscall_return: 334syscall_return:
413 /* The IRETQ could re-enable interrupts: */ 335 /* The IRETQ could re-enable interrupts: */
@@ -418,10 +340,10 @@ syscall_return:
418 * Try to use SYSRET instead of IRET if we're returning to 340 * Try to use SYSRET instead of IRET if we're returning to
419 * a completely clean 64-bit userspace context. 341 * a completely clean 64-bit userspace context.
420 */ 342 */
421 movq RCX(%rsp),%rcx 343 movq RCX(%rsp), %rcx
422 movq RIP(%rsp),%r11 344 movq RIP(%rsp), %r11
423 cmpq %rcx,%r11 /* RCX == RIP */ 345 cmpq %rcx, %r11 /* RCX == RIP */
424 jne opportunistic_sysret_failed 346 jne opportunistic_sysret_failed
425 347
426 /* 348 /*
427 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP 349 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
@@ -434,19 +356,21 @@ syscall_return:
434 .ifne __VIRTUAL_MASK_SHIFT - 47 356 .ifne __VIRTUAL_MASK_SHIFT - 47
435 .error "virtual address width changed -- SYSRET checks need update" 357 .error "virtual address width changed -- SYSRET checks need update"
436 .endif 358 .endif
359
437 /* Change top 16 bits to be the sign-extension of 47th bit */ 360 /* Change top 16 bits to be the sign-extension of 47th bit */
438 shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx 361 shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
439 sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx 362 sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
363
440 /* If this changed %rcx, it was not canonical */ 364 /* If this changed %rcx, it was not canonical */
441 cmpq %rcx, %r11 365 cmpq %rcx, %r11
442 jne opportunistic_sysret_failed 366 jne opportunistic_sysret_failed
443 367
444 cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */ 368 cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
445 jne opportunistic_sysret_failed 369 jne opportunistic_sysret_failed
446 370
447 movq R11(%rsp),%r11 371 movq R11(%rsp), %r11
448 cmpq %r11,EFLAGS(%rsp) /* R11 == RFLAGS */ 372 cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
449 jne opportunistic_sysret_failed 373 jne opportunistic_sysret_failed
450 374
451 /* 375 /*
452 * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET, 376 * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
@@ -455,47 +379,41 @@ syscall_return:
455 * with register state that satisfies the opportunistic SYSRET 379 * with register state that satisfies the opportunistic SYSRET
456 * conditions. For example, single-stepping this user code: 380 * conditions. For example, single-stepping this user code:
457 * 381 *
458 * movq $stuck_here,%rcx 382 * movq $stuck_here, %rcx
459 * pushfq 383 * pushfq
460 * popq %r11 384 * popq %r11
461 * stuck_here: 385 * stuck_here:
462 * 386 *
463 * would never get past 'stuck_here'. 387 * would never get past 'stuck_here'.
464 */ 388 */
465 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 389 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
466 jnz opportunistic_sysret_failed 390 jnz opportunistic_sysret_failed
467 391
468 /* nothing to check for RSP */ 392 /* nothing to check for RSP */
469 393
470 cmpq $__USER_DS,SS(%rsp) /* SS must match SYSRET */ 394 cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
471 jne opportunistic_sysret_failed 395 jne opportunistic_sysret_failed
472 396
473 /* 397 /*
474 * We win! This label is here just for ease of understanding 398 * We win! This label is here just for ease of understanding
475 * perf profiles. Nothing jumps here. 399 * perf profiles. Nothing jumps here.
476 */ 400 */
477syscall_return_via_sysret: 401syscall_return_via_sysret:
478 CFI_REMEMBER_STATE
479 /* rcx and r11 are already restored (see code above) */ 402 /* rcx and r11 are already restored (see code above) */
480 RESTORE_C_REGS_EXCEPT_RCX_R11 403 RESTORE_C_REGS_EXCEPT_RCX_R11
481 movq RSP(%rsp),%rsp 404 movq RSP(%rsp), %rsp
482 USERGS_SYSRET64 405 USERGS_SYSRET64
483 CFI_RESTORE_STATE
484 406
485opportunistic_sysret_failed: 407opportunistic_sysret_failed:
486 SWAPGS 408 SWAPGS
487 jmp restore_c_regs_and_iret 409 jmp restore_c_regs_and_iret
488 CFI_ENDPROC 410END(entry_SYSCALL_64)
489END(system_call)
490 411
491 412
492 .macro FORK_LIKE func 413 .macro FORK_LIKE func
493ENTRY(stub_\func) 414ENTRY(stub_\func)
494 CFI_STARTPROC
495 DEFAULT_FRAME 0, 8 /* offset 8: return address */
496 SAVE_EXTRA_REGS 8 415 SAVE_EXTRA_REGS 8
497 jmp sys_\func 416 jmp sys_\func
498 CFI_ENDPROC
499END(stub_\func) 417END(stub_\func)
500 .endm 418 .endm
501 419
@@ -504,8 +422,6 @@ END(stub_\func)
504 FORK_LIKE vfork 422 FORK_LIKE vfork
505 423
506ENTRY(stub_execve) 424ENTRY(stub_execve)
507 CFI_STARTPROC
508 DEFAULT_FRAME 0, 8
509 call sys_execve 425 call sys_execve
510return_from_execve: 426return_from_execve:
511 testl %eax, %eax 427 testl %eax, %eax
@@ -515,11 +431,9 @@ return_from_execve:
5151: 4311:
516 /* must use IRET code path (pt_regs->cs may have changed) */ 432 /* must use IRET code path (pt_regs->cs may have changed) */
517 addq $8, %rsp 433 addq $8, %rsp
518 CFI_ADJUST_CFA_OFFSET -8
519 ZERO_EXTRA_REGS 434 ZERO_EXTRA_REGS
520 movq %rax,RAX(%rsp) 435 movq %rax, RAX(%rsp)
521 jmp int_ret_from_sys_call 436 jmp int_ret_from_sys_call
522 CFI_ENDPROC
523END(stub_execve) 437END(stub_execve)
524/* 438/*
525 * Remaining execve stubs are only 7 bytes long. 439 * Remaining execve stubs are only 7 bytes long.
@@ -527,32 +441,23 @@ END(stub_execve)
527 */ 441 */
528 .align 8 442 .align 8
529GLOBAL(stub_execveat) 443GLOBAL(stub_execveat)
530 CFI_STARTPROC
531 DEFAULT_FRAME 0, 8
532 call sys_execveat 444 call sys_execveat
533 jmp return_from_execve 445 jmp return_from_execve
534 CFI_ENDPROC
535END(stub_execveat) 446END(stub_execveat)
536 447
537#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) 448#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
538 .align 8 449 .align 8
539GLOBAL(stub_x32_execve) 450GLOBAL(stub_x32_execve)
540GLOBAL(stub32_execve) 451GLOBAL(stub32_execve)
541 CFI_STARTPROC
542 DEFAULT_FRAME 0, 8
543 call compat_sys_execve 452 call compat_sys_execve
544 jmp return_from_execve 453 jmp return_from_execve
545 CFI_ENDPROC
546END(stub32_execve) 454END(stub32_execve)
547END(stub_x32_execve) 455END(stub_x32_execve)
548 .align 8 456 .align 8
549GLOBAL(stub_x32_execveat) 457GLOBAL(stub_x32_execveat)
550GLOBAL(stub32_execveat) 458GLOBAL(stub32_execveat)
551 CFI_STARTPROC
552 DEFAULT_FRAME 0, 8
553 call compat_sys_execveat 459 call compat_sys_execveat
554 jmp return_from_execve 460 jmp return_from_execve
555 CFI_ENDPROC
556END(stub32_execveat) 461END(stub32_execveat)
557END(stub_x32_execveat) 462END(stub_x32_execveat)
558#endif 463#endif
@@ -562,8 +467,6 @@ END(stub_x32_execveat)
562 * This cannot be done with SYSRET, so use the IRET return path instead. 467 * This cannot be done with SYSRET, so use the IRET return path instead.
563 */ 468 */
564ENTRY(stub_rt_sigreturn) 469ENTRY(stub_rt_sigreturn)
565 CFI_STARTPROC
566 DEFAULT_FRAME 0, 8
567 /* 470 /*
568 * SAVE_EXTRA_REGS result is not normally needed: 471 * SAVE_EXTRA_REGS result is not normally needed:
569 * sigreturn overwrites all pt_regs->GPREGS. 472 * sigreturn overwrites all pt_regs->GPREGS.
@@ -572,24 +475,19 @@ ENTRY(stub_rt_sigreturn)
572 * we SAVE_EXTRA_REGS here. 475 * we SAVE_EXTRA_REGS here.
573 */ 476 */
574 SAVE_EXTRA_REGS 8 477 SAVE_EXTRA_REGS 8
575 call sys_rt_sigreturn 478 call sys_rt_sigreturn
576return_from_stub: 479return_from_stub:
577 addq $8, %rsp 480 addq $8, %rsp
578 CFI_ADJUST_CFA_OFFSET -8
579 RESTORE_EXTRA_REGS 481 RESTORE_EXTRA_REGS
580 movq %rax,RAX(%rsp) 482 movq %rax, RAX(%rsp)
581 jmp int_ret_from_sys_call 483 jmp int_ret_from_sys_call
582 CFI_ENDPROC
583END(stub_rt_sigreturn) 484END(stub_rt_sigreturn)
584 485
585#ifdef CONFIG_X86_X32_ABI 486#ifdef CONFIG_X86_X32_ABI
586ENTRY(stub_x32_rt_sigreturn) 487ENTRY(stub_x32_rt_sigreturn)
587 CFI_STARTPROC
588 DEFAULT_FRAME 0, 8
589 SAVE_EXTRA_REGS 8 488 SAVE_EXTRA_REGS 8
590 call sys32_x32_rt_sigreturn 489 call sys32_x32_rt_sigreturn
591 jmp return_from_stub 490 jmp return_from_stub
592 CFI_ENDPROC
593END(stub_x32_rt_sigreturn) 491END(stub_x32_rt_sigreturn)
594#endif 492#endif
595 493
@@ -599,36 +497,36 @@ END(stub_x32_rt_sigreturn)
599 * rdi: prev task we switched from 497 * rdi: prev task we switched from
600 */ 498 */
601ENTRY(ret_from_fork) 499ENTRY(ret_from_fork)
602 DEFAULT_FRAME
603 500
604 LOCK ; btr $TIF_FORK,TI_flags(%r8) 501 LOCK ; btr $TIF_FORK, TI_flags(%r8)
605 502
606 pushq_cfi $0x0002 503 pushq $0x0002
607 popfq_cfi # reset kernel eflags 504 popfq /* reset kernel eflags */
608 505
609 call schedule_tail # rdi: 'prev' task parameter 506 call schedule_tail /* rdi: 'prev' task parameter */
610 507
611 RESTORE_EXTRA_REGS 508 RESTORE_EXTRA_REGS
612 509
613 testb $3, CS(%rsp) # from kernel_thread? 510 testb $3, CS(%rsp) /* from kernel_thread? */
614 511
615 /* 512 /*
616 * By the time we get here, we have no idea whether our pt_regs, 513 * By the time we get here, we have no idea whether our pt_regs,
617 * ti flags, and ti status came from the 64-bit SYSCALL fast path, 514 * ti flags, and ti status came from the 64-bit SYSCALL fast path,
618 * the slow path, or one of the ia32entry paths. 515 * the slow path, or one of the 32-bit compat paths.
619 * Use IRET code path to return, since it can safely handle 516 * Use IRET code path to return, since it can safely handle
620 * all of the above. 517 * all of the above.
621 */ 518 */
622 jnz int_ret_from_sys_call 519 jnz int_ret_from_sys_call
623 520
624 /* We came from kernel_thread */ 521 /*
625 /* nb: we depend on RESTORE_EXTRA_REGS above */ 522 * We came from kernel_thread
626 movq %rbp, %rdi 523 * nb: we depend on RESTORE_EXTRA_REGS above
627 call *%rbx 524 */
628 movl $0, RAX(%rsp) 525 movq %rbp, %rdi
526 call *%rbx
527 movl $0, RAX(%rsp)
629 RESTORE_EXTRA_REGS 528 RESTORE_EXTRA_REGS
630 jmp int_ret_from_sys_call 529 jmp int_ret_from_sys_call
631 CFI_ENDPROC
632END(ret_from_fork) 530END(ret_from_fork)
633 531
634/* 532/*
@@ -637,16 +535,13 @@ END(ret_from_fork)
637 */ 535 */
638 .align 8 536 .align 8
639ENTRY(irq_entries_start) 537ENTRY(irq_entries_start)
640 INTR_FRAME
641 vector=FIRST_EXTERNAL_VECTOR 538 vector=FIRST_EXTERNAL_VECTOR
642 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) 539 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
643 pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ 540 pushq $(~vector+0x80) /* Note: always in signed byte range */
644 vector=vector+1 541 vector=vector+1
645 jmp common_interrupt 542 jmp common_interrupt
646 CFI_ADJUST_CFA_OFFSET -8
647 .align 8 543 .align 8
648 .endr 544 .endr
649 CFI_ENDPROC
650END(irq_entries_start) 545END(irq_entries_start)
651 546
652/* 547/*
@@ -672,7 +567,7 @@ END(irq_entries_start)
672 /* this goes to 0(%rsp) for unwinder, not for saving the value: */ 567 /* this goes to 0(%rsp) for unwinder, not for saving the value: */
673 SAVE_EXTRA_REGS_RBP -RBP 568 SAVE_EXTRA_REGS_RBP -RBP
674 569
675 leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ 570 leaq -RBP(%rsp), %rdi /* arg1 for \func (pointer to pt_regs) */
676 571
677 testb $3, CS-RBP(%rsp) 572 testb $3, CS-RBP(%rsp)
678 jz 1f 573 jz 1f
@@ -685,24 +580,14 @@ END(irq_entries_start)
685 * a little cheaper to use a separate counter in the PDA (short of 580 * a little cheaper to use a separate counter in the PDA (short of
686 * moving irq_enter into assembly, which would be too much work) 581 * moving irq_enter into assembly, which would be too much work)
687 */ 582 */
688 movq %rsp, %rsi 583 movq %rsp, %rsi
689 incl PER_CPU_VAR(irq_count) 584 incl PER_CPU_VAR(irq_count)
690 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp 585 cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
691 CFI_DEF_CFA_REGISTER rsi 586 pushq %rsi
692 pushq %rsi
693 /*
694 * For debugger:
695 * "CFA (Current Frame Address) is the value on stack + offset"
696 */
697 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
698 0x77 /* DW_OP_breg7 (rsp) */, 0, \
699 0x06 /* DW_OP_deref */, \
700 0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
701 0x22 /* DW_OP_plus */
702 /* We entered an interrupt context - irqs are off: */ 587 /* We entered an interrupt context - irqs are off: */
703 TRACE_IRQS_OFF 588 TRACE_IRQS_OFF
704 589
705 call \func 590 call \func
706 .endm 591 .endm
707 592
708 /* 593 /*
@@ -711,42 +596,36 @@ END(irq_entries_start)
711 */ 596 */
712 .p2align CONFIG_X86_L1_CACHE_SHIFT 597 .p2align CONFIG_X86_L1_CACHE_SHIFT
713common_interrupt: 598common_interrupt:
714 XCPT_FRAME
715 ASM_CLAC 599 ASM_CLAC
716 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 600 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
717 interrupt do_IRQ 601 interrupt do_IRQ
718 /* 0(%rsp): old RSP */ 602 /* 0(%rsp): old RSP */
719ret_from_intr: 603ret_from_intr:
720 DISABLE_INTERRUPTS(CLBR_NONE) 604 DISABLE_INTERRUPTS(CLBR_NONE)
721 TRACE_IRQS_OFF 605 TRACE_IRQS_OFF
722 decl PER_CPU_VAR(irq_count) 606 decl PER_CPU_VAR(irq_count)
723 607
724 /* Restore saved previous stack */ 608 /* Restore saved previous stack */
725 popq %rsi 609 popq %rsi
726 CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */
727 /* return code expects complete pt_regs - adjust rsp accordingly: */ 610 /* return code expects complete pt_regs - adjust rsp accordingly: */
728 leaq -RBP(%rsi),%rsp 611 leaq -RBP(%rsi), %rsp
729 CFI_DEF_CFA_REGISTER rsp
730 CFI_ADJUST_CFA_OFFSET RBP
731 612
732 testb $3, CS(%rsp) 613 testb $3, CS(%rsp)
733 jz retint_kernel 614 jz retint_kernel
734 /* Interrupt came from user space */ 615 /* Interrupt came from user space */
735 616retint_user:
736 GET_THREAD_INFO(%rcx) 617 GET_THREAD_INFO(%rcx)
737 /* 618
738 * %rcx: thread info. Interrupts off. 619 /* %rcx: thread info. Interrupts are off. */
739 */
740retint_with_reschedule: 620retint_with_reschedule:
741 movl $_TIF_WORK_MASK,%edi 621 movl $_TIF_WORK_MASK, %edi
742retint_check: 622retint_check:
743 LOCKDEP_SYS_EXIT_IRQ 623 LOCKDEP_SYS_EXIT_IRQ
744 movl TI_flags(%rcx),%edx 624 movl TI_flags(%rcx), %edx
745 andl %edi,%edx 625 andl %edi, %edx
746 CFI_REMEMBER_STATE 626 jnz retint_careful
747 jnz retint_careful
748 627
749retint_swapgs: /* return to user-space */ 628retint_swapgs: /* return to user-space */
750 /* 629 /*
751 * The iretq could re-enable interrupts: 630 * The iretq could re-enable interrupts:
752 */ 631 */
@@ -761,9 +640,9 @@ retint_kernel:
761#ifdef CONFIG_PREEMPT 640#ifdef CONFIG_PREEMPT
762 /* Interrupts are off */ 641 /* Interrupts are off */
763 /* Check if we need preemption */ 642 /* Check if we need preemption */
764 bt $9,EFLAGS(%rsp) /* interrupts were off? */ 643 bt $9, EFLAGS(%rsp) /* were interrupts off? */
765 jnc 1f 644 jnc 1f
7660: cmpl $0,PER_CPU_VAR(__preempt_count) 6450: cmpl $0, PER_CPU_VAR(__preempt_count)
767 jnz 1f 646 jnz 1f
768 call preempt_schedule_irq 647 call preempt_schedule_irq
769 jmp 0b 648 jmp 0b
@@ -781,8 +660,6 @@ retint_kernel:
781restore_c_regs_and_iret: 660restore_c_regs_and_iret:
782 RESTORE_C_REGS 661 RESTORE_C_REGS
783 REMOVE_PT_GPREGS_FROM_STACK 8 662 REMOVE_PT_GPREGS_FROM_STACK 8
784
785irq_return:
786 INTERRUPT_RETURN 663 INTERRUPT_RETURN
787 664
788ENTRY(native_iret) 665ENTRY(native_iret)
@@ -791,8 +668,8 @@ ENTRY(native_iret)
791 * 64-bit mode SS:RSP on the exception stack is always valid. 668 * 64-bit mode SS:RSP on the exception stack is always valid.
792 */ 669 */
793#ifdef CONFIG_X86_ESPFIX64 670#ifdef CONFIG_X86_ESPFIX64
794 testb $4,(SS-RIP)(%rsp) 671 testb $4, (SS-RIP)(%rsp)
795 jnz native_irq_return_ldt 672 jnz native_irq_return_ldt
796#endif 673#endif
797 674
798.global native_irq_return_iret 675.global native_irq_return_iret
@@ -807,62 +684,60 @@ native_irq_return_iret:
807 684
808#ifdef CONFIG_X86_ESPFIX64 685#ifdef CONFIG_X86_ESPFIX64
809native_irq_return_ldt: 686native_irq_return_ldt:
810 pushq_cfi %rax 687 pushq %rax
811 pushq_cfi %rdi 688 pushq %rdi
812 SWAPGS 689 SWAPGS
813 movq PER_CPU_VAR(espfix_waddr),%rdi 690 movq PER_CPU_VAR(espfix_waddr), %rdi
814 movq %rax,(0*8)(%rdi) /* RAX */ 691 movq %rax, (0*8)(%rdi) /* RAX */
815 movq (2*8)(%rsp),%rax /* RIP */ 692 movq (2*8)(%rsp), %rax /* RIP */
816 movq %rax,(1*8)(%rdi) 693 movq %rax, (1*8)(%rdi)
817 movq (3*8)(%rsp),%rax /* CS */ 694 movq (3*8)(%rsp), %rax /* CS */
818 movq %rax,(2*8)(%rdi) 695 movq %rax, (2*8)(%rdi)
819 movq (4*8)(%rsp),%rax /* RFLAGS */ 696 movq (4*8)(%rsp), %rax /* RFLAGS */
820 movq %rax,(3*8)(%rdi) 697 movq %rax, (3*8)(%rdi)
821 movq (6*8)(%rsp),%rax /* SS */ 698 movq (6*8)(%rsp), %rax /* SS */
822 movq %rax,(5*8)(%rdi) 699 movq %rax, (5*8)(%rdi)
823 movq (5*8)(%rsp),%rax /* RSP */ 700 movq (5*8)(%rsp), %rax /* RSP */
824 movq %rax,(4*8)(%rdi) 701 movq %rax, (4*8)(%rdi)
825 andl $0xffff0000,%eax 702 andl $0xffff0000, %eax
826 popq_cfi %rdi 703 popq %rdi
827 orq PER_CPU_VAR(espfix_stack),%rax 704 orq PER_CPU_VAR(espfix_stack), %rax
828 SWAPGS 705 SWAPGS
829 movq %rax,%rsp 706 movq %rax, %rsp
830 popq_cfi %rax 707 popq %rax
831 jmp native_irq_return_iret 708 jmp native_irq_return_iret
832#endif 709#endif
833 710
834 /* edi: workmask, edx: work */ 711 /* edi: workmask, edx: work */
835retint_careful: 712retint_careful:
836 CFI_RESTORE_STATE 713 bt $TIF_NEED_RESCHED, %edx
837 bt $TIF_NEED_RESCHED,%edx 714 jnc retint_signal
838 jnc retint_signal
839 TRACE_IRQS_ON 715 TRACE_IRQS_ON
840 ENABLE_INTERRUPTS(CLBR_NONE) 716 ENABLE_INTERRUPTS(CLBR_NONE)
841 pushq_cfi %rdi 717 pushq %rdi
842 SCHEDULE_USER 718 SCHEDULE_USER
843 popq_cfi %rdi 719 popq %rdi
844 GET_THREAD_INFO(%rcx) 720 GET_THREAD_INFO(%rcx)
845 DISABLE_INTERRUPTS(CLBR_NONE) 721 DISABLE_INTERRUPTS(CLBR_NONE)
846 TRACE_IRQS_OFF 722 TRACE_IRQS_OFF
847 jmp retint_check 723 jmp retint_check
848 724
849retint_signal: 725retint_signal:
850 testl $_TIF_DO_NOTIFY_MASK,%edx 726 testl $_TIF_DO_NOTIFY_MASK, %edx
851 jz retint_swapgs 727 jz retint_swapgs
852 TRACE_IRQS_ON 728 TRACE_IRQS_ON
853 ENABLE_INTERRUPTS(CLBR_NONE) 729 ENABLE_INTERRUPTS(CLBR_NONE)
854 SAVE_EXTRA_REGS 730 SAVE_EXTRA_REGS
855 movq $-1,ORIG_RAX(%rsp) 731 movq $-1, ORIG_RAX(%rsp)
856 xorl %esi,%esi # oldset 732 xorl %esi, %esi /* oldset */
857 movq %rsp,%rdi # &pt_regs 733 movq %rsp, %rdi /* &pt_regs */
858 call do_notify_resume 734 call do_notify_resume
859 RESTORE_EXTRA_REGS 735 RESTORE_EXTRA_REGS
860 DISABLE_INTERRUPTS(CLBR_NONE) 736 DISABLE_INTERRUPTS(CLBR_NONE)
861 TRACE_IRQS_OFF 737 TRACE_IRQS_OFF
862 GET_THREAD_INFO(%rcx) 738 GET_THREAD_INFO(%rcx)
863 jmp retint_with_reschedule 739 jmp retint_with_reschedule
864 740
865 CFI_ENDPROC
866END(common_interrupt) 741END(common_interrupt)
867 742
868/* 743/*
@@ -870,13 +745,11 @@ END(common_interrupt)
870 */ 745 */
871.macro apicinterrupt3 num sym do_sym 746.macro apicinterrupt3 num sym do_sym
872ENTRY(\sym) 747ENTRY(\sym)
873 INTR_FRAME
874 ASM_CLAC 748 ASM_CLAC
875 pushq_cfi $~(\num) 749 pushq $~(\num)
876.Lcommon_\sym: 750.Lcommon_\sym:
877 interrupt \do_sym 751 interrupt \do_sym
878 jmp ret_from_intr 752 jmp ret_from_intr
879 CFI_ENDPROC
880END(\sym) 753END(\sym)
881.endm 754.endm
882 755
@@ -898,55 +771,45 @@ trace_apicinterrupt \num \sym
898.endm 771.endm
899 772
900#ifdef CONFIG_SMP 773#ifdef CONFIG_SMP
901apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \ 774apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
902 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 775apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt
903apicinterrupt3 REBOOT_VECTOR \
904 reboot_interrupt smp_reboot_interrupt
905#endif 776#endif
906 777
907#ifdef CONFIG_X86_UV 778#ifdef CONFIG_X86_UV
908apicinterrupt3 UV_BAU_MESSAGE \ 779apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt
909 uv_bau_message_intr1 uv_bau_message_interrupt
910#endif 780#endif
911apicinterrupt LOCAL_TIMER_VECTOR \ 781
912 apic_timer_interrupt smp_apic_timer_interrupt 782apicinterrupt LOCAL_TIMER_VECTOR apic_timer_interrupt smp_apic_timer_interrupt
913apicinterrupt X86_PLATFORM_IPI_VECTOR \ 783apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi
914 x86_platform_ipi smp_x86_platform_ipi
915 784
916#ifdef CONFIG_HAVE_KVM 785#ifdef CONFIG_HAVE_KVM
917apicinterrupt3 POSTED_INTR_VECTOR \ 786apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
918 kvm_posted_intr_ipi smp_kvm_posted_intr_ipi 787apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
919apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \
920 kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
921#endif 788#endif
922 789
923#ifdef CONFIG_X86_MCE_THRESHOLD 790#ifdef CONFIG_X86_MCE_THRESHOLD
924apicinterrupt THRESHOLD_APIC_VECTOR \ 791apicinterrupt THRESHOLD_APIC_VECTOR threshold_interrupt smp_threshold_interrupt
925 threshold_interrupt smp_threshold_interrupt 792#endif
793
794#ifdef CONFIG_X86_MCE_AMD
795apicinterrupt DEFERRED_ERROR_VECTOR deferred_error_interrupt smp_deferred_error_interrupt
926#endif 796#endif
927 797
928#ifdef CONFIG_X86_THERMAL_VECTOR 798#ifdef CONFIG_X86_THERMAL_VECTOR
929apicinterrupt THERMAL_APIC_VECTOR \ 799apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt
930 thermal_interrupt smp_thermal_interrupt
931#endif 800#endif
932 801
933#ifdef CONFIG_SMP 802#ifdef CONFIG_SMP
934apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ 803apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt
935 call_function_single_interrupt smp_call_function_single_interrupt 804apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt
936apicinterrupt CALL_FUNCTION_VECTOR \ 805apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
937 call_function_interrupt smp_call_function_interrupt
938apicinterrupt RESCHEDULE_VECTOR \
939 reschedule_interrupt smp_reschedule_interrupt
940#endif 806#endif
941 807
942apicinterrupt ERROR_APIC_VECTOR \ 808apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt
943 error_interrupt smp_error_interrupt 809apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt
944apicinterrupt SPURIOUS_APIC_VECTOR \
945 spurious_interrupt smp_spurious_interrupt
946 810
947#ifdef CONFIG_IRQ_WORK 811#ifdef CONFIG_IRQ_WORK
948apicinterrupt IRQ_WORK_VECTOR \ 812apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
949 irq_work_interrupt smp_irq_work_interrupt
950#endif 813#endif
951 814
952/* 815/*
@@ -961,100 +824,87 @@ ENTRY(\sym)
961 .error "using shift_ist requires paranoid=1" 824 .error "using shift_ist requires paranoid=1"
962 .endif 825 .endif
963 826
964 .if \has_error_code
965 XCPT_FRAME
966 .else
967 INTR_FRAME
968 .endif
969
970 ASM_CLAC 827 ASM_CLAC
971 PARAVIRT_ADJUST_EXCEPTION_FRAME 828 PARAVIRT_ADJUST_EXCEPTION_FRAME
972 829
973 .ifeq \has_error_code 830 .ifeq \has_error_code
974 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 831 pushq $-1 /* ORIG_RAX: no syscall to restart */
975 .endif 832 .endif
976 833
977 ALLOC_PT_GPREGS_ON_STACK 834 ALLOC_PT_GPREGS_ON_STACK
978 835
979 .if \paranoid 836 .if \paranoid
980 .if \paranoid == 1 837 .if \paranoid == 1
981 CFI_REMEMBER_STATE 838 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
982 testb $3, CS(%rsp) /* If coming from userspace, switch */ 839 jnz 1f
983 jnz 1f /* stacks. */
984 .endif 840 .endif
985 call paranoid_entry 841 call paranoid_entry
986 .else 842 .else
987 call error_entry 843 call error_entry
988 .endif 844 .endif
989 /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ 845 /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
990 846
991 DEFAULT_FRAME 0
992
993 .if \paranoid 847 .if \paranoid
994 .if \shift_ist != -1 848 .if \shift_ist != -1
995 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ 849 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
996 .else 850 .else
997 TRACE_IRQS_OFF 851 TRACE_IRQS_OFF
998 .endif 852 .endif
999 .endif 853 .endif
1000 854
1001 movq %rsp,%rdi /* pt_regs pointer */ 855 movq %rsp, %rdi /* pt_regs pointer */
1002 856
1003 .if \has_error_code 857 .if \has_error_code
1004 movq ORIG_RAX(%rsp),%rsi /* get error code */ 858 movq ORIG_RAX(%rsp), %rsi /* get error code */
1005 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 859 movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
1006 .else 860 .else
1007 xorl %esi,%esi /* no error code */ 861 xorl %esi, %esi /* no error code */
1008 .endif 862 .endif
1009 863
1010 .if \shift_ist != -1 864 .if \shift_ist != -1
1011 subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) 865 subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
1012 .endif 866 .endif
1013 867
1014 call \do_sym 868 call \do_sym
1015 869
1016 .if \shift_ist != -1 870 .if \shift_ist != -1
1017 addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) 871 addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
1018 .endif 872 .endif
1019 873
1020 /* these procedures expect "no swapgs" flag in ebx */ 874 /* these procedures expect "no swapgs" flag in ebx */
1021 .if \paranoid 875 .if \paranoid
1022 jmp paranoid_exit 876 jmp paranoid_exit
1023 .else 877 .else
1024 jmp error_exit 878 jmp error_exit
1025 .endif 879 .endif
1026 880
1027 .if \paranoid == 1 881 .if \paranoid == 1
1028 CFI_RESTORE_STATE
1029 /* 882 /*
1030 * Paranoid entry from userspace. Switch stacks and treat it 883 * Paranoid entry from userspace. Switch stacks and treat it
1031 * as a normal entry. This means that paranoid handlers 884 * as a normal entry. This means that paranoid handlers
1032 * run in real process context if user_mode(regs). 885 * run in real process context if user_mode(regs).
1033 */ 886 */
10341: 8871:
1035 call error_entry 888 call error_entry
1036 889
1037 DEFAULT_FRAME 0
1038 890
1039 movq %rsp,%rdi /* pt_regs pointer */ 891 movq %rsp, %rdi /* pt_regs pointer */
1040 call sync_regs 892 call sync_regs
1041 movq %rax,%rsp /* switch stack */ 893 movq %rax, %rsp /* switch stack */
1042 894
1043 movq %rsp,%rdi /* pt_regs pointer */ 895 movq %rsp, %rdi /* pt_regs pointer */
1044 896
1045 .if \has_error_code 897 .if \has_error_code
1046 movq ORIG_RAX(%rsp),%rsi /* get error code */ 898 movq ORIG_RAX(%rsp), %rsi /* get error code */
1047 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 899 movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
1048 .else 900 .else
1049 xorl %esi,%esi /* no error code */ 901 xorl %esi, %esi /* no error code */
1050 .endif 902 .endif
1051 903
1052 call \do_sym 904 call \do_sym
1053 905
1054 jmp error_exit /* %ebx: no swapgs flag */ 906 jmp error_exit /* %ebx: no swapgs flag */
1055 .endif 907 .endif
1056
1057 CFI_ENDPROC
1058END(\sym) 908END(\sym)
1059.endm 909.endm
1060 910
@@ -1069,65 +919,58 @@ idtentry \sym \do_sym has_error_code=\has_error_code
1069.endm 919.endm
1070#endif 920#endif
1071 921
1072idtentry divide_error do_divide_error has_error_code=0 922idtentry divide_error do_divide_error has_error_code=0
1073idtentry overflow do_overflow has_error_code=0 923idtentry overflow do_overflow has_error_code=0
1074idtentry bounds do_bounds has_error_code=0 924idtentry bounds do_bounds has_error_code=0
1075idtentry invalid_op do_invalid_op has_error_code=0 925idtentry invalid_op do_invalid_op has_error_code=0
1076idtentry device_not_available do_device_not_available has_error_code=0 926idtentry device_not_available do_device_not_available has_error_code=0
1077idtentry double_fault do_double_fault has_error_code=1 paranoid=2 927idtentry double_fault do_double_fault has_error_code=1 paranoid=2
1078idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 928idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
1079idtentry invalid_TSS do_invalid_TSS has_error_code=1 929idtentry invalid_TSS do_invalid_TSS has_error_code=1
1080idtentry segment_not_present do_segment_not_present has_error_code=1 930idtentry segment_not_present do_segment_not_present has_error_code=1
1081idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 931idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0
1082idtentry coprocessor_error do_coprocessor_error has_error_code=0 932idtentry coprocessor_error do_coprocessor_error has_error_code=0
1083idtentry alignment_check do_alignment_check has_error_code=1 933idtentry alignment_check do_alignment_check has_error_code=1
1084idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 934idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
1085 935
1086 936
1087 /* Reload gs selector with exception handling */ 937 /*
1088 /* edi: new selector */ 938 * Reload gs selector with exception handling
939 * edi: new selector
940 */
1089ENTRY(native_load_gs_index) 941ENTRY(native_load_gs_index)
1090 CFI_STARTPROC 942 pushfq
1091 pushfq_cfi
1092 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) 943 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1093 SWAPGS 944 SWAPGS
1094gs_change: 945gs_change:
1095 movl %edi,%gs 946 movl %edi, %gs
10962: mfence /* workaround */ 9472: mfence /* workaround */
1097 SWAPGS 948 SWAPGS
1098 popfq_cfi 949 popfq
1099 ret 950 ret
1100 CFI_ENDPROC
1101END(native_load_gs_index) 951END(native_load_gs_index)
1102 952
1103 _ASM_EXTABLE(gs_change,bad_gs) 953 _ASM_EXTABLE(gs_change, bad_gs)
1104 .section .fixup,"ax" 954 .section .fixup, "ax"
1105 /* running with kernelgs */ 955 /* running with kernelgs */
1106bad_gs: 956bad_gs:
1107 SWAPGS /* switch back to user gs */ 957 SWAPGS /* switch back to user gs */
1108 xorl %eax,%eax 958 xorl %eax, %eax
1109 movl %eax,%gs 959 movl %eax, %gs
1110 jmp 2b 960 jmp 2b
1111 .previous 961 .previous
1112 962
1113/* Call softirq on interrupt stack. Interrupts are off. */ 963/* Call softirq on interrupt stack. Interrupts are off. */
1114ENTRY(do_softirq_own_stack) 964ENTRY(do_softirq_own_stack)
1115 CFI_STARTPROC 965 pushq %rbp
1116 pushq_cfi %rbp 966 mov %rsp, %rbp
1117 CFI_REL_OFFSET rbp,0 967 incl PER_CPU_VAR(irq_count)
1118 mov %rsp,%rbp 968 cmove PER_CPU_VAR(irq_stack_ptr), %rsp
1119 CFI_DEF_CFA_REGISTER rbp 969 push %rbp /* frame pointer backlink */
1120 incl PER_CPU_VAR(irq_count) 970 call __do_softirq
1121 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1122 push %rbp # backlink for old unwinder
1123 call __do_softirq
1124 leaveq 971 leaveq
1125 CFI_RESTORE rbp 972 decl PER_CPU_VAR(irq_count)
1126 CFI_DEF_CFA_REGISTER rsp
1127 CFI_ADJUST_CFA_OFFSET -8
1128 decl PER_CPU_VAR(irq_count)
1129 ret 973 ret
1130 CFI_ENDPROC
1131END(do_softirq_own_stack) 974END(do_softirq_own_stack)
1132 975
1133#ifdef CONFIG_XEN 976#ifdef CONFIG_XEN
@@ -1146,29 +989,24 @@ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
1146 * existing activation in its critical region -- if so, we pop the current 989 * existing activation in its critical region -- if so, we pop the current
1147 * activation and restart the handler using the previous one. 990 * activation and restart the handler using the previous one.
1148 */ 991 */
1149ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 992ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
1150 CFI_STARTPROC 993
1151/* 994/*
1152 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 995 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1153 * see the correct pointer to the pt_regs 996 * see the correct pointer to the pt_regs
1154 */ 997 */
1155 movq %rdi, %rsp # we don't return, adjust the stack frame 998 movq %rdi, %rsp /* we don't return, adjust the stack frame */
1156 CFI_ENDPROC 99911: incl PER_CPU_VAR(irq_count)
1157 DEFAULT_FRAME 1000 movq %rsp, %rbp
115811: incl PER_CPU_VAR(irq_count) 1001 cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
1159 movq %rsp,%rbp 1002 pushq %rbp /* frame pointer backlink */
1160 CFI_DEF_CFA_REGISTER rbp 1003 call xen_evtchn_do_upcall
1161 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp 1004 popq %rsp
1162 pushq %rbp # backlink for old unwinder 1005 decl PER_CPU_VAR(irq_count)
1163 call xen_evtchn_do_upcall
1164 popq %rsp
1165 CFI_DEF_CFA_REGISTER rsp
1166 decl PER_CPU_VAR(irq_count)
1167#ifndef CONFIG_PREEMPT 1006#ifndef CONFIG_PREEMPT
1168 call xen_maybe_preempt_hcall 1007 call xen_maybe_preempt_hcall
1169#endif 1008#endif
1170 jmp error_exit 1009 jmp error_exit
1171 CFI_ENDPROC
1172END(xen_do_hypervisor_callback) 1010END(xen_do_hypervisor_callback)
1173 1011
1174/* 1012/*
@@ -1185,51 +1023,35 @@ END(xen_do_hypervisor_callback)
1185 * with its current contents: any discrepancy means we in category 1. 1023 * with its current contents: any discrepancy means we in category 1.
1186 */ 1024 */
1187ENTRY(xen_failsafe_callback) 1025ENTRY(xen_failsafe_callback)
1188 INTR_FRAME 1 (6*8) 1026 movl %ds, %ecx
1189 /*CFI_REL_OFFSET gs,GS*/ 1027 cmpw %cx, 0x10(%rsp)
1190 /*CFI_REL_OFFSET fs,FS*/ 1028 jne 1f
1191 /*CFI_REL_OFFSET es,ES*/ 1029 movl %es, %ecx
1192 /*CFI_REL_OFFSET ds,DS*/ 1030 cmpw %cx, 0x18(%rsp)
1193 CFI_REL_OFFSET r11,8 1031 jne 1f
1194 CFI_REL_OFFSET rcx,0 1032 movl %fs, %ecx
1195 movl %ds,%ecx 1033 cmpw %cx, 0x20(%rsp)
1196 cmpw %cx,0x10(%rsp) 1034 jne 1f
1197 CFI_REMEMBER_STATE 1035 movl %gs, %ecx
1198 jne 1f 1036 cmpw %cx, 0x28(%rsp)
1199 movl %es,%ecx 1037 jne 1f
1200 cmpw %cx,0x18(%rsp)
1201 jne 1f
1202 movl %fs,%ecx
1203 cmpw %cx,0x20(%rsp)
1204 jne 1f
1205 movl %gs,%ecx
1206 cmpw %cx,0x28(%rsp)
1207 jne 1f
1208 /* All segments match their saved values => Category 2 (Bad IRET). */ 1038 /* All segments match their saved values => Category 2 (Bad IRET). */
1209 movq (%rsp),%rcx 1039 movq (%rsp), %rcx
1210 CFI_RESTORE rcx 1040 movq 8(%rsp), %r11
1211 movq 8(%rsp),%r11 1041 addq $0x30, %rsp
1212 CFI_RESTORE r11 1042 pushq $0 /* RIP */
1213 addq $0x30,%rsp 1043 pushq %r11
1214 CFI_ADJUST_CFA_OFFSET -0x30 1044 pushq %rcx
1215 pushq_cfi $0 /* RIP */ 1045 jmp general_protection
1216 pushq_cfi %r11
1217 pushq_cfi %rcx
1218 jmp general_protection
1219 CFI_RESTORE_STATE
12201: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 10461: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1221 movq (%rsp),%rcx 1047 movq (%rsp), %rcx
1222 CFI_RESTORE rcx 1048 movq 8(%rsp), %r11
1223 movq 8(%rsp),%r11 1049 addq $0x30, %rsp
1224 CFI_RESTORE r11 1050 pushq $-1 /* orig_ax = -1 => not a system call */
1225 addq $0x30,%rsp
1226 CFI_ADJUST_CFA_OFFSET -0x30
1227 pushq_cfi $-1 /* orig_ax = -1 => not a system call */
1228 ALLOC_PT_GPREGS_ON_STACK 1051 ALLOC_PT_GPREGS_ON_STACK
1229 SAVE_C_REGS 1052 SAVE_C_REGS
1230 SAVE_EXTRA_REGS 1053 SAVE_EXTRA_REGS
1231 jmp error_exit 1054 jmp error_exit
1232 CFI_ENDPROC
1233END(xen_failsafe_callback) 1055END(xen_failsafe_callback)
1234 1056
1235apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ 1057apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
@@ -1242,21 +1064,25 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
1242 hyperv_callback_vector hyperv_vector_handler 1064 hyperv_callback_vector hyperv_vector_handler
1243#endif /* CONFIG_HYPERV */ 1065#endif /* CONFIG_HYPERV */
1244 1066
1245idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1067idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1246idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1068idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1247idtentry stack_segment do_stack_segment has_error_code=1 1069idtentry stack_segment do_stack_segment has_error_code=1
1070
1248#ifdef CONFIG_XEN 1071#ifdef CONFIG_XEN
1249idtentry xen_debug do_debug has_error_code=0 1072idtentry xen_debug do_debug has_error_code=0
1250idtentry xen_int3 do_int3 has_error_code=0 1073idtentry xen_int3 do_int3 has_error_code=0
1251idtentry xen_stack_segment do_stack_segment has_error_code=1 1074idtentry xen_stack_segment do_stack_segment has_error_code=1
1252#endif 1075#endif
1253idtentry general_protection do_general_protection has_error_code=1 1076
1254trace_idtentry page_fault do_page_fault has_error_code=1 1077idtentry general_protection do_general_protection has_error_code=1
1078trace_idtentry page_fault do_page_fault has_error_code=1
1079
1255#ifdef CONFIG_KVM_GUEST 1080#ifdef CONFIG_KVM_GUEST
1256idtentry async_page_fault do_async_page_fault has_error_code=1 1081idtentry async_page_fault do_async_page_fault has_error_code=1
1257#endif 1082#endif
1083
1258#ifdef CONFIG_X86_MCE 1084#ifdef CONFIG_X86_MCE
1259idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) 1085idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
1260#endif 1086#endif
1261 1087
1262/* 1088/*
@@ -1265,19 +1091,17 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(
1265 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise 1091 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
1266 */ 1092 */
1267ENTRY(paranoid_entry) 1093ENTRY(paranoid_entry)
1268 XCPT_FRAME 1 15*8
1269 cld 1094 cld
1270 SAVE_C_REGS 8 1095 SAVE_C_REGS 8
1271 SAVE_EXTRA_REGS 8 1096 SAVE_EXTRA_REGS 8
1272 movl $1,%ebx 1097 movl $1, %ebx
1273 movl $MSR_GS_BASE,%ecx 1098 movl $MSR_GS_BASE, %ecx
1274 rdmsr 1099 rdmsr
1275 testl %edx,%edx 1100 testl %edx, %edx
1276 js 1f /* negative -> in kernel */ 1101 js 1f /* negative -> in kernel */
1277 SWAPGS 1102 SWAPGS
1278 xorl %ebx,%ebx 1103 xorl %ebx, %ebx
12791: ret 11041: ret
1280 CFI_ENDPROC
1281END(paranoid_entry) 1105END(paranoid_entry)
1282 1106
1283/* 1107/*
@@ -1289,17 +1113,17 @@ END(paranoid_entry)
1289 * in syscall entry), so checking for preemption here would 1113 * in syscall entry), so checking for preemption here would
1290 * be complicated. Fortunately, we there's no good reason 1114 * be complicated. Fortunately, we there's no good reason
1291 * to try to handle preemption here. 1115 * to try to handle preemption here.
1116 *
1117 * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
1292 */ 1118 */
1293/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
1294ENTRY(paranoid_exit) 1119ENTRY(paranoid_exit)
1295 DEFAULT_FRAME
1296 DISABLE_INTERRUPTS(CLBR_NONE) 1120 DISABLE_INTERRUPTS(CLBR_NONE)
1297 TRACE_IRQS_OFF_DEBUG 1121 TRACE_IRQS_OFF_DEBUG
1298 testl %ebx,%ebx /* swapgs needed? */ 1122 testl %ebx, %ebx /* swapgs needed? */
1299 jnz paranoid_exit_no_swapgs 1123 jnz paranoid_exit_no_swapgs
1300 TRACE_IRQS_IRETQ 1124 TRACE_IRQS_IRETQ
1301 SWAPGS_UNSAFE_STACK 1125 SWAPGS_UNSAFE_STACK
1302 jmp paranoid_exit_restore 1126 jmp paranoid_exit_restore
1303paranoid_exit_no_swapgs: 1127paranoid_exit_no_swapgs:
1304 TRACE_IRQS_IRETQ_DEBUG 1128 TRACE_IRQS_IRETQ_DEBUG
1305paranoid_exit_restore: 1129paranoid_exit_restore:
@@ -1307,24 +1131,24 @@ paranoid_exit_restore:
1307 RESTORE_C_REGS 1131 RESTORE_C_REGS
1308 REMOVE_PT_GPREGS_FROM_STACK 8 1132 REMOVE_PT_GPREGS_FROM_STACK 8
1309 INTERRUPT_RETURN 1133 INTERRUPT_RETURN
1310 CFI_ENDPROC
1311END(paranoid_exit) 1134END(paranoid_exit)
1312 1135
1313/* 1136/*
1314 * Save all registers in pt_regs, and switch gs if needed. 1137 * Save all registers in pt_regs, and switch gs if needed.
1315 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise 1138 * Return: EBX=0: came from user mode; EBX=1: otherwise
1316 */ 1139 */
1317ENTRY(error_entry) 1140ENTRY(error_entry)
1318 XCPT_FRAME 1 15*8
1319 cld 1141 cld
1320 SAVE_C_REGS 8 1142 SAVE_C_REGS 8
1321 SAVE_EXTRA_REGS 8 1143 SAVE_EXTRA_REGS 8
1322 xorl %ebx,%ebx 1144 xorl %ebx, %ebx
1323 testb $3, CS+8(%rsp) 1145 testb $3, CS+8(%rsp)
1324 jz error_kernelspace 1146 jz error_kernelspace
1325error_swapgs: 1147
1148 /* We entered from user mode */
1326 SWAPGS 1149 SWAPGS
1327error_sti: 1150
1151error_entry_done:
1328 TRACE_IRQS_OFF 1152 TRACE_IRQS_OFF
1329 ret 1153 ret
1330 1154
@@ -1335,56 +1159,66 @@ error_sti:
1335 * for these here too. 1159 * for these here too.
1336 */ 1160 */
1337error_kernelspace: 1161error_kernelspace:
1338 CFI_REL_OFFSET rcx, RCX+8 1162 incl %ebx
1339 incl %ebx 1163 leaq native_irq_return_iret(%rip), %rcx
1340 leaq native_irq_return_iret(%rip),%rcx 1164 cmpq %rcx, RIP+8(%rsp)
1341 cmpq %rcx,RIP+8(%rsp) 1165 je error_bad_iret
1342 je error_bad_iret 1166 movl %ecx, %eax /* zero extend */
1343 movl %ecx,%eax /* zero extend */ 1167 cmpq %rax, RIP+8(%rsp)
1344 cmpq %rax,RIP+8(%rsp) 1168 je bstep_iret
1345 je bstep_iret 1169 cmpq $gs_change, RIP+8(%rsp)
1346 cmpq $gs_change,RIP+8(%rsp) 1170 jne error_entry_done
1347 je error_swapgs 1171
1348 jmp error_sti 1172 /*
1173 * hack: gs_change can fail with user gsbase. If this happens, fix up
1174 * gsbase and proceed. We'll fix up the exception and land in
1175 * gs_change's error handler with kernel gsbase.
1176 */
1177 SWAPGS
1178 jmp error_entry_done
1349 1179
1350bstep_iret: 1180bstep_iret:
1351 /* Fix truncated RIP */ 1181 /* Fix truncated RIP */
1352 movq %rcx,RIP+8(%rsp) 1182 movq %rcx, RIP+8(%rsp)
1353 /* fall through */ 1183 /* fall through */
1354 1184
1355error_bad_iret: 1185error_bad_iret:
1186 /*
1187 * We came from an IRET to user mode, so we have user gsbase.
1188 * Switch to kernel gsbase:
1189 */
1356 SWAPGS 1190 SWAPGS
1357 mov %rsp,%rdi 1191
1358 call fixup_bad_iret 1192 /*
1359 mov %rax,%rsp 1193 * Pretend that the exception came from user mode: set up pt_regs
1360 decl %ebx /* Return to usergs */ 1194 * as if we faulted immediately after IRET and clear EBX so that
1361 jmp error_sti 1195 * error_exit knows that we will be returning to user mode.
1362 CFI_ENDPROC 1196 */
1197 mov %rsp, %rdi
1198 call fixup_bad_iret
1199 mov %rax, %rsp
1200 decl %ebx
1201 jmp error_entry_done
1363END(error_entry) 1202END(error_entry)
1364 1203
1365 1204
1366/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ 1205/*
1206 * On entry, EBS is a "return to kernel mode" flag:
1207 * 1: already in kernel mode, don't need SWAPGS
1208 * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
1209 */
1367ENTRY(error_exit) 1210ENTRY(error_exit)
1368 DEFAULT_FRAME 1211 movl %ebx, %eax
1369 movl %ebx,%eax
1370 RESTORE_EXTRA_REGS 1212 RESTORE_EXTRA_REGS
1371 DISABLE_INTERRUPTS(CLBR_NONE) 1213 DISABLE_INTERRUPTS(CLBR_NONE)
1372 TRACE_IRQS_OFF 1214 TRACE_IRQS_OFF
1373 GET_THREAD_INFO(%rcx) 1215 testl %eax, %eax
1374 testl %eax,%eax 1216 jnz retint_kernel
1375 jnz retint_kernel 1217 jmp retint_user
1376 LOCKDEP_SYS_EXIT_IRQ
1377 movl TI_flags(%rcx),%edx
1378 movl $_TIF_WORK_MASK,%edi
1379 andl %edi,%edx
1380 jnz retint_careful
1381 jmp retint_swapgs
1382 CFI_ENDPROC
1383END(error_exit) 1218END(error_exit)
1384 1219
1385/* Runs on exception stack */ 1220/* Runs on exception stack */
1386ENTRY(nmi) 1221ENTRY(nmi)
1387 INTR_FRAME
1388 PARAVIRT_ADJUST_EXCEPTION_FRAME 1222 PARAVIRT_ADJUST_EXCEPTION_FRAME
1389 /* 1223 /*
1390 * We allow breakpoints in NMIs. If a breakpoint occurs, then 1224 * We allow breakpoints in NMIs. If a breakpoint occurs, then
@@ -1419,22 +1253,21 @@ ENTRY(nmi)
1419 */ 1253 */
1420 1254
1421 /* Use %rdx as our temp variable throughout */ 1255 /* Use %rdx as our temp variable throughout */
1422 pushq_cfi %rdx 1256 pushq %rdx
1423 CFI_REL_OFFSET rdx, 0
1424 1257
1425 /* 1258 /*
1426 * If %cs was not the kernel segment, then the NMI triggered in user 1259 * If %cs was not the kernel segment, then the NMI triggered in user
1427 * space, which means it is definitely not nested. 1260 * space, which means it is definitely not nested.
1428 */ 1261 */
1429 cmpl $__KERNEL_CS, 16(%rsp) 1262 cmpl $__KERNEL_CS, 16(%rsp)
1430 jne first_nmi 1263 jne first_nmi
1431 1264
1432 /* 1265 /*
1433 * Check the special variable on the stack to see if NMIs are 1266 * Check the special variable on the stack to see if NMIs are
1434 * executing. 1267 * executing.
1435 */ 1268 */
1436 cmpl $1, -8(%rsp) 1269 cmpl $1, -8(%rsp)
1437 je nested_nmi 1270 je nested_nmi
1438 1271
1439 /* 1272 /*
1440 * Now test if the previous stack was an NMI stack. 1273 * Now test if the previous stack was an NMI stack.
@@ -1448,51 +1281,46 @@ ENTRY(nmi)
1448 cmpq %rdx, 4*8(%rsp) 1281 cmpq %rdx, 4*8(%rsp)
1449 /* If the stack pointer is above the NMI stack, this is a normal NMI */ 1282 /* If the stack pointer is above the NMI stack, this is a normal NMI */
1450 ja first_nmi 1283 ja first_nmi
1284
1451 subq $EXCEPTION_STKSZ, %rdx 1285 subq $EXCEPTION_STKSZ, %rdx
1452 cmpq %rdx, 4*8(%rsp) 1286 cmpq %rdx, 4*8(%rsp)
1453 /* If it is below the NMI stack, it is a normal NMI */ 1287 /* If it is below the NMI stack, it is a normal NMI */
1454 jb first_nmi 1288 jb first_nmi
1455 /* Ah, it is within the NMI stack, treat it as nested */ 1289 /* Ah, it is within the NMI stack, treat it as nested */
1456 1290
1457 CFI_REMEMBER_STATE
1458
1459nested_nmi: 1291nested_nmi:
1460 /* 1292 /*
1461 * Do nothing if we interrupted the fixup in repeat_nmi. 1293 * Do nothing if we interrupted the fixup in repeat_nmi.
1462 * It's about to repeat the NMI handler, so we are fine 1294 * It's about to repeat the NMI handler, so we are fine
1463 * with ignoring this one. 1295 * with ignoring this one.
1464 */ 1296 */
1465 movq $repeat_nmi, %rdx 1297 movq $repeat_nmi, %rdx
1466 cmpq 8(%rsp), %rdx 1298 cmpq 8(%rsp), %rdx
1467 ja 1f 1299 ja 1f
1468 movq $end_repeat_nmi, %rdx 1300 movq $end_repeat_nmi, %rdx
1469 cmpq 8(%rsp), %rdx 1301 cmpq 8(%rsp), %rdx
1470 ja nested_nmi_out 1302 ja nested_nmi_out
1471 1303
14721: 13041:
1473 /* Set up the interrupted NMIs stack to jump to repeat_nmi */ 1305 /* Set up the interrupted NMIs stack to jump to repeat_nmi */
1474 leaq -1*8(%rsp), %rdx 1306 leaq -1*8(%rsp), %rdx
1475 movq %rdx, %rsp 1307 movq %rdx, %rsp
1476 CFI_ADJUST_CFA_OFFSET 1*8 1308 leaq -10*8(%rsp), %rdx
1477 leaq -10*8(%rsp), %rdx 1309 pushq $__KERNEL_DS
1478 pushq_cfi $__KERNEL_DS 1310 pushq %rdx
1479 pushq_cfi %rdx 1311 pushfq
1480 pushfq_cfi 1312 pushq $__KERNEL_CS
1481 pushq_cfi $__KERNEL_CS 1313 pushq $repeat_nmi
1482 pushq_cfi $repeat_nmi
1483 1314
1484 /* Put stack back */ 1315 /* Put stack back */
1485 addq $(6*8), %rsp 1316 addq $(6*8), %rsp
1486 CFI_ADJUST_CFA_OFFSET -6*8
1487 1317
1488nested_nmi_out: 1318nested_nmi_out:
1489 popq_cfi %rdx 1319 popq %rdx
1490 CFI_RESTORE rdx
1491 1320
1492 /* No need to check faults here */ 1321 /* No need to check faults here */
1493 INTERRUPT_RETURN 1322 INTERRUPT_RETURN
1494 1323
1495 CFI_RESTORE_STATE
1496first_nmi: 1324first_nmi:
1497 /* 1325 /*
1498 * Because nested NMIs will use the pushed location that we 1326 * Because nested NMIs will use the pushed location that we
@@ -1530,23 +1358,18 @@ first_nmi:
1530 * is also used by nested NMIs and can not be trusted on exit. 1358 * is also used by nested NMIs and can not be trusted on exit.
1531 */ 1359 */
1532 /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ 1360 /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
1533 movq (%rsp), %rdx 1361 movq (%rsp), %rdx
1534 CFI_RESTORE rdx
1535 1362
1536 /* Set the NMI executing variable on the stack. */ 1363 /* Set the NMI executing variable on the stack. */
1537 pushq_cfi $1 1364 pushq $1
1538 1365
1539 /* 1366 /* Leave room for the "copied" frame */
1540 * Leave room for the "copied" frame 1367 subq $(5*8), %rsp
1541 */
1542 subq $(5*8), %rsp
1543 CFI_ADJUST_CFA_OFFSET 5*8
1544 1368
1545 /* Copy the stack frame to the Saved frame */ 1369 /* Copy the stack frame to the Saved frame */
1546 .rept 5 1370 .rept 5
1547 pushq_cfi 11*8(%rsp) 1371 pushq 11*8(%rsp)
1548 .endr 1372 .endr
1549 CFI_DEF_CFA_OFFSET 5*8
1550 1373
1551 /* Everything up to here is safe from nested NMIs */ 1374 /* Everything up to here is safe from nested NMIs */
1552 1375
@@ -1565,16 +1388,14 @@ repeat_nmi:
1565 * is benign for the non-repeat case, where 1 was pushed just above 1388 * is benign for the non-repeat case, where 1 was pushed just above
1566 * to this very stack slot). 1389 * to this very stack slot).
1567 */ 1390 */
1568 movq $1, 10*8(%rsp) 1391 movq $1, 10*8(%rsp)
1569 1392
1570 /* Make another copy, this one may be modified by nested NMIs */ 1393 /* Make another copy, this one may be modified by nested NMIs */
1571 addq $(10*8), %rsp 1394 addq $(10*8), %rsp
1572 CFI_ADJUST_CFA_OFFSET -10*8
1573 .rept 5 1395 .rept 5
1574 pushq_cfi -6*8(%rsp) 1396 pushq -6*8(%rsp)
1575 .endr 1397 .endr
1576 subq $(5*8), %rsp 1398 subq $(5*8), %rsp
1577 CFI_DEF_CFA_OFFSET 5*8
1578end_repeat_nmi: 1399end_repeat_nmi:
1579 1400
1580 /* 1401 /*
@@ -1582,7 +1403,7 @@ end_repeat_nmi:
1582 * NMI if the first NMI took an exception and reset our iret stack 1403 * NMI if the first NMI took an exception and reset our iret stack
1583 * so that we repeat another NMI. 1404 * so that we repeat another NMI.
1584 */ 1405 */
1585 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1406 pushq $-1 /* ORIG_RAX: no syscall to restart */
1586 ALLOC_PT_GPREGS_ON_STACK 1407 ALLOC_PT_GPREGS_ON_STACK
1587 1408
1588 /* 1409 /*
@@ -1592,8 +1413,7 @@ end_repeat_nmi:
1592 * setting NEED_RESCHED or anything that normal interrupts and 1413 * setting NEED_RESCHED or anything that normal interrupts and
1593 * exceptions might do. 1414 * exceptions might do.
1594 */ 1415 */
1595 call paranoid_entry 1416 call paranoid_entry
1596 DEFAULT_FRAME 0
1597 1417
1598 /* 1418 /*
1599 * Save off the CR2 register. If we take a page fault in the NMI then 1419 * Save off the CR2 register. If we take a page fault in the NMI then
@@ -1604,21 +1424,21 @@ end_repeat_nmi:
1604 * origin fault. Save it off and restore it if it changes. 1424 * origin fault. Save it off and restore it if it changes.
1605 * Use the r12 callee-saved register. 1425 * Use the r12 callee-saved register.
1606 */ 1426 */
1607 movq %cr2, %r12 1427 movq %cr2, %r12
1608 1428
1609 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1429 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1610 movq %rsp,%rdi 1430 movq %rsp, %rdi
1611 movq $-1,%rsi 1431 movq $-1, %rsi
1612 call do_nmi 1432 call do_nmi
1613 1433
1614 /* Did the NMI take a page fault? Restore cr2 if it did */ 1434 /* Did the NMI take a page fault? Restore cr2 if it did */
1615 movq %cr2, %rcx 1435 movq %cr2, %rcx
1616 cmpq %rcx, %r12 1436 cmpq %rcx, %r12
1617 je 1f 1437 je 1f
1618 movq %r12, %cr2 1438 movq %r12, %cr2
16191: 14391:
1620 testl %ebx,%ebx /* swapgs needed? */ 1440 testl %ebx, %ebx /* swapgs needed? */
1621 jnz nmi_restore 1441 jnz nmi_restore
1622nmi_swapgs: 1442nmi_swapgs:
1623 SWAPGS_UNSAFE_STACK 1443 SWAPGS_UNSAFE_STACK
1624nmi_restore: 1444nmi_restore:
@@ -1628,15 +1448,11 @@ nmi_restore:
1628 REMOVE_PT_GPREGS_FROM_STACK 6*8 1448 REMOVE_PT_GPREGS_FROM_STACK 6*8
1629 1449
1630 /* Clear the NMI executing stack variable */ 1450 /* Clear the NMI executing stack variable */
1631 movq $0, 5*8(%rsp) 1451 movq $0, 5*8(%rsp)
1632 jmp irq_return 1452 INTERRUPT_RETURN
1633 CFI_ENDPROC
1634END(nmi) 1453END(nmi)
1635 1454
1636ENTRY(ignore_sysret) 1455ENTRY(ignore_sysret)
1637 CFI_STARTPROC 1456 mov $-ENOSYS, %eax
1638 mov $-ENOSYS,%eax
1639 sysret 1457 sysret
1640 CFI_ENDPROC
1641END(ignore_sysret) 1458END(ignore_sysret)
1642
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
new file mode 100644
index 000000000000..bb187a6a877c
--- /dev/null
+++ b/arch/x86/entry/entry_64_compat.S
@@ -0,0 +1,556 @@
1/*
2 * Compatibility mode system call entry point for x86-64.
3 *
4 * Copyright 2000-2002 Andi Kleen, SuSE Labs.
5 */
6#include "calling.h"
7#include <asm/asm-offsets.h>
8#include <asm/current.h>
9#include <asm/errno.h>
10#include <asm/ia32_unistd.h>
11#include <asm/thread_info.h>
12#include <asm/segment.h>
13#include <asm/irqflags.h>
14#include <asm/asm.h>
15#include <asm/smap.h>
16#include <linux/linkage.h>
17#include <linux/err.h>
18
19/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
20#include <linux/elf-em.h>
21#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
22#define __AUDIT_ARCH_LE 0x40000000
23
24#ifndef CONFIG_AUDITSYSCALL
25# define sysexit_audit ia32_ret_from_sys_call
26# define sysretl_audit ia32_ret_from_sys_call
27#endif
28
29 .section .entry.text, "ax"
30
31#ifdef CONFIG_PARAVIRT
32ENTRY(native_usergs_sysret32)
33 swapgs
34 sysretl
35ENDPROC(native_usergs_sysret32)
36#endif
37
38/*
39 * 32-bit SYSENTER instruction entry.
40 *
41 * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
42 * IF and VM in rflags are cleared (IOW: interrupts are off).
43 * SYSENTER does not save anything on the stack,
44 * and does not save old rip (!!!) and rflags.
45 *
46 * Arguments:
47 * eax system call number
48 * ebx arg1
49 * ecx arg2
50 * edx arg3
51 * esi arg4
52 * edi arg5
53 * ebp user stack
54 * 0(%ebp) arg6
55 *
56 * This is purely a fast path. For anything complicated we use the int 0x80
57 * path below. We set up a complete hardware stack frame to share code
58 * with the int 0x80 path.
59 */
60ENTRY(entry_SYSENTER_compat)
61 /*
62 * Interrupts are off on entry.
63 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
64 * it is too small to ever cause noticeable irq latency.
65 */
66 SWAPGS_UNSAFE_STACK
67 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
68 ENABLE_INTERRUPTS(CLBR_NONE)
69
70 /* Zero-extending 32-bit regs, do not remove */
71 movl %ebp, %ebp
72 movl %eax, %eax
73
74 movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
75
76 /* Construct struct pt_regs on stack */
77 pushq $__USER32_DS /* pt_regs->ss */
78 pushq %rbp /* pt_regs->sp */
79 pushfq /* pt_regs->flags */
80 pushq $__USER32_CS /* pt_regs->cs */
81 pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */
82 pushq %rax /* pt_regs->orig_ax */
83 pushq %rdi /* pt_regs->di */
84 pushq %rsi /* pt_regs->si */
85 pushq %rdx /* pt_regs->dx */
86 pushq %rcx /* pt_regs->cx */
87 pushq $-ENOSYS /* pt_regs->ax */
88 cld
89 sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
90
91 /*
92 * no need to do an access_ok check here because rbp has been
93 * 32-bit zero extended
94 */
95 ASM_STAC
961: movl (%rbp), %ebp
97 _ASM_EXTABLE(1b, ia32_badarg)
98 ASM_CLAC
99
100 /*
101 * Sysenter doesn't filter flags, so we need to clear NT
102 * ourselves. To save a few cycles, we can check whether
103 * NT was set instead of doing an unconditional popfq.
104 */
105 testl $X86_EFLAGS_NT, EFLAGS(%rsp)
106 jnz sysenter_fix_flags
107sysenter_flags_fixed:
108
109 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
110 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
111 jnz sysenter_tracesys
112
113sysenter_do_call:
114 /* 32-bit syscall -> 64-bit C ABI argument conversion */
115 movl %edi, %r8d /* arg5 */
116 movl %ebp, %r9d /* arg6 */
117 xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
118 movl %ebx, %edi /* arg1 */
119 movl %edx, %edx /* arg3 (zero extension) */
120sysenter_dispatch:
121 cmpq $(IA32_NR_syscalls-1), %rax
122 ja 1f
123 call *ia32_sys_call_table(, %rax, 8)
124 movq %rax, RAX(%rsp)
1251:
126 DISABLE_INTERRUPTS(CLBR_NONE)
127 TRACE_IRQS_OFF
128 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
129 jnz sysexit_audit
130sysexit_from_sys_call:
131 /*
132 * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
133 * NMI between STI and SYSEXIT has poorly specified behavior,
134 * and and NMI followed by an IRQ with usergs is fatal. So
135 * we just pretend we're using SYSEXIT but we really use
136 * SYSRETL instead.
137 *
138 * This code path is still called 'sysexit' because it pairs
139 * with 'sysenter' and it uses the SYSENTER calling convention.
140 */
141 andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
142 movl RIP(%rsp), %ecx /* User %eip */
143 RESTORE_RSI_RDI
144 xorl %edx, %edx /* Do not leak kernel information */
145 xorq %r8, %r8
146 xorq %r9, %r9
147 xorq %r10, %r10
148 movl EFLAGS(%rsp), %r11d /* User eflags */
149 TRACE_IRQS_ON
150
151 /*
152 * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
153 * since it avoids a dicey window with interrupts enabled.
154 */
155 movl RSP(%rsp), %esp
156
157 /*
158 * USERGS_SYSRET32 does:
159 * gsbase = user's gs base
160 * eip = ecx
161 * rflags = r11
162 * cs = __USER32_CS
163 * ss = __USER_DS
164 *
165 * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
166 *
167 * pop %ebp
168 * pop %edx
169 * pop %ecx
170 *
171 * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
172 * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
173 * address (already known to user code), and R12-R15 are
174 * callee-saved and therefore don't contain any interesting
175 * kernel data.
176 */
177 USERGS_SYSRET32
178
179#ifdef CONFIG_AUDITSYSCALL
180 .macro auditsys_entry_common
181 /*
182 * At this point, registers hold syscall args in the 32-bit syscall ABI:
183 * EAX is syscall number, the 6 args are in EBX,ECX,EDX,ESI,EDI,EBP.
184 *
185 * We want to pass them to __audit_syscall_entry(), which is a 64-bit
186 * C function with 5 parameters, so shuffle them to match what
187 * the function expects: RDI,RSI,RDX,RCX,R8.
188 */
189 movl %esi, %r8d /* arg5 (R8 ) <= 4th syscall arg (ESI) */
190 xchg %ecx, %edx /* arg4 (RCX) <= 3rd syscall arg (EDX) */
191 /* arg3 (RDX) <= 2nd syscall arg (ECX) */
192 movl %ebx, %esi /* arg2 (RSI) <= 1st syscall arg (EBX) */
193 movl %eax, %edi /* arg1 (RDI) <= syscall number (EAX) */
194 call __audit_syscall_entry
195
196 /*
197 * We are going to jump back to the syscall dispatch code.
198 * Prepare syscall args as required by the 64-bit C ABI.
199 * Registers clobbered by __audit_syscall_entry() are
200 * loaded from pt_regs on stack:
201 */
202 movl ORIG_RAX(%rsp), %eax /* syscall number */
203 movl %ebx, %edi /* arg1 */
204 movl RCX(%rsp), %esi /* arg2 */
205 movl RDX(%rsp), %edx /* arg3 */
206 movl RSI(%rsp), %ecx /* arg4 */
207 movl RDI(%rsp), %r8d /* arg5 */
208 movl %ebp, %r9d /* arg6 */
209 .endm
210
211 .macro auditsys_exit exit
212 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
213 jnz ia32_ret_from_sys_call
214 TRACE_IRQS_ON
215 ENABLE_INTERRUPTS(CLBR_NONE)
216 movl %eax, %esi /* second arg, syscall return value */
217 cmpl $-MAX_ERRNO, %eax /* is it an error ? */
218 jbe 1f
219 movslq %eax, %rsi /* if error sign extend to 64 bits */
2201: setbe %al /* 1 if error, 0 if not */
221 movzbl %al, %edi /* zero-extend that into %edi */
222 call __audit_syscall_exit
223 movq RAX(%rsp), %rax /* reload syscall return value */
224 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
225 DISABLE_INTERRUPTS(CLBR_NONE)
226 TRACE_IRQS_OFF
227 testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
228 jz \exit
229 xorl %eax, %eax /* Do not leak kernel information */
230 movq %rax, R11(%rsp)
231 movq %rax, R10(%rsp)
232 movq %rax, R9(%rsp)
233 movq %rax, R8(%rsp)
234 jmp int_with_check
235 .endm
236
237sysenter_auditsys:
238 auditsys_entry_common
239 jmp sysenter_dispatch
240
241sysexit_audit:
242 auditsys_exit sysexit_from_sys_call
243#endif
244
245sysenter_fix_flags:
246 pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
247 popfq
248 jmp sysenter_flags_fixed
249
250sysenter_tracesys:
251#ifdef CONFIG_AUDITSYSCALL
252 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
253 jz sysenter_auditsys
254#endif
255 SAVE_EXTRA_REGS
256 xorl %eax, %eax /* Do not leak kernel information */
257 movq %rax, R11(%rsp)
258 movq %rax, R10(%rsp)
259 movq %rax, R9(%rsp)
260 movq %rax, R8(%rsp)
261 movq %rsp, %rdi /* &pt_regs -> arg1 */
262 call syscall_trace_enter
263
264 /* Reload arg registers from stack. (see sysenter_tracesys) */
265 movl RCX(%rsp), %ecx
266 movl RDX(%rsp), %edx
267 movl RSI(%rsp), %esi
268 movl RDI(%rsp), %edi
269 movl %eax, %eax /* zero extension */
270
271 RESTORE_EXTRA_REGS
272 jmp sysenter_do_call
273ENDPROC(entry_SYSENTER_compat)
274
275/*
276 * 32-bit SYSCALL instruction entry.
277 *
278 * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
279 * then loads new ss, cs, and rip from previously programmed MSRs.
280 * rflags gets masked by a value from another MSR (so CLD and CLAC
281 * are not needed). SYSCALL does not save anything on the stack
282 * and does not change rsp.
283 *
284 * Note: rflags saving+masking-with-MSR happens only in Long mode
285 * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
286 * Don't get confused: rflags saving+masking depends on Long Mode Active bit
287 * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
288 * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
289 *
290 * Arguments:
291 * eax system call number
292 * ecx return address
293 * ebx arg1
294 * ebp arg2 (note: not saved in the stack frame, should not be touched)
295 * edx arg3
296 * esi arg4
297 * edi arg5
298 * esp user stack
299 * 0(%esp) arg6
300 *
301 * This is purely a fast path. For anything complicated we use the int 0x80
302 * path below. We set up a complete hardware stack frame to share code
303 * with the int 0x80 path.
304 */
305ENTRY(entry_SYSCALL_compat)
306 /*
307 * Interrupts are off on entry.
308 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
309 * it is too small to ever cause noticeable irq latency.
310 */
311 SWAPGS_UNSAFE_STACK
312 movl %esp, %r8d
313 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
314 ENABLE_INTERRUPTS(CLBR_NONE)
315
316 /* Zero-extending 32-bit regs, do not remove */
317 movl %eax, %eax
318
319 /* Construct struct pt_regs on stack */
320 pushq $__USER32_DS /* pt_regs->ss */
321 pushq %r8 /* pt_regs->sp */
322 pushq %r11 /* pt_regs->flags */
323 pushq $__USER32_CS /* pt_regs->cs */
324 pushq %rcx /* pt_regs->ip */
325 pushq %rax /* pt_regs->orig_ax */
326 pushq %rdi /* pt_regs->di */
327 pushq %rsi /* pt_regs->si */
328 pushq %rdx /* pt_regs->dx */
329 pushq %rbp /* pt_regs->cx */
330 movl %ebp, %ecx
331 pushq $-ENOSYS /* pt_regs->ax */
332 sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
333
334 /*
335 * No need to do an access_ok check here because r8 has been
336 * 32-bit zero extended:
337 */
338 ASM_STAC
3391: movl (%r8), %ebp
340 _ASM_EXTABLE(1b, ia32_badarg)
341 ASM_CLAC
342 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
343 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
344 jnz cstar_tracesys
345
346cstar_do_call:
347 /* 32-bit syscall -> 64-bit C ABI argument conversion */
348 movl %edi, %r8d /* arg5 */
349 movl %ebp, %r9d /* arg6 */
350 xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
351 movl %ebx, %edi /* arg1 */
352 movl %edx, %edx /* arg3 (zero extension) */
353
354cstar_dispatch:
355 cmpq $(IA32_NR_syscalls-1), %rax
356 ja 1f
357
358 call *ia32_sys_call_table(, %rax, 8)
359 movq %rax, RAX(%rsp)
3601:
361 movl RCX(%rsp), %ebp
362 DISABLE_INTERRUPTS(CLBR_NONE)
363 TRACE_IRQS_OFF
364 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
365 jnz sysretl_audit
366
367sysretl_from_sys_call:
368 andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
369 RESTORE_RSI_RDI_RDX
370 movl RIP(%rsp), %ecx
371 movl EFLAGS(%rsp), %r11d
372 xorq %r10, %r10
373 xorq %r9, %r9
374 xorq %r8, %r8
375 TRACE_IRQS_ON
376 movl RSP(%rsp), %esp
377 /*
378 * 64-bit->32-bit SYSRET restores eip from ecx,
379 * eflags from r11 (but RF and VM bits are forced to 0),
380 * cs and ss are loaded from MSRs.
381 * (Note: 32-bit->32-bit SYSRET is different: since r11
382 * does not exist, it merely sets eflags.IF=1).
383 *
384 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
385 * descriptor is not reinitialized. This means that we must
386 * avoid SYSRET with SS == NULL, which could happen if we schedule,
387 * exit the kernel, and re-enter using an interrupt vector. (All
388 * interrupt entries on x86_64 set SS to NULL.) We prevent that
389 * from happening by reloading SS in __switch_to.
390 */
391 USERGS_SYSRET32
392
393#ifdef CONFIG_AUDITSYSCALL
394cstar_auditsys:
395 auditsys_entry_common
396 jmp cstar_dispatch
397
398sysretl_audit:
399 auditsys_exit sysretl_from_sys_call
400#endif
401
402cstar_tracesys:
403#ifdef CONFIG_AUDITSYSCALL
404 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
405 jz cstar_auditsys
406#endif
407 SAVE_EXTRA_REGS
408 xorl %eax, %eax /* Do not leak kernel information */
409 movq %rax, R11(%rsp)
410 movq %rax, R10(%rsp)
411 movq %rax, R9(%rsp)
412 movq %rax, R8(%rsp)
413 movq %rsp, %rdi /* &pt_regs -> arg1 */
414 call syscall_trace_enter
415
416 /* Reload arg registers from stack. (see sysenter_tracesys) */
417 movl RCX(%rsp), %ecx
418 movl RDX(%rsp), %edx
419 movl RSI(%rsp), %esi
420 movl RDI(%rsp), %edi
421 movl %eax, %eax /* zero extension */
422
423 RESTORE_EXTRA_REGS
424 jmp cstar_do_call
425END(entry_SYSCALL_compat)
426
427ia32_badarg:
428 ASM_CLAC
429 movq $-EFAULT, RAX(%rsp)
430ia32_ret_from_sys_call:
431 xorl %eax, %eax /* Do not leak kernel information */
432 movq %rax, R11(%rsp)
433 movq %rax, R10(%rsp)
434 movq %rax, R9(%rsp)
435 movq %rax, R8(%rsp)
436 jmp int_ret_from_sys_call
437
438/*
439 * Emulated IA32 system calls via int 0x80.
440 *
441 * Arguments:
442 * eax system call number
443 * ebx arg1
444 * ecx arg2
445 * edx arg3
446 * esi arg4
447 * edi arg5
448 * ebp arg6 (note: not saved in the stack frame, should not be touched)
449 *
450 * Notes:
451 * Uses the same stack frame as the x86-64 version.
452 * All registers except eax must be saved (but ptrace may violate that).
453 * Arguments are zero extended. For system calls that want sign extension and
454 * take long arguments a wrapper is needed. Most calls can just be called
455 * directly.
456 * Assumes it is only called from user space and entered with interrupts off.
457 */
458
459ENTRY(entry_INT80_compat)
460 /*
461 * Interrupts are off on entry.
462 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
463 * it is too small to ever cause noticeable irq latency.
464 */
465 PARAVIRT_ADJUST_EXCEPTION_FRAME
466 SWAPGS
467 ENABLE_INTERRUPTS(CLBR_NONE)
468
469 /* Zero-extending 32-bit regs, do not remove */
470 movl %eax, %eax
471
472 /* Construct struct pt_regs on stack (iret frame is already on stack) */
473 pushq %rax /* pt_regs->orig_ax */
474 pushq %rdi /* pt_regs->di */
475 pushq %rsi /* pt_regs->si */
476 pushq %rdx /* pt_regs->dx */
477 pushq %rcx /* pt_regs->cx */
478 pushq $-ENOSYS /* pt_regs->ax */
479 pushq $0 /* pt_regs->r8 */
480 pushq $0 /* pt_regs->r9 */
481 pushq $0 /* pt_regs->r10 */
482 pushq $0 /* pt_regs->r11 */
483 cld
484 sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
485
486 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
487 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
488 jnz ia32_tracesys
489
490ia32_do_call:
491 /* 32-bit syscall -> 64-bit C ABI argument conversion */
492 movl %edi, %r8d /* arg5 */
493 movl %ebp, %r9d /* arg6 */
494 xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
495 movl %ebx, %edi /* arg1 */
496 movl %edx, %edx /* arg3 (zero extension) */
497 cmpq $(IA32_NR_syscalls-1), %rax
498 ja 1f
499
500 call *ia32_sys_call_table(, %rax, 8)
501 movq %rax, RAX(%rsp)
5021:
503 jmp int_ret_from_sys_call
504
505ia32_tracesys:
506 SAVE_EXTRA_REGS
507 movq %rsp, %rdi /* &pt_regs -> arg1 */
508 call syscall_trace_enter
509 /*
510 * Reload arg registers from stack in case ptrace changed them.
511 * Don't reload %eax because syscall_trace_enter() returned
512 * the %rax value we should see. But do truncate it to 32 bits.
513 * If it's -1 to make us punt the syscall, then (u32)-1 is still
514 * an appropriately invalid value.
515 */
516 movl RCX(%rsp), %ecx
517 movl RDX(%rsp), %edx
518 movl RSI(%rsp), %esi
519 movl RDI(%rsp), %edi
520 movl %eax, %eax /* zero extension */
521 RESTORE_EXTRA_REGS
522 jmp ia32_do_call
523END(entry_INT80_compat)
524
525 .macro PTREGSCALL label, func
526 ALIGN
527GLOBAL(\label)
528 leaq \func(%rip), %rax
529 jmp ia32_ptregs_common
530 .endm
531
532 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
533 PTREGSCALL stub32_sigreturn, sys32_sigreturn
534 PTREGSCALL stub32_fork, sys_fork
535 PTREGSCALL stub32_vfork, sys_vfork
536
537 ALIGN
538GLOBAL(stub32_clone)
539 leaq sys_clone(%rip), %rax
540 /*
541 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
542 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
543 *
544 * The native 64-bit kernel's sys_clone() implements the latter,
545 * so we need to swap arguments here before calling it:
546 */
547 xchg %r8, %rcx
548 jmp ia32_ptregs_common
549
550 ALIGN
551ia32_ptregs_common:
552 SAVE_EXTRA_REGS 8
553 call *%rax
554 RESTORE_EXTRA_REGS 8
555 ret
556END(ia32_ptregs_common)
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/entry/syscall_32.c
index 3777189c4a19..8ea34f94e973 100644
--- a/arch/x86/kernel/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -10,7 +10,7 @@
10#else 10#else
11#define SYM(sym, compat) sym 11#define SYM(sym, compat) sym
12#define ia32_sys_call_table sys_call_table 12#define ia32_sys_call_table sys_call_table
13#define __NR_ia32_syscall_max __NR_syscall_max 13#define __NR_syscall_compat_max __NR_syscall_max
14#endif 14#endif
15 15
16#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ; 16#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
@@ -23,11 +23,11 @@ typedef asmlinkage void (*sys_call_ptr_t)(void);
23 23
24extern asmlinkage void sys_ni_syscall(void); 24extern asmlinkage void sys_ni_syscall(void);
25 25
26__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { 26__visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = {
27 /* 27 /*
28 * Smells like a compiler bug -- it doesn't work 28 * Smells like a compiler bug -- it doesn't work
29 * when the & below is removed. 29 * when the & below is removed.
30 */ 30 */
31 [0 ... __NR_ia32_syscall_max] = &sys_ni_syscall, 31 [0 ... __NR_syscall_compat_max] = &sys_ni_syscall,
32#include <asm/syscalls_32.h> 32#include <asm/syscalls_32.h>
33}; 33};
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/entry/syscall_64.c
index 4ac730b37f0b..4ac730b37f0b 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index a55abb9f6c5e..57aa59fd140c 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -1,5 +1,5 @@
1out := $(obj)/../include/generated/asm 1out := $(obj)/../../include/generated/asm
2uapi := $(obj)/../include/generated/uapi/asm 2uapi := $(obj)/../../include/generated/uapi/asm
3 3
4# Create output directory if not already present 4# Create output directory if not already present
5_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ 5_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index ef8187f9d28d..ef8187f9d28d 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 9ef32d5f1b19..9ef32d5f1b19 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
diff --git a/arch/x86/syscalls/syscallhdr.sh b/arch/x86/entry/syscalls/syscallhdr.sh
index 31fd5f1f38f7..31fd5f1f38f7 100644
--- a/arch/x86/syscalls/syscallhdr.sh
+++ b/arch/x86/entry/syscalls/syscallhdr.sh
diff --git a/arch/x86/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 0e7f8ec071e7..0e7f8ec071e7 100644
--- a/arch/x86/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/entry/thunk_32.S
index 5eb715087b80..e9acf5f4fc92 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -6,16 +6,14 @@
6 */ 6 */
7 #include <linux/linkage.h> 7 #include <linux/linkage.h>
8 #include <asm/asm.h> 8 #include <asm/asm.h>
9 #include <asm/dwarf2.h>
10 9
11 /* put return address in eax (arg1) */ 10 /* put return address in eax (arg1) */
12 .macro THUNK name, func, put_ret_addr_in_eax=0 11 .macro THUNK name, func, put_ret_addr_in_eax=0
13 .globl \name 12 .globl \name
14\name: 13\name:
15 CFI_STARTPROC 14 pushl %eax
16 pushl_cfi_reg eax 15 pushl %ecx
17 pushl_cfi_reg ecx 16 pushl %edx
18 pushl_cfi_reg edx
19 17
20 .if \put_ret_addr_in_eax 18 .if \put_ret_addr_in_eax
21 /* Place EIP in the arg1 */ 19 /* Place EIP in the arg1 */
@@ -23,11 +21,10 @@
23 .endif 21 .endif
24 22
25 call \func 23 call \func
26 popl_cfi_reg edx 24 popl %edx
27 popl_cfi_reg ecx 25 popl %ecx
28 popl_cfi_reg eax 26 popl %eax
29 ret 27 ret
30 CFI_ENDPROC
31 _ASM_NOKPROBE(\name) 28 _ASM_NOKPROBE(\name)
32 .endm 29 .endm
33 30
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/entry/thunk_64.S
index f89ba4e93025..3e95681b4e2d 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -6,35 +6,32 @@
6 * Subject to the GNU public license, v.2. No warranty of any kind. 6 * Subject to the GNU public license, v.2. No warranty of any kind.
7 */ 7 */
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/dwarf2.h> 9#include "calling.h"
10#include <asm/calling.h>
11#include <asm/asm.h> 10#include <asm/asm.h>
12 11
13 /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ 12 /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
14 .macro THUNK name, func, put_ret_addr_in_rdi=0 13 .macro THUNK name, func, put_ret_addr_in_rdi=0
15 .globl \name 14 .globl \name
16\name: 15\name:
17 CFI_STARTPROC
18 16
19 /* this one pushes 9 elems, the next one would be %rIP */ 17 /* this one pushes 9 elems, the next one would be %rIP */
20 pushq_cfi_reg rdi 18 pushq %rdi
21 pushq_cfi_reg rsi 19 pushq %rsi
22 pushq_cfi_reg rdx 20 pushq %rdx
23 pushq_cfi_reg rcx 21 pushq %rcx
24 pushq_cfi_reg rax 22 pushq %rax
25 pushq_cfi_reg r8 23 pushq %r8
26 pushq_cfi_reg r9 24 pushq %r9
27 pushq_cfi_reg r10 25 pushq %r10
28 pushq_cfi_reg r11 26 pushq %r11
29 27
30 .if \put_ret_addr_in_rdi 28 .if \put_ret_addr_in_rdi
31 /* 9*8(%rsp) is return addr on stack */ 29 /* 9*8(%rsp) is return addr on stack */
32 movq_cfi_restore 9*8, rdi 30 movq 9*8(%rsp), %rdi
33 .endif 31 .endif
34 32
35 call \func 33 call \func
36 jmp restore 34 jmp restore
37 CFI_ENDPROC
38 _ASM_NOKPROBE(\name) 35 _ASM_NOKPROBE(\name)
39 .endm 36 .endm
40 37
@@ -57,19 +54,16 @@
57#if defined(CONFIG_TRACE_IRQFLAGS) \ 54#if defined(CONFIG_TRACE_IRQFLAGS) \
58 || defined(CONFIG_DEBUG_LOCK_ALLOC) \ 55 || defined(CONFIG_DEBUG_LOCK_ALLOC) \
59 || defined(CONFIG_PREEMPT) 56 || defined(CONFIG_PREEMPT)
60 CFI_STARTPROC
61 CFI_ADJUST_CFA_OFFSET 9*8
62restore: 57restore:
63 popq_cfi_reg r11 58 popq %r11
64 popq_cfi_reg r10 59 popq %r10
65 popq_cfi_reg r9 60 popq %r9
66 popq_cfi_reg r8 61 popq %r8
67 popq_cfi_reg rax 62 popq %rax
68 popq_cfi_reg rcx 63 popq %rcx
69 popq_cfi_reg rdx 64 popq %rdx
70 popq_cfi_reg rsi 65 popq %rsi
71 popq_cfi_reg rdi 66 popq %rdi
72 ret 67 ret
73 CFI_ENDPROC
74 _ASM_NOKPROBE(restore) 68 _ASM_NOKPROBE(restore)
75#endif 69#endif
diff --git a/arch/x86/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore
index aae8ffdd5880..aae8ffdd5880 100644
--- a/arch/x86/vdso/.gitignore
+++ b/arch/x86/entry/vdso/.gitignore
diff --git a/arch/x86/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index e97032069f88..e97032069f88 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
diff --git a/arch/x86/vdso/checkundef.sh b/arch/x86/entry/vdso/checkundef.sh
index 7ee90a9b549d..7ee90a9b549d 100755
--- a/arch/x86/vdso/checkundef.sh
+++ b/arch/x86/entry/vdso/checkundef.sh
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 9793322751e0..9793322751e0 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index de2c921025f5..de2c921025f5 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
diff --git a/arch/x86/vdso/vdso-note.S b/arch/x86/entry/vdso/vdso-note.S
index 79a071e4357e..79a071e4357e 100644
--- a/arch/x86/vdso/vdso-note.S
+++ b/arch/x86/entry/vdso/vdso-note.S
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index 6807932643c2..6807932643c2 100644
--- a/arch/x86/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 8627db24a7f6..8627db24a7f6 100644
--- a/arch/x86/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 0224987556ce..0224987556ce 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index e904c270573b..e904c270573b 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
diff --git a/arch/x86/vdso/vdso32/.gitignore b/arch/x86/entry/vdso/vdso32/.gitignore
index e45fba9d0ced..e45fba9d0ced 100644
--- a/arch/x86/vdso/vdso32/.gitignore
+++ b/arch/x86/entry/vdso/vdso32/.gitignore
diff --git a/arch/x86/vdso/vdso32/int80.S b/arch/x86/entry/vdso/vdso32/int80.S
index b15b7c01aedb..b15b7c01aedb 100644
--- a/arch/x86/vdso/vdso32/int80.S
+++ b/arch/x86/entry/vdso/vdso32/int80.S
diff --git a/arch/x86/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S
index c83f25734696..c83f25734696 100644
--- a/arch/x86/vdso/vdso32/note.S
+++ b/arch/x86/entry/vdso/vdso32/note.S
diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S
index d7ec4e251c0a..d7ec4e251c0a 100644
--- a/arch/x86/vdso/vdso32/sigreturn.S
+++ b/arch/x86/entry/vdso/vdso32/sigreturn.S
diff --git a/arch/x86/vdso/vdso32/syscall.S b/arch/x86/entry/vdso/vdso32/syscall.S
index 6b286bb5251c..6b286bb5251c 100644
--- a/arch/x86/vdso/vdso32/syscall.S
+++ b/arch/x86/entry/vdso/vdso32/syscall.S
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/entry/vdso/vdso32/sysenter.S
index e354bceee0e0..e354bceee0e0 100644
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/entry/vdso/vdso32/sysenter.S
diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 175cc72c0f68..175cc72c0f68 100644
--- a/arch/x86/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
diff --git a/arch/x86/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
index 541468e25265..541468e25265 100644
--- a/arch/x86/vdso/vdso32/vdso-fakesections.c
+++ b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
index 31056cf294bf..31056cf294bf 100644
--- a/arch/x86/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
index 697c11ece90c..697c11ece90c 100644
--- a/arch/x86/vdso/vdsox32.lds.S
+++ b/arch/x86/entry/vdso/vdsox32.lds.S
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
index 8ec3d1f4ce9a..8ec3d1f4ce9a 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/entry/vdso/vgetcpu.c
diff --git a/arch/x86/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 1c9f750c3859..1c9f750c3859 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile
new file mode 100644
index 000000000000..a9f4856f622a
--- /dev/null
+++ b/arch/x86/entry/vsyscall/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the x86 low level vsyscall code
3#
4obj-y := vsyscall_gtod.o
5
6obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
7
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 2dcc6ff6fdcc..2dcc6ff6fdcc 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
index c9596a9af159..c9596a9af159 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 51e330416995..51e330416995 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/entry/vsyscall/vsyscall_trace.h
index a8b2edec54fe..9dd7359a38a8 100644
--- a/arch/x86/kernel/vsyscall_trace.h
+++ b/arch/x86/entry/vsyscall/vsyscall_trace.h
@@ -24,6 +24,6 @@ TRACE_EVENT(emulate_vsyscall,
24#endif 24#endif
25 25
26#undef TRACE_INCLUDE_PATH 26#undef TRACE_INCLUDE_PATH
27#define TRACE_INCLUDE_PATH ../../arch/x86/kernel 27#define TRACE_INCLUDE_PATH ../../arch/x86/entry/vsyscall/
28#define TRACE_INCLUDE_FILE vsyscall_trace 28#define TRACE_INCLUDE_FILE vsyscall_trace
29#include <trace/define_trace.h> 29#include <trace/define_trace.h>
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index bb635c641869..cd4339bae066 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the ia32 kernel emulation subsystem. 2# Makefile for the ia32 kernel emulation subsystem.
3# 3#
4 4
5obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o 5obj-$(CONFIG_IA32_EMULATION) := sys_ia32.o ia32_signal.o
6 6
7obj-$(CONFIG_IA32_AOUT) += ia32_aout.o 7obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
8 8
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
deleted file mode 100644
index 63450a596800..000000000000
--- a/arch/x86/ia32/ia32entry.S
+++ /dev/null
@@ -1,591 +0,0 @@
1/*
2 * Compatibility mode system call entry point for x86-64.
3 *
4 * Copyright 2000-2002 Andi Kleen, SuSE Labs.
5 */
6
7#include <asm/dwarf2.h>
8#include <asm/calling.h>
9#include <asm/asm-offsets.h>
10#include <asm/current.h>
11#include <asm/errno.h>
12#include <asm/ia32_unistd.h>
13#include <asm/thread_info.h>
14#include <asm/segment.h>
15#include <asm/irqflags.h>
16#include <asm/asm.h>
17#include <asm/smap.h>
18#include <linux/linkage.h>
19#include <linux/err.h>
20
21/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
22#include <linux/elf-em.h>
23#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
24#define __AUDIT_ARCH_LE 0x40000000
25
26#ifndef CONFIG_AUDITSYSCALL
27#define sysexit_audit ia32_ret_from_sys_call
28#define sysretl_audit ia32_ret_from_sys_call
29#endif
30
31 .section .entry.text, "ax"
32
33 /* clobbers %rax */
34 .macro CLEAR_RREGS _r9=rax
35 xorl %eax,%eax
36 movq %rax,R11(%rsp)
37 movq %rax,R10(%rsp)
38 movq %\_r9,R9(%rsp)
39 movq %rax,R8(%rsp)
40 .endm
41
42 /*
43 * Reload arg registers from stack in case ptrace changed them.
44 * We don't reload %eax because syscall_trace_enter() returned
45 * the %rax value we should see. Instead, we just truncate that
46 * value to 32 bits again as we did on entry from user mode.
47 * If it's a new value set by user_regset during entry tracing,
48 * this matches the normal truncation of the user-mode value.
49 * If it's -1 to make us punt the syscall, then (u32)-1 is still
50 * an appropriately invalid value.
51 */
52 .macro LOAD_ARGS32 _r9=0
53 .if \_r9
54 movl R9(%rsp),%r9d
55 .endif
56 movl RCX(%rsp),%ecx
57 movl RDX(%rsp),%edx
58 movl RSI(%rsp),%esi
59 movl RDI(%rsp),%edi
60 movl %eax,%eax /* zero extension */
61 .endm
62
63 .macro CFI_STARTPROC32 simple
64 CFI_STARTPROC \simple
65 CFI_UNDEFINED r8
66 CFI_UNDEFINED r9
67 CFI_UNDEFINED r10
68 CFI_UNDEFINED r11
69 CFI_UNDEFINED r12
70 CFI_UNDEFINED r13
71 CFI_UNDEFINED r14
72 CFI_UNDEFINED r15
73 .endm
74
75#ifdef CONFIG_PARAVIRT
76ENTRY(native_usergs_sysret32)
77 swapgs
78 sysretl
79ENDPROC(native_usergs_sysret32)
80#endif
81
82/*
83 * 32bit SYSENTER instruction entry.
84 *
85 * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
86 * IF and VM in rflags are cleared (IOW: interrupts are off).
87 * SYSENTER does not save anything on the stack,
88 * and does not save old rip (!!!) and rflags.
89 *
90 * Arguments:
91 * eax system call number
92 * ebx arg1
93 * ecx arg2
94 * edx arg3
95 * esi arg4
96 * edi arg5
97 * ebp user stack
98 * 0(%ebp) arg6
99 *
100 * This is purely a fast path. For anything complicated we use the int 0x80
101 * path below. We set up a complete hardware stack frame to share code
102 * with the int 0x80 path.
103 */
104ENTRY(ia32_sysenter_target)
105 CFI_STARTPROC32 simple
106 CFI_SIGNAL_FRAME
107 CFI_DEF_CFA rsp,0
108 CFI_REGISTER rsp,rbp
109
110 /*
111 * Interrupts are off on entry.
112 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
113 * it is too small to ever cause noticeable irq latency.
114 */
115 SWAPGS_UNSAFE_STACK
116 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
117 ENABLE_INTERRUPTS(CLBR_NONE)
118
119 /* Zero-extending 32-bit regs, do not remove */
120 movl %ebp, %ebp
121 movl %eax, %eax
122
123 movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
124 CFI_REGISTER rip,r10
125
126 /* Construct struct pt_regs on stack */
127 pushq_cfi $__USER32_DS /* pt_regs->ss */
128 pushq_cfi %rbp /* pt_regs->sp */
129 CFI_REL_OFFSET rsp,0
130 pushfq_cfi /* pt_regs->flags */
131 pushq_cfi $__USER32_CS /* pt_regs->cs */
132 pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */
133 CFI_REL_OFFSET rip,0
134 pushq_cfi_reg rax /* pt_regs->orig_ax */
135 pushq_cfi_reg rdi /* pt_regs->di */
136 pushq_cfi_reg rsi /* pt_regs->si */
137 pushq_cfi_reg rdx /* pt_regs->dx */
138 pushq_cfi_reg rcx /* pt_regs->cx */
139 pushq_cfi $-ENOSYS /* pt_regs->ax */
140 cld
141 sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
142 CFI_ADJUST_CFA_OFFSET 10*8
143
144 /*
145 * no need to do an access_ok check here because rbp has been
146 * 32bit zero extended
147 */
148 ASM_STAC
1491: movl (%rbp),%ebp
150 _ASM_EXTABLE(1b,ia32_badarg)
151 ASM_CLAC
152
153 /*
154 * Sysenter doesn't filter flags, so we need to clear NT
155 * ourselves. To save a few cycles, we can check whether
156 * NT was set instead of doing an unconditional popfq.
157 */
158 testl $X86_EFLAGS_NT,EFLAGS(%rsp)
159 jnz sysenter_fix_flags
160sysenter_flags_fixed:
161
162 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
163 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
164 CFI_REMEMBER_STATE
165 jnz sysenter_tracesys
166sysenter_do_call:
167 /* 32bit syscall -> 64bit C ABI argument conversion */
168 movl %edi,%r8d /* arg5 */
169 movl %ebp,%r9d /* arg6 */
170 xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
171 movl %ebx,%edi /* arg1 */
172 movl %edx,%edx /* arg3 (zero extension) */
173sysenter_dispatch:
174 cmpq $(IA32_NR_syscalls-1),%rax
175 ja 1f
176 call *ia32_sys_call_table(,%rax,8)
177 movq %rax,RAX(%rsp)
1781:
179 DISABLE_INTERRUPTS(CLBR_NONE)
180 TRACE_IRQS_OFF
181 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
182 jnz sysexit_audit
183sysexit_from_sys_call:
184 /*
185 * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
186 * NMI between STI and SYSEXIT has poorly specified behavior,
187 * and and NMI followed by an IRQ with usergs is fatal. So
188 * we just pretend we're using SYSEXIT but we really use
189 * SYSRETL instead.
190 *
191 * This code path is still called 'sysexit' because it pairs
192 * with 'sysenter' and it uses the SYSENTER calling convention.
193 */
194 andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
195 movl RIP(%rsp),%ecx /* User %eip */
196 CFI_REGISTER rip,rcx
197 RESTORE_RSI_RDI
198 xorl %edx,%edx /* avoid info leaks */
199 xorq %r8,%r8
200 xorq %r9,%r9
201 xorq %r10,%r10
202 movl EFLAGS(%rsp),%r11d /* User eflags */
203 /*CFI_RESTORE rflags*/
204 TRACE_IRQS_ON
205
206 /*
207 * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
208 * since it avoids a dicey window with interrupts enabled.
209 */
210 movl RSP(%rsp),%esp
211
212 /*
213 * USERGS_SYSRET32 does:
214 * gsbase = user's gs base
215 * eip = ecx
216 * rflags = r11
217 * cs = __USER32_CS
218 * ss = __USER_DS
219 *
220 * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
221 *
222 * pop %ebp
223 * pop %edx
224 * pop %ecx
225 *
226 * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
227 * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
228 * address (already known to user code), and R12-R15 are
229 * callee-saved and therefore don't contain any interesting
230 * kernel data.
231 */
232 USERGS_SYSRET32
233
234 CFI_RESTORE_STATE
235
236#ifdef CONFIG_AUDITSYSCALL
237 .macro auditsys_entry_common
238 movl %esi,%r8d /* 5th arg: 4th syscall arg */
239 movl %ecx,%r9d /*swap with edx*/
240 movl %edx,%ecx /* 4th arg: 3rd syscall arg */
241 movl %r9d,%edx /* 3rd arg: 2nd syscall arg */
242 movl %ebx,%esi /* 2nd arg: 1st syscall arg */
243 movl %eax,%edi /* 1st arg: syscall number */
244 call __audit_syscall_entry
245 movl ORIG_RAX(%rsp),%eax /* reload syscall number */
246 movl %ebx,%edi /* reload 1st syscall arg */
247 movl RCX(%rsp),%esi /* reload 2nd syscall arg */
248 movl RDX(%rsp),%edx /* reload 3rd syscall arg */
249 movl RSI(%rsp),%ecx /* reload 4th syscall arg */
250 movl RDI(%rsp),%r8d /* reload 5th syscall arg */
251 .endm
252
253 .macro auditsys_exit exit
254 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
255 jnz ia32_ret_from_sys_call
256 TRACE_IRQS_ON
257 ENABLE_INTERRUPTS(CLBR_NONE)
258 movl %eax,%esi /* second arg, syscall return value */
259 cmpl $-MAX_ERRNO,%eax /* is it an error ? */
260 jbe 1f
261 movslq %eax, %rsi /* if error sign extend to 64 bits */
2621: setbe %al /* 1 if error, 0 if not */
263 movzbl %al,%edi /* zero-extend that into %edi */
264 call __audit_syscall_exit
265 movq RAX(%rsp),%rax /* reload syscall return value */
266 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
267 DISABLE_INTERRUPTS(CLBR_NONE)
268 TRACE_IRQS_OFF
269 testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
270 jz \exit
271 CLEAR_RREGS
272 jmp int_with_check
273 .endm
274
275sysenter_auditsys:
276 auditsys_entry_common
277 movl %ebp,%r9d /* reload 6th syscall arg */
278 jmp sysenter_dispatch
279
280sysexit_audit:
281 auditsys_exit sysexit_from_sys_call
282#endif
283
284sysenter_fix_flags:
285 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
286 popfq_cfi
287 jmp sysenter_flags_fixed
288
289sysenter_tracesys:
290#ifdef CONFIG_AUDITSYSCALL
291 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
292 jz sysenter_auditsys
293#endif
294 SAVE_EXTRA_REGS
295 CLEAR_RREGS
296 movq %rsp,%rdi /* &pt_regs -> arg1 */
297 call syscall_trace_enter
298 LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
299 RESTORE_EXTRA_REGS
300 jmp sysenter_do_call
301 CFI_ENDPROC
302ENDPROC(ia32_sysenter_target)
303
304/*
305 * 32bit SYSCALL instruction entry.
306 *
307 * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
308 * then loads new ss, cs, and rip from previously programmed MSRs.
309 * rflags gets masked by a value from another MSR (so CLD and CLAC
310 * are not needed). SYSCALL does not save anything on the stack
311 * and does not change rsp.
312 *
313 * Note: rflags saving+masking-with-MSR happens only in Long mode
314 * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
315 * Don't get confused: rflags saving+masking depends on Long Mode Active bit
316 * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
317 * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
318 *
319 * Arguments:
320 * eax system call number
321 * ecx return address
322 * ebx arg1
323 * ebp arg2 (note: not saved in the stack frame, should not be touched)
324 * edx arg3
325 * esi arg4
326 * edi arg5
327 * esp user stack
328 * 0(%esp) arg6
329 *
330 * This is purely a fast path. For anything complicated we use the int 0x80
331 * path below. We set up a complete hardware stack frame to share code
332 * with the int 0x80 path.
333 */
334ENTRY(ia32_cstar_target)
335 CFI_STARTPROC32 simple
336 CFI_SIGNAL_FRAME
337 CFI_DEF_CFA rsp,0
338 CFI_REGISTER rip,rcx
339 /*CFI_REGISTER rflags,r11*/
340
341 /*
342 * Interrupts are off on entry.
343 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
344 * it is too small to ever cause noticeable irq latency.
345 */
346 SWAPGS_UNSAFE_STACK
347 movl %esp,%r8d
348 CFI_REGISTER rsp,r8
349 movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp
350 ENABLE_INTERRUPTS(CLBR_NONE)
351
352 /* Zero-extending 32-bit regs, do not remove */
353 movl %eax,%eax
354
355 /* Construct struct pt_regs on stack */
356 pushq_cfi $__USER32_DS /* pt_regs->ss */
357 pushq_cfi %r8 /* pt_regs->sp */
358 CFI_REL_OFFSET rsp,0
359 pushq_cfi %r11 /* pt_regs->flags */
360 pushq_cfi $__USER32_CS /* pt_regs->cs */
361 pushq_cfi %rcx /* pt_regs->ip */
362 CFI_REL_OFFSET rip,0
363 pushq_cfi_reg rax /* pt_regs->orig_ax */
364 pushq_cfi_reg rdi /* pt_regs->di */
365 pushq_cfi_reg rsi /* pt_regs->si */
366 pushq_cfi_reg rdx /* pt_regs->dx */
367 pushq_cfi_reg rbp /* pt_regs->cx */
368 movl %ebp,%ecx
369 pushq_cfi $-ENOSYS /* pt_regs->ax */
370 sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
371 CFI_ADJUST_CFA_OFFSET 10*8
372
373 /*
374 * no need to do an access_ok check here because r8 has been
375 * 32bit zero extended
376 */
377 ASM_STAC
3781: movl (%r8),%r9d
379 _ASM_EXTABLE(1b,ia32_badarg)
380 ASM_CLAC
381 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
382 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
383 CFI_REMEMBER_STATE
384 jnz cstar_tracesys
385cstar_do_call:
386 /* 32bit syscall -> 64bit C ABI argument conversion */
387 movl %edi,%r8d /* arg5 */
388 /* r9 already loaded */ /* arg6 */
389 xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
390 movl %ebx,%edi /* arg1 */
391 movl %edx,%edx /* arg3 (zero extension) */
392cstar_dispatch:
393 cmpq $(IA32_NR_syscalls-1),%rax
394 ja 1f
395 call *ia32_sys_call_table(,%rax,8)
396 movq %rax,RAX(%rsp)
3971:
398 DISABLE_INTERRUPTS(CLBR_NONE)
399 TRACE_IRQS_OFF
400 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
401 jnz sysretl_audit
402sysretl_from_sys_call:
403 andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
404 RESTORE_RSI_RDI_RDX
405 movl RIP(%rsp),%ecx
406 CFI_REGISTER rip,rcx
407 movl EFLAGS(%rsp),%r11d
408 /*CFI_REGISTER rflags,r11*/
409 xorq %r10,%r10
410 xorq %r9,%r9
411 xorq %r8,%r8
412 TRACE_IRQS_ON
413 movl RSP(%rsp),%esp
414 CFI_RESTORE rsp
415 /*
416 * 64bit->32bit SYSRET restores eip from ecx,
417 * eflags from r11 (but RF and VM bits are forced to 0),
418 * cs and ss are loaded from MSRs.
419 * (Note: 32bit->32bit SYSRET is different: since r11
420 * does not exist, it merely sets eflags.IF=1).
421 *
422 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
423 * descriptor is not reinitialized. This means that we must
424 * avoid SYSRET with SS == NULL, which could happen if we schedule,
425 * exit the kernel, and re-enter using an interrupt vector. (All
426 * interrupt entries on x86_64 set SS to NULL.) We prevent that
427 * from happening by reloading SS in __switch_to.
428 */
429 USERGS_SYSRET32
430
431#ifdef CONFIG_AUDITSYSCALL
432cstar_auditsys:
433 CFI_RESTORE_STATE
434 movl %r9d,R9(%rsp) /* register to be clobbered by call */
435 auditsys_entry_common
436 movl R9(%rsp),%r9d /* reload 6th syscall arg */
437 jmp cstar_dispatch
438
439sysretl_audit:
440 auditsys_exit sysretl_from_sys_call
441#endif
442
443cstar_tracesys:
444#ifdef CONFIG_AUDITSYSCALL
445 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
446 jz cstar_auditsys
447#endif
448 xchgl %r9d,%ebp
449 SAVE_EXTRA_REGS
450 CLEAR_RREGS r9
451 movq %rsp,%rdi /* &pt_regs -> arg1 */
452 call syscall_trace_enter
453 LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */
454 RESTORE_EXTRA_REGS
455 xchgl %ebp,%r9d
456 jmp cstar_do_call
457END(ia32_cstar_target)
458
459ia32_badarg:
460 ASM_CLAC
461 movq $-EFAULT,%rax
462 jmp ia32_sysret
463 CFI_ENDPROC
464
465/*
466 * Emulated IA32 system calls via int 0x80.
467 *
468 * Arguments:
469 * eax system call number
470 * ebx arg1
471 * ecx arg2
472 * edx arg3
473 * esi arg4
474 * edi arg5
475 * ebp arg6 (note: not saved in the stack frame, should not be touched)
476 *
477 * Notes:
478 * Uses the same stack frame as the x86-64 version.
479 * All registers except eax must be saved (but ptrace may violate that).
480 * Arguments are zero extended. For system calls that want sign extension and
481 * take long arguments a wrapper is needed. Most calls can just be called
482 * directly.
483 * Assumes it is only called from user space and entered with interrupts off.
484 */
485
486ENTRY(ia32_syscall)
487 CFI_STARTPROC32 simple
488 CFI_SIGNAL_FRAME
489 CFI_DEF_CFA rsp,5*8
490 /*CFI_REL_OFFSET ss,4*8 */
491 CFI_REL_OFFSET rsp,3*8
492 /*CFI_REL_OFFSET rflags,2*8 */
493 /*CFI_REL_OFFSET cs,1*8 */
494 CFI_REL_OFFSET rip,0*8
495
496 /*
497 * Interrupts are off on entry.
498 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
499 * it is too small to ever cause noticeable irq latency.
500 */
501 PARAVIRT_ADJUST_EXCEPTION_FRAME
502 SWAPGS
503 ENABLE_INTERRUPTS(CLBR_NONE)
504
505 /* Zero-extending 32-bit regs, do not remove */
506 movl %eax,%eax
507
508 /* Construct struct pt_regs on stack (iret frame is already on stack) */
509 pushq_cfi_reg rax /* pt_regs->orig_ax */
510 pushq_cfi_reg rdi /* pt_regs->di */
511 pushq_cfi_reg rsi /* pt_regs->si */
512 pushq_cfi_reg rdx /* pt_regs->dx */
513 pushq_cfi_reg rcx /* pt_regs->cx */
514 pushq_cfi $-ENOSYS /* pt_regs->ax */
515 cld
516 sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
517 CFI_ADJUST_CFA_OFFSET 10*8
518
519 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
520 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
521 jnz ia32_tracesys
522ia32_do_call:
523 /* 32bit syscall -> 64bit C ABI argument conversion */
524 movl %edi,%r8d /* arg5 */
525 movl %ebp,%r9d /* arg6 */
526 xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
527 movl %ebx,%edi /* arg1 */
528 movl %edx,%edx /* arg3 (zero extension) */
529 cmpq $(IA32_NR_syscalls-1),%rax
530 ja 1f
531 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
532ia32_sysret:
533 movq %rax,RAX(%rsp)
5341:
535ia32_ret_from_sys_call:
536 CLEAR_RREGS
537 jmp int_ret_from_sys_call
538
539ia32_tracesys:
540 SAVE_EXTRA_REGS
541 CLEAR_RREGS
542 movq %rsp,%rdi /* &pt_regs -> arg1 */
543 call syscall_trace_enter
544 LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
545 RESTORE_EXTRA_REGS
546 jmp ia32_do_call
547 CFI_ENDPROC
548END(ia32_syscall)
549
550 .macro PTREGSCALL label, func
551 ALIGN
552GLOBAL(\label)
553 leaq \func(%rip),%rax
554 jmp ia32_ptregs_common
555 .endm
556
557 CFI_STARTPROC32
558
559 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
560 PTREGSCALL stub32_sigreturn, sys32_sigreturn
561 PTREGSCALL stub32_fork, sys_fork
562 PTREGSCALL stub32_vfork, sys_vfork
563
564 ALIGN
565GLOBAL(stub32_clone)
566 leaq sys_clone(%rip),%rax
567 mov %r8, %rcx
568 jmp ia32_ptregs_common
569
570 ALIGN
571ia32_ptregs_common:
572 CFI_ENDPROC
573 CFI_STARTPROC32 simple
574 CFI_SIGNAL_FRAME
575 CFI_DEF_CFA rsp,SIZEOF_PTREGS
576 CFI_REL_OFFSET rax,RAX
577 CFI_REL_OFFSET rcx,RCX
578 CFI_REL_OFFSET rdx,RDX
579 CFI_REL_OFFSET rsi,RSI
580 CFI_REL_OFFSET rdi,RDI
581 CFI_REL_OFFSET rip,RIP
582/* CFI_REL_OFFSET cs,CS*/
583/* CFI_REL_OFFSET rflags,EFLAGS*/
584 CFI_REL_OFFSET rsp,RSP
585/* CFI_REL_OFFSET ss,SS*/
586 SAVE_EXTRA_REGS 8
587 call *%rax
588 RESTORE_EXTRA_REGS 8
589 ret
590 CFI_ENDPROC
591END(ia32_ptregs_common)
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 959e45b81fe2..e51a8f803f55 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -35,12 +35,12 @@
35#define smp_mb() mb() 35#define smp_mb() mb()
36#define smp_rmb() dma_rmb() 36#define smp_rmb() dma_rmb()
37#define smp_wmb() barrier() 37#define smp_wmb() barrier()
38#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 38#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
39#else /* !SMP */ 39#else /* !SMP */
40#define smp_mb() barrier() 40#define smp_mb() barrier()
41#define smp_rmb() barrier() 41#define smp_rmb() barrier()
42#define smp_wmb() barrier() 42#define smp_wmb() barrier()
43#define set_mb(var, value) do { var = value; barrier(); } while (0) 43#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
44#endif /* SMP */ 44#endif /* SMP */
45 45
46#define read_barrier_depends() do { } while (0) 46#define read_barrier_depends() do { } while (0)
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 47c8e32f621a..b6f7457d12e4 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -8,7 +8,7 @@
8/* 8/*
9 * The set_memory_* API can be used to change various attributes of a virtual 9 * The set_memory_* API can be used to change various attributes of a virtual
10 * address range. The attributes include: 10 * address range. The attributes include:
11 * Cachability : UnCached, WriteCombining, WriteBack 11 * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack
12 * Executability : eXeutable, NoteXecutable 12 * Executability : eXeutable, NoteXecutable
13 * Read/Write : ReadOnly, ReadWrite 13 * Read/Write : ReadOnly, ReadWrite
14 * Presence : NotPresent 14 * Presence : NotPresent
@@ -35,9 +35,11 @@
35 35
36int _set_memory_uc(unsigned long addr, int numpages); 36int _set_memory_uc(unsigned long addr, int numpages);
37int _set_memory_wc(unsigned long addr, int numpages); 37int _set_memory_wc(unsigned long addr, int numpages);
38int _set_memory_wt(unsigned long addr, int numpages);
38int _set_memory_wb(unsigned long addr, int numpages); 39int _set_memory_wb(unsigned long addr, int numpages);
39int set_memory_uc(unsigned long addr, int numpages); 40int set_memory_uc(unsigned long addr, int numpages);
40int set_memory_wc(unsigned long addr, int numpages); 41int set_memory_wc(unsigned long addr, int numpages);
42int set_memory_wt(unsigned long addr, int numpages);
41int set_memory_wb(unsigned long addr, int numpages); 43int set_memory_wb(unsigned long addr, int numpages);
42int set_memory_x(unsigned long addr, int numpages); 44int set_memory_x(unsigned long addr, int numpages);
43int set_memory_nx(unsigned long addr, int numpages); 45int set_memory_nx(unsigned long addr, int numpages);
@@ -48,10 +50,12 @@ int set_memory_4k(unsigned long addr, int numpages);
48 50
49int set_memory_array_uc(unsigned long *addr, int addrinarray); 51int set_memory_array_uc(unsigned long *addr, int addrinarray);
50int set_memory_array_wc(unsigned long *addr, int addrinarray); 52int set_memory_array_wc(unsigned long *addr, int addrinarray);
53int set_memory_array_wt(unsigned long *addr, int addrinarray);
51int set_memory_array_wb(unsigned long *addr, int addrinarray); 54int set_memory_array_wb(unsigned long *addr, int addrinarray);
52 55
53int set_pages_array_uc(struct page **pages, int addrinarray); 56int set_pages_array_uc(struct page **pages, int addrinarray);
54int set_pages_array_wc(struct page **pages, int addrinarray); 57int set_pages_array_wc(struct page **pages, int addrinarray);
58int set_pages_array_wt(struct page **pages, int addrinarray);
55int set_pages_array_wb(struct page **pages, int addrinarray); 59int set_pages_array_wb(struct page **pages, int addrinarray);
56 60
57/* 61/*
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 99c105d78b7e..ad19841eddfe 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -4,8 +4,6 @@
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <asm/alternative.h> /* Provides LOCK_PREFIX */ 5#include <asm/alternative.h> /* Provides LOCK_PREFIX */
6 6
7#define __HAVE_ARCH_CMPXCHG 1
8
9/* 7/*
10 * Non-existant functions to indicate usage errors at link time 8 * Non-existant functions to indicate usage errors at link time
11 * (or compile-time if the compiler implements __compiletime_error(). 9 * (or compile-time if the compiler implements __compiletime_error().
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
deleted file mode 100644
index de1cdaf4d743..000000000000
--- a/arch/x86/include/asm/dwarf2.h
+++ /dev/null
@@ -1,170 +0,0 @@
1#ifndef _ASM_X86_DWARF2_H
2#define _ASM_X86_DWARF2_H
3
4#ifndef __ASSEMBLY__
5#warning "asm/dwarf2.h should be only included in pure assembly files"
6#endif
7
8/*
9 * Macros for dwarf2 CFI unwind table entries.
10 * See "as.info" for details on these pseudo ops. Unfortunately
11 * they are only supported in very new binutils, so define them
12 * away for older version.
13 */
14
15#ifdef CONFIG_AS_CFI
16
17#define CFI_STARTPROC .cfi_startproc
18#define CFI_ENDPROC .cfi_endproc
19#define CFI_DEF_CFA .cfi_def_cfa
20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
23#define CFI_OFFSET .cfi_offset
24#define CFI_REL_OFFSET .cfi_rel_offset
25#define CFI_REGISTER .cfi_register
26#define CFI_RESTORE .cfi_restore
27#define CFI_REMEMBER_STATE .cfi_remember_state
28#define CFI_RESTORE_STATE .cfi_restore_state
29#define CFI_UNDEFINED .cfi_undefined
30#define CFI_ESCAPE .cfi_escape
31
32#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
33#define CFI_SIGNAL_FRAME .cfi_signal_frame
34#else
35#define CFI_SIGNAL_FRAME
36#endif
37
38#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__)
39 /*
40 * Emit CFI data in .debug_frame sections, not .eh_frame sections.
41 * The latter we currently just discard since we don't do DWARF
42 * unwinding at runtime. So only the offline DWARF information is
43 * useful to anyone. Note we should not use this directive if this
44 * file is used in the vDSO assembly, or if vmlinux.lds.S gets
45 * changed so it doesn't discard .eh_frame.
46 */
47 .cfi_sections .debug_frame
48#endif
49
50#else
51
52/*
53 * Due to the structure of pre-exisiting code, don't use assembler line
54 * comment character # to ignore the arguments. Instead, use a dummy macro.
55 */
56.macro cfi_ignore a=0, b=0, c=0, d=0
57.endm
58
59#define CFI_STARTPROC cfi_ignore
60#define CFI_ENDPROC cfi_ignore
61#define CFI_DEF_CFA cfi_ignore
62#define CFI_DEF_CFA_REGISTER cfi_ignore
63#define CFI_DEF_CFA_OFFSET cfi_ignore
64#define CFI_ADJUST_CFA_OFFSET cfi_ignore
65#define CFI_OFFSET cfi_ignore
66#define CFI_REL_OFFSET cfi_ignore
67#define CFI_REGISTER cfi_ignore
68#define CFI_RESTORE cfi_ignore
69#define CFI_REMEMBER_STATE cfi_ignore
70#define CFI_RESTORE_STATE cfi_ignore
71#define CFI_UNDEFINED cfi_ignore
72#define CFI_ESCAPE cfi_ignore
73#define CFI_SIGNAL_FRAME cfi_ignore
74
75#endif
76
77/*
78 * An attempt to make CFI annotations more or less
79 * correct and shorter. It is implied that you know
80 * what you're doing if you use them.
81 */
82#ifdef __ASSEMBLY__
83#ifdef CONFIG_X86_64
84 .macro pushq_cfi reg
85 pushq \reg
86 CFI_ADJUST_CFA_OFFSET 8
87 .endm
88
89 .macro pushq_cfi_reg reg
90 pushq %\reg
91 CFI_ADJUST_CFA_OFFSET 8
92 CFI_REL_OFFSET \reg, 0
93 .endm
94
95 .macro popq_cfi reg
96 popq \reg
97 CFI_ADJUST_CFA_OFFSET -8
98 .endm
99
100 .macro popq_cfi_reg reg
101 popq %\reg
102 CFI_ADJUST_CFA_OFFSET -8
103 CFI_RESTORE \reg
104 .endm
105
106 .macro pushfq_cfi
107 pushfq
108 CFI_ADJUST_CFA_OFFSET 8
109 .endm
110
111 .macro popfq_cfi
112 popfq
113 CFI_ADJUST_CFA_OFFSET -8
114 .endm
115
116 .macro movq_cfi reg offset=0
117 movq %\reg, \offset(%rsp)
118 CFI_REL_OFFSET \reg, \offset
119 .endm
120
121 .macro movq_cfi_restore offset reg
122 movq \offset(%rsp), %\reg
123 CFI_RESTORE \reg
124 .endm
125#else /*!CONFIG_X86_64*/
126 .macro pushl_cfi reg
127 pushl \reg
128 CFI_ADJUST_CFA_OFFSET 4
129 .endm
130
131 .macro pushl_cfi_reg reg
132 pushl %\reg
133 CFI_ADJUST_CFA_OFFSET 4
134 CFI_REL_OFFSET \reg, 0
135 .endm
136
137 .macro popl_cfi reg
138 popl \reg
139 CFI_ADJUST_CFA_OFFSET -4
140 .endm
141
142 .macro popl_cfi_reg reg
143 popl %\reg
144 CFI_ADJUST_CFA_OFFSET -4
145 CFI_RESTORE \reg
146 .endm
147
148 .macro pushfl_cfi
149 pushfl
150 CFI_ADJUST_CFA_OFFSET 4
151 .endm
152
153 .macro popfl_cfi
154 popfl
155 CFI_ADJUST_CFA_OFFSET -4
156 .endm
157
158 .macro movl_cfi reg offset=0
159 movl %\reg, \offset(%esp)
160 CFI_REL_OFFSET \reg, \offset
161 .endm
162
163 .macro movl_cfi_restore offset reg
164 movl \offset(%esp), %\reg
165 CFI_RESTORE \reg
166 .endm
167#endif /*!CONFIG_X86_64*/
168#endif /*__ASSEMBLY__*/
169
170#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 27ca0afcccd7..df002992d8fd 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -52,4 +52,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
52BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) 52BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
53#endif 53#endif
54 54
55#ifdef CONFIG_X86_MCE_AMD
56BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
57#endif
55#endif 58#endif
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 3b629f47eb65..793179cf8e21 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -1,20 +1,17 @@
1#ifdef __ASSEMBLY__ 1#ifdef __ASSEMBLY__
2 2
3#include <asm/asm.h> 3#include <asm/asm.h>
4#include <asm/dwarf2.h>
5 4
6/* The annotation hides the frame from the unwinder and makes it look 5/* The annotation hides the frame from the unwinder and makes it look
7 like a ordinary ebp save/restore. This avoids some special cases for 6 like a ordinary ebp save/restore. This avoids some special cases for
8 frame pointer later */ 7 frame pointer later */
9#ifdef CONFIG_FRAME_POINTER 8#ifdef CONFIG_FRAME_POINTER
10 .macro FRAME 9 .macro FRAME
11 __ASM_SIZE(push,_cfi) %__ASM_REG(bp) 10 __ASM_SIZE(push,) %__ASM_REG(bp)
12 CFI_REL_OFFSET __ASM_REG(bp), 0
13 __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp) 11 __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp)
14 .endm 12 .endm
15 .macro ENDFRAME 13 .macro ENDFRAME
16 __ASM_SIZE(pop,_cfi) %__ASM_REG(bp) 14 __ASM_SIZE(pop,) %__ASM_REG(bp)
17 CFI_RESTORE __ASM_REG(bp)
18 .endm 15 .endm
19#else 16#else
20 .macro FRAME 17 .macro FRAME
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 986606539395..7178043b0e1d 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -34,6 +34,9 @@ typedef struct {
34#ifdef CONFIG_X86_MCE_THRESHOLD 34#ifdef CONFIG_X86_MCE_THRESHOLD
35 unsigned int irq_threshold_count; 35 unsigned int irq_threshold_count;
36#endif 36#endif
37#ifdef CONFIG_X86_MCE_AMD
38 unsigned int irq_deferred_error_count;
39#endif
37#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) 40#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
38 unsigned int irq_hv_callback_count; 41 unsigned int irq_hv_callback_count;
39#endif 42#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 10c80d4f8386..6615032e19c8 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -40,6 +40,7 @@ extern asmlinkage void reschedule_interrupt(void);
40extern asmlinkage void irq_move_cleanup_interrupt(void); 40extern asmlinkage void irq_move_cleanup_interrupt(void);
41extern asmlinkage void reboot_interrupt(void); 41extern asmlinkage void reboot_interrupt(void);
42extern asmlinkage void threshold_interrupt(void); 42extern asmlinkage void threshold_interrupt(void);
43extern asmlinkage void deferred_error_interrupt(void);
43 44
44extern asmlinkage void call_function_interrupt(void); 45extern asmlinkage void call_function_interrupt(void);
45extern asmlinkage void call_function_single_interrupt(void); 46extern asmlinkage void call_function_single_interrupt(void);
@@ -54,6 +55,7 @@ extern void trace_spurious_interrupt(void);
54extern void trace_thermal_interrupt(void); 55extern void trace_thermal_interrupt(void);
55extern void trace_reschedule_interrupt(void); 56extern void trace_reschedule_interrupt(void);
56extern void trace_threshold_interrupt(void); 57extern void trace_threshold_interrupt(void);
58extern void trace_deferred_error_interrupt(void);
57extern void trace_call_function_interrupt(void); 59extern void trace_call_function_interrupt(void);
58extern void trace_call_function_single_interrupt(void); 60extern void trace_call_function_single_interrupt(void);
59#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt 61#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 34a5b93704d3..83ec9b1d77cc 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -35,11 +35,13 @@
35 */ 35 */
36 36
37#define ARCH_HAS_IOREMAP_WC 37#define ARCH_HAS_IOREMAP_WC
38#define ARCH_HAS_IOREMAP_WT
38 39
39#include <linux/string.h> 40#include <linux/string.h>
40#include <linux/compiler.h> 41#include <linux/compiler.h>
41#include <asm/page.h> 42#include <asm/page.h>
42#include <asm/early_ioremap.h> 43#include <asm/early_ioremap.h>
44#include <asm/pgtable_types.h>
43 45
44#define build_mmio_read(name, size, type, reg, barrier) \ 46#define build_mmio_read(name, size, type, reg, barrier) \
45static inline type name(const volatile void __iomem *addr) \ 47static inline type name(const volatile void __iomem *addr) \
@@ -177,6 +179,7 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
177 * look at pci_iomap(). 179 * look at pci_iomap().
178 */ 180 */
179extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); 181extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
182extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
180extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); 183extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
181extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, 184extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
182 unsigned long prot_val); 185 unsigned long prot_val);
@@ -197,8 +200,6 @@ extern void set_iounmap_nonlazy(void);
197 200
198#include <asm-generic/iomap.h> 201#include <asm-generic/iomap.h>
199 202
200#include <linux/vmalloc.h>
201
202/* 203/*
203 * Convert a virtual cached pointer to an uncached pointer 204 * Convert a virtual cached pointer to an uncached pointer
204 */ 205 */
@@ -320,6 +321,7 @@ extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
320extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, 321extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
321 enum page_cache_mode pcm); 322 enum page_cache_mode pcm);
322extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); 323extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
324extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size);
323 325
324extern bool is_early_ioremap_ptep(pte_t *ptep); 326extern bool is_early_ioremap_ptep(pte_t *ptep);
325 327
@@ -338,6 +340,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
338#define IO_SPACE_LIMIT 0xffff 340#define IO_SPACE_LIMIT 0xffff
339 341
340#ifdef CONFIG_MTRR 342#ifdef CONFIG_MTRR
343extern int __must_check arch_phys_wc_index(int handle);
344#define arch_phys_wc_index arch_phys_wc_index
345
341extern int __must_check arch_phys_wc_add(unsigned long base, 346extern int __must_check arch_phys_wc_add(unsigned long base,
342 unsigned long size); 347 unsigned long size);
343extern void arch_phys_wc_del(int handle); 348extern void arch_phys_wc_del(int handle);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0ed29ac13a9d..4c2d2eb2060a 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -83,22 +83,23 @@
83 */ 83 */
84#define X86_PLATFORM_IPI_VECTOR 0xf7 84#define X86_PLATFORM_IPI_VECTOR 0xf7
85 85
86/* Vector for KVM to deliver posted interrupt IPI */
87#ifdef CONFIG_HAVE_KVM
88#define POSTED_INTR_VECTOR 0xf2
89#define POSTED_INTR_WAKEUP_VECTOR 0xf1 86#define POSTED_INTR_WAKEUP_VECTOR 0xf1
90#endif
91
92/* 87/*
93 * IRQ work vector: 88 * IRQ work vector:
94 */ 89 */
95#define IRQ_WORK_VECTOR 0xf6 90#define IRQ_WORK_VECTOR 0xf6
96 91
97#define UV_BAU_MESSAGE 0xf5 92#define UV_BAU_MESSAGE 0xf5
93#define DEFERRED_ERROR_VECTOR 0xf4
98 94
99/* Vector on which hypervisor callbacks will be delivered */ 95/* Vector on which hypervisor callbacks will be delivered */
100#define HYPERVISOR_CALLBACK_VECTOR 0xf3 96#define HYPERVISOR_CALLBACK_VECTOR 0xf3
101 97
98/* Vector for KVM to deliver posted interrupt IPI */
99#ifdef CONFIG_HAVE_KVM
100#define POSTED_INTR_VECTOR 0xf2
101#endif
102
102/* 103/*
103 * Local APIC timer IRQ vector is on a different priority level, 104 * Local APIC timer IRQ vector is on a different priority level,
104 * to work around the 'lost local interrupt if more than 2 IRQ 105 * to work around the 'lost local interrupt if more than 2 IRQ
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dea2e7e962e3..f4a555beef19 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -207,6 +207,7 @@ union kvm_mmu_page_role {
207 unsigned nxe:1; 207 unsigned nxe:1;
208 unsigned cr0_wp:1; 208 unsigned cr0_wp:1;
209 unsigned smep_andnot_wp:1; 209 unsigned smep_andnot_wp:1;
210 unsigned smap_andnot_wp:1;
210 }; 211 };
211}; 212};
212 213
@@ -400,6 +401,7 @@ struct kvm_vcpu_arch {
400 struct kvm_mmu_memory_cache mmu_page_header_cache; 401 struct kvm_mmu_memory_cache mmu_page_header_cache;
401 402
402 struct fpu guest_fpu; 403 struct fpu guest_fpu;
404 bool eager_fpu;
403 u64 xcr0; 405 u64 xcr0;
404 u64 guest_supported_xcr0; 406 u64 guest_supported_xcr0;
405 u32 guest_xstate_size; 407 u32 guest_xstate_size;
@@ -743,6 +745,7 @@ struct kvm_x86_ops {
743 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 745 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
744 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 746 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
745 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 747 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
748 void (*fpu_activate)(struct kvm_vcpu *vcpu);
746 void (*fpu_deactivate)(struct kvm_vcpu *vcpu); 749 void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
747 750
748 void (*tlb_flush)(struct kvm_vcpu *vcpu); 751 void (*tlb_flush)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 1f5a86d518db..982dfc3679ad 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -17,11 +17,16 @@
17#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) 17#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
18#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ 18#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
19#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */ 19#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */
20#define MCG_LMCE_P (1ULL<<27) /* Local machine check supported */
20 21
21/* MCG_STATUS register defines */ 22/* MCG_STATUS register defines */
22#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ 23#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
23#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ 24#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
24#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ 25#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
26#define MCG_STATUS_LMCES (1ULL<<3) /* LMCE signaled */
27
28/* MCG_EXT_CTL register defines */
29#define MCG_EXT_CTL_LMCE_EN (1ULL<<0) /* Enable LMCE */
25 30
26/* MCi_STATUS register defines */ 31/* MCi_STATUS register defines */
27#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ 32#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
@@ -104,6 +109,7 @@ struct mce_log {
104struct mca_config { 109struct mca_config {
105 bool dont_log_ce; 110 bool dont_log_ce;
106 bool cmci_disabled; 111 bool cmci_disabled;
112 bool lmce_disabled;
107 bool ignore_ce; 113 bool ignore_ce;
108 bool disabled; 114 bool disabled;
109 bool ser; 115 bool ser;
@@ -117,8 +123,19 @@ struct mca_config {
117}; 123};
118 124
119struct mce_vendor_flags { 125struct mce_vendor_flags {
120 __u64 overflow_recov : 1, /* cpuid_ebx(80000007) */ 126 /*
121 __reserved_0 : 63; 127 * overflow recovery cpuid bit indicates that overflow
128 * conditions are not fatal
129 */
130 __u64 overflow_recov : 1,
131
132 /*
133 * SUCCOR stands for S/W UnCorrectable error COntainment
134 * and Recovery. It indicates support for data poisoning
135 * in HW and deferred error interrupts.
136 */
137 succor : 1,
138 __reserved_0 : 62;
122}; 139};
123extern struct mce_vendor_flags mce_flags; 140extern struct mce_vendor_flags mce_flags;
124 141
@@ -168,12 +185,16 @@ void cmci_clear(void);
168void cmci_reenable(void); 185void cmci_reenable(void);
169void cmci_rediscover(void); 186void cmci_rediscover(void);
170void cmci_recheck(void); 187void cmci_recheck(void);
188void lmce_clear(void);
189void lmce_enable(void);
171#else 190#else
172static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } 191static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
173static inline void cmci_clear(void) {} 192static inline void cmci_clear(void) {}
174static inline void cmci_reenable(void) {} 193static inline void cmci_reenable(void) {}
175static inline void cmci_rediscover(void) {} 194static inline void cmci_rediscover(void) {}
176static inline void cmci_recheck(void) {} 195static inline void cmci_recheck(void) {}
196static inline void lmce_clear(void) {}
197static inline void lmce_enable(void) {}
177#endif 198#endif
178 199
179#ifdef CONFIG_X86_MCE_AMD 200#ifdef CONFIG_X86_MCE_AMD
@@ -223,6 +244,9 @@ void do_machine_check(struct pt_regs *, long);
223extern void (*mce_threshold_vector)(void); 244extern void (*mce_threshold_vector)(void);
224extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 245extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
225 246
247/* Deferred error interrupt handler */
248extern void (*deferred_error_int_vector)(void);
249
226/* 250/*
227 * Thermal handler 251 * Thermal handler
228 */ 252 */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index c469490db4a8..9ebc3d009373 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -56,6 +56,7 @@
56#define MSR_IA32_MCG_CAP 0x00000179 56#define MSR_IA32_MCG_CAP 0x00000179
57#define MSR_IA32_MCG_STATUS 0x0000017a 57#define MSR_IA32_MCG_STATUS 0x0000017a
58#define MSR_IA32_MCG_CTL 0x0000017b 58#define MSR_IA32_MCG_CTL 0x0000017b
59#define MSR_IA32_MCG_EXT_CTL 0x000004d0
59 60
60#define MSR_OFFCORE_RSP_0 0x000001a6 61#define MSR_OFFCORE_RSP_0 0x000001a6
61#define MSR_OFFCORE_RSP_1 0x000001a7 62#define MSR_OFFCORE_RSP_1 0x000001a7
@@ -140,6 +141,7 @@
140#define MSR_CORE_C3_RESIDENCY 0x000003fc 141#define MSR_CORE_C3_RESIDENCY 0x000003fc
141#define MSR_CORE_C6_RESIDENCY 0x000003fd 142#define MSR_CORE_C6_RESIDENCY 0x000003fd
142#define MSR_CORE_C7_RESIDENCY 0x000003fe 143#define MSR_CORE_C7_RESIDENCY 0x000003fe
144#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
143#define MSR_PKG_C2_RESIDENCY 0x0000060d 145#define MSR_PKG_C2_RESIDENCY 0x0000060d
144#define MSR_PKG_C8_RESIDENCY 0x00000630 146#define MSR_PKG_C8_RESIDENCY 0x00000630
145#define MSR_PKG_C9_RESIDENCY 0x00000631 147#define MSR_PKG_C9_RESIDENCY 0x00000631
@@ -379,6 +381,7 @@
379#define FEATURE_CONTROL_LOCKED (1<<0) 381#define FEATURE_CONTROL_LOCKED (1<<0)
380#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) 382#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
381#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) 383#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
384#define FEATURE_CONTROL_LMCE (1<<20)
382 385
383#define MSR_IA32_APICBASE 0x0000001b 386#define MSR_IA32_APICBASE 0x0000001b
384#define MSR_IA32_APICBASE_BSP (1<<8) 387#define MSR_IA32_APICBASE_BSP (1<<8)
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index de36f22eb0b9..e6a707eb5081 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -1,13 +1,14 @@
1#ifndef _ASM_X86_MSR_H 1#ifndef _ASM_X86_MSR_H
2#define _ASM_X86_MSR_H 2#define _ASM_X86_MSR_H
3 3
4#include <uapi/asm/msr.h> 4#include "msr-index.h"
5 5
6#ifndef __ASSEMBLY__ 6#ifndef __ASSEMBLY__
7 7
8#include <asm/asm.h> 8#include <asm/asm.h>
9#include <asm/errno.h> 9#include <asm/errno.h>
10#include <asm/cpumask.h> 10#include <asm/cpumask.h>
11#include <uapi/asm/msr.h>
11 12
12struct msr { 13struct msr {
13 union { 14 union {
@@ -205,8 +206,13 @@ do { \
205 206
206#endif /* !CONFIG_PARAVIRT */ 207#endif /* !CONFIG_PARAVIRT */
207 208
208#define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val), \ 209/*
209 (u32)((val) >> 32)) 210 * 64-bit version of wrmsr_safe():
211 */
212static inline int wrmsrl_safe(u32 msr, u64 val)
213{
214 return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
215}
210 216
211#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high)) 217#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high))
212 218
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index f768f6298419..b94f6f64e23d 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -31,7 +31,7 @@
31 * arch_phys_wc_add and arch_phys_wc_del. 31 * arch_phys_wc_add and arch_phys_wc_del.
32 */ 32 */
33# ifdef CONFIG_MTRR 33# ifdef CONFIG_MTRR
34extern u8 mtrr_type_lookup(u64 addr, u64 end); 34extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
35extern void mtrr_save_fixed_ranges(void *); 35extern void mtrr_save_fixed_ranges(void *);
36extern void mtrr_save_state(void); 36extern void mtrr_save_state(void);
37extern int mtrr_add(unsigned long base, unsigned long size, 37extern int mtrr_add(unsigned long base, unsigned long size,
@@ -48,14 +48,13 @@ extern void mtrr_aps_init(void);
48extern void mtrr_bp_restore(void); 48extern void mtrr_bp_restore(void);
49extern int mtrr_trim_uncached_memory(unsigned long end_pfn); 49extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
50extern int amd_special_default_mtrr(void); 50extern int amd_special_default_mtrr(void);
51extern int phys_wc_to_mtrr_index(int handle);
52# else 51# else
53static inline u8 mtrr_type_lookup(u64 addr, u64 end) 52static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform)
54{ 53{
55 /* 54 /*
56 * Return no-MTRRs: 55 * Return no-MTRRs:
57 */ 56 */
58 return 0xff; 57 return MTRR_TYPE_INVALID;
59} 58}
60#define mtrr_save_fixed_ranges(arg) do {} while (0) 59#define mtrr_save_fixed_ranges(arg) do {} while (0)
61#define mtrr_save_state() do {} while (0) 60#define mtrr_save_state() do {} while (0)
@@ -84,10 +83,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
84static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) 83static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
85{ 84{
86} 85}
87static inline int phys_wc_to_mtrr_index(int handle)
88{
89 return -1;
90}
91 86
92#define mtrr_ap_init() do {} while (0) 87#define mtrr_ap_init() do {} while (0)
93#define mtrr_bp_init() do {} while (0) 88#define mtrr_bp_init() do {} while (0)
@@ -127,4 +122,8 @@ struct mtrr_gentry32 {
127 _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32) 122 _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32)
128#endif /* CONFIG_COMPAT */ 123#endif /* CONFIG_COMPAT */
129 124
125/* Bit fields for enabled in struct mtrr_state_type */
126#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01
127#define MTRR_STATE_MTRR_ENABLED 0x02
128
130#endif /* _ASM_X86_MTRR_H */ 129#endif /* _ASM_X86_MTRR_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8957810ad7d1..d143bfad45d7 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
712 712
713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) 713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
714 714
715#ifdef CONFIG_QUEUED_SPINLOCKS
716
717static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
718 u32 val)
719{
720 PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
721}
722
723static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
724{
725 PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
726}
727
728static __always_inline void pv_wait(u8 *ptr, u8 val)
729{
730 PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
731}
732
733static __always_inline void pv_kick(int cpu)
734{
735 PVOP_VCALL1(pv_lock_ops.kick, cpu);
736}
737
738#else /* !CONFIG_QUEUED_SPINLOCKS */
739
715static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, 740static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
716 __ticket_t ticket) 741 __ticket_t ticket)
717{ 742{
@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
724 PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); 749 PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
725} 750}
726 751
727#endif 752#endif /* CONFIG_QUEUED_SPINLOCKS */
753
754#endif /* SMP && PARAVIRT_SPINLOCKS */
728 755
729#ifdef CONFIG_X86_32 756#ifdef CONFIG_X86_32
730#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" 757#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 344c646e7f06..a6b8f9fadb06 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -334,9 +334,19 @@ struct arch_spinlock;
334typedef u16 __ticket_t; 334typedef u16 __ticket_t;
335#endif 335#endif
336 336
337struct qspinlock;
338
337struct pv_lock_ops { 339struct pv_lock_ops {
340#ifdef CONFIG_QUEUED_SPINLOCKS
341 void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
342 struct paravirt_callee_save queued_spin_unlock;
343
344 void (*wait)(u8 *ptr, u8 val);
345 void (*kick)(int cpu);
346#else /* !CONFIG_QUEUED_SPINLOCKS */
338 struct paravirt_callee_save lock_spinning; 347 struct paravirt_callee_save lock_spinning;
339 void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); 348 void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
349#endif /* !CONFIG_QUEUED_SPINLOCKS */
340}; 350};
341 351
342/* This contains all the paravirt structures: we get a convenient 352/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 91bc4ba95f91..ca6c228d5e62 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -4,14 +4,9 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <asm/pgtable_types.h> 5#include <asm/pgtable_types.h>
6 6
7#ifdef CONFIG_X86_PAT 7bool pat_enabled(void);
8extern int pat_enabled;
9#else
10static const int pat_enabled;
11#endif
12
13extern void pat_init(void); 8extern void pat_init(void);
14void pat_init_cache_modes(void); 9void pat_init_cache_modes(u64);
15 10
16extern int reserve_memtype(u64 start, u64 end, 11extern int reserve_memtype(u64 start, u64 end,
17 enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); 12 enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index fe57e7a98839..2562e303405b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -398,11 +398,17 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
398 * requested memtype: 398 * requested memtype:
399 * - request is uncached, return cannot be write-back 399 * - request is uncached, return cannot be write-back
400 * - request is write-combine, return cannot be write-back 400 * - request is write-combine, return cannot be write-back
401 * - request is write-through, return cannot be write-back
402 * - request is write-through, return cannot be write-combine
401 */ 403 */
402 if ((pcm == _PAGE_CACHE_MODE_UC_MINUS && 404 if ((pcm == _PAGE_CACHE_MODE_UC_MINUS &&
403 new_pcm == _PAGE_CACHE_MODE_WB) || 405 new_pcm == _PAGE_CACHE_MODE_WB) ||
404 (pcm == _PAGE_CACHE_MODE_WC && 406 (pcm == _PAGE_CACHE_MODE_WC &&
405 new_pcm == _PAGE_CACHE_MODE_WB)) { 407 new_pcm == _PAGE_CACHE_MODE_WB) ||
408 (pcm == _PAGE_CACHE_MODE_WT &&
409 new_pcm == _PAGE_CACHE_MODE_WB) ||
410 (pcm == _PAGE_CACHE_MODE_WT &&
411 new_pcm == _PAGE_CACHE_MODE_WC)) {
406 return 0; 412 return 0;
407 } 413 }
408 414
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 78f0c8cbe316..13f310bfc09a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -367,6 +367,9 @@ extern int nx_enabled;
367#define pgprot_writecombine pgprot_writecombine 367#define pgprot_writecombine pgprot_writecombine
368extern pgprot_t pgprot_writecombine(pgprot_t prot); 368extern pgprot_t pgprot_writecombine(pgprot_t prot);
369 369
370#define pgprot_writethrough pgprot_writethrough
371extern pgprot_t pgprot_writethrough(pgprot_t prot);
372
370/* Indicate that x86 has its own track and untrack pfn vma functions */ 373/* Indicate that x86 has its own track and untrack pfn vma functions */
371#define __HAVE_PFNMAP_TRACKING 374#define __HAVE_PFNMAP_TRACKING
372 375
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index a90f8972dad5..a4a77286cb1d 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -5,12 +5,14 @@
5 5
6/* misc architecture specific prototypes */ 6/* misc architecture specific prototypes */
7 7
8void system_call(void);
9void syscall_init(void); 8void syscall_init(void);
10 9
11void ia32_syscall(void); 10void entry_SYSCALL_64(void);
12void ia32_cstar_target(void); 11void entry_SYSCALL_compat(void);
13void ia32_sysenter_target(void); 12void entry_INT80_32(void);
13void entry_INT80_compat(void);
14void entry_SYSENTER_32(void);
15void entry_SYSENTER_compat(void);
14 16
15void x86_configure_nx(void); 17void x86_configure_nx(void);
16void x86_report_nx(void); 18void x86_report_nx(void);
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
new file mode 100644
index 000000000000..9d51fae1cba3
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock.h
@@ -0,0 +1,57 @@
1#ifndef _ASM_X86_QSPINLOCK_H
2#define _ASM_X86_QSPINLOCK_H
3
4#include <asm/cpufeature.h>
5#include <asm-generic/qspinlock_types.h>
6#include <asm/paravirt.h>
7
8#define queued_spin_unlock queued_spin_unlock
9/**
10 * queued_spin_unlock - release a queued spinlock
11 * @lock : Pointer to queued spinlock structure
12 *
13 * A smp_store_release() on the least-significant byte.
14 */
15static inline void native_queued_spin_unlock(struct qspinlock *lock)
16{
17 smp_store_release((u8 *)lock, 0);
18}
19
20#ifdef CONFIG_PARAVIRT_SPINLOCKS
21extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
22extern void __pv_init_lock_hash(void);
23extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
24extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
25
26static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
27{
28 pv_queued_spin_lock_slowpath(lock, val);
29}
30
31static inline void queued_spin_unlock(struct qspinlock *lock)
32{
33 pv_queued_spin_unlock(lock);
34}
35#else
36static inline void queued_spin_unlock(struct qspinlock *lock)
37{
38 native_queued_spin_unlock(lock);
39}
40#endif
41
42#define virt_queued_spin_lock virt_queued_spin_lock
43
44static inline bool virt_queued_spin_lock(struct qspinlock *lock)
45{
46 if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
47 return false;
48
49 while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
50 cpu_relax();
51
52 return true;
53}
54
55#include <asm-generic/qspinlock.h>
56
57#endif /* _ASM_X86_QSPINLOCK_H */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
new file mode 100644
index 000000000000..b002e711ba88
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_QSPINLOCK_PARAVIRT_H
2#define __ASM_QSPINLOCK_PARAVIRT_H
3
4PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
5
6#endif
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 5a9856eb12ba..7d5a1929d76b 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -231,11 +231,21 @@
231#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) 231#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
232 232
233#ifdef __KERNEL__ 233#ifdef __KERNEL__
234
235/*
236 * early_idt_handler_array is an array of entry points referenced in the
237 * early IDT. For simplicity, it's a real array with one entry point
238 * every nine bytes. That leaves room for an optional 'push $0' if the
239 * vector has no error code (two bytes), a 'push $vector_number' (two
240 * bytes), and a jump to the common entry code (up to five bytes).
241 */
242#define EARLY_IDT_HANDLER_SIZE 9
243
234#ifndef __ASSEMBLY__ 244#ifndef __ASSEMBLY__
235 245
236extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; 246extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
237#ifdef CONFIG_TRACING 247#ifdef CONFIG_TRACING
238# define trace_early_idt_handlers early_idt_handlers 248# define trace_early_idt_handler_array early_idt_handler_array
239#endif 249#endif
240 250
241/* 251/*
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index aeb4666e0c0a..2270e41b32fd 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -215,6 +215,44 @@ static inline void clwb(volatile void *__p)
215 : [pax] "a" (p)); 215 : [pax] "a" (p));
216} 216}
217 217
218/**
219 * pcommit_sfence() - persistent commit and fence
220 *
221 * The PCOMMIT instruction ensures that data that has been flushed from the
222 * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to
223 * memory and is durable on the DIMM. The primary use case for this is
224 * persistent memory.
225 *
226 * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT
227 * with appropriate fencing.
228 *
229 * Example:
230 * void flush_and_commit_buffer(void *vaddr, unsigned int size)
231 * {
232 * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
233 * void *vend = vaddr + size;
234 * void *p;
235 *
236 * for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
237 * p < vend; p += boot_cpu_data.x86_clflush_size)
238 * clwb(p);
239 *
240 * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes
241 * // MFENCE via mb() also works
242 * wmb();
243 *
244 * // PCOMMIT and the required SFENCE for ordering
245 * pcommit_sfence();
246 * }
247 *
248 * After this function completes the data pointed to by 'vaddr' has been
249 * accepted to memory and will be durable if the 'vaddr' points to persistent
250 * memory.
251 *
252 * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify
253 * things we include both the PCOMMIT and the required SFENCE in the
254 * alternatives generated by pcommit_sfence().
255 */
218static inline void pcommit_sfence(void) 256static inline void pcommit_sfence(void)
219{ 257{
220 alternative(ASM_NOP7, 258 alternative(ASM_NOP7,
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 64b611782ef0..be0a05913b91 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -42,6 +42,10 @@
42extern struct static_key paravirt_ticketlocks_enabled; 42extern struct static_key paravirt_ticketlocks_enabled;
43static __always_inline bool static_key_false(struct static_key *key); 43static __always_inline bool static_key_false(struct static_key *key);
44 44
45#ifdef CONFIG_QUEUED_SPINLOCKS
46#include <asm/qspinlock.h>
47#else
48
45#ifdef CONFIG_PARAVIRT_SPINLOCKS 49#ifdef CONFIG_PARAVIRT_SPINLOCKS
46 50
47static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) 51static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
@@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
196 cpu_relax(); 200 cpu_relax();
197 } 201 }
198} 202}
203#endif /* CONFIG_QUEUED_SPINLOCKS */
199 204
200/* 205/*
201 * Read-write spinlocks, allowing multiple readers 206 * Read-write spinlocks, allowing multiple readers
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 5f9d7572d82b..65c3e37f879a 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -23,6 +23,9 @@ typedef u32 __ticketpair_t;
23 23
24#define TICKET_SHIFT (sizeof(__ticket_t) * 8) 24#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
25 25
26#ifdef CONFIG_QUEUED_SPINLOCKS
27#include <asm-generic/qspinlock_types.h>
28#else
26typedef struct arch_spinlock { 29typedef struct arch_spinlock {
27 union { 30 union {
28 __ticketpair_t head_tail; 31 __ticketpair_t head_tail;
@@ -33,6 +36,7 @@ typedef struct arch_spinlock {
33} arch_spinlock_t; 36} arch_spinlock_t;
34 37
35#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } 38#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
39#endif /* CONFIG_QUEUED_SPINLOCKS */
36 40
37#include <asm-generic/qrwlock_types.h> 41#include <asm-generic/qrwlock_types.h>
38 42
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 0e8f04f2c26f..8d717faeed22 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -26,7 +26,7 @@
26#define _ASM_X86_TOPOLOGY_H 26#define _ASM_X86_TOPOLOGY_H
27 27
28#ifdef CONFIG_X86_32 28#ifdef CONFIG_X86_32
29# ifdef CONFIG_X86_HT 29# ifdef CONFIG_SMP
30# define ENABLE_TOPO_DEFINES 30# define ENABLE_TOPO_DEFINES
31# endif 31# endif
32#else 32#else
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 4cab890007a7..38a09a13a9bc 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -101,6 +101,12 @@ DEFINE_IRQ_VECTOR_EVENT(call_function_single);
101DEFINE_IRQ_VECTOR_EVENT(threshold_apic); 101DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
102 102
103/* 103/*
104 * deferred_error_apic - called when entering/exiting a deferred apic interrupt
105 * vector handler
106 */
107DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
108
109/*
104 * thermal_apic - called when entering/exiting a thermal apic interrupt 110 * thermal_apic - called when entering/exiting a thermal apic interrupt
105 * vector handler 111 * vector handler
106 */ 112 */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 4e49d7dff78e..c5380bea2a36 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -108,7 +108,8 @@ extern int panic_on_unrecovered_nmi;
108void math_emulate(struct math_emu_info *); 108void math_emulate(struct math_emu_info *);
109#ifndef CONFIG_X86_32 109#ifndef CONFIG_X86_32
110asmlinkage void smp_thermal_interrupt(void); 110asmlinkage void smp_thermal_interrupt(void);
111asmlinkage void mce_threshold_interrupt(void); 111asmlinkage void smp_threshold_interrupt(void);
112asmlinkage void smp_deferred_error_interrupt(void);
112#endif 113#endif
113 114
114extern enum ctx_state ist_enter(struct pt_regs *regs); 115extern enum ctx_state ist_enter(struct pt_regs *regs);
diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h
index 155e51048fa4..c41f4fe25483 100644
--- a/arch/x86/include/uapi/asm/msr.h
+++ b/arch/x86/include/uapi/asm/msr.h
@@ -1,8 +1,6 @@
1#ifndef _UAPI_ASM_X86_MSR_H 1#ifndef _UAPI_ASM_X86_MSR_H
2#define _UAPI_ASM_X86_MSR_H 2#define _UAPI_ASM_X86_MSR_H
3 3
4#include <asm/msr-index.h>
5
6#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
7 5
8#include <linux/types.h> 6#include <linux/types.h>
diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h
index d0acb658c8f4..7528dcf59691 100644
--- a/arch/x86/include/uapi/asm/mtrr.h
+++ b/arch/x86/include/uapi/asm/mtrr.h
@@ -103,7 +103,7 @@ struct mtrr_state_type {
103#define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry) 103#define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry)
104#define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry) 104#define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry)
105 105
106/* These are the region types */ 106/* MTRR memory types, which are defined in SDM */
107#define MTRR_TYPE_UNCACHABLE 0 107#define MTRR_TYPE_UNCACHABLE 0
108#define MTRR_TYPE_WRCOMB 1 108#define MTRR_TYPE_WRCOMB 1
109/*#define MTRR_TYPE_ 2*/ 109/*#define MTRR_TYPE_ 2*/
@@ -113,5 +113,11 @@ struct mtrr_state_type {
113#define MTRR_TYPE_WRBACK 6 113#define MTRR_TYPE_WRBACK 6
114#define MTRR_NUM_TYPES 7 114#define MTRR_NUM_TYPES 7
115 115
116/*
117 * Invalid MTRR memory type. mtrr_type_lookup() returns this value when
118 * MTRRs are disabled. Note, this value is allocated from the reserved
119 * values (0x7-0xff) of the MTRR memory types.
120 */
121#define MTRR_TYPE_INVALID 0xff
116 122
117#endif /* _UAPI_ASM_X86_MTRR_H */ 123#endif /* _UAPI_ASM_X86_MTRR_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9bcd0b56ca17..01663ee5f1b7 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,7 +22,7 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
22 22
23CFLAGS_irq.o := -I$(src)/../include/asm/trace 23CFLAGS_irq.o := -I$(src)/../include/asm/trace
24 24
25obj-y := process_$(BITS).o signal.o entry_$(BITS).o 25obj-y := process_$(BITS).o signal.o
26obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 26obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
27obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o 27obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
28obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o 28obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
@@ -31,9 +31,6 @@ obj-y += probe_roms.o
31obj-$(CONFIG_X86_32) += i386_ksyms_32.o 31obj-$(CONFIG_X86_32) += i386_ksyms_32.o
32obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 32obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
33obj-$(CONFIG_X86_64) += mcount_64.o 33obj-$(CONFIG_X86_64) += mcount_64.o
34obj-y += syscall_$(BITS).o vsyscall_gtod.o
35obj-$(CONFIG_IA32_EMULATION) += syscall_32.o
36obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
37obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o 34obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
38obj-$(CONFIG_SYSFS) += ksysfs.o 35obj-$(CONFIG_SYSFS) += ksysfs.o
39obj-y += bootflag.o e820.o 36obj-y += bootflag.o e820.o
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index dcaab87da629..d8f42f902a0f 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -66,7 +66,7 @@ int main(void)
66 DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); 66 DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
67 DEFINE(NR_syscalls, sizeof(syscalls_64)); 67 DEFINE(NR_syscalls, sizeof(syscalls_64));
68 68
69 DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1); 69 DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
70 DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); 70 DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
71 71
72 return 0; 72 return 0;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e4cf63301ff4..eb4f01269b5d 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -288,7 +288,7 @@ static int nearby_node(int apicid)
288 * Assumption: Number of cores in each internal node is the same. 288 * Assumption: Number of cores in each internal node is the same.
289 * (2) AMD processors supporting compute units 289 * (2) AMD processors supporting compute units
290 */ 290 */
291#ifdef CONFIG_X86_HT 291#ifdef CONFIG_SMP
292static void amd_get_topology(struct cpuinfo_x86 *c) 292static void amd_get_topology(struct cpuinfo_x86 *c)
293{ 293{
294 u32 nodes, cores_per_cu = 1; 294 u32 nodes, cores_per_cu = 1;
@@ -341,7 +341,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
341 */ 341 */
342static void amd_detect_cmp(struct cpuinfo_x86 *c) 342static void amd_detect_cmp(struct cpuinfo_x86 *c)
343{ 343{
344#ifdef CONFIG_X86_HT 344#ifdef CONFIG_SMP
345 unsigned bits; 345 unsigned bits;
346 int cpu = smp_processor_id(); 346 int cpu = smp_processor_id();
347 347
@@ -420,7 +420,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
420 420
421static void early_init_amd_mc(struct cpuinfo_x86 *c) 421static void early_init_amd_mc(struct cpuinfo_x86 *c)
422{ 422{
423#ifdef CONFIG_X86_HT 423#ifdef CONFIG_SMP
424 unsigned bits, ecx; 424 unsigned bits, ecx;
425 425
426 /* Multi core CPU? */ 426 /* Multi core CPU? */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6bec0b55863e..cc7f753e571d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -508,7 +508,7 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
508 508
509void detect_ht(struct cpuinfo_x86 *c) 509void detect_ht(struct cpuinfo_x86 *c)
510{ 510{
511#ifdef CONFIG_X86_HT 511#ifdef CONFIG_SMP
512 u32 eax, ebx, ecx, edx; 512 u32 eax, ebx, ecx, edx;
513 int index_msb, core_bits; 513 int index_msb, core_bits;
514 static bool printed; 514 static bool printed;
@@ -844,7 +844,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
844 if (c->cpuid_level >= 0x00000001) { 844 if (c->cpuid_level >= 0x00000001) {
845 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; 845 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
846#ifdef CONFIG_X86_32 846#ifdef CONFIG_X86_32
847# ifdef CONFIG_X86_HT 847# ifdef CONFIG_SMP
848 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); 848 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
849# else 849# else
850 c->apicid = c->initial_apicid; 850 c->apicid = c->initial_apicid;
@@ -1026,7 +1026,7 @@ void enable_sep_cpu(void)
1026 (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), 1026 (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
1027 0); 1027 0);
1028 1028
1029 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0); 1029 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
1030 1030
1031out: 1031out:
1032 put_cpu(); 1032 put_cpu();
@@ -1204,10 +1204,10 @@ void syscall_init(void)
1204 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. 1204 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
1205 */ 1205 */
1206 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); 1206 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
1207 wrmsrl(MSR_LSTAR, system_call); 1207 wrmsrl(MSR_LSTAR, entry_SYSCALL_64);
1208 1208
1209#ifdef CONFIG_IA32_EMULATION 1209#ifdef CONFIG_IA32_EMULATION
1210 wrmsrl(MSR_CSTAR, ia32_cstar_target); 1210 wrmsrl(MSR_CSTAR, entry_SYSCALL_compat);
1211 /* 1211 /*
1212 * This only works on Intel CPUs. 1212 * This only works on Intel CPUs.
1213 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. 1213 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
@@ -1216,7 +1216,7 @@ void syscall_init(void)
1216 */ 1216 */
1217 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); 1217 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
1218 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); 1218 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
1219 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); 1219 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
1220#else 1220#else
1221 wrmsrl(MSR_CSTAR, ignore_sysret); 1221 wrmsrl(MSR_CSTAR, ignore_sysret);
1222 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); 1222 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index edcb0e28c336..be4febc58b94 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -654,7 +654,7 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
654 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 654 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
655 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ 655 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
656 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; 656 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
657#ifdef CONFIG_X86_HT 657#ifdef CONFIG_SMP
658 unsigned int cpu = c->cpu_index; 658 unsigned int cpu = c->cpu_index;
659#endif 659#endif
660 660
@@ -773,19 +773,19 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
773 773
774 if (new_l2) { 774 if (new_l2) {
775 l2 = new_l2; 775 l2 = new_l2;
776#ifdef CONFIG_X86_HT 776#ifdef CONFIG_SMP
777 per_cpu(cpu_llc_id, cpu) = l2_id; 777 per_cpu(cpu_llc_id, cpu) = l2_id;
778#endif 778#endif
779 } 779 }
780 780
781 if (new_l3) { 781 if (new_l3) {
782 l3 = new_l3; 782 l3 = new_l3;
783#ifdef CONFIG_X86_HT 783#ifdef CONFIG_SMP
784 per_cpu(cpu_llc_id, cpu) = l3_id; 784 per_cpu(cpu_llc_id, cpu) = l3_id;
785#endif 785#endif
786 } 786 }
787 787
788#ifdef CONFIG_X86_HT 788#ifdef CONFIG_SMP
789 /* 789 /*
790 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in 790 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
791 * turns means that the only possibility is SMT (as indicated in 791 * turns means that the only possibility is SMT (as indicated in
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e535533d5ab8..5b974c97e31e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -708,6 +708,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
708 struct pt_regs *regs) 708 struct pt_regs *regs)
709{ 709{
710 int i, ret = 0; 710 int i, ret = 0;
711 char *tmp;
711 712
712 for (i = 0; i < mca_cfg.banks; i++) { 713 for (i = 0; i < mca_cfg.banks; i++) {
713 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 714 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
@@ -716,9 +717,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
716 if (quirk_no_way_out) 717 if (quirk_no_way_out)
717 quirk_no_way_out(i, m, regs); 718 quirk_no_way_out(i, m, regs);
718 } 719 }
719 if (mce_severity(m, mca_cfg.tolerant, msg, true) >= 720
720 MCE_PANIC_SEVERITY) 721 if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
722 *msg = tmp;
721 ret = 1; 723 ret = 1;
724 }
722 } 725 }
723 return ret; 726 return ret;
724} 727}
@@ -1047,6 +1050,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1047 char *msg = "Unknown"; 1050 char *msg = "Unknown";
1048 u64 recover_paddr = ~0ull; 1051 u64 recover_paddr = ~0ull;
1049 int flags = MF_ACTION_REQUIRED; 1052 int flags = MF_ACTION_REQUIRED;
1053 int lmce = 0;
1050 1054
1051 prev_state = ist_enter(regs); 1055 prev_state = ist_enter(regs);
1052 1056
@@ -1074,11 +1078,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1074 kill_it = 1; 1078 kill_it = 1;
1075 1079
1076 /* 1080 /*
1077 * Go through all the banks in exclusion of the other CPUs. 1081 * Check if this MCE is signaled to only this logical processor
1078 * This way we don't report duplicated events on shared banks
1079 * because the first one to see it will clear it.
1080 */ 1082 */
1081 order = mce_start(&no_way_out); 1083 if (m.mcgstatus & MCG_STATUS_LMCES)
1084 lmce = 1;
1085 else {
1086 /*
1087 * Go through all the banks in exclusion of the other CPUs.
1088 * This way we don't report duplicated events on shared banks
1089 * because the first one to see it will clear it.
1090 * If this is a Local MCE, then no need to perform rendezvous.
1091 */
1092 order = mce_start(&no_way_out);
1093 }
1094
1082 for (i = 0; i < cfg->banks; i++) { 1095 for (i = 0; i < cfg->banks; i++) {
1083 __clear_bit(i, toclear); 1096 __clear_bit(i, toclear);
1084 if (!test_bit(i, valid_banks)) 1097 if (!test_bit(i, valid_banks))
@@ -1155,8 +1168,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1155 * Do most of the synchronization with other CPUs. 1168 * Do most of the synchronization with other CPUs.
1156 * When there's any problem use only local no_way_out state. 1169 * When there's any problem use only local no_way_out state.
1157 */ 1170 */
1158 if (mce_end(order) < 0) 1171 if (!lmce) {
1159 no_way_out = worst >= MCE_PANIC_SEVERITY; 1172 if (mce_end(order) < 0)
1173 no_way_out = worst >= MCE_PANIC_SEVERITY;
1174 } else {
1175 /*
1176 * Local MCE skipped calling mce_reign()
1177 * If we found a fatal error, we need to panic here.
1178 */
1179 if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
1180 mce_panic("Machine check from unknown source",
1181 NULL, NULL);
1182 }
1160 1183
1161 /* 1184 /*
1162 * At insane "tolerant" levels we take no action. Otherwise 1185 * At insane "tolerant" levels we take no action. Otherwise
@@ -1637,10 +1660,16 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1637 mce_intel_feature_init(c); 1660 mce_intel_feature_init(c);
1638 mce_adjust_timer = cmci_intel_adjust_timer; 1661 mce_adjust_timer = cmci_intel_adjust_timer;
1639 break; 1662 break;
1640 case X86_VENDOR_AMD: 1663
1664 case X86_VENDOR_AMD: {
1665 u32 ebx = cpuid_ebx(0x80000007);
1666
1641 mce_amd_feature_init(c); 1667 mce_amd_feature_init(c);
1642 mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1; 1668 mce_flags.overflow_recov = !!(ebx & BIT(0));
1669 mce_flags.succor = !!(ebx & BIT(1));
1643 break; 1670 break;
1671 }
1672
1644 default: 1673 default:
1645 break; 1674 break;
1646 } 1675 }
@@ -1976,6 +2005,7 @@ void mce_disable_bank(int bank)
1976/* 2005/*
1977 * mce=off Disables machine check 2006 * mce=off Disables machine check
1978 * mce=no_cmci Disables CMCI 2007 * mce=no_cmci Disables CMCI
2008 * mce=no_lmce Disables LMCE
1979 * mce=dont_log_ce Clears corrected events silently, no log created for CEs. 2009 * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
1980 * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. 2010 * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
1981 * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) 2011 * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
@@ -1999,6 +2029,8 @@ static int __init mcheck_enable(char *str)
1999 cfg->disabled = true; 2029 cfg->disabled = true;
2000 else if (!strcmp(str, "no_cmci")) 2030 else if (!strcmp(str, "no_cmci"))
2001 cfg->cmci_disabled = true; 2031 cfg->cmci_disabled = true;
2032 else if (!strcmp(str, "no_lmce"))
2033 cfg->lmce_disabled = true;
2002 else if (!strcmp(str, "dont_log_ce")) 2034 else if (!strcmp(str, "dont_log_ce"))
2003 cfg->dont_log_ce = true; 2035 cfg->dont_log_ce = true;
2004 else if (!strcmp(str, "ignore_ce")) 2036 else if (!strcmp(str, "ignore_ce"))
@@ -2008,11 +2040,8 @@ static int __init mcheck_enable(char *str)
2008 else if (!strcmp(str, "bios_cmci_threshold")) 2040 else if (!strcmp(str, "bios_cmci_threshold"))
2009 cfg->bios_cmci_threshold = true; 2041 cfg->bios_cmci_threshold = true;
2010 else if (isdigit(str[0])) { 2042 else if (isdigit(str[0])) {
2011 get_option(&str, &(cfg->tolerant)); 2043 if (get_option(&str, &cfg->tolerant) == 2)
2012 if (*str == ',') {
2013 ++str;
2014 get_option(&str, &(cfg->monarch_timeout)); 2044 get_option(&str, &(cfg->monarch_timeout));
2015 }
2016 } else { 2045 } else {
2017 pr_info("mce argument %s ignored. Please use /sys\n", str); 2046 pr_info("mce argument %s ignored. Please use /sys\n", str);
2018 return 0; 2047 return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 55ad9b37cae8..e99b15077e94 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1,19 +1,13 @@
1/* 1/*
2 * (c) 2005-2012 Advanced Micro Devices, Inc. 2 * (c) 2005-2015 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or 4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
6 * 6 *
7 * Written by Jacob Shin - AMD, Inc. 7 * Written by Jacob Shin - AMD, Inc.
8 *
9 * Maintained by: Borislav Petkov <bp@alien8.de> 8 * Maintained by: Borislav Petkov <bp@alien8.de>
10 * 9 *
11 * April 2006 10 * All MC4_MISCi registers are shared between cores on a node.
12 * - added support for AMD Family 0x10 processors
13 * May 2012
14 * - major scrubbing
15 *
16 * All MC4_MISCi registers are shared between multi-cores
17 */ 11 */
18#include <linux/interrupt.h> 12#include <linux/interrupt.h>
19#include <linux/notifier.h> 13#include <linux/notifier.h>
@@ -32,6 +26,7 @@
32#include <asm/idle.h> 26#include <asm/idle.h>
33#include <asm/mce.h> 27#include <asm/mce.h>
34#include <asm/msr.h> 28#include <asm/msr.h>
29#include <asm/trace/irq_vectors.h>
35 30
36#define NR_BLOCKS 9 31#define NR_BLOCKS 9
37#define THRESHOLD_MAX 0xFFF 32#define THRESHOLD_MAX 0xFFF
@@ -47,6 +42,13 @@
47#define MASK_BLKPTR_LO 0xFF000000 42#define MASK_BLKPTR_LO 0xFF000000
48#define MCG_XBLK_ADDR 0xC0000400 43#define MCG_XBLK_ADDR 0xC0000400
49 44
45/* Deferred error settings */
46#define MSR_CU_DEF_ERR 0xC0000410
47#define MASK_DEF_LVTOFF 0x000000F0
48#define MASK_DEF_INT_TYPE 0x00000006
49#define DEF_LVT_OFF 0x2
50#define DEF_INT_TYPE_APIC 0x2
51
50static const char * const th_names[] = { 52static const char * const th_names[] = {
51 "load_store", 53 "load_store",
52 "insn_fetch", 54 "insn_fetch",
@@ -60,6 +62,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
60static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 62static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
61 63
62static void amd_threshold_interrupt(void); 64static void amd_threshold_interrupt(void);
65static void amd_deferred_error_interrupt(void);
66
67static void default_deferred_error_interrupt(void)
68{
69 pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
70}
71void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
63 72
64/* 73/*
65 * CPU Initialization 74 * CPU Initialization
@@ -196,7 +205,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
196 threshold_restart_bank(&tr); 205 threshold_restart_bank(&tr);
197}; 206};
198 207
199static int setup_APIC_mce(int reserved, int new) 208static int setup_APIC_mce_threshold(int reserved, int new)
200{ 209{
201 if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR, 210 if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
202 APIC_EILVT_MSG_FIX, 0)) 211 APIC_EILVT_MSG_FIX, 0))
@@ -205,6 +214,39 @@ static int setup_APIC_mce(int reserved, int new)
205 return reserved; 214 return reserved;
206} 215}
207 216
217static int setup_APIC_deferred_error(int reserved, int new)
218{
219 if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
220 APIC_EILVT_MSG_FIX, 0))
221 return new;
222
223 return reserved;
224}
225
226static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
227{
228 u32 low = 0, high = 0;
229 int def_offset = -1, def_new;
230
231 if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
232 return;
233
234 def_new = (low & MASK_DEF_LVTOFF) >> 4;
235 if (!(low & MASK_DEF_LVTOFF)) {
236 pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
237 def_new = DEF_LVT_OFF;
238 low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
239 }
240
241 def_offset = setup_APIC_deferred_error(def_offset, def_new);
242 if ((def_offset == def_new) &&
243 (deferred_error_int_vector != amd_deferred_error_interrupt))
244 deferred_error_int_vector = amd_deferred_error_interrupt;
245
246 low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
247 wrmsr(MSR_CU_DEF_ERR, low, high);
248}
249
208/* cpu init entry point, called from mce.c with preempt off */ 250/* cpu init entry point, called from mce.c with preempt off */
209void mce_amd_feature_init(struct cpuinfo_x86 *c) 251void mce_amd_feature_init(struct cpuinfo_x86 *c)
210{ 252{
@@ -252,7 +294,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
252 294
253 b.interrupt_enable = 1; 295 b.interrupt_enable = 1;
254 new = (high & MASK_LVTOFF_HI) >> 20; 296 new = (high & MASK_LVTOFF_HI) >> 20;
255 offset = setup_APIC_mce(offset, new); 297 offset = setup_APIC_mce_threshold(offset, new);
256 298
257 if ((offset == new) && 299 if ((offset == new) &&
258 (mce_threshold_vector != amd_threshold_interrupt)) 300 (mce_threshold_vector != amd_threshold_interrupt))
@@ -262,6 +304,73 @@ init:
262 mce_threshold_block_init(&b, offset); 304 mce_threshold_block_init(&b, offset);
263 } 305 }
264 } 306 }
307
308 if (mce_flags.succor)
309 deferred_error_interrupt_enable(c);
310}
311
312static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
313{
314 struct mce m;
315 u64 status;
316
317 rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
318 if (!(status & MCI_STATUS_VAL))
319 return;
320
321 mce_setup(&m);
322
323 m.status = status;
324 m.bank = bank;
325
326 if (threshold_err)
327 m.misc = misc;
328
329 if (m.status & MCI_STATUS_ADDRV)
330 rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
331
332 mce_log(&m);
333 wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
334}
335
336static inline void __smp_deferred_error_interrupt(void)
337{
338 inc_irq_stat(irq_deferred_error_count);
339 deferred_error_int_vector();
340}
341
342asmlinkage __visible void smp_deferred_error_interrupt(void)
343{
344 entering_irq();
345 __smp_deferred_error_interrupt();
346 exiting_ack_irq();
347}
348
349asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
350{
351 entering_irq();
352 trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
353 __smp_deferred_error_interrupt();
354 trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
355 exiting_ack_irq();
356}
357
358/* APIC interrupt handler for deferred errors */
359static void amd_deferred_error_interrupt(void)
360{
361 u64 status;
362 unsigned int bank;
363
364 for (bank = 0; bank < mca_cfg.banks; ++bank) {
365 rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
366
367 if (!(status & MCI_STATUS_VAL) ||
368 !(status & MCI_STATUS_DEFERRED))
369 continue;
370
371 __log_error(bank, false, 0);
372 break;
373 }
265} 374}
266 375
267/* 376/*
@@ -273,12 +382,12 @@ init:
273 * the interrupt goes off when error_count reaches threshold_limit. 382 * the interrupt goes off when error_count reaches threshold_limit.
274 * the handler will simply log mcelog w/ software defined bank number. 383 * the handler will simply log mcelog w/ software defined bank number.
275 */ 384 */
385
276static void amd_threshold_interrupt(void) 386static void amd_threshold_interrupt(void)
277{ 387{
278 u32 low = 0, high = 0, address = 0; 388 u32 low = 0, high = 0, address = 0;
279 int cpu = smp_processor_id(); 389 int cpu = smp_processor_id();
280 unsigned int bank, block; 390 unsigned int bank, block;
281 struct mce m;
282 391
283 /* assume first bank caused it */ 392 /* assume first bank caused it */
284 for (bank = 0; bank < mca_cfg.banks; ++bank) { 393 for (bank = 0; bank < mca_cfg.banks; ++bank) {
@@ -321,15 +430,7 @@ static void amd_threshold_interrupt(void)
321 return; 430 return;
322 431
323log: 432log:
324 mce_setup(&m); 433 __log_error(bank, true, ((u64)high << 32) | low);
325 rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
326 if (!(m.status & MCI_STATUS_VAL))
327 return;
328 m.misc = ((u64)high << 32) | low;
329 m.bank = bank;
330 mce_log(&m);
331
332 wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
333} 434}
334 435
335/* 436/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b4a41cf030ed..844f56c5616d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -91,6 +91,36 @@ static int cmci_supported(int *banks)
91 return !!(cap & MCG_CMCI_P); 91 return !!(cap & MCG_CMCI_P);
92} 92}
93 93
94static bool lmce_supported(void)
95{
96 u64 tmp;
97
98 if (mca_cfg.lmce_disabled)
99 return false;
100
101 rdmsrl(MSR_IA32_MCG_CAP, tmp);
102
103 /*
104 * LMCE depends on recovery support in the processor. Hence both
105 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
106 */
107 if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
108 (MCG_SER_P | MCG_LMCE_P))
109 return false;
110
111 /*
112 * BIOS should indicate support for LMCE by setting bit 20 in
113 * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
114 * generate a #GP fault.
115 */
116 rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
117 if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
118 (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
119 return true;
120
121 return false;
122}
123
94bool mce_intel_cmci_poll(void) 124bool mce_intel_cmci_poll(void)
95{ 125{
96 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) 126 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
@@ -405,8 +435,22 @@ static void intel_init_cmci(void)
405 cmci_recheck(); 435 cmci_recheck();
406} 436}
407 437
438void intel_init_lmce(void)
439{
440 u64 val;
441
442 if (!lmce_supported())
443 return;
444
445 rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
446
447 if (!(val & MCG_EXT_CTL_LMCE_EN))
448 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
449}
450
408void mce_intel_feature_init(struct cpuinfo_x86 *c) 451void mce_intel_feature_init(struct cpuinfo_x86 *c)
409{ 452{
410 intel_init_thermal(c); 453 intel_init_thermal(c);
411 intel_init_cmci(); 454 intel_init_cmci();
455 intel_init_lmce();
412} 456}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 5f90b85ff22e..70d7c93f4550 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -98,7 +98,8 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
98 continue; 98 continue;
99 base = range_state[i].base_pfn; 99 base = range_state[i].base_pfn;
100 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && 100 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
101 (mtrr_state.enabled & 1)) { 101 (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
102 (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
102 /* Var MTRR contains UC entry below 1M? Skip it: */ 103 /* Var MTRR contains UC entry below 1M? Skip it: */
103 printk(BIOS_BUG_MSG, i); 104 printk(BIOS_BUG_MSG, i);
104 if (base + size <= (1<<(20-PAGE_SHIFT))) 105 if (base + size <= (1<<(20-PAGE_SHIFT)))
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 7d74f7b3c6ba..3b533cf37c74 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -102,59 +102,76 @@ static int check_type_overlap(u8 *prev, u8 *curr)
102 return 0; 102 return 0;
103} 103}
104 104
105/* 105/**
106 * Error/Semi-error returns: 106 * mtrr_type_lookup_fixed - look up memory type in MTRR fixed entries
107 * 0xFF - when MTRR is not enabled 107 *
108 * *repeat == 1 implies [start:end] spanned across MTRR range and type returned 108 * Return the MTRR fixed memory type of 'start'.
109 * corresponds only to [start:*partial_end]. 109 *
110 * Caller has to lookup again for [*partial_end:end]. 110 * MTRR fixed entries are divided into the following ways:
111 * 0x00000 - 0x7FFFF : This range is divided into eight 64KB sub-ranges
112 * 0x80000 - 0xBFFFF : This range is divided into sixteen 16KB sub-ranges
113 * 0xC0000 - 0xFFFFF : This range is divided into sixty-four 4KB sub-ranges
114 *
115 * Return Values:
116 * MTRR_TYPE_(type) - Matched memory type
117 * MTRR_TYPE_INVALID - Unmatched
118 */
119static u8 mtrr_type_lookup_fixed(u64 start, u64 end)
120{
121 int idx;
122
123 if (start >= 0x100000)
124 return MTRR_TYPE_INVALID;
125
126 /* 0x0 - 0x7FFFF */
127 if (start < 0x80000) {
128 idx = 0;
129 idx += (start >> 16);
130 return mtrr_state.fixed_ranges[idx];
131 /* 0x80000 - 0xBFFFF */
132 } else if (start < 0xC0000) {
133 idx = 1 * 8;
134 idx += ((start - 0x80000) >> 14);
135 return mtrr_state.fixed_ranges[idx];
136 }
137
138 /* 0xC0000 - 0xFFFFF */
139 idx = 3 * 8;
140 idx += ((start - 0xC0000) >> 12);
141 return mtrr_state.fixed_ranges[idx];
142}
143
144/**
145 * mtrr_type_lookup_variable - look up memory type in MTRR variable entries
146 *
147 * Return Value:
148 * MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched)
149 *
150 * Output Arguments:
151 * repeat - Set to 1 when [start:end] spanned across MTRR range and type
152 * returned corresponds only to [start:*partial_end]. Caller has
153 * to lookup again for [*partial_end:end].
154 *
155 * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the
156 * region is fully covered by a single MTRR entry or the default
157 * type.
111 */ 158 */
112static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) 159static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
160 int *repeat, u8 *uniform)
113{ 161{
114 int i; 162 int i;
115 u64 base, mask; 163 u64 base, mask;
116 u8 prev_match, curr_match; 164 u8 prev_match, curr_match;
117 165
118 *repeat = 0; 166 *repeat = 0;
119 if (!mtrr_state_set) 167 *uniform = 1;
120 return 0xFF;
121
122 if (!mtrr_state.enabled)
123 return 0xFF;
124 168
125 /* Make end inclusive end, instead of exclusive */ 169 /* Make end inclusive instead of exclusive */
126 end--; 170 end--;
127 171
128 /* Look in fixed ranges. Just return the type as per start */ 172 prev_match = MTRR_TYPE_INVALID;
129 if (mtrr_state.have_fixed && (start < 0x100000)) {
130 int idx;
131
132 if (start < 0x80000) {
133 idx = 0;
134 idx += (start >> 16);
135 return mtrr_state.fixed_ranges[idx];
136 } else if (start < 0xC0000) {
137 idx = 1 * 8;
138 idx += ((start - 0x80000) >> 14);
139 return mtrr_state.fixed_ranges[idx];
140 } else if (start < 0x1000000) {
141 idx = 3 * 8;
142 idx += ((start - 0xC0000) >> 12);
143 return mtrr_state.fixed_ranges[idx];
144 }
145 }
146
147 /*
148 * Look in variable ranges
149 * Look of multiple ranges matching this address and pick type
150 * as per MTRR precedence
151 */
152 if (!(mtrr_state.enabled & 2))
153 return mtrr_state.def_type;
154
155 prev_match = 0xFF;
156 for (i = 0; i < num_var_ranges; ++i) { 173 for (i = 0; i < num_var_ranges; ++i) {
157 unsigned short start_state, end_state; 174 unsigned short start_state, end_state, inclusive;
158 175
159 if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11))) 176 if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11)))
160 continue; 177 continue;
@@ -166,20 +183,29 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
166 183
167 start_state = ((start & mask) == (base & mask)); 184 start_state = ((start & mask) == (base & mask));
168 end_state = ((end & mask) == (base & mask)); 185 end_state = ((end & mask) == (base & mask));
186 inclusive = ((start < base) && (end > base));
169 187
170 if (start_state != end_state) { 188 if ((start_state != end_state) || inclusive) {
171 /* 189 /*
172 * We have start:end spanning across an MTRR. 190 * We have start:end spanning across an MTRR.
173 * We split the region into 191 * We split the region into either
174 * either 192 *
175 * (start:mtrr_end) (mtrr_end:end) 193 * - start_state:1
176 * or 194 * (start:mtrr_end)(mtrr_end:end)
177 * (start:mtrr_start) (mtrr_start:end) 195 * - end_state:1
196 * (start:mtrr_start)(mtrr_start:end)
197 * - inclusive:1
198 * (start:mtrr_start)(mtrr_start:mtrr_end)(mtrr_end:end)
199 *
178 * depending on kind of overlap. 200 * depending on kind of overlap.
179 * Return the type for first region and a pointer to 201 *
180 * the start of second region so that caller will 202 * Return the type of the first region and a pointer
181 * lookup again on the second region. 203 * to the start of next region so that caller will be
182 * Note: This way we handle multiple overlaps as well. 204 * advised to lookup again after having adjusted start
205 * and end.
206 *
207 * Note: This way we handle overlaps with multiple
208 * entries and the default type properly.
183 */ 209 */
184 if (start_state) 210 if (start_state)
185 *partial_end = base + get_mtrr_size(mask); 211 *partial_end = base + get_mtrr_size(mask);
@@ -193,59 +219,94 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
193 219
194 end = *partial_end - 1; /* end is inclusive */ 220 end = *partial_end - 1; /* end is inclusive */
195 *repeat = 1; 221 *repeat = 1;
222 *uniform = 0;
196 } 223 }
197 224
198 if ((start & mask) != (base & mask)) 225 if ((start & mask) != (base & mask))
199 continue; 226 continue;
200 227
201 curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; 228 curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
202 if (prev_match == 0xFF) { 229 if (prev_match == MTRR_TYPE_INVALID) {
203 prev_match = curr_match; 230 prev_match = curr_match;
204 continue; 231 continue;
205 } 232 }
206 233
234 *uniform = 0;
207 if (check_type_overlap(&prev_match, &curr_match)) 235 if (check_type_overlap(&prev_match, &curr_match))
208 return curr_match; 236 return curr_match;
209 } 237 }
210 238
211 if (mtrr_tom2) { 239 if (prev_match != MTRR_TYPE_INVALID)
212 if (start >= (1ULL<<32) && (end < mtrr_tom2))
213 return MTRR_TYPE_WRBACK;
214 }
215
216 if (prev_match != 0xFF)
217 return prev_match; 240 return prev_match;
218 241
219 return mtrr_state.def_type; 242 return mtrr_state.def_type;
220} 243}
221 244
222/* 245/**
223 * Returns the effective MTRR type for the region 246 * mtrr_type_lookup - look up memory type in MTRR
224 * Error return: 247 *
225 * 0xFF - when MTRR is not enabled 248 * Return Values:
249 * MTRR_TYPE_(type) - The effective MTRR type for the region
250 * MTRR_TYPE_INVALID - MTRR is disabled
251 *
252 * Output Argument:
253 * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the
254 * region is fully covered by a single MTRR entry or the default
255 * type.
226 */ 256 */
227u8 mtrr_type_lookup(u64 start, u64 end) 257u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
228{ 258{
229 u8 type, prev_type; 259 u8 type, prev_type, is_uniform = 1, dummy;
230 int repeat; 260 int repeat;
231 u64 partial_end; 261 u64 partial_end;
232 262
233 type = __mtrr_type_lookup(start, end, &partial_end, &repeat); 263 if (!mtrr_state_set)
264 return MTRR_TYPE_INVALID;
265
266 if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED))
267 return MTRR_TYPE_INVALID;
268
269 /*
270 * Look up the fixed ranges first, which take priority over
271 * the variable ranges.
272 */
273 if ((start < 0x100000) &&
274 (mtrr_state.have_fixed) &&
275 (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
276 is_uniform = 0;
277 type = mtrr_type_lookup_fixed(start, end);
278 goto out;
279 }
280
281 /*
282 * Look up the variable ranges. Look of multiple ranges matching
283 * this address and pick type as per MTRR precedence.
284 */
285 type = mtrr_type_lookup_variable(start, end, &partial_end,
286 &repeat, &is_uniform);
234 287
235 /* 288 /*
236 * Common path is with repeat = 0. 289 * Common path is with repeat = 0.
237 * However, we can have cases where [start:end] spans across some 290 * However, we can have cases where [start:end] spans across some
238 * MTRR range. Do repeated lookups for that case here. 291 * MTRR ranges and/or the default type. Do repeated lookups for
292 * that case here.
239 */ 293 */
240 while (repeat) { 294 while (repeat) {
241 prev_type = type; 295 prev_type = type;
242 start = partial_end; 296 start = partial_end;
243 type = __mtrr_type_lookup(start, end, &partial_end, &repeat); 297 is_uniform = 0;
298 type = mtrr_type_lookup_variable(start, end, &partial_end,
299 &repeat, &dummy);
244 300
245 if (check_type_overlap(&prev_type, &type)) 301 if (check_type_overlap(&prev_type, &type))
246 return type; 302 goto out;
247 } 303 }
248 304
305 if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2))
306 type = MTRR_TYPE_WRBACK;
307
308out:
309 *uniform = is_uniform;
249 return type; 310 return type;
250} 311}
251 312
@@ -347,7 +408,9 @@ static void __init print_mtrr_state(void)
347 mtrr_attrib_to_str(mtrr_state.def_type)); 408 mtrr_attrib_to_str(mtrr_state.def_type));
348 if (mtrr_state.have_fixed) { 409 if (mtrr_state.have_fixed) {
349 pr_debug("MTRR fixed ranges %sabled:\n", 410 pr_debug("MTRR fixed ranges %sabled:\n",
350 mtrr_state.enabled & 1 ? "en" : "dis"); 411 ((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
412 (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ?
413 "en" : "dis");
351 print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); 414 print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
352 for (i = 0; i < 2; ++i) 415 for (i = 0; i < 2; ++i)
353 print_fixed(0x80000 + i * 0x20000, 0x04000, 416 print_fixed(0x80000 + i * 0x20000, 0x04000,
@@ -360,7 +423,7 @@ static void __init print_mtrr_state(void)
360 print_fixed_last(); 423 print_fixed_last();
361 } 424 }
362 pr_debug("MTRR variable ranges %sabled:\n", 425 pr_debug("MTRR variable ranges %sabled:\n",
363 mtrr_state.enabled & 2 ? "en" : "dis"); 426 mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis");
364 high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4; 427 high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
365 428
366 for (i = 0; i < num_var_ranges; ++i) { 429 for (i = 0; i < num_var_ranges; ++i) {
@@ -382,7 +445,7 @@ static void __init print_mtrr_state(void)
382} 445}
383 446
384/* Grab all of the MTRR state for this CPU into *state */ 447/* Grab all of the MTRR state for this CPU into *state */
385void __init get_mtrr_state(void) 448bool __init get_mtrr_state(void)
386{ 449{
387 struct mtrr_var_range *vrs; 450 struct mtrr_var_range *vrs;
388 unsigned long flags; 451 unsigned long flags;
@@ -426,6 +489,8 @@ void __init get_mtrr_state(void)
426 489
427 post_set(); 490 post_set();
428 local_irq_restore(flags); 491 local_irq_restore(flags);
492
493 return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
429} 494}
430 495
431/* Some BIOS's are messed up and don't set all MTRRs the same! */ 496/* Some BIOS's are messed up and don't set all MTRRs the same! */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index ea5f363a1948..e7ed0d8ebacb 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -59,6 +59,12 @@
59#define MTRR_TO_PHYS_WC_OFFSET 1000 59#define MTRR_TO_PHYS_WC_OFFSET 1000
60 60
61u32 num_var_ranges; 61u32 num_var_ranges;
62static bool __mtrr_enabled;
63
64static bool mtrr_enabled(void)
65{
66 return __mtrr_enabled;
67}
62 68
63unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; 69unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
64static DEFINE_MUTEX(mtrr_mutex); 70static DEFINE_MUTEX(mtrr_mutex);
@@ -286,7 +292,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
286 int i, replace, error; 292 int i, replace, error;
287 mtrr_type ltype; 293 mtrr_type ltype;
288 294
289 if (!mtrr_if) 295 if (!mtrr_enabled())
290 return -ENXIO; 296 return -ENXIO;
291 297
292 error = mtrr_if->validate_add_page(base, size, type); 298 error = mtrr_if->validate_add_page(base, size, type);
@@ -435,6 +441,8 @@ static int mtrr_check(unsigned long base, unsigned long size)
435int mtrr_add(unsigned long base, unsigned long size, unsigned int type, 441int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
436 bool increment) 442 bool increment)
437{ 443{
444 if (!mtrr_enabled())
445 return -ENODEV;
438 if (mtrr_check(base, size)) 446 if (mtrr_check(base, size))
439 return -EINVAL; 447 return -EINVAL;
440 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, 448 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
@@ -463,8 +471,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
463 unsigned long lbase, lsize; 471 unsigned long lbase, lsize;
464 int error = -EINVAL; 472 int error = -EINVAL;
465 473
466 if (!mtrr_if) 474 if (!mtrr_enabled())
467 return -ENXIO; 475 return -ENODEV;
468 476
469 max = num_var_ranges; 477 max = num_var_ranges;
470 /* No CPU hotplug when we change MTRR entries */ 478 /* No CPU hotplug when we change MTRR entries */
@@ -523,6 +531,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
523 */ 531 */
524int mtrr_del(int reg, unsigned long base, unsigned long size) 532int mtrr_del(int reg, unsigned long base, unsigned long size)
525{ 533{
534 if (!mtrr_enabled())
535 return -ENODEV;
526 if (mtrr_check(base, size)) 536 if (mtrr_check(base, size))
527 return -EINVAL; 537 return -EINVAL;
528 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); 538 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
@@ -538,6 +548,9 @@ EXPORT_SYMBOL(mtrr_del);
538 * attempts to add a WC MTRR covering size bytes starting at base and 548 * attempts to add a WC MTRR covering size bytes starting at base and
539 * logs an error if this fails. 549 * logs an error if this fails.
540 * 550 *
551 * The called should provide a power of two size on an equivalent
552 * power of two boundary.
553 *
541 * Drivers must store the return value to pass to mtrr_del_wc_if_needed, 554 * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
542 * but drivers should not try to interpret that return value. 555 * but drivers should not try to interpret that return value.
543 */ 556 */
@@ -545,7 +558,7 @@ int arch_phys_wc_add(unsigned long base, unsigned long size)
545{ 558{
546 int ret; 559 int ret;
547 560
548 if (pat_enabled) 561 if (pat_enabled() || !mtrr_enabled())
549 return 0; /* Success! (We don't need to do anything.) */ 562 return 0; /* Success! (We don't need to do anything.) */
550 563
551 ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true); 564 ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
@@ -577,7 +590,7 @@ void arch_phys_wc_del(int handle)
577EXPORT_SYMBOL(arch_phys_wc_del); 590EXPORT_SYMBOL(arch_phys_wc_del);
578 591
579/* 592/*
580 * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value 593 * arch_phys_wc_index - translates arch_phys_wc_add's return value
581 * @handle: Return value from arch_phys_wc_add 594 * @handle: Return value from arch_phys_wc_add
582 * 595 *
583 * This will turn the return value from arch_phys_wc_add into an mtrr 596 * This will turn the return value from arch_phys_wc_add into an mtrr
@@ -587,14 +600,14 @@ EXPORT_SYMBOL(arch_phys_wc_del);
587 * in printk line. Alas there is an illegitimate use in some ancient 600 * in printk line. Alas there is an illegitimate use in some ancient
588 * drm ioctls. 601 * drm ioctls.
589 */ 602 */
590int phys_wc_to_mtrr_index(int handle) 603int arch_phys_wc_index(int handle)
591{ 604{
592 if (handle < MTRR_TO_PHYS_WC_OFFSET) 605 if (handle < MTRR_TO_PHYS_WC_OFFSET)
593 return -1; 606 return -1;
594 else 607 else
595 return handle - MTRR_TO_PHYS_WC_OFFSET; 608 return handle - MTRR_TO_PHYS_WC_OFFSET;
596} 609}
597EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index); 610EXPORT_SYMBOL_GPL(arch_phys_wc_index);
598 611
599/* 612/*
600 * HACK ALERT! 613 * HACK ALERT!
@@ -734,10 +747,12 @@ void __init mtrr_bp_init(void)
734 } 747 }
735 748
736 if (mtrr_if) { 749 if (mtrr_if) {
750 __mtrr_enabled = true;
737 set_num_var_ranges(); 751 set_num_var_ranges();
738 init_table(); 752 init_table();
739 if (use_intel()) { 753 if (use_intel()) {
740 get_mtrr_state(); 754 /* BIOS may override */
755 __mtrr_enabled = get_mtrr_state();
741 756
742 if (mtrr_cleanup(phys_addr)) { 757 if (mtrr_cleanup(phys_addr)) {
743 changed_by_mtrr_cleanup = 1; 758 changed_by_mtrr_cleanup = 1;
@@ -745,10 +760,16 @@ void __init mtrr_bp_init(void)
745 } 760 }
746 } 761 }
747 } 762 }
763
764 if (!mtrr_enabled())
765 pr_info("MTRR: Disabled\n");
748} 766}
749 767
750void mtrr_ap_init(void) 768void mtrr_ap_init(void)
751{ 769{
770 if (!mtrr_enabled())
771 return;
772
752 if (!use_intel() || mtrr_aps_delayed_init) 773 if (!use_intel() || mtrr_aps_delayed_init)
753 return; 774 return;
754 /* 775 /*
@@ -774,6 +795,9 @@ void mtrr_save_state(void)
774{ 795{
775 int first_cpu; 796 int first_cpu;
776 797
798 if (!mtrr_enabled())
799 return;
800
777 get_online_cpus(); 801 get_online_cpus();
778 first_cpu = cpumask_first(cpu_online_mask); 802 first_cpu = cpumask_first(cpu_online_mask);
779 smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); 803 smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
@@ -782,6 +806,8 @@ void mtrr_save_state(void)
782 806
783void set_mtrr_aps_delayed_init(void) 807void set_mtrr_aps_delayed_init(void)
784{ 808{
809 if (!mtrr_enabled())
810 return;
785 if (!use_intel()) 811 if (!use_intel())
786 return; 812 return;
787 813
@@ -793,7 +819,7 @@ void set_mtrr_aps_delayed_init(void)
793 */ 819 */
794void mtrr_aps_init(void) 820void mtrr_aps_init(void)
795{ 821{
796 if (!use_intel()) 822 if (!use_intel() || !mtrr_enabled())
797 return; 823 return;
798 824
799 /* 825 /*
@@ -810,7 +836,7 @@ void mtrr_aps_init(void)
810 836
811void mtrr_bp_restore(void) 837void mtrr_bp_restore(void)
812{ 838{
813 if (!use_intel()) 839 if (!use_intel() || !mtrr_enabled())
814 return; 840 return;
815 841
816 mtrr_if->set_all(); 842 mtrr_if->set_all();
@@ -818,7 +844,7 @@ void mtrr_bp_restore(void)
818 844
819static int __init mtrr_init_finialize(void) 845static int __init mtrr_init_finialize(void)
820{ 846{
821 if (!mtrr_if) 847 if (!mtrr_enabled())
822 return 0; 848 return 0;
823 849
824 if (use_intel()) { 850 if (use_intel()) {
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index df5e41f31a27..951884dcc433 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -51,7 +51,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
51 51
52void fill_mtrr_var_range(unsigned int index, 52void fill_mtrr_var_range(unsigned int index,
53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); 53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
54void get_mtrr_state(void); 54bool get_mtrr_state(void);
55 55
56extern void set_mtrr_ops(const struct mtrr_ops *ops); 56extern void set_mtrr_ops(const struct mtrr_ops *ops);
57 57
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c76d3e37c6e1..e068d6683dba 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -22,6 +22,7 @@
22#include <linux/elfcore.h> 22#include <linux/elfcore.h>
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/vmalloc.h>
25 26
26#include <asm/processor.h> 27#include <asm/processor.h>
27#include <asm/hardirq.h> 28#include <asm/hardirq.h>
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index fe9f0b79a18b..5cb9a4d6f623 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -627,8 +627,12 @@ static struct chipset early_qrk[] __initdata = {
627 { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID, 627 { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
628 QFLAG_APPLY_ONCE, intel_graphics_stolen }, 628 QFLAG_APPLY_ONCE, intel_graphics_stolen },
629 /* 629 /*
630 * HPET on current version of Baytrail platform has accuracy 630 * HPET on the current version of the Baytrail platform has accuracy
631 * problems, disable it for now: 631 * problems: it will halt in deep idle state - so we disable it.
632 *
633 * More details can be found in section 18.10.1.3 of the datasheet:
634 *
635 * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/atom-z8000-datasheet-vol-1.pdf
632 */ 636 */
633 { PCI_VENDOR_ID_INTEL, 0x0f00, 637 { PCI_VENDOR_ID_INTEL, 0x0f00,
634 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, 638 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
deleted file mode 100644
index 1c309763e321..000000000000
--- a/arch/x86/kernel/entry_32.S
+++ /dev/null
@@ -1,1401 +0,0 @@
1/*
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 */
5
6/*
7 * entry.S contains the system-call and fault low-level handling routines.
8 * This also contains the timer-interrupt handler, as well as all interrupts
9 * and faults that can result in a task-switch.
10 *
11 * NOTE: This code handles signal-recognition, which happens every time
12 * after a timer-interrupt and after each system call.
13 *
14 * I changed all the .align's to 4 (16 byte alignment), as that's faster
15 * on a 486.
16 *
17 * Stack layout in 'syscall_exit':
18 * ptrace needs to have all regs on the stack.
19 * if the order here is changed, it needs to be
20 * updated in fork.c:copy_process, signal.c:do_signal,
21 * ptrace.c and ptrace.h
22 *
23 * 0(%esp) - %ebx
24 * 4(%esp) - %ecx
25 * 8(%esp) - %edx
26 * C(%esp) - %esi
27 * 10(%esp) - %edi
28 * 14(%esp) - %ebp
29 * 18(%esp) - %eax
30 * 1C(%esp) - %ds
31 * 20(%esp) - %es
32 * 24(%esp) - %fs
33 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
34 * 2C(%esp) - orig_eax
35 * 30(%esp) - %eip
36 * 34(%esp) - %cs
37 * 38(%esp) - %eflags
38 * 3C(%esp) - %oldesp
39 * 40(%esp) - %oldss
40 *
41 * "current" is in register %ebx during any slow entries.
42 */
43
44#include <linux/linkage.h>
45#include <linux/err.h>
46#include <asm/thread_info.h>
47#include <asm/irqflags.h>
48#include <asm/errno.h>
49#include <asm/segment.h>
50#include <asm/smp.h>
51#include <asm/page_types.h>
52#include <asm/percpu.h>
53#include <asm/dwarf2.h>
54#include <asm/processor-flags.h>
55#include <asm/ftrace.h>
56#include <asm/irq_vectors.h>
57#include <asm/cpufeature.h>
58#include <asm/alternative-asm.h>
59#include <asm/asm.h>
60#include <asm/smap.h>
61
62/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
63#include <linux/elf-em.h>
64#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
65#define __AUDIT_ARCH_LE 0x40000000
66
67#ifndef CONFIG_AUDITSYSCALL
68#define sysenter_audit syscall_trace_entry
69#define sysexit_audit syscall_exit_work
70#endif
71
72 .section .entry.text, "ax"
73
74/*
75 * We use macros for low-level operations which need to be overridden
76 * for paravirtualization. The following will never clobber any registers:
77 * INTERRUPT_RETURN (aka. "iret")
78 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
79 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
80 *
81 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
82 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
83 * Allowing a register to be clobbered can shrink the paravirt replacement
84 * enough to patch inline, increasing performance.
85 */
86
87#ifdef CONFIG_PREEMPT
88#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
89#else
90#define preempt_stop(clobbers)
91#define resume_kernel restore_all
92#endif
93
94.macro TRACE_IRQS_IRET
95#ifdef CONFIG_TRACE_IRQFLAGS
96 testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off?
97 jz 1f
98 TRACE_IRQS_ON
991:
100#endif
101.endm
102
103/*
104 * User gs save/restore
105 *
106 * %gs is used for userland TLS and kernel only uses it for stack
107 * canary which is required to be at %gs:20 by gcc. Read the comment
108 * at the top of stackprotector.h for more info.
109 *
110 * Local labels 98 and 99 are used.
111 */
112#ifdef CONFIG_X86_32_LAZY_GS
113
114 /* unfortunately push/pop can't be no-op */
115.macro PUSH_GS
116 pushl_cfi $0
117.endm
118.macro POP_GS pop=0
119 addl $(4 + \pop), %esp
120 CFI_ADJUST_CFA_OFFSET -(4 + \pop)
121.endm
122.macro POP_GS_EX
123.endm
124
125 /* all the rest are no-op */
126.macro PTGS_TO_GS
127.endm
128.macro PTGS_TO_GS_EX
129.endm
130.macro GS_TO_REG reg
131.endm
132.macro REG_TO_PTGS reg
133.endm
134.macro SET_KERNEL_GS reg
135.endm
136
137#else /* CONFIG_X86_32_LAZY_GS */
138
139.macro PUSH_GS
140 pushl_cfi %gs
141 /*CFI_REL_OFFSET gs, 0*/
142.endm
143
144.macro POP_GS pop=0
14598: popl_cfi %gs
146 /*CFI_RESTORE gs*/
147 .if \pop <> 0
148 add $\pop, %esp
149 CFI_ADJUST_CFA_OFFSET -\pop
150 .endif
151.endm
152.macro POP_GS_EX
153.pushsection .fixup, "ax"
15499: movl $0, (%esp)
155 jmp 98b
156.popsection
157 _ASM_EXTABLE(98b,99b)
158.endm
159
160.macro PTGS_TO_GS
16198: mov PT_GS(%esp), %gs
162.endm
163.macro PTGS_TO_GS_EX
164.pushsection .fixup, "ax"
16599: movl $0, PT_GS(%esp)
166 jmp 98b
167.popsection
168 _ASM_EXTABLE(98b,99b)
169.endm
170
171.macro GS_TO_REG reg
172 movl %gs, \reg
173 /*CFI_REGISTER gs, \reg*/
174.endm
175.macro REG_TO_PTGS reg
176 movl \reg, PT_GS(%esp)
177 /*CFI_REL_OFFSET gs, PT_GS*/
178.endm
179.macro SET_KERNEL_GS reg
180 movl $(__KERNEL_STACK_CANARY), \reg
181 movl \reg, %gs
182.endm
183
184#endif /* CONFIG_X86_32_LAZY_GS */
185
186.macro SAVE_ALL
187 cld
188 PUSH_GS
189 pushl_cfi %fs
190 /*CFI_REL_OFFSET fs, 0;*/
191 pushl_cfi %es
192 /*CFI_REL_OFFSET es, 0;*/
193 pushl_cfi %ds
194 /*CFI_REL_OFFSET ds, 0;*/
195 pushl_cfi %eax
196 CFI_REL_OFFSET eax, 0
197 pushl_cfi %ebp
198 CFI_REL_OFFSET ebp, 0
199 pushl_cfi %edi
200 CFI_REL_OFFSET edi, 0
201 pushl_cfi %esi
202 CFI_REL_OFFSET esi, 0
203 pushl_cfi %edx
204 CFI_REL_OFFSET edx, 0
205 pushl_cfi %ecx
206 CFI_REL_OFFSET ecx, 0
207 pushl_cfi %ebx
208 CFI_REL_OFFSET ebx, 0
209 movl $(__USER_DS), %edx
210 movl %edx, %ds
211 movl %edx, %es
212 movl $(__KERNEL_PERCPU), %edx
213 movl %edx, %fs
214 SET_KERNEL_GS %edx
215.endm
216
217.macro RESTORE_INT_REGS
218 popl_cfi %ebx
219 CFI_RESTORE ebx
220 popl_cfi %ecx
221 CFI_RESTORE ecx
222 popl_cfi %edx
223 CFI_RESTORE edx
224 popl_cfi %esi
225 CFI_RESTORE esi
226 popl_cfi %edi
227 CFI_RESTORE edi
228 popl_cfi %ebp
229 CFI_RESTORE ebp
230 popl_cfi %eax
231 CFI_RESTORE eax
232.endm
233
234.macro RESTORE_REGS pop=0
235 RESTORE_INT_REGS
2361: popl_cfi %ds
237 /*CFI_RESTORE ds;*/
2382: popl_cfi %es
239 /*CFI_RESTORE es;*/
2403: popl_cfi %fs
241 /*CFI_RESTORE fs;*/
242 POP_GS \pop
243.pushsection .fixup, "ax"
2444: movl $0, (%esp)
245 jmp 1b
2465: movl $0, (%esp)
247 jmp 2b
2486: movl $0, (%esp)
249 jmp 3b
250.popsection
251 _ASM_EXTABLE(1b,4b)
252 _ASM_EXTABLE(2b,5b)
253 _ASM_EXTABLE(3b,6b)
254 POP_GS_EX
255.endm
256
257.macro RING0_INT_FRAME
258 CFI_STARTPROC simple
259 CFI_SIGNAL_FRAME
260 CFI_DEF_CFA esp, 3*4
261 /*CFI_OFFSET cs, -2*4;*/
262 CFI_OFFSET eip, -3*4
263.endm
264
265.macro RING0_EC_FRAME
266 CFI_STARTPROC simple
267 CFI_SIGNAL_FRAME
268 CFI_DEF_CFA esp, 4*4
269 /*CFI_OFFSET cs, -2*4;*/
270 CFI_OFFSET eip, -3*4
271.endm
272
273.macro RING0_PTREGS_FRAME
274 CFI_STARTPROC simple
275 CFI_SIGNAL_FRAME
276 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
277 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
278 CFI_OFFSET eip, PT_EIP-PT_OLDESP
279 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
280 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
281 CFI_OFFSET eax, PT_EAX-PT_OLDESP
282 CFI_OFFSET ebp, PT_EBP-PT_OLDESP
283 CFI_OFFSET edi, PT_EDI-PT_OLDESP
284 CFI_OFFSET esi, PT_ESI-PT_OLDESP
285 CFI_OFFSET edx, PT_EDX-PT_OLDESP
286 CFI_OFFSET ecx, PT_ECX-PT_OLDESP
287 CFI_OFFSET ebx, PT_EBX-PT_OLDESP
288.endm
289
290ENTRY(ret_from_fork)
291 CFI_STARTPROC
292 pushl_cfi %eax
293 call schedule_tail
294 GET_THREAD_INFO(%ebp)
295 popl_cfi %eax
296 pushl_cfi $0x0202 # Reset kernel eflags
297 popfl_cfi
298 jmp syscall_exit
299 CFI_ENDPROC
300END(ret_from_fork)
301
302ENTRY(ret_from_kernel_thread)
303 CFI_STARTPROC
304 pushl_cfi %eax
305 call schedule_tail
306 GET_THREAD_INFO(%ebp)
307 popl_cfi %eax
308 pushl_cfi $0x0202 # Reset kernel eflags
309 popfl_cfi
310 movl PT_EBP(%esp),%eax
311 call *PT_EBX(%esp)
312 movl $0,PT_EAX(%esp)
313 jmp syscall_exit
314 CFI_ENDPROC
315ENDPROC(ret_from_kernel_thread)
316
317/*
318 * Return to user mode is not as complex as all this looks,
319 * but we want the default path for a system call return to
320 * go as quickly as possible which is why some of this is
321 * less clear than it otherwise should be.
322 */
323
324 # userspace resumption stub bypassing syscall exit tracing
325 ALIGN
326 RING0_PTREGS_FRAME
327ret_from_exception:
328 preempt_stop(CLBR_ANY)
329ret_from_intr:
330 GET_THREAD_INFO(%ebp)
331#ifdef CONFIG_VM86
332 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
333 movb PT_CS(%esp), %al
334 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
335#else
336 /*
337 * We can be coming here from child spawned by kernel_thread().
338 */
339 movl PT_CS(%esp), %eax
340 andl $SEGMENT_RPL_MASK, %eax
341#endif
342 cmpl $USER_RPL, %eax
343 jb resume_kernel # not returning to v8086 or userspace
344
345ENTRY(resume_userspace)
346 LOCKDEP_SYS_EXIT
347 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
348 # setting need_resched or sigpending
349 # between sampling and the iret
350 TRACE_IRQS_OFF
351 movl TI_flags(%ebp), %ecx
352 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
353 # int/exception return?
354 jne work_pending
355 jmp restore_all
356END(ret_from_exception)
357
358#ifdef CONFIG_PREEMPT
359ENTRY(resume_kernel)
360 DISABLE_INTERRUPTS(CLBR_ANY)
361need_resched:
362 cmpl $0,PER_CPU_VAR(__preempt_count)
363 jnz restore_all
364 testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
365 jz restore_all
366 call preempt_schedule_irq
367 jmp need_resched
368END(resume_kernel)
369#endif
370 CFI_ENDPROC
371
372/* SYSENTER_RETURN points to after the "sysenter" instruction in
373 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
374
375 # sysenter call handler stub
376ENTRY(ia32_sysenter_target)
377 CFI_STARTPROC simple
378 CFI_SIGNAL_FRAME
379 CFI_DEF_CFA esp, 0
380 CFI_REGISTER esp, ebp
381 movl TSS_sysenter_sp0(%esp),%esp
382sysenter_past_esp:
383 /*
384 * Interrupts are disabled here, but we can't trace it until
385 * enough kernel state to call TRACE_IRQS_OFF can be called - but
386 * we immediately enable interrupts at that point anyway.
387 */
388 pushl_cfi $__USER_DS
389 /*CFI_REL_OFFSET ss, 0*/
390 pushl_cfi %ebp
391 CFI_REL_OFFSET esp, 0
392 pushfl_cfi
393 orl $X86_EFLAGS_IF, (%esp)
394 pushl_cfi $__USER_CS
395 /*CFI_REL_OFFSET cs, 0*/
396 /*
397 * Push current_thread_info()->sysenter_return to the stack.
398 * A tiny bit of offset fixup is necessary: TI_sysenter_return
399 * is relative to thread_info, which is at the bottom of the
400 * kernel stack page. 4*4 means the 4 words pushed above;
401 * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
402 * and THREAD_SIZE takes us to the bottom.
403 */
404 pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
405 CFI_REL_OFFSET eip, 0
406
407 pushl_cfi %eax
408 SAVE_ALL
409 ENABLE_INTERRUPTS(CLBR_NONE)
410
411/*
412 * Load the potential sixth argument from user stack.
413 * Careful about security.
414 */
415 cmpl $__PAGE_OFFSET-3,%ebp
416 jae syscall_fault
417 ASM_STAC
4181: movl (%ebp),%ebp
419 ASM_CLAC
420 movl %ebp,PT_EBP(%esp)
421 _ASM_EXTABLE(1b,syscall_fault)
422
423 GET_THREAD_INFO(%ebp)
424
425 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
426 jnz sysenter_audit
427sysenter_do_call:
428 cmpl $(NR_syscalls), %eax
429 jae sysenter_badsys
430 call *sys_call_table(,%eax,4)
431sysenter_after_call:
432 movl %eax,PT_EAX(%esp)
433 LOCKDEP_SYS_EXIT
434 DISABLE_INTERRUPTS(CLBR_ANY)
435 TRACE_IRQS_OFF
436 movl TI_flags(%ebp), %ecx
437 testl $_TIF_ALLWORK_MASK, %ecx
438 jnz sysexit_audit
439sysenter_exit:
440/* if something modifies registers it must also disable sysexit */
441 movl PT_EIP(%esp), %edx
442 movl PT_OLDESP(%esp), %ecx
443 xorl %ebp,%ebp
444 TRACE_IRQS_ON
4451: mov PT_FS(%esp), %fs
446 PTGS_TO_GS
447 ENABLE_INTERRUPTS_SYSEXIT
448
449#ifdef CONFIG_AUDITSYSCALL
450sysenter_audit:
451 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
452 jnz syscall_trace_entry
453 /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
454 movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */
455 /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
456 pushl_cfi PT_ESI(%esp) /* a3: 5th arg */
457 pushl_cfi PT_EDX+4(%esp) /* a2: 4th arg */
458 call __audit_syscall_entry
459 popl_cfi %ecx /* get that remapped edx off the stack */
460 popl_cfi %ecx /* get that remapped esi off the stack */
461 movl PT_EAX(%esp),%eax /* reload syscall number */
462 jmp sysenter_do_call
463
464sysexit_audit:
465 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
466 jnz syscall_exit_work
467 TRACE_IRQS_ON
468 ENABLE_INTERRUPTS(CLBR_ANY)
469 movl %eax,%edx /* second arg, syscall return value */
470 cmpl $-MAX_ERRNO,%eax /* is it an error ? */
471 setbe %al /* 1 if so, 0 if not */
472 movzbl %al,%eax /* zero-extend that */
473 call __audit_syscall_exit
474 DISABLE_INTERRUPTS(CLBR_ANY)
475 TRACE_IRQS_OFF
476 movl TI_flags(%ebp), %ecx
477 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
478 jnz syscall_exit_work
479 movl PT_EAX(%esp),%eax /* reload syscall return value */
480 jmp sysenter_exit
481#endif
482
483 CFI_ENDPROC
484.pushsection .fixup,"ax"
4852: movl $0,PT_FS(%esp)
486 jmp 1b
487.popsection
488 _ASM_EXTABLE(1b,2b)
489 PTGS_TO_GS_EX
490ENDPROC(ia32_sysenter_target)
491
492 # system call handler stub
493ENTRY(system_call)
494 RING0_INT_FRAME # can't unwind into user space anyway
495 ASM_CLAC
496 pushl_cfi %eax # save orig_eax
497 SAVE_ALL
498 GET_THREAD_INFO(%ebp)
499 # system call tracing in operation / emulation
500 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
501 jnz syscall_trace_entry
502 cmpl $(NR_syscalls), %eax
503 jae syscall_badsys
504syscall_call:
505 call *sys_call_table(,%eax,4)
506syscall_after_call:
507 movl %eax,PT_EAX(%esp) # store the return value
508syscall_exit:
509 LOCKDEP_SYS_EXIT
510 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
511 # setting need_resched or sigpending
512 # between sampling and the iret
513 TRACE_IRQS_OFF
514 movl TI_flags(%ebp), %ecx
515 testl $_TIF_ALLWORK_MASK, %ecx # current->work
516 jnz syscall_exit_work
517
518restore_all:
519 TRACE_IRQS_IRET
520restore_all_notrace:
521#ifdef CONFIG_X86_ESPFIX32
522 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
523 # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
524 # are returning to the kernel.
525 # See comments in process.c:copy_thread() for details.
526 movb PT_OLDSS(%esp), %ah
527 movb PT_CS(%esp), %al
528 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
529 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
530 CFI_REMEMBER_STATE
531 je ldt_ss # returning to user-space with LDT SS
532#endif
533restore_nocheck:
534 RESTORE_REGS 4 # skip orig_eax/error_code
535irq_return:
536 INTERRUPT_RETURN
537.section .fixup,"ax"
538ENTRY(iret_exc)
539 pushl $0 # no error code
540 pushl $do_iret_error
541 jmp error_code
542.previous
543 _ASM_EXTABLE(irq_return,iret_exc)
544
545#ifdef CONFIG_X86_ESPFIX32
546 CFI_RESTORE_STATE
547ldt_ss:
548#ifdef CONFIG_PARAVIRT
549 /*
550 * The kernel can't run on a non-flat stack if paravirt mode
551 * is active. Rather than try to fixup the high bits of
552 * ESP, bypass this code entirely. This may break DOSemu
553 * and/or Wine support in a paravirt VM, although the option
554 * is still available to implement the setting of the high
555 * 16-bits in the INTERRUPT_RETURN paravirt-op.
556 */
557 cmpl $0, pv_info+PARAVIRT_enabled
558 jne restore_nocheck
559#endif
560
561/*
562 * Setup and switch to ESPFIX stack
563 *
564 * We're returning to userspace with a 16 bit stack. The CPU will not
565 * restore the high word of ESP for us on executing iret... This is an
566 * "official" bug of all the x86-compatible CPUs, which we can work
567 * around to make dosemu and wine happy. We do this by preloading the
568 * high word of ESP with the high word of the userspace ESP while
569 * compensating for the offset by changing to the ESPFIX segment with
570 * a base address that matches for the difference.
571 */
572#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
573 mov %esp, %edx /* load kernel esp */
574 mov PT_OLDESP(%esp), %eax /* load userspace esp */
575 mov %dx, %ax /* eax: new kernel esp */
576 sub %eax, %edx /* offset (low word is 0) */
577 shr $16, %edx
578 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
579 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
580 pushl_cfi $__ESPFIX_SS
581 pushl_cfi %eax /* new kernel esp */
582 /* Disable interrupts, but do not irqtrace this section: we
583 * will soon execute iret and the tracer was already set to
584 * the irqstate after the iret */
585 DISABLE_INTERRUPTS(CLBR_EAX)
586 lss (%esp), %esp /* switch to espfix segment */
587 CFI_ADJUST_CFA_OFFSET -8
588 jmp restore_nocheck
589#endif
590 CFI_ENDPROC
591ENDPROC(system_call)
592
593 # perform work that needs to be done immediately before resumption
594 ALIGN
595 RING0_PTREGS_FRAME # can't unwind into user space anyway
596work_pending:
597 testb $_TIF_NEED_RESCHED, %cl
598 jz work_notifysig
599work_resched:
600 call schedule
601 LOCKDEP_SYS_EXIT
602 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
603 # setting need_resched or sigpending
604 # between sampling and the iret
605 TRACE_IRQS_OFF
606 movl TI_flags(%ebp), %ecx
607 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
608 # than syscall tracing?
609 jz restore_all
610 testb $_TIF_NEED_RESCHED, %cl
611 jnz work_resched
612
613work_notifysig: # deal with pending signals and
614 # notify-resume requests
615#ifdef CONFIG_VM86
616 testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
617 movl %esp, %eax
618 jnz work_notifysig_v86 # returning to kernel-space or
619 # vm86-space
6201:
621#else
622 movl %esp, %eax
623#endif
624 TRACE_IRQS_ON
625 ENABLE_INTERRUPTS(CLBR_NONE)
626 movb PT_CS(%esp), %bl
627 andb $SEGMENT_RPL_MASK, %bl
628 cmpb $USER_RPL, %bl
629 jb resume_kernel
630 xorl %edx, %edx
631 call do_notify_resume
632 jmp resume_userspace
633
634#ifdef CONFIG_VM86
635 ALIGN
636work_notifysig_v86:
637 pushl_cfi %ecx # save ti_flags for do_notify_resume
638 call save_v86_state # %eax contains pt_regs pointer
639 popl_cfi %ecx
640 movl %eax, %esp
641 jmp 1b
642#endif
643END(work_pending)
644
645 # perform syscall exit tracing
646 ALIGN
647syscall_trace_entry:
648 movl $-ENOSYS,PT_EAX(%esp)
649 movl %esp, %eax
650 call syscall_trace_enter
651 /* What it returned is what we'll actually use. */
652 cmpl $(NR_syscalls), %eax
653 jnae syscall_call
654 jmp syscall_exit
655END(syscall_trace_entry)
656
657 # perform syscall exit tracing
658 ALIGN
659syscall_exit_work:
660 testl $_TIF_WORK_SYSCALL_EXIT, %ecx
661 jz work_pending
662 TRACE_IRQS_ON
663 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
664 # schedule() instead
665 movl %esp, %eax
666 call syscall_trace_leave
667 jmp resume_userspace
668END(syscall_exit_work)
669 CFI_ENDPROC
670
671 RING0_INT_FRAME # can't unwind into user space anyway
672syscall_fault:
673 ASM_CLAC
674 GET_THREAD_INFO(%ebp)
675 movl $-EFAULT,PT_EAX(%esp)
676 jmp resume_userspace
677END(syscall_fault)
678
679syscall_badsys:
680 movl $-ENOSYS,%eax
681 jmp syscall_after_call
682END(syscall_badsys)
683
684sysenter_badsys:
685 movl $-ENOSYS,%eax
686 jmp sysenter_after_call
687END(sysenter_badsys)
688 CFI_ENDPROC
689
690.macro FIXUP_ESPFIX_STACK
691/*
692 * Switch back for ESPFIX stack to the normal zerobased stack
693 *
694 * We can't call C functions using the ESPFIX stack. This code reads
695 * the high word of the segment base from the GDT and swiches to the
696 * normal stack and adjusts ESP with the matching offset.
697 */
698#ifdef CONFIG_X86_ESPFIX32
699 /* fixup the stack */
700 mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
701 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
702 shl $16, %eax
703 addl %esp, %eax /* the adjusted stack pointer */
704 pushl_cfi $__KERNEL_DS
705 pushl_cfi %eax
706 lss (%esp), %esp /* switch to the normal stack segment */
707 CFI_ADJUST_CFA_OFFSET -8
708#endif
709.endm
710.macro UNWIND_ESPFIX_STACK
711#ifdef CONFIG_X86_ESPFIX32
712 movl %ss, %eax
713 /* see if on espfix stack */
714 cmpw $__ESPFIX_SS, %ax
715 jne 27f
716 movl $__KERNEL_DS, %eax
717 movl %eax, %ds
718 movl %eax, %es
719 /* switch to normal stack */
720 FIXUP_ESPFIX_STACK
72127:
722#endif
723.endm
724
725/*
726 * Build the entry stubs with some assembler magic.
727 * We pack 1 stub into every 8-byte block.
728 */
729 .align 8
730ENTRY(irq_entries_start)
731 RING0_INT_FRAME
732 vector=FIRST_EXTERNAL_VECTOR
733 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
734 pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
735 vector=vector+1
736 jmp common_interrupt
737 CFI_ADJUST_CFA_OFFSET -4
738 .align 8
739 .endr
740END(irq_entries_start)
741
742/*
743 * the CPU automatically disables interrupts when executing an IRQ vector,
744 * so IRQ-flags tracing has to follow that:
745 */
746 .p2align CONFIG_X86_L1_CACHE_SHIFT
747common_interrupt:
748 ASM_CLAC
749 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
750 SAVE_ALL
751 TRACE_IRQS_OFF
752 movl %esp,%eax
753 call do_IRQ
754 jmp ret_from_intr
755ENDPROC(common_interrupt)
756 CFI_ENDPROC
757
758#define BUILD_INTERRUPT3(name, nr, fn) \
759ENTRY(name) \
760 RING0_INT_FRAME; \
761 ASM_CLAC; \
762 pushl_cfi $~(nr); \
763 SAVE_ALL; \
764 TRACE_IRQS_OFF \
765 movl %esp,%eax; \
766 call fn; \
767 jmp ret_from_intr; \
768 CFI_ENDPROC; \
769ENDPROC(name)
770
771
772#ifdef CONFIG_TRACING
773#define TRACE_BUILD_INTERRUPT(name, nr) \
774 BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
775#else
776#define TRACE_BUILD_INTERRUPT(name, nr)
777#endif
778
779#define BUILD_INTERRUPT(name, nr) \
780 BUILD_INTERRUPT3(name, nr, smp_##name); \
781 TRACE_BUILD_INTERRUPT(name, nr)
782
783/* The include is where all of the SMP etc. interrupts come from */
784#include <asm/entry_arch.h>
785
786ENTRY(coprocessor_error)
787 RING0_INT_FRAME
788 ASM_CLAC
789 pushl_cfi $0
790 pushl_cfi $do_coprocessor_error
791 jmp error_code
792 CFI_ENDPROC
793END(coprocessor_error)
794
795ENTRY(simd_coprocessor_error)
796 RING0_INT_FRAME
797 ASM_CLAC
798 pushl_cfi $0
799#ifdef CONFIG_X86_INVD_BUG
800 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
801 ALTERNATIVE "pushl_cfi $do_general_protection", \
802 "pushl $do_simd_coprocessor_error", \
803 X86_FEATURE_XMM
804#else
805 pushl_cfi $do_simd_coprocessor_error
806#endif
807 jmp error_code
808 CFI_ENDPROC
809END(simd_coprocessor_error)
810
811ENTRY(device_not_available)
812 RING0_INT_FRAME
813 ASM_CLAC
814 pushl_cfi $-1 # mark this as an int
815 pushl_cfi $do_device_not_available
816 jmp error_code
817 CFI_ENDPROC
818END(device_not_available)
819
820#ifdef CONFIG_PARAVIRT
821ENTRY(native_iret)
822 iret
823 _ASM_EXTABLE(native_iret, iret_exc)
824END(native_iret)
825
826ENTRY(native_irq_enable_sysexit)
827 sti
828 sysexit
829END(native_irq_enable_sysexit)
830#endif
831
832ENTRY(overflow)
833 RING0_INT_FRAME
834 ASM_CLAC
835 pushl_cfi $0
836 pushl_cfi $do_overflow
837 jmp error_code
838 CFI_ENDPROC
839END(overflow)
840
841ENTRY(bounds)
842 RING0_INT_FRAME
843 ASM_CLAC
844 pushl_cfi $0
845 pushl_cfi $do_bounds
846 jmp error_code
847 CFI_ENDPROC
848END(bounds)
849
850ENTRY(invalid_op)
851 RING0_INT_FRAME
852 ASM_CLAC
853 pushl_cfi $0
854 pushl_cfi $do_invalid_op
855 jmp error_code
856 CFI_ENDPROC
857END(invalid_op)
858
859ENTRY(coprocessor_segment_overrun)
860 RING0_INT_FRAME
861 ASM_CLAC
862 pushl_cfi $0
863 pushl_cfi $do_coprocessor_segment_overrun
864 jmp error_code
865 CFI_ENDPROC
866END(coprocessor_segment_overrun)
867
868ENTRY(invalid_TSS)
869 RING0_EC_FRAME
870 ASM_CLAC
871 pushl_cfi $do_invalid_TSS
872 jmp error_code
873 CFI_ENDPROC
874END(invalid_TSS)
875
876ENTRY(segment_not_present)
877 RING0_EC_FRAME
878 ASM_CLAC
879 pushl_cfi $do_segment_not_present
880 jmp error_code
881 CFI_ENDPROC
882END(segment_not_present)
883
884ENTRY(stack_segment)
885 RING0_EC_FRAME
886 ASM_CLAC
887 pushl_cfi $do_stack_segment
888 jmp error_code
889 CFI_ENDPROC
890END(stack_segment)
891
892ENTRY(alignment_check)
893 RING0_EC_FRAME
894 ASM_CLAC
895 pushl_cfi $do_alignment_check
896 jmp error_code
897 CFI_ENDPROC
898END(alignment_check)
899
900ENTRY(divide_error)
901 RING0_INT_FRAME
902 ASM_CLAC
903 pushl_cfi $0 # no error code
904 pushl_cfi $do_divide_error
905 jmp error_code
906 CFI_ENDPROC
907END(divide_error)
908
909#ifdef CONFIG_X86_MCE
910ENTRY(machine_check)
911 RING0_INT_FRAME
912 ASM_CLAC
913 pushl_cfi $0
914 pushl_cfi machine_check_vector
915 jmp error_code
916 CFI_ENDPROC
917END(machine_check)
918#endif
919
920ENTRY(spurious_interrupt_bug)
921 RING0_INT_FRAME
922 ASM_CLAC
923 pushl_cfi $0
924 pushl_cfi $do_spurious_interrupt_bug
925 jmp error_code
926 CFI_ENDPROC
927END(spurious_interrupt_bug)
928
929#ifdef CONFIG_XEN
930/* Xen doesn't set %esp to be precisely what the normal sysenter
931 entrypoint expects, so fix it up before using the normal path. */
932ENTRY(xen_sysenter_target)
933 RING0_INT_FRAME
934 addl $5*4, %esp /* remove xen-provided frame */
935 CFI_ADJUST_CFA_OFFSET -5*4
936 jmp sysenter_past_esp
937 CFI_ENDPROC
938
939ENTRY(xen_hypervisor_callback)
940 CFI_STARTPROC
941 pushl_cfi $-1 /* orig_ax = -1 => not a system call */
942 SAVE_ALL
943 TRACE_IRQS_OFF
944
945 /* Check to see if we got the event in the critical
946 region in xen_iret_direct, after we've reenabled
947 events and checked for pending events. This simulates
948 iret instruction's behaviour where it delivers a
949 pending interrupt when enabling interrupts. */
950 movl PT_EIP(%esp),%eax
951 cmpl $xen_iret_start_crit,%eax
952 jb 1f
953 cmpl $xen_iret_end_crit,%eax
954 jae 1f
955
956 jmp xen_iret_crit_fixup
957
958ENTRY(xen_do_upcall)
9591: mov %esp, %eax
960 call xen_evtchn_do_upcall
961#ifndef CONFIG_PREEMPT
962 call xen_maybe_preempt_hcall
963#endif
964 jmp ret_from_intr
965 CFI_ENDPROC
966ENDPROC(xen_hypervisor_callback)
967
968# Hypervisor uses this for application faults while it executes.
969# We get here for two reasons:
970# 1. Fault while reloading DS, ES, FS or GS
971# 2. Fault while executing IRET
972# Category 1 we fix up by reattempting the load, and zeroing the segment
973# register if the load fails.
974# Category 2 we fix up by jumping to do_iret_error. We cannot use the
975# normal Linux return path in this case because if we use the IRET hypercall
976# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
977# We distinguish between categories by maintaining a status value in EAX.
978ENTRY(xen_failsafe_callback)
979 CFI_STARTPROC
980 pushl_cfi %eax
981 movl $1,%eax
9821: mov 4(%esp),%ds
9832: mov 8(%esp),%es
9843: mov 12(%esp),%fs
9854: mov 16(%esp),%gs
986 /* EAX == 0 => Category 1 (Bad segment)
987 EAX != 0 => Category 2 (Bad IRET) */
988 testl %eax,%eax
989 popl_cfi %eax
990 lea 16(%esp),%esp
991 CFI_ADJUST_CFA_OFFSET -16
992 jz 5f
993 jmp iret_exc
9945: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
995 SAVE_ALL
996 jmp ret_from_exception
997 CFI_ENDPROC
998
999.section .fixup,"ax"
10006: xorl %eax,%eax
1001 movl %eax,4(%esp)
1002 jmp 1b
10037: xorl %eax,%eax
1004 movl %eax,8(%esp)
1005 jmp 2b
10068: xorl %eax,%eax
1007 movl %eax,12(%esp)
1008 jmp 3b
10099: xorl %eax,%eax
1010 movl %eax,16(%esp)
1011 jmp 4b
1012.previous
1013 _ASM_EXTABLE(1b,6b)
1014 _ASM_EXTABLE(2b,7b)
1015 _ASM_EXTABLE(3b,8b)
1016 _ASM_EXTABLE(4b,9b)
1017ENDPROC(xen_failsafe_callback)
1018
1019BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1020 xen_evtchn_do_upcall)
1021
1022#endif /* CONFIG_XEN */
1023
1024#if IS_ENABLED(CONFIG_HYPERV)
1025
1026BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1027 hyperv_vector_handler)
1028
1029#endif /* CONFIG_HYPERV */
1030
1031#ifdef CONFIG_FUNCTION_TRACER
1032#ifdef CONFIG_DYNAMIC_FTRACE
1033
1034ENTRY(mcount)
1035 ret
1036END(mcount)
1037
1038ENTRY(ftrace_caller)
1039 pushl %eax
1040 pushl %ecx
1041 pushl %edx
1042 pushl $0 /* Pass NULL as regs pointer */
1043 movl 4*4(%esp), %eax
1044 movl 0x4(%ebp), %edx
1045 movl function_trace_op, %ecx
1046 subl $MCOUNT_INSN_SIZE, %eax
1047
1048.globl ftrace_call
1049ftrace_call:
1050 call ftrace_stub
1051
1052 addl $4,%esp /* skip NULL pointer */
1053 popl %edx
1054 popl %ecx
1055 popl %eax
1056ftrace_ret:
1057#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1058.globl ftrace_graph_call
1059ftrace_graph_call:
1060 jmp ftrace_stub
1061#endif
1062
1063.globl ftrace_stub
1064ftrace_stub:
1065 ret
1066END(ftrace_caller)
1067
1068ENTRY(ftrace_regs_caller)
1069 pushf /* push flags before compare (in cs location) */
1070
1071 /*
1072 * i386 does not save SS and ESP when coming from kernel.
1073 * Instead, to get sp, &regs->sp is used (see ptrace.h).
1074 * Unfortunately, that means eflags must be at the same location
1075 * as the current return ip is. We move the return ip into the
1076 * ip location, and move flags into the return ip location.
1077 */
1078 pushl 4(%esp) /* save return ip into ip slot */
1079
1080 pushl $0 /* Load 0 into orig_ax */
1081 pushl %gs
1082 pushl %fs
1083 pushl %es
1084 pushl %ds
1085 pushl %eax
1086 pushl %ebp
1087 pushl %edi
1088 pushl %esi
1089 pushl %edx
1090 pushl %ecx
1091 pushl %ebx
1092
1093 movl 13*4(%esp), %eax /* Get the saved flags */
1094 movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
1095 /* clobbering return ip */
1096 movl $__KERNEL_CS,13*4(%esp)
1097
1098 movl 12*4(%esp), %eax /* Load ip (1st parameter) */
1099 subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
1100 movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
1101 movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
1102 pushl %esp /* Save pt_regs as 4th parameter */
1103
1104GLOBAL(ftrace_regs_call)
1105 call ftrace_stub
1106
1107 addl $4, %esp /* Skip pt_regs */
1108 movl 14*4(%esp), %eax /* Move flags back into cs */
1109 movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
1110 movl 12*4(%esp), %eax /* Get return ip from regs->ip */
1111 movl %eax, 14*4(%esp) /* Put return ip back for ret */
1112
1113 popl %ebx
1114 popl %ecx
1115 popl %edx
1116 popl %esi
1117 popl %edi
1118 popl %ebp
1119 popl %eax
1120 popl %ds
1121 popl %es
1122 popl %fs
1123 popl %gs
1124 addl $8, %esp /* Skip orig_ax and ip */
1125 popf /* Pop flags at end (no addl to corrupt flags) */
1126 jmp ftrace_ret
1127
1128 popf
1129 jmp ftrace_stub
1130#else /* ! CONFIG_DYNAMIC_FTRACE */
1131
1132ENTRY(mcount)
1133 cmpl $__PAGE_OFFSET, %esp
1134 jb ftrace_stub /* Paging not enabled yet? */
1135
1136 cmpl $ftrace_stub, ftrace_trace_function
1137 jnz trace
1138#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1139 cmpl $ftrace_stub, ftrace_graph_return
1140 jnz ftrace_graph_caller
1141
1142 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
1143 jnz ftrace_graph_caller
1144#endif
1145.globl ftrace_stub
1146ftrace_stub:
1147 ret
1148
1149 /* taken from glibc */
1150trace:
1151 pushl %eax
1152 pushl %ecx
1153 pushl %edx
1154 movl 0xc(%esp), %eax
1155 movl 0x4(%ebp), %edx
1156 subl $MCOUNT_INSN_SIZE, %eax
1157
1158 call *ftrace_trace_function
1159
1160 popl %edx
1161 popl %ecx
1162 popl %eax
1163 jmp ftrace_stub
1164END(mcount)
1165#endif /* CONFIG_DYNAMIC_FTRACE */
1166#endif /* CONFIG_FUNCTION_TRACER */
1167
1168#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1169ENTRY(ftrace_graph_caller)
1170 pushl %eax
1171 pushl %ecx
1172 pushl %edx
1173 movl 0xc(%esp), %eax
1174 lea 0x4(%ebp), %edx
1175 movl (%ebp), %ecx
1176 subl $MCOUNT_INSN_SIZE, %eax
1177 call prepare_ftrace_return
1178 popl %edx
1179 popl %ecx
1180 popl %eax
1181 ret
1182END(ftrace_graph_caller)
1183
1184.globl return_to_handler
1185return_to_handler:
1186 pushl %eax
1187 pushl %edx
1188 movl %ebp, %eax
1189 call ftrace_return_to_handler
1190 movl %eax, %ecx
1191 popl %edx
1192 popl %eax
1193 jmp *%ecx
1194#endif
1195
1196#ifdef CONFIG_TRACING
1197ENTRY(trace_page_fault)
1198 RING0_EC_FRAME
1199 ASM_CLAC
1200 pushl_cfi $trace_do_page_fault
1201 jmp error_code
1202 CFI_ENDPROC
1203END(trace_page_fault)
1204#endif
1205
1206ENTRY(page_fault)
1207 RING0_EC_FRAME
1208 ASM_CLAC
1209 pushl_cfi $do_page_fault
1210 ALIGN
1211error_code:
1212 /* the function address is in %gs's slot on the stack */
1213 pushl_cfi %fs
1214 /*CFI_REL_OFFSET fs, 0*/
1215 pushl_cfi %es
1216 /*CFI_REL_OFFSET es, 0*/
1217 pushl_cfi %ds
1218 /*CFI_REL_OFFSET ds, 0*/
1219 pushl_cfi_reg eax
1220 pushl_cfi_reg ebp
1221 pushl_cfi_reg edi
1222 pushl_cfi_reg esi
1223 pushl_cfi_reg edx
1224 pushl_cfi_reg ecx
1225 pushl_cfi_reg ebx
1226 cld
1227 movl $(__KERNEL_PERCPU), %ecx
1228 movl %ecx, %fs
1229 UNWIND_ESPFIX_STACK
1230 GS_TO_REG %ecx
1231 movl PT_GS(%esp), %edi # get the function address
1232 movl PT_ORIG_EAX(%esp), %edx # get the error code
1233 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1234 REG_TO_PTGS %ecx
1235 SET_KERNEL_GS %ecx
1236 movl $(__USER_DS), %ecx
1237 movl %ecx, %ds
1238 movl %ecx, %es
1239 TRACE_IRQS_OFF
1240 movl %esp,%eax # pt_regs pointer
1241 call *%edi
1242 jmp ret_from_exception
1243 CFI_ENDPROC
1244END(page_fault)
1245
1246/*
1247 * Debug traps and NMI can happen at the one SYSENTER instruction
1248 * that sets up the real kernel stack. Check here, since we can't
1249 * allow the wrong stack to be used.
1250 *
1251 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
1252 * already pushed 3 words if it hits on the sysenter instruction:
1253 * eflags, cs and eip.
1254 *
1255 * We just load the right stack, and push the three (known) values
1256 * by hand onto the new stack - while updating the return eip past
1257 * the instruction that would have done it for sysenter.
1258 */
1259.macro FIX_STACK offset ok label
1260 cmpw $__KERNEL_CS, 4(%esp)
1261 jne \ok
1262\label:
1263 movl TSS_sysenter_sp0 + \offset(%esp), %esp
1264 CFI_DEF_CFA esp, 0
1265 CFI_UNDEFINED eip
1266 pushfl_cfi
1267 pushl_cfi $__KERNEL_CS
1268 pushl_cfi $sysenter_past_esp
1269 CFI_REL_OFFSET eip, 0
1270.endm
1271
1272ENTRY(debug)
1273 RING0_INT_FRAME
1274 ASM_CLAC
1275 cmpl $ia32_sysenter_target,(%esp)
1276 jne debug_stack_correct
1277 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1278debug_stack_correct:
1279 pushl_cfi $-1 # mark this as an int
1280 SAVE_ALL
1281 TRACE_IRQS_OFF
1282 xorl %edx,%edx # error code 0
1283 movl %esp,%eax # pt_regs pointer
1284 call do_debug
1285 jmp ret_from_exception
1286 CFI_ENDPROC
1287END(debug)
1288
1289/*
1290 * NMI is doubly nasty. It can happen _while_ we're handling
1291 * a debug fault, and the debug fault hasn't yet been able to
1292 * clear up the stack. So we first check whether we got an
1293 * NMI on the sysenter entry path, but after that we need to
1294 * check whether we got an NMI on the debug path where the debug
1295 * fault happened on the sysenter path.
1296 */
1297ENTRY(nmi)
1298 RING0_INT_FRAME
1299 ASM_CLAC
1300#ifdef CONFIG_X86_ESPFIX32
1301 pushl_cfi %eax
1302 movl %ss, %eax
1303 cmpw $__ESPFIX_SS, %ax
1304 popl_cfi %eax
1305 je nmi_espfix_stack
1306#endif
1307 cmpl $ia32_sysenter_target,(%esp)
1308 je nmi_stack_fixup
1309 pushl_cfi %eax
1310 movl %esp,%eax
1311 /* Do not access memory above the end of our stack page,
1312 * it might not exist.
1313 */
1314 andl $(THREAD_SIZE-1),%eax
1315 cmpl $(THREAD_SIZE-20),%eax
1316 popl_cfi %eax
1317 jae nmi_stack_correct
1318 cmpl $ia32_sysenter_target,12(%esp)
1319 je nmi_debug_stack_check
1320nmi_stack_correct:
1321 /* We have a RING0_INT_FRAME here */
1322 pushl_cfi %eax
1323 SAVE_ALL
1324 xorl %edx,%edx # zero error code
1325 movl %esp,%eax # pt_regs pointer
1326 call do_nmi
1327 jmp restore_all_notrace
1328 CFI_ENDPROC
1329
1330nmi_stack_fixup:
1331 RING0_INT_FRAME
1332 FIX_STACK 12, nmi_stack_correct, 1
1333 jmp nmi_stack_correct
1334
1335nmi_debug_stack_check:
1336 /* We have a RING0_INT_FRAME here */
1337 cmpw $__KERNEL_CS,16(%esp)
1338 jne nmi_stack_correct
1339 cmpl $debug,(%esp)
1340 jb nmi_stack_correct
1341 cmpl $debug_esp_fix_insn,(%esp)
1342 ja nmi_stack_correct
1343 FIX_STACK 24, nmi_stack_correct, 1
1344 jmp nmi_stack_correct
1345
1346#ifdef CONFIG_X86_ESPFIX32
1347nmi_espfix_stack:
1348 /* We have a RING0_INT_FRAME here.
1349 *
1350 * create the pointer to lss back
1351 */
1352 pushl_cfi %ss
1353 pushl_cfi %esp
1354 addl $4, (%esp)
1355 /* copy the iret frame of 12 bytes */
1356 .rept 3
1357 pushl_cfi 16(%esp)
1358 .endr
1359 pushl_cfi %eax
1360 SAVE_ALL
1361 FIXUP_ESPFIX_STACK # %eax == %esp
1362 xorl %edx,%edx # zero error code
1363 call do_nmi
1364 RESTORE_REGS
1365 lss 12+4(%esp), %esp # back to espfix stack
1366 CFI_ADJUST_CFA_OFFSET -24
1367 jmp irq_return
1368#endif
1369 CFI_ENDPROC
1370END(nmi)
1371
1372ENTRY(int3)
1373 RING0_INT_FRAME
1374 ASM_CLAC
1375 pushl_cfi $-1 # mark this as an int
1376 SAVE_ALL
1377 TRACE_IRQS_OFF
1378 xorl %edx,%edx # zero error code
1379 movl %esp,%eax # pt_regs pointer
1380 call do_int3
1381 jmp ret_from_exception
1382 CFI_ENDPROC
1383END(int3)
1384
1385ENTRY(general_protection)
1386 RING0_EC_FRAME
1387 pushl_cfi $do_general_protection
1388 jmp error_code
1389 CFI_ENDPROC
1390END(general_protection)
1391
1392#ifdef CONFIG_KVM_GUEST
1393ENTRY(async_page_fault)
1394 RING0_EC_FRAME
1395 ASM_CLAC
1396 pushl_cfi $do_async_page_fault
1397 jmp error_code
1398 CFI_ENDPROC
1399END(async_page_fault)
1400#endif
1401
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2b55ee6db053..5a4668136e98 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
167 clear_bss(); 167 clear_bss();
168 168
169 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) 169 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
170 set_intr_gate(i, early_idt_handlers[i]); 170 set_intr_gate(i, early_idt_handler_array[i]);
171 load_idt((const struct desc_ptr *)&idt_descr); 171 load_idt((const struct desc_ptr *)&idt_descr);
172 172
173 copy_bootdata(__va(real_mode_data)); 173 copy_bootdata(__va(real_mode_data));
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 02d257256200..544dec4cc605 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -478,21 +478,22 @@ is486:
478__INIT 478__INIT
479setup_once: 479setup_once:
480 /* 480 /*
481 * Set up a idt with 256 entries pointing to ignore_int, 481 * Set up a idt with 256 interrupt gates that push zero if there
482 * interrupt gates. It doesn't actually load idt - that needs 482 * is no error code and then jump to early_idt_handler_common.
483 * to be done on each CPU. Interrupts are enabled elsewhere, 483 * It doesn't actually load the idt - that needs to be done on
484 * when we can be relatively sure everything is ok. 484 * each CPU. Interrupts are enabled elsewhere, when we can be
485 * relatively sure everything is ok.
485 */ 486 */
486 487
487 movl $idt_table,%edi 488 movl $idt_table,%edi
488 movl $early_idt_handlers,%eax 489 movl $early_idt_handler_array,%eax
489 movl $NUM_EXCEPTION_VECTORS,%ecx 490 movl $NUM_EXCEPTION_VECTORS,%ecx
4901: 4911:
491 movl %eax,(%edi) 492 movl %eax,(%edi)
492 movl %eax,4(%edi) 493 movl %eax,4(%edi)
493 /* interrupt gate, dpl=0, present */ 494 /* interrupt gate, dpl=0, present */
494 movl $(0x8E000000 + __KERNEL_CS),2(%edi) 495 movl $(0x8E000000 + __KERNEL_CS),2(%edi)
495 addl $9,%eax 496 addl $EARLY_IDT_HANDLER_SIZE,%eax
496 addl $8,%edi 497 addl $8,%edi
497 loop 1b 498 loop 1b
498 499
@@ -524,26 +525,28 @@ setup_once:
524 andl $0,setup_once_ref /* Once is enough, thanks */ 525 andl $0,setup_once_ref /* Once is enough, thanks */
525 ret 526 ret
526 527
527ENTRY(early_idt_handlers) 528ENTRY(early_idt_handler_array)
528 # 36(%esp) %eflags 529 # 36(%esp) %eflags
529 # 32(%esp) %cs 530 # 32(%esp) %cs
530 # 28(%esp) %eip 531 # 28(%esp) %eip
531 # 24(%rsp) error code 532 # 24(%rsp) error code
532 i = 0 533 i = 0
533 .rept NUM_EXCEPTION_VECTORS 534 .rept NUM_EXCEPTION_VECTORS
534 .if (EXCEPTION_ERRCODE_MASK >> i) & 1 535 .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
535 ASM_NOP2
536 .else
537 pushl $0 # Dummy error code, to make stack frame uniform 536 pushl $0 # Dummy error code, to make stack frame uniform
538 .endif 537 .endif
539 pushl $i # 20(%esp) Vector number 538 pushl $i # 20(%esp) Vector number
540 jmp early_idt_handler 539 jmp early_idt_handler_common
541 i = i + 1 540 i = i + 1
541 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
542 .endr 542 .endr
543ENDPROC(early_idt_handlers) 543ENDPROC(early_idt_handler_array)
544 544
545 /* This is global to keep gas from relaxing the jumps */ 545early_idt_handler_common:
546ENTRY(early_idt_handler) 546 /*
547 * The stack is the hardware frame, an error code or zero, and the
548 * vector number.
549 */
547 cld 550 cld
548 551
549 cmpl $2,(%esp) # X86_TRAP_NMI 552 cmpl $2,(%esp) # X86_TRAP_NMI
@@ -603,7 +606,7 @@ ex_entry:
603.Lis_nmi: 606.Lis_nmi:
604 addl $8,%esp /* drop vector number and error code */ 607 addl $8,%esp /* drop vector number and error code */
605 iret 608 iret
606ENDPROC(early_idt_handler) 609ENDPROC(early_idt_handler_common)
607 610
608/* This is the default interrupt "handler" :-) */ 611/* This is the default interrupt "handler" :-) */
609 ALIGN 612 ALIGN
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 43eafc8afb69..e5c27f729a38 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -321,26 +321,28 @@ bad_address:
321 jmp bad_address 321 jmp bad_address
322 322
323 __INIT 323 __INIT
324 .globl early_idt_handlers 324ENTRY(early_idt_handler_array)
325early_idt_handlers:
326 # 104(%rsp) %rflags 325 # 104(%rsp) %rflags
327 # 96(%rsp) %cs 326 # 96(%rsp) %cs
328 # 88(%rsp) %rip 327 # 88(%rsp) %rip
329 # 80(%rsp) error code 328 # 80(%rsp) error code
330 i = 0 329 i = 0
331 .rept NUM_EXCEPTION_VECTORS 330 .rept NUM_EXCEPTION_VECTORS
332 .if (EXCEPTION_ERRCODE_MASK >> i) & 1 331 .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
333 ASM_NOP2
334 .else
335 pushq $0 # Dummy error code, to make stack frame uniform 332 pushq $0 # Dummy error code, to make stack frame uniform
336 .endif 333 .endif
337 pushq $i # 72(%rsp) Vector number 334 pushq $i # 72(%rsp) Vector number
338 jmp early_idt_handler 335 jmp early_idt_handler_common
339 i = i + 1 336 i = i + 1
337 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
340 .endr 338 .endr
339ENDPROC(early_idt_handler_array)
341 340
342/* This is global to keep gas from relaxing the jumps */ 341early_idt_handler_common:
343ENTRY(early_idt_handler) 342 /*
343 * The stack is the hardware frame, an error code or zero, and the
344 * vector number.
345 */
344 cld 346 cld
345 347
346 cmpl $2,(%rsp) # X86_TRAP_NMI 348 cmpl $2,(%rsp) # X86_TRAP_NMI
@@ -412,7 +414,7 @@ ENTRY(early_idt_handler)
412.Lis_nmi: 414.Lis_nmi:
413 addq $16,%rsp # drop vector number and error code 415 addq $16,%rsp # drop vector number and error code
414 INTERRUPT_RETURN 416 INTERRUPT_RETURN
415ENDPROC(early_idt_handler) 417ENDPROC(early_idt_handler_common)
416 418
417 __INITDATA 419 __INITDATA
418 420
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 009183276bb7..6185d3141219 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -173,6 +173,21 @@ static void init_thread_xstate(void)
173 xstate_size = sizeof(struct i387_fxsave_struct); 173 xstate_size = sizeof(struct i387_fxsave_struct);
174 else 174 else
175 xstate_size = sizeof(struct i387_fsave_struct); 175 xstate_size = sizeof(struct i387_fsave_struct);
176
177 /*
178 * Quirk: we don't yet handle the XSAVES* instructions
179 * correctly, as we don't correctly convert between
180 * standard and compacted format when interfacing
181 * with user-space - so disable it for now.
182 *
183 * The difference is small: with recent CPUs the
184 * compacted format is only marginally smaller than
185 * the standard FPU state format.
186 *
187 * ( This is easy to backport while we are fixing
188 * XSAVES* support. )
189 */
190 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
176} 191}
177 192
178/* 193/*
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 7e10c8b4b318..88b366487b0e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -122,6 +122,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
122 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 122 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
123 seq_puts(p, " Threshold APIC interrupts\n"); 123 seq_puts(p, " Threshold APIC interrupts\n");
124#endif 124#endif
125#ifdef CONFIG_X86_MCE_AMD
126 seq_printf(p, "%*s: ", prec, "DFR");
127 for_each_online_cpu(j)
128 seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
129 seq_puts(p, " Deferred Error APIC interrupts\n");
130#endif
125#ifdef CONFIG_X86_MCE 131#ifdef CONFIG_X86_MCE
126 seq_printf(p, "%*s: ", prec, "MCE"); 132 seq_printf(p, "%*s: ", prec, "MCE");
127 for_each_online_cpu(j) 133 for_each_online_cpu(j)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 680723a8e4b6..a3a5e158ed69 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -135,6 +135,10 @@ static void __init apic_intr_init(void)
135 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 135 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
136#endif 136#endif
137 137
138#ifdef CONFIG_X86_MCE_AMD
139 alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
140#endif
141
138#ifdef CONFIG_X86_LOCAL_APIC 142#ifdef CONFIG_X86_LOCAL_APIC
139 /* self generated IPI for local APIC timer */ 143 /* self generated IPI for local APIC timer */
140 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); 144 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9435620062df..1681504e44a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
584 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); 584 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
585} 585}
586 586
587
588#ifdef CONFIG_QUEUED_SPINLOCKS
589
590#include <asm/qspinlock.h>
591
592static void kvm_wait(u8 *ptr, u8 val)
593{
594 unsigned long flags;
595
596 if (in_nmi())
597 return;
598
599 local_irq_save(flags);
600
601 if (READ_ONCE(*ptr) != val)
602 goto out;
603
604 /*
605 * halt until it's our turn and kicked. Note that we do safe halt
606 * for irq enabled case to avoid hang when lock info is overwritten
607 * in irq spinlock slowpath and no spurious interrupt occur to save us.
608 */
609 if (arch_irqs_disabled_flags(flags))
610 halt();
611 else
612 safe_halt();
613
614out:
615 local_irq_restore(flags);
616}
617
618#else /* !CONFIG_QUEUED_SPINLOCKS */
619
587enum kvm_contention_stat { 620enum kvm_contention_stat {
588 TAKEN_SLOW, 621 TAKEN_SLOW,
589 TAKEN_SLOW_PICKUP, 622 TAKEN_SLOW_PICKUP,
@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
817 } 850 }
818} 851}
819 852
853#endif /* !CONFIG_QUEUED_SPINLOCKS */
854
820/* 855/*
821 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. 856 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
822 */ 857 */
@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
828 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) 863 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
829 return; 864 return;
830 865
866#ifdef CONFIG_QUEUED_SPINLOCKS
867 __pv_init_lock_hash();
868 pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
869 pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
870 pv_lock_ops.wait = kvm_wait;
871 pv_lock_ops.kick = kvm_kick_cpu;
872#else /* !CONFIG_QUEUED_SPINLOCKS */
831 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); 873 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
832 pv_lock_ops.unlock_kick = kvm_unlock_kick; 874 pv_lock_ops.unlock_kick = kvm_unlock_kick;
875#endif
833} 876}
834 877
835static __init int kvm_spinlock_init_jump(void) 878static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 415480d3ea84..11546b462fa6 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -17,6 +17,7 @@
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/io.h> 18#include <linux/io.h>
19#include <linux/suspend.h> 19#include <linux/suspend.h>
20#include <linux/vmalloc.h>
20 21
21#include <asm/init.h> 22#include <asm/init.h>
22#include <asm/pgtable.h> 23#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index bbb6c7316341..33ee3e0efd65 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,11 +8,33 @@
8 8
9#include <asm/paravirt.h> 9#include <asm/paravirt.h>
10 10
11#ifdef CONFIG_QUEUED_SPINLOCKS
12__visible void __native_queued_spin_unlock(struct qspinlock *lock)
13{
14 native_queued_spin_unlock(lock);
15}
16
17PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
18
19bool pv_is_native_spin_unlock(void)
20{
21 return pv_lock_ops.queued_spin_unlock.func ==
22 __raw_callee_save___native_queued_spin_unlock;
23}
24#endif
25
11struct pv_lock_ops pv_lock_ops = { 26struct pv_lock_ops pv_lock_ops = {
12#ifdef CONFIG_SMP 27#ifdef CONFIG_SMP
28#ifdef CONFIG_QUEUED_SPINLOCKS
29 .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
30 .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
31 .wait = paravirt_nop,
32 .kick = paravirt_nop,
33#else /* !CONFIG_QUEUED_SPINLOCKS */
13 .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), 34 .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
14 .unlock_kick = paravirt_nop, 35 .unlock_kick = paravirt_nop,
15#endif 36#endif /* !CONFIG_QUEUED_SPINLOCKS */
37#endif /* SMP */
16}; 38};
17EXPORT_SYMBOL(pv_lock_ops); 39EXPORT_SYMBOL(pv_lock_ops);
18 40
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab6..e1b013696dde 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
12DEF_NATIVE(pv_cpu_ops, clts, "clts"); 12DEF_NATIVE(pv_cpu_ops, clts, "clts");
13DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); 13DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
14 14
15#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
16DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
17#endif
18
15unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) 19unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
16{ 20{
17 /* arg in %eax, return in %eax */ 21 /* arg in %eax, return in %eax */
@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
24 return 0; 28 return 0;
25} 29}
26 30
31extern bool pv_is_native_spin_unlock(void);
32
27unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 33unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
28 unsigned long addr, unsigned len) 34 unsigned long addr, unsigned len)
29{ 35{
@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
47 PATCH_SITE(pv_mmu_ops, write_cr3); 53 PATCH_SITE(pv_mmu_ops, write_cr3);
48 PATCH_SITE(pv_cpu_ops, clts); 54 PATCH_SITE(pv_cpu_ops, clts);
49 PATCH_SITE(pv_cpu_ops, read_tsc); 55 PATCH_SITE(pv_cpu_ops, read_tsc);
50 56#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
51 patch_site: 57 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
52 ret = paravirt_patch_insns(ibuf, len, start, end); 58 if (pv_is_native_spin_unlock()) {
53 break; 59 start = start_pv_lock_ops_queued_spin_unlock;
60 end = end_pv_lock_ops_queued_spin_unlock;
61 goto patch_site;
62 }
63#endif
54 64
55 default: 65 default:
56 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); 66 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
57 break; 67 break;
68
69patch_site:
70 ret = paravirt_patch_insns(ibuf, len, start, end);
71 break;
58 } 72 }
59#undef PATCH_SITE 73#undef PATCH_SITE
60 return ret; 74 return ret;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 0de21c62c348..8aa05583bc42 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
21DEF_NATIVE(, mov32, "mov %edi, %eax"); 21DEF_NATIVE(, mov32, "mov %edi, %eax");
22DEF_NATIVE(, mov64, "mov %rdi, %rax"); 22DEF_NATIVE(, mov64, "mov %rdi, %rax");
23 23
24#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
25DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
26#endif
27
24unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) 28unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
25{ 29{
26 return paravirt_patch_insns(insnbuf, len, 30 return paravirt_patch_insns(insnbuf, len,
@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
33 start__mov64, end__mov64); 37 start__mov64, end__mov64);
34} 38}
35 39
40extern bool pv_is_native_spin_unlock(void);
41
36unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 42unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
37 unsigned long addr, unsigned len) 43 unsigned long addr, unsigned len)
38{ 44{
@@ -58,14 +64,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
58 PATCH_SITE(pv_cpu_ops, clts); 64 PATCH_SITE(pv_cpu_ops, clts);
59 PATCH_SITE(pv_mmu_ops, flush_tlb_single); 65 PATCH_SITE(pv_mmu_ops, flush_tlb_single);
60 PATCH_SITE(pv_cpu_ops, wbinvd); 66 PATCH_SITE(pv_cpu_ops, wbinvd);
61 67#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
62 patch_site: 68 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
63 ret = paravirt_patch_insns(ibuf, len, start, end); 69 if (pv_is_native_spin_unlock()) {
64 break; 70 start = start_pv_lock_ops_queued_spin_unlock;
71 end = end_pv_lock_ops_queued_spin_unlock;
72 goto patch_site;
73 }
74#endif
65 75
66 default: 76 default:
67 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); 77 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
68 break; 78 break;
79
80patch_site:
81 ret = paravirt_patch_insns(ibuf, len, start, end);
82 break;
69 } 83 }
70#undef PATCH_SITE 84#undef PATCH_SITE
71 return ret; 85 return ret;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5e0791f9d3dc..de379366f6d1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,8 +72,7 @@ gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss;
72#else 72#else
73#include <asm/processor-flags.h> 73#include <asm/processor-flags.h>
74#include <asm/setup.h> 74#include <asm/setup.h>
75 75#include <asm/proto.h>
76asmlinkage int system_call(void);
77#endif 76#endif
78 77
79/* Must be page-aligned because the real IDT is used in a fixmap. */ 78/* Must be page-aligned because the real IDT is used in a fixmap. */
@@ -813,18 +812,6 @@ dotraplinkage void
813do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) 812do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
814{ 813{
815 conditional_sti(regs); 814 conditional_sti(regs);
816#if 0
817 /* No need to warn about this any longer. */
818 pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
819#endif
820}
821
822asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void)
823{
824}
825
826asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
827{
828} 815}
829 816
830/* 817/*
@@ -992,12 +979,12 @@ void __init trap_init(void)
992 set_bit(i, used_vectors); 979 set_bit(i, used_vectors);
993 980
994#ifdef CONFIG_IA32_EMULATION 981#ifdef CONFIG_IA32_EMULATION
995 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 982 set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat);
996 set_bit(IA32_SYSCALL_VECTOR, used_vectors); 983 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
997#endif 984#endif
998 985
999#ifdef CONFIG_X86_32 986#ifdef CONFIG_X86_32
1000 set_system_trap_gate(IA32_SYSCALL_VECTOR, &system_call); 987 set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
1001 set_bit(IA32_SYSCALL_VECTOR, used_vectors); 988 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
1002#endif 989#endif
1003 990
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 59b69f6a2844..1d08ad3582d0 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,6 +16,8 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/vmalloc.h> 17#include <linux/vmalloc.h>
18#include <linux/uaccess.h> 18#include <linux/uaccess.h>
19#include <asm/i387.h> /* For use_eager_fpu. Ugh! */
20#include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */
19#include <asm/user.h> 21#include <asm/user.h>
20#include <asm/xsave.h> 22#include <asm/xsave.h>
21#include "cpuid.h" 23#include "cpuid.h"
@@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
95 if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) 97 if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
96 best->ebx = xstate_required_size(vcpu->arch.xcr0, true); 98 best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
97 99
100 vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
101
98 /* 102 /*
99 * The existing code assumes virtual address is 48-bit in the canonical 103 * The existing code assumes virtual address is 48-bit in the canonical
100 * address checks; exit if it is ever changed. 104 * address checks; exit if it is ever changed.
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c3b1ad9fca81..496b3695d3d3 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
117 best = kvm_find_cpuid_entry(vcpu, 7, 0); 117 best = kvm_find_cpuid_entry(vcpu, 7, 0);
118 return best && (best->ebx & bit(X86_FEATURE_RTM)); 118 return best && (best->ebx & bit(X86_FEATURE_RTM));
119} 119}
120
121static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
122{
123 struct kvm_cpuid_entry2 *best;
124
125 best = kvm_find_cpuid_entry(vcpu, 7, 0);
126 return best && (best->ebx & bit(X86_FEATURE_MPX));
127}
120#endif 128#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d43867c33bc4..44a7d2515497 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
3736 } 3736 }
3737} 3737}
3738 3738
3739void update_permission_bitmask(struct kvm_vcpu *vcpu, 3739static void update_permission_bitmask(struct kvm_vcpu *vcpu,
3740 struct kvm_mmu *mmu, bool ept) 3740 struct kvm_mmu *mmu, bool ept)
3741{ 3741{
3742 unsigned bit, byte, pfec; 3742 unsigned bit, byte, pfec;
3743 u8 map; 3743 u8 map;
@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3918void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) 3918void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
3919{ 3919{
3920 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); 3920 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
3921 bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
3921 struct kvm_mmu *context = &vcpu->arch.mmu; 3922 struct kvm_mmu *context = &vcpu->arch.mmu;
3922 3923
3923 MMU_WARN_ON(VALID_PAGE(context->root_hpa)); 3924 MMU_WARN_ON(VALID_PAGE(context->root_hpa));
@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
3936 context->base_role.cr0_wp = is_write_protection(vcpu); 3937 context->base_role.cr0_wp = is_write_protection(vcpu);
3937 context->base_role.smep_andnot_wp 3938 context->base_role.smep_andnot_wp
3938 = smep && !is_write_protection(vcpu); 3939 = smep && !is_write_protection(vcpu);
3940 context->base_role.smap_andnot_wp
3941 = smap && !is_write_protection(vcpu);
3939} 3942}
3940EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); 3943EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
3941 3944
@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4207 const u8 *new, int bytes) 4210 const u8 *new, int bytes)
4208{ 4211{
4209 gfn_t gfn = gpa >> PAGE_SHIFT; 4212 gfn_t gfn = gpa >> PAGE_SHIFT;
4210 union kvm_mmu_page_role mask = { .word = 0 };
4211 struct kvm_mmu_page *sp; 4213 struct kvm_mmu_page *sp;
4212 LIST_HEAD(invalid_list); 4214 LIST_HEAD(invalid_list);
4213 u64 entry, gentry, *spte; 4215 u64 entry, gentry, *spte;
4214 int npte; 4216 int npte;
4215 bool remote_flush, local_flush, zap_page; 4217 bool remote_flush, local_flush, zap_page;
4218 union kvm_mmu_page_role mask = (union kvm_mmu_page_role) {
4219 .cr0_wp = 1,
4220 .cr4_pae = 1,
4221 .nxe = 1,
4222 .smep_andnot_wp = 1,
4223 .smap_andnot_wp = 1,
4224 };
4216 4225
4217 /* 4226 /*
4218 * If we don't have indirect shadow pages, it means no page is 4227 * If we don't have indirect shadow pages, it means no page is
@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4238 ++vcpu->kvm->stat.mmu_pte_write; 4247 ++vcpu->kvm->stat.mmu_pte_write;
4239 kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); 4248 kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
4240 4249
4241 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
4242 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { 4250 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
4243 if (detect_write_misaligned(sp, gpa, bytes) || 4251 if (detect_write_misaligned(sp, gpa, bytes) ||
4244 detect_write_flooding(sp)) { 4252 detect_write_flooding(sp)) {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index c7d65637c851..0ada65ecddcf 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -71,8 +71,6 @@ enum {
71int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); 71int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
72void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); 72void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
73void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); 73void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
74void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
75 bool ept);
76 74
77static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) 75static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
78{ 76{
@@ -166,6 +164,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
166 int index = (pfec >> 1) + 164 int index = (pfec >> 1) +
167 (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); 165 (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
168 166
167 WARN_ON(pfec & PFERR_RSVD_MASK);
168
169 return (mmu->permissions[index] >> pte_access) & 1; 169 return (mmu->permissions[index] >> pte_access) & 1;
170} 170}
171 171
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index fd49c867b25a..6e6d115fe9b5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
718 mmu_is_nested(vcpu)); 718 mmu_is_nested(vcpu));
719 if (likely(r != RET_MMIO_PF_INVALID)) 719 if (likely(r != RET_MMIO_PF_INVALID))
720 return r; 720 return r;
721
722 /*
723 * page fault with PFEC.RSVD = 1 is caused by shadow
724 * page fault, should not be used to walk guest page
725 * table.
726 */
727 error_code &= ~PFERR_RSVD_MASK;
721 }; 728 };
722 729
723 r = mmu_topup_memory_caches(vcpu); 730 r = mmu_topup_memory_caches(vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ce741b8650f6..9afa233b5482 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = {
4381 .cache_reg = svm_cache_reg, 4381 .cache_reg = svm_cache_reg,
4382 .get_rflags = svm_get_rflags, 4382 .get_rflags = svm_get_rflags,
4383 .set_rflags = svm_set_rflags, 4383 .set_rflags = svm_set_rflags,
4384 .fpu_activate = svm_fpu_activate,
4384 .fpu_deactivate = svm_fpu_deactivate, 4385 .fpu_deactivate = svm_fpu_deactivate,
4385 4386
4386 .tlb_flush = svm_flush_tlb, 4387 .tlb_flush = svm_flush_tlb,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f7b61687bd79..2d73807f0d31 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10185,6 +10185,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
10185 .cache_reg = vmx_cache_reg, 10185 .cache_reg = vmx_cache_reg,
10186 .get_rflags = vmx_get_rflags, 10186 .get_rflags = vmx_get_rflags,
10187 .set_rflags = vmx_set_rflags, 10187 .set_rflags = vmx_set_rflags,
10188 .fpu_activate = vmx_fpu_activate,
10188 .fpu_deactivate = vmx_fpu_deactivate, 10189 .fpu_deactivate = vmx_fpu_deactivate,
10189 10190
10190 .tlb_flush = vmx_flush_tlb, 10191 .tlb_flush = vmx_flush_tlb,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c73efcd03e29..ea306adbbc13 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
702int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 702int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
703{ 703{
704 unsigned long old_cr4 = kvm_read_cr4(vcpu); 704 unsigned long old_cr4 = kvm_read_cr4(vcpu);
705 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | 705 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
706 X86_CR4_PAE | X86_CR4_SMEP; 706 X86_CR4_SMEP | X86_CR4_SMAP;
707
707 if (cr4 & CR4_RESERVED_BITS) 708 if (cr4 & CR4_RESERVED_BITS)
708 return 1; 709 return 1;
709 710
@@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
744 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) 745 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
745 kvm_mmu_reset_context(vcpu); 746 kvm_mmu_reset_context(vcpu);
746 747
747 if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
748 update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
749
750 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) 748 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
751 kvm_update_cpuid(vcpu); 749 kvm_update_cpuid(vcpu);
752 750
@@ -6197,6 +6195,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
6197 return; 6195 return;
6198 6196
6199 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); 6197 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
6198 if (is_error_page(page))
6199 return;
6200 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); 6200 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
6201 6201
6202 /* 6202 /*
@@ -7060,7 +7060,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
7060 fpu_save_init(&vcpu->arch.guest_fpu); 7060 fpu_save_init(&vcpu->arch.guest_fpu);
7061 __kernel_fpu_end(); 7061 __kernel_fpu_end();
7062 ++vcpu->stat.fpu_reload; 7062 ++vcpu->stat.fpu_reload;
7063 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); 7063 if (!vcpu->arch.eager_fpu)
7064 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
7065
7064 trace_kvm_fpu(0); 7066 trace_kvm_fpu(0);
7065} 7067}
7066 7068
@@ -7076,11 +7078,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
7076struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 7078struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
7077 unsigned int id) 7079 unsigned int id)
7078{ 7080{
7081 struct kvm_vcpu *vcpu;
7082
7079 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) 7083 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
7080 printk_once(KERN_WARNING 7084 printk_once(KERN_WARNING
7081 "kvm: SMP vm created on host with unstable TSC; " 7085 "kvm: SMP vm created on host with unstable TSC; "
7082 "guest TSC will not be reliable\n"); 7086 "guest TSC will not be reliable\n");
7083 return kvm_x86_ops->vcpu_create(kvm, id); 7087
7088 vcpu = kvm_x86_ops->vcpu_create(kvm, id);
7089
7090 /*
7091 * Activate fpu unconditionally in case the guest needs eager FPU. It will be
7092 * deactivated soon if it doesn't.
7093 */
7094 kvm_x86_ops->fpu_activate(vcpu);
7095 return vcpu;
7084} 7096}
7085 7097
7086int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 7098int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 982989d282ff..f2587888d987 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -17,7 +17,6 @@ clean-files := inat-tables.c
17obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o 17obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
18 18
19lib-y := delay.o misc.o cmdline.o 19lib-y := delay.o misc.o cmdline.o
20lib-y += thunk_$(BITS).o
21lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o 20lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
22lib-y += memcpy_$(BITS).o 21lib-y += memcpy_$(BITS).o
23lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o 22lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 00933d5e992f..9b0ca8fe80fc 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -11,26 +11,23 @@
11 11
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <asm/alternative-asm.h> 13#include <asm/alternative-asm.h>
14#include <asm/dwarf2.h>
15 14
16/* if you want SMP support, implement these with real spinlocks */ 15/* if you want SMP support, implement these with real spinlocks */
17.macro LOCK reg 16.macro LOCK reg
18 pushfl_cfi 17 pushfl
19 cli 18 cli
20.endm 19.endm
21 20
22.macro UNLOCK reg 21.macro UNLOCK reg
23 popfl_cfi 22 popfl
24.endm 23.endm
25 24
26#define BEGIN(op) \ 25#define BEGIN(op) \
27.macro endp; \ 26.macro endp; \
28 CFI_ENDPROC; \
29ENDPROC(atomic64_##op##_386); \ 27ENDPROC(atomic64_##op##_386); \
30.purgem endp; \ 28.purgem endp; \
31.endm; \ 29.endm; \
32ENTRY(atomic64_##op##_386); \ 30ENTRY(atomic64_##op##_386); \
33 CFI_STARTPROC; \
34 LOCK v; 31 LOCK v;
35 32
36#define ENDP endp 33#define ENDP endp
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 082a85167a5b..db3ae85440ff 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -11,7 +11,6 @@
11 11
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <asm/alternative-asm.h> 13#include <asm/alternative-asm.h>
14#include <asm/dwarf2.h>
15 14
16.macro read64 reg 15.macro read64 reg
17 movl %ebx, %eax 16 movl %ebx, %eax
@@ -22,16 +21,11 @@
22.endm 21.endm
23 22
24ENTRY(atomic64_read_cx8) 23ENTRY(atomic64_read_cx8)
25 CFI_STARTPROC
26
27 read64 %ecx 24 read64 %ecx
28 ret 25 ret
29 CFI_ENDPROC
30ENDPROC(atomic64_read_cx8) 26ENDPROC(atomic64_read_cx8)
31 27
32ENTRY(atomic64_set_cx8) 28ENTRY(atomic64_set_cx8)
33 CFI_STARTPROC
34
351: 291:
36/* we don't need LOCK_PREFIX since aligned 64-bit writes 30/* we don't need LOCK_PREFIX since aligned 64-bit writes
37 * are atomic on 586 and newer */ 31 * are atomic on 586 and newer */
@@ -39,28 +33,23 @@ ENTRY(atomic64_set_cx8)
39 jne 1b 33 jne 1b
40 34
41 ret 35 ret
42 CFI_ENDPROC
43ENDPROC(atomic64_set_cx8) 36ENDPROC(atomic64_set_cx8)
44 37
45ENTRY(atomic64_xchg_cx8) 38ENTRY(atomic64_xchg_cx8)
46 CFI_STARTPROC
47
481: 391:
49 LOCK_PREFIX 40 LOCK_PREFIX
50 cmpxchg8b (%esi) 41 cmpxchg8b (%esi)
51 jne 1b 42 jne 1b
52 43
53 ret 44 ret
54 CFI_ENDPROC
55ENDPROC(atomic64_xchg_cx8) 45ENDPROC(atomic64_xchg_cx8)
56 46
57.macro addsub_return func ins insc 47.macro addsub_return func ins insc
58ENTRY(atomic64_\func\()_return_cx8) 48ENTRY(atomic64_\func\()_return_cx8)
59 CFI_STARTPROC 49 pushl %ebp
60 pushl_cfi_reg ebp 50 pushl %ebx
61 pushl_cfi_reg ebx 51 pushl %esi
62 pushl_cfi_reg esi 52 pushl %edi
63 pushl_cfi_reg edi
64 53
65 movl %eax, %esi 54 movl %eax, %esi
66 movl %edx, %edi 55 movl %edx, %edi
@@ -79,12 +68,11 @@ ENTRY(atomic64_\func\()_return_cx8)
7910: 6810:
80 movl %ebx, %eax 69 movl %ebx, %eax
81 movl %ecx, %edx 70 movl %ecx, %edx
82 popl_cfi_reg edi 71 popl %edi
83 popl_cfi_reg esi 72 popl %esi
84 popl_cfi_reg ebx 73 popl %ebx
85 popl_cfi_reg ebp 74 popl %ebp
86 ret 75 ret
87 CFI_ENDPROC
88ENDPROC(atomic64_\func\()_return_cx8) 76ENDPROC(atomic64_\func\()_return_cx8)
89.endm 77.endm
90 78
@@ -93,8 +81,7 @@ addsub_return sub sub sbb
93 81
94.macro incdec_return func ins insc 82.macro incdec_return func ins insc
95ENTRY(atomic64_\func\()_return_cx8) 83ENTRY(atomic64_\func\()_return_cx8)
96 CFI_STARTPROC 84 pushl %ebx
97 pushl_cfi_reg ebx
98 85
99 read64 %esi 86 read64 %esi
1001: 871:
@@ -109,9 +96,8 @@ ENTRY(atomic64_\func\()_return_cx8)
10910: 9610:
110 movl %ebx, %eax 97 movl %ebx, %eax
111 movl %ecx, %edx 98 movl %ecx, %edx
112 popl_cfi_reg ebx 99 popl %ebx
113 ret 100 ret
114 CFI_ENDPROC
115ENDPROC(atomic64_\func\()_return_cx8) 101ENDPROC(atomic64_\func\()_return_cx8)
116.endm 102.endm
117 103
@@ -119,8 +105,7 @@ incdec_return inc add adc
119incdec_return dec sub sbb 105incdec_return dec sub sbb
120 106
121ENTRY(atomic64_dec_if_positive_cx8) 107ENTRY(atomic64_dec_if_positive_cx8)
122 CFI_STARTPROC 108 pushl %ebx
123 pushl_cfi_reg ebx
124 109
125 read64 %esi 110 read64 %esi
1261: 1111:
@@ -136,18 +121,16 @@ ENTRY(atomic64_dec_if_positive_cx8)
1362: 1212:
137 movl %ebx, %eax 122 movl %ebx, %eax
138 movl %ecx, %edx 123 movl %ecx, %edx
139 popl_cfi_reg ebx 124 popl %ebx
140 ret 125 ret
141 CFI_ENDPROC
142ENDPROC(atomic64_dec_if_positive_cx8) 126ENDPROC(atomic64_dec_if_positive_cx8)
143 127
144ENTRY(atomic64_add_unless_cx8) 128ENTRY(atomic64_add_unless_cx8)
145 CFI_STARTPROC 129 pushl %ebp
146 pushl_cfi_reg ebp 130 pushl %ebx
147 pushl_cfi_reg ebx
148/* these just push these two parameters on the stack */ 131/* these just push these two parameters on the stack */
149 pushl_cfi_reg edi 132 pushl %edi
150 pushl_cfi_reg ecx 133 pushl %ecx
151 134
152 movl %eax, %ebp 135 movl %eax, %ebp
153 movl %edx, %edi 136 movl %edx, %edi
@@ -168,21 +151,18 @@ ENTRY(atomic64_add_unless_cx8)
168 movl $1, %eax 151 movl $1, %eax
1693: 1523:
170 addl $8, %esp 153 addl $8, %esp
171 CFI_ADJUST_CFA_OFFSET -8 154 popl %ebx
172 popl_cfi_reg ebx 155 popl %ebp
173 popl_cfi_reg ebp
174 ret 156 ret
1754: 1574:
176 cmpl %edx, 4(%esp) 158 cmpl %edx, 4(%esp)
177 jne 2b 159 jne 2b
178 xorl %eax, %eax 160 xorl %eax, %eax
179 jmp 3b 161 jmp 3b
180 CFI_ENDPROC
181ENDPROC(atomic64_add_unless_cx8) 162ENDPROC(atomic64_add_unless_cx8)
182 163
183ENTRY(atomic64_inc_not_zero_cx8) 164ENTRY(atomic64_inc_not_zero_cx8)
184 CFI_STARTPROC 165 pushl %ebx
185 pushl_cfi_reg ebx
186 166
187 read64 %esi 167 read64 %esi
1881: 1681:
@@ -199,7 +179,6 @@ ENTRY(atomic64_inc_not_zero_cx8)
199 179
200 movl $1, %eax 180 movl $1, %eax
2013: 1813:
202 popl_cfi_reg ebx 182 popl %ebx
203 ret 183 ret
204 CFI_ENDPROC
205ENDPROC(atomic64_inc_not_zero_cx8) 184ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 9bc944a91274..c1e623209853 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -26,7 +26,6 @@
26 */ 26 */
27 27
28#include <linux/linkage.h> 28#include <linux/linkage.h>
29#include <asm/dwarf2.h>
30#include <asm/errno.h> 29#include <asm/errno.h>
31#include <asm/asm.h> 30#include <asm/asm.h>
32 31
@@ -50,9 +49,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
50 * alignment for the unrolled loop. 49 * alignment for the unrolled loop.
51 */ 50 */
52ENTRY(csum_partial) 51ENTRY(csum_partial)
53 CFI_STARTPROC 52 pushl %esi
54 pushl_cfi_reg esi 53 pushl %ebx
55 pushl_cfi_reg ebx
56 movl 20(%esp),%eax # Function arg: unsigned int sum 54 movl 20(%esp),%eax # Function arg: unsigned int sum
57 movl 16(%esp),%ecx # Function arg: int len 55 movl 16(%esp),%ecx # Function arg: int len
58 movl 12(%esp),%esi # Function arg: unsigned char *buff 56 movl 12(%esp),%esi # Function arg: unsigned char *buff
@@ -129,10 +127,9 @@ ENTRY(csum_partial)
129 jz 8f 127 jz 8f
130 roll $8, %eax 128 roll $8, %eax
1318: 1298:
132 popl_cfi_reg ebx 130 popl %ebx
133 popl_cfi_reg esi 131 popl %esi
134 ret 132 ret
135 CFI_ENDPROC
136ENDPROC(csum_partial) 133ENDPROC(csum_partial)
137 134
138#else 135#else
@@ -140,9 +137,8 @@ ENDPROC(csum_partial)
140/* Version for PentiumII/PPro */ 137/* Version for PentiumII/PPro */
141 138
142ENTRY(csum_partial) 139ENTRY(csum_partial)
143 CFI_STARTPROC 140 pushl %esi
144 pushl_cfi_reg esi 141 pushl %ebx
145 pushl_cfi_reg ebx
146 movl 20(%esp),%eax # Function arg: unsigned int sum 142 movl 20(%esp),%eax # Function arg: unsigned int sum
147 movl 16(%esp),%ecx # Function arg: int len 143 movl 16(%esp),%ecx # Function arg: int len
148 movl 12(%esp),%esi # Function arg: const unsigned char *buf 144 movl 12(%esp),%esi # Function arg: const unsigned char *buf
@@ -249,10 +245,9 @@ ENTRY(csum_partial)
249 jz 90f 245 jz 90f
250 roll $8, %eax 246 roll $8, %eax
25190: 24790:
252 popl_cfi_reg ebx 248 popl %ebx
253 popl_cfi_reg esi 249 popl %esi
254 ret 250 ret
255 CFI_ENDPROC
256ENDPROC(csum_partial) 251ENDPROC(csum_partial)
257 252
258#endif 253#endif
@@ -287,12 +282,10 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
287#define FP 12 282#define FP 12
288 283
289ENTRY(csum_partial_copy_generic) 284ENTRY(csum_partial_copy_generic)
290 CFI_STARTPROC
291 subl $4,%esp 285 subl $4,%esp
292 CFI_ADJUST_CFA_OFFSET 4 286 pushl %edi
293 pushl_cfi_reg edi 287 pushl %esi
294 pushl_cfi_reg esi 288 pushl %ebx
295 pushl_cfi_reg ebx
296 movl ARGBASE+16(%esp),%eax # sum 289 movl ARGBASE+16(%esp),%eax # sum
297 movl ARGBASE+12(%esp),%ecx # len 290 movl ARGBASE+12(%esp),%ecx # len
298 movl ARGBASE+4(%esp),%esi # src 291 movl ARGBASE+4(%esp),%esi # src
@@ -401,12 +394,11 @@ DST( movb %cl, (%edi) )
401 394
402.previous 395.previous
403 396
404 popl_cfi_reg ebx 397 popl %ebx
405 popl_cfi_reg esi 398 popl %esi
406 popl_cfi_reg edi 399 popl %edi
407 popl_cfi %ecx # equivalent to addl $4,%esp 400 popl %ecx # equivalent to addl $4,%esp
408 ret 401 ret
409 CFI_ENDPROC
410ENDPROC(csum_partial_copy_generic) 402ENDPROC(csum_partial_copy_generic)
411 403
412#else 404#else
@@ -426,10 +418,9 @@ ENDPROC(csum_partial_copy_generic)
426#define ARGBASE 12 418#define ARGBASE 12
427 419
428ENTRY(csum_partial_copy_generic) 420ENTRY(csum_partial_copy_generic)
429 CFI_STARTPROC 421 pushl %ebx
430 pushl_cfi_reg ebx 422 pushl %edi
431 pushl_cfi_reg edi 423 pushl %esi
432 pushl_cfi_reg esi
433 movl ARGBASE+4(%esp),%esi #src 424 movl ARGBASE+4(%esp),%esi #src
434 movl ARGBASE+8(%esp),%edi #dst 425 movl ARGBASE+8(%esp),%edi #dst
435 movl ARGBASE+12(%esp),%ecx #len 426 movl ARGBASE+12(%esp),%ecx #len
@@ -489,11 +480,10 @@ DST( movb %dl, (%edi) )
489 jmp 7b 480 jmp 7b
490.previous 481.previous
491 482
492 popl_cfi_reg esi 483 popl %esi
493 popl_cfi_reg edi 484 popl %edi
494 popl_cfi_reg ebx 485 popl %ebx
495 ret 486 ret
496 CFI_ENDPROC
497ENDPROC(csum_partial_copy_generic) 487ENDPROC(csum_partial_copy_generic)
498 488
499#undef ROUND 489#undef ROUND
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index e67e579c93bd..a2fe51b00cce 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,4 @@
1#include <linux/linkage.h> 1#include <linux/linkage.h>
2#include <asm/dwarf2.h>
3#include <asm/cpufeature.h> 2#include <asm/cpufeature.h>
4#include <asm/alternative-asm.h> 3#include <asm/alternative-asm.h>
5 4
@@ -15,7 +14,6 @@
15 * %rdi - page 14 * %rdi - page
16 */ 15 */
17ENTRY(clear_page) 16ENTRY(clear_page)
18 CFI_STARTPROC
19 17
20 ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \ 18 ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
21 "jmp clear_page_c_e", X86_FEATURE_ERMS 19 "jmp clear_page_c_e", X86_FEATURE_ERMS
@@ -24,11 +22,9 @@ ENTRY(clear_page)
24 xorl %eax,%eax 22 xorl %eax,%eax
25 rep stosq 23 rep stosq
26 ret 24 ret
27 CFI_ENDPROC
28ENDPROC(clear_page) 25ENDPROC(clear_page)
29 26
30ENTRY(clear_page_orig) 27ENTRY(clear_page_orig)
31 CFI_STARTPROC
32 28
33 xorl %eax,%eax 29 xorl %eax,%eax
34 movl $4096/64,%ecx 30 movl $4096/64,%ecx
@@ -48,14 +44,11 @@ ENTRY(clear_page_orig)
48 jnz .Lloop 44 jnz .Lloop
49 nop 45 nop
50 ret 46 ret
51 CFI_ENDPROC
52ENDPROC(clear_page_orig) 47ENDPROC(clear_page_orig)
53 48
54ENTRY(clear_page_c_e) 49ENTRY(clear_page_c_e)
55 CFI_STARTPROC
56 movl $4096,%ecx 50 movl $4096,%ecx
57 xorl %eax,%eax 51 xorl %eax,%eax
58 rep stosb 52 rep stosb
59 ret 53 ret
60 CFI_ENDPROC
61ENDPROC(clear_page_c_e) 54ENDPROC(clear_page_c_e)
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 40a172541ee2..9b330242e740 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -6,7 +6,6 @@
6 * 6 *
7 */ 7 */
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/dwarf2.h>
10#include <asm/percpu.h> 9#include <asm/percpu.h>
11 10
12.text 11.text
@@ -21,7 +20,6 @@
21 * %al : Operation successful 20 * %al : Operation successful
22 */ 21 */
23ENTRY(this_cpu_cmpxchg16b_emu) 22ENTRY(this_cpu_cmpxchg16b_emu)
24CFI_STARTPROC
25 23
26# 24#
27# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not 25# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
@@ -32,7 +30,7 @@ CFI_STARTPROC
32# *atomic* on a single cpu (as provided by the this_cpu_xx class of 30# *atomic* on a single cpu (as provided by the this_cpu_xx class of
33# macros). 31# macros).
34# 32#
35 pushfq_cfi 33 pushfq
36 cli 34 cli
37 35
38 cmpq PER_CPU_VAR((%rsi)), %rax 36 cmpq PER_CPU_VAR((%rsi)), %rax
@@ -43,17 +41,13 @@ CFI_STARTPROC
43 movq %rbx, PER_CPU_VAR((%rsi)) 41 movq %rbx, PER_CPU_VAR((%rsi))
44 movq %rcx, PER_CPU_VAR(8(%rsi)) 42 movq %rcx, PER_CPU_VAR(8(%rsi))
45 43
46 CFI_REMEMBER_STATE 44 popfq
47 popfq_cfi
48 mov $1, %al 45 mov $1, %al
49 ret 46 ret
50 47
51 CFI_RESTORE_STATE
52.Lnot_same: 48.Lnot_same:
53 popfq_cfi 49 popfq
54 xor %al,%al 50 xor %al,%al
55 ret 51 ret
56 52
57CFI_ENDPROC
58
59ENDPROC(this_cpu_cmpxchg16b_emu) 53ENDPROC(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index b4807fce5177..ad5349778490 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -7,7 +7,6 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
11 10
12.text 11.text
13 12
@@ -20,14 +19,13 @@
20 * %ecx : high 32 bits of new value 19 * %ecx : high 32 bits of new value
21 */ 20 */
22ENTRY(cmpxchg8b_emu) 21ENTRY(cmpxchg8b_emu)
23CFI_STARTPROC
24 22
25# 23#
26# Emulate 'cmpxchg8b (%esi)' on UP except we don't 24# Emulate 'cmpxchg8b (%esi)' on UP except we don't
27# set the whole ZF thing (caller will just compare 25# set the whole ZF thing (caller will just compare
28# eax:edx with the expected value) 26# eax:edx with the expected value)
29# 27#
30 pushfl_cfi 28 pushfl
31 cli 29 cli
32 30
33 cmpl (%esi), %eax 31 cmpl (%esi), %eax
@@ -38,18 +36,15 @@ CFI_STARTPROC
38 movl %ebx, (%esi) 36 movl %ebx, (%esi)
39 movl %ecx, 4(%esi) 37 movl %ecx, 4(%esi)
40 38
41 CFI_REMEMBER_STATE 39 popfl
42 popfl_cfi
43 ret 40 ret
44 41
45 CFI_RESTORE_STATE
46.Lnot_same: 42.Lnot_same:
47 movl (%esi), %eax 43 movl (%esi), %eax
48.Lhalf_same: 44.Lhalf_same:
49 movl 4(%esi), %edx 45 movl 4(%esi), %edx
50 46
51 popfl_cfi 47 popfl
52 ret 48 ret
53 49
54CFI_ENDPROC
55ENDPROC(cmpxchg8b_emu) 50ENDPROC(cmpxchg8b_emu)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 8239dbcbf984..009f98216b7e 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -1,7 +1,6 @@
1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ 1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h> 4#include <asm/cpufeature.h>
6#include <asm/alternative-asm.h> 5#include <asm/alternative-asm.h>
7 6
@@ -13,22 +12,16 @@
13 */ 12 */
14 ALIGN 13 ALIGN
15ENTRY(copy_page) 14ENTRY(copy_page)
16 CFI_STARTPROC
17 ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD 15 ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
18 movl $4096/8, %ecx 16 movl $4096/8, %ecx
19 rep movsq 17 rep movsq
20 ret 18 ret
21 CFI_ENDPROC
22ENDPROC(copy_page) 19ENDPROC(copy_page)
23 20
24ENTRY(copy_page_regs) 21ENTRY(copy_page_regs)
25 CFI_STARTPROC
26 subq $2*8, %rsp 22 subq $2*8, %rsp
27 CFI_ADJUST_CFA_OFFSET 2*8
28 movq %rbx, (%rsp) 23 movq %rbx, (%rsp)
29 CFI_REL_OFFSET rbx, 0
30 movq %r12, 1*8(%rsp) 24 movq %r12, 1*8(%rsp)
31 CFI_REL_OFFSET r12, 1*8
32 25
33 movl $(4096/64)-5, %ecx 26 movl $(4096/64)-5, %ecx
34 .p2align 4 27 .p2align 4
@@ -87,11 +80,7 @@ ENTRY(copy_page_regs)
87 jnz .Loop2 80 jnz .Loop2
88 81
89 movq (%rsp), %rbx 82 movq (%rsp), %rbx
90 CFI_RESTORE rbx
91 movq 1*8(%rsp), %r12 83 movq 1*8(%rsp), %r12
92 CFI_RESTORE r12
93 addq $2*8, %rsp 84 addq $2*8, %rsp
94 CFI_ADJUST_CFA_OFFSET -2*8
95 ret 85 ret
96 CFI_ENDPROC
97ENDPROC(copy_page_regs) 86ENDPROC(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index e4b3beee83bd..982ce34f4a9b 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -7,7 +7,6 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
11#include <asm/current.h> 10#include <asm/current.h>
12#include <asm/asm-offsets.h> 11#include <asm/asm-offsets.h>
13#include <asm/thread_info.h> 12#include <asm/thread_info.h>
@@ -18,7 +17,6 @@
18 17
19/* Standard copy_to_user with segment limit checking */ 18/* Standard copy_to_user with segment limit checking */
20ENTRY(_copy_to_user) 19ENTRY(_copy_to_user)
21 CFI_STARTPROC
22 GET_THREAD_INFO(%rax) 20 GET_THREAD_INFO(%rax)
23 movq %rdi,%rcx 21 movq %rdi,%rcx
24 addq %rdx,%rcx 22 addq %rdx,%rcx
@@ -30,12 +28,10 @@ ENTRY(_copy_to_user)
30 X86_FEATURE_REP_GOOD, \ 28 X86_FEATURE_REP_GOOD, \
31 "jmp copy_user_enhanced_fast_string", \ 29 "jmp copy_user_enhanced_fast_string", \
32 X86_FEATURE_ERMS 30 X86_FEATURE_ERMS
33 CFI_ENDPROC
34ENDPROC(_copy_to_user) 31ENDPROC(_copy_to_user)
35 32
36/* Standard copy_from_user with segment limit checking */ 33/* Standard copy_from_user with segment limit checking */
37ENTRY(_copy_from_user) 34ENTRY(_copy_from_user)
38 CFI_STARTPROC
39 GET_THREAD_INFO(%rax) 35 GET_THREAD_INFO(%rax)
40 movq %rsi,%rcx 36 movq %rsi,%rcx
41 addq %rdx,%rcx 37 addq %rdx,%rcx
@@ -47,14 +43,12 @@ ENTRY(_copy_from_user)
47 X86_FEATURE_REP_GOOD, \ 43 X86_FEATURE_REP_GOOD, \
48 "jmp copy_user_enhanced_fast_string", \ 44 "jmp copy_user_enhanced_fast_string", \
49 X86_FEATURE_ERMS 45 X86_FEATURE_ERMS
50 CFI_ENDPROC
51ENDPROC(_copy_from_user) 46ENDPROC(_copy_from_user)
52 47
53 .section .fixup,"ax" 48 .section .fixup,"ax"
54 /* must zero dest */ 49 /* must zero dest */
55ENTRY(bad_from_user) 50ENTRY(bad_from_user)
56bad_from_user: 51bad_from_user:
57 CFI_STARTPROC
58 movl %edx,%ecx 52 movl %edx,%ecx
59 xorl %eax,%eax 53 xorl %eax,%eax
60 rep 54 rep
@@ -62,7 +56,6 @@ bad_from_user:
62bad_to_user: 56bad_to_user:
63 movl %edx,%eax 57 movl %edx,%eax
64 ret 58 ret
65 CFI_ENDPROC
66ENDPROC(bad_from_user) 59ENDPROC(bad_from_user)
67 .previous 60 .previous
68 61
@@ -80,7 +73,6 @@ ENDPROC(bad_from_user)
80 * eax uncopied bytes or 0 if successful. 73 * eax uncopied bytes or 0 if successful.
81 */ 74 */
82ENTRY(copy_user_generic_unrolled) 75ENTRY(copy_user_generic_unrolled)
83 CFI_STARTPROC
84 ASM_STAC 76 ASM_STAC
85 cmpl $8,%edx 77 cmpl $8,%edx
86 jb 20f /* less then 8 bytes, go to byte copy loop */ 78 jb 20f /* less then 8 bytes, go to byte copy loop */
@@ -162,7 +154,6 @@ ENTRY(copy_user_generic_unrolled)
162 _ASM_EXTABLE(19b,40b) 154 _ASM_EXTABLE(19b,40b)
163 _ASM_EXTABLE(21b,50b) 155 _ASM_EXTABLE(21b,50b)
164 _ASM_EXTABLE(22b,50b) 156 _ASM_EXTABLE(22b,50b)
165 CFI_ENDPROC
166ENDPROC(copy_user_generic_unrolled) 157ENDPROC(copy_user_generic_unrolled)
167 158
168/* Some CPUs run faster using the string copy instructions. 159/* Some CPUs run faster using the string copy instructions.
@@ -184,7 +175,6 @@ ENDPROC(copy_user_generic_unrolled)
184 * eax uncopied bytes or 0 if successful. 175 * eax uncopied bytes or 0 if successful.
185 */ 176 */
186ENTRY(copy_user_generic_string) 177ENTRY(copy_user_generic_string)
187 CFI_STARTPROC
188 ASM_STAC 178 ASM_STAC
189 cmpl $8,%edx 179 cmpl $8,%edx
190 jb 2f /* less than 8 bytes, go to byte copy loop */ 180 jb 2f /* less than 8 bytes, go to byte copy loop */
@@ -209,7 +199,6 @@ ENTRY(copy_user_generic_string)
209 199
210 _ASM_EXTABLE(1b,11b) 200 _ASM_EXTABLE(1b,11b)
211 _ASM_EXTABLE(3b,12b) 201 _ASM_EXTABLE(3b,12b)
212 CFI_ENDPROC
213ENDPROC(copy_user_generic_string) 202ENDPROC(copy_user_generic_string)
214 203
215/* 204/*
@@ -225,7 +214,6 @@ ENDPROC(copy_user_generic_string)
225 * eax uncopied bytes or 0 if successful. 214 * eax uncopied bytes or 0 if successful.
226 */ 215 */
227ENTRY(copy_user_enhanced_fast_string) 216ENTRY(copy_user_enhanced_fast_string)
228 CFI_STARTPROC
229 ASM_STAC 217 ASM_STAC
230 movl %edx,%ecx 218 movl %edx,%ecx
2311: rep 2191: rep
@@ -240,7 +228,6 @@ ENTRY(copy_user_enhanced_fast_string)
240 .previous 228 .previous
241 229
242 _ASM_EXTABLE(1b,12b) 230 _ASM_EXTABLE(1b,12b)
243 CFI_ENDPROC
244ENDPROC(copy_user_enhanced_fast_string) 231ENDPROC(copy_user_enhanced_fast_string)
245 232
246/* 233/*
@@ -248,7 +235,6 @@ ENDPROC(copy_user_enhanced_fast_string)
248 * This will force destination/source out of cache for more performance. 235 * This will force destination/source out of cache for more performance.
249 */ 236 */
250ENTRY(__copy_user_nocache) 237ENTRY(__copy_user_nocache)
251 CFI_STARTPROC
252 ASM_STAC 238 ASM_STAC
253 cmpl $8,%edx 239 cmpl $8,%edx
254 jb 20f /* less then 8 bytes, go to byte copy loop */ 240 jb 20f /* less then 8 bytes, go to byte copy loop */
@@ -332,5 +318,4 @@ ENTRY(__copy_user_nocache)
332 _ASM_EXTABLE(19b,40b) 318 _ASM_EXTABLE(19b,40b)
333 _ASM_EXTABLE(21b,50b) 319 _ASM_EXTABLE(21b,50b)
334 _ASM_EXTABLE(22b,50b) 320 _ASM_EXTABLE(22b,50b)
335 CFI_ENDPROC
336ENDPROC(__copy_user_nocache) 321ENDPROC(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 9734182966f3..7e48807b2fa1 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -6,7 +6,6 @@
6 * for more details. No warranty for anything given at all. 6 * for more details. No warranty for anything given at all.
7 */ 7 */
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/dwarf2.h>
10#include <asm/errno.h> 9#include <asm/errno.h>
11#include <asm/asm.h> 10#include <asm/asm.h>
12 11
@@ -47,23 +46,16 @@
47 46
48 47
49ENTRY(csum_partial_copy_generic) 48ENTRY(csum_partial_copy_generic)
50 CFI_STARTPROC
51 cmpl $3*64, %edx 49 cmpl $3*64, %edx
52 jle .Lignore 50 jle .Lignore
53 51
54.Lignore: 52.Lignore:
55 subq $7*8, %rsp 53 subq $7*8, %rsp
56 CFI_ADJUST_CFA_OFFSET 7*8
57 movq %rbx, 2*8(%rsp) 54 movq %rbx, 2*8(%rsp)
58 CFI_REL_OFFSET rbx, 2*8
59 movq %r12, 3*8(%rsp) 55 movq %r12, 3*8(%rsp)
60 CFI_REL_OFFSET r12, 3*8
61 movq %r14, 4*8(%rsp) 56 movq %r14, 4*8(%rsp)
62 CFI_REL_OFFSET r14, 4*8
63 movq %r13, 5*8(%rsp) 57 movq %r13, 5*8(%rsp)
64 CFI_REL_OFFSET r13, 5*8
65 movq %rbp, 6*8(%rsp) 58 movq %rbp, 6*8(%rsp)
66 CFI_REL_OFFSET rbp, 6*8
67 59
68 movq %r8, (%rsp) 60 movq %r8, (%rsp)
69 movq %r9, 1*8(%rsp) 61 movq %r9, 1*8(%rsp)
@@ -206,22 +198,14 @@ ENTRY(csum_partial_copy_generic)
206 addl %ebx, %eax 198 addl %ebx, %eax
207 adcl %r9d, %eax /* carry */ 199 adcl %r9d, %eax /* carry */
208 200
209 CFI_REMEMBER_STATE
210.Lende: 201.Lende:
211 movq 2*8(%rsp), %rbx 202 movq 2*8(%rsp), %rbx
212 CFI_RESTORE rbx
213 movq 3*8(%rsp), %r12 203 movq 3*8(%rsp), %r12
214 CFI_RESTORE r12
215 movq 4*8(%rsp), %r14 204 movq 4*8(%rsp), %r14
216 CFI_RESTORE r14
217 movq 5*8(%rsp), %r13 205 movq 5*8(%rsp), %r13
218 CFI_RESTORE r13
219 movq 6*8(%rsp), %rbp 206 movq 6*8(%rsp), %rbp
220 CFI_RESTORE rbp
221 addq $7*8, %rsp 207 addq $7*8, %rsp
222 CFI_ADJUST_CFA_OFFSET -7*8
223 ret 208 ret
224 CFI_RESTORE_STATE
225 209
226 /* Exception handlers. Very simple, zeroing is done in the wrappers */ 210 /* Exception handlers. Very simple, zeroing is done in the wrappers */
227.Lbad_source: 211.Lbad_source:
@@ -237,5 +221,4 @@ ENTRY(csum_partial_copy_generic)
237 jz .Lende 221 jz .Lende
238 movl $-EFAULT, (%rax) 222 movl $-EFAULT, (%rax)
239 jmp .Lende 223 jmp .Lende
240 CFI_ENDPROC
241ENDPROC(csum_partial_copy_generic) 224ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index a4512359656a..46668cda4ffd 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -26,7 +26,6 @@
26 */ 26 */
27 27
28#include <linux/linkage.h> 28#include <linux/linkage.h>
29#include <asm/dwarf2.h>
30#include <asm/page_types.h> 29#include <asm/page_types.h>
31#include <asm/errno.h> 30#include <asm/errno.h>
32#include <asm/asm-offsets.h> 31#include <asm/asm-offsets.h>
@@ -36,7 +35,6 @@
36 35
37 .text 36 .text
38ENTRY(__get_user_1) 37ENTRY(__get_user_1)
39 CFI_STARTPROC
40 GET_THREAD_INFO(%_ASM_DX) 38 GET_THREAD_INFO(%_ASM_DX)
41 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 39 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
42 jae bad_get_user 40 jae bad_get_user
@@ -45,11 +43,9 @@ ENTRY(__get_user_1)
45 xor %eax,%eax 43 xor %eax,%eax
46 ASM_CLAC 44 ASM_CLAC
47 ret 45 ret
48 CFI_ENDPROC
49ENDPROC(__get_user_1) 46ENDPROC(__get_user_1)
50 47
51ENTRY(__get_user_2) 48ENTRY(__get_user_2)
52 CFI_STARTPROC
53 add $1,%_ASM_AX 49 add $1,%_ASM_AX
54 jc bad_get_user 50 jc bad_get_user
55 GET_THREAD_INFO(%_ASM_DX) 51 GET_THREAD_INFO(%_ASM_DX)
@@ -60,11 +56,9 @@ ENTRY(__get_user_2)
60 xor %eax,%eax 56 xor %eax,%eax
61 ASM_CLAC 57 ASM_CLAC
62 ret 58 ret
63 CFI_ENDPROC
64ENDPROC(__get_user_2) 59ENDPROC(__get_user_2)
65 60
66ENTRY(__get_user_4) 61ENTRY(__get_user_4)
67 CFI_STARTPROC
68 add $3,%_ASM_AX 62 add $3,%_ASM_AX
69 jc bad_get_user 63 jc bad_get_user
70 GET_THREAD_INFO(%_ASM_DX) 64 GET_THREAD_INFO(%_ASM_DX)
@@ -75,11 +69,9 @@ ENTRY(__get_user_4)
75 xor %eax,%eax 69 xor %eax,%eax
76 ASM_CLAC 70 ASM_CLAC
77 ret 71 ret
78 CFI_ENDPROC
79ENDPROC(__get_user_4) 72ENDPROC(__get_user_4)
80 73
81ENTRY(__get_user_8) 74ENTRY(__get_user_8)
82 CFI_STARTPROC
83#ifdef CONFIG_X86_64 75#ifdef CONFIG_X86_64
84 add $7,%_ASM_AX 76 add $7,%_ASM_AX
85 jc bad_get_user 77 jc bad_get_user
@@ -104,28 +96,23 @@ ENTRY(__get_user_8)
104 ASM_CLAC 96 ASM_CLAC
105 ret 97 ret
106#endif 98#endif
107 CFI_ENDPROC
108ENDPROC(__get_user_8) 99ENDPROC(__get_user_8)
109 100
110 101
111bad_get_user: 102bad_get_user:
112 CFI_STARTPROC
113 xor %edx,%edx 103 xor %edx,%edx
114 mov $(-EFAULT),%_ASM_AX 104 mov $(-EFAULT),%_ASM_AX
115 ASM_CLAC 105 ASM_CLAC
116 ret 106 ret
117 CFI_ENDPROC
118END(bad_get_user) 107END(bad_get_user)
119 108
120#ifdef CONFIG_X86_32 109#ifdef CONFIG_X86_32
121bad_get_user_8: 110bad_get_user_8:
122 CFI_STARTPROC
123 xor %edx,%edx 111 xor %edx,%edx
124 xor %ecx,%ecx 112 xor %ecx,%ecx
125 mov $(-EFAULT),%_ASM_AX 113 mov $(-EFAULT),%_ASM_AX
126 ASM_CLAC 114 ASM_CLAC
127 ret 115 ret
128 CFI_ENDPROC
129END(bad_get_user_8) 116END(bad_get_user_8)
130#endif 117#endif
131 118
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
index 05a95e713da8..33147fef3452 100644
--- a/arch/x86/lib/iomap_copy_64.S
+++ b/arch/x86/lib/iomap_copy_64.S
@@ -16,15 +16,12 @@
16 */ 16 */
17 17
18#include <linux/linkage.h> 18#include <linux/linkage.h>
19#include <asm/dwarf2.h>
20 19
21/* 20/*
22 * override generic version in lib/iomap_copy.c 21 * override generic version in lib/iomap_copy.c
23 */ 22 */
24ENTRY(__iowrite32_copy) 23ENTRY(__iowrite32_copy)
25 CFI_STARTPROC
26 movl %edx,%ecx 24 movl %edx,%ecx
27 rep movsd 25 rep movsd
28 ret 26 ret
29 CFI_ENDPROC
30ENDPROC(__iowrite32_copy) 27ENDPROC(__iowrite32_copy)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index b046664f5a1c..16698bba87de 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -2,7 +2,6 @@
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/cpufeature.h> 4#include <asm/cpufeature.h>
5#include <asm/dwarf2.h>
6#include <asm/alternative-asm.h> 5#include <asm/alternative-asm.h>
7 6
8/* 7/*
@@ -53,7 +52,6 @@ ENTRY(memcpy_erms)
53ENDPROC(memcpy_erms) 52ENDPROC(memcpy_erms)
54 53
55ENTRY(memcpy_orig) 54ENTRY(memcpy_orig)
56 CFI_STARTPROC
57 movq %rdi, %rax 55 movq %rdi, %rax
58 56
59 cmpq $0x20, %rdx 57 cmpq $0x20, %rdx
@@ -178,5 +176,4 @@ ENTRY(memcpy_orig)
178 176
179.Lend: 177.Lend:
180 retq 178 retq
181 CFI_ENDPROC
182ENDPROC(memcpy_orig) 179ENDPROC(memcpy_orig)
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 0f8a0d0331b9..ca2afdd6d98e 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -6,7 +6,6 @@
6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> 6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
7 */ 7 */
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/dwarf2.h>
10#include <asm/cpufeature.h> 9#include <asm/cpufeature.h>
11#include <asm/alternative-asm.h> 10#include <asm/alternative-asm.h>
12 11
@@ -27,7 +26,6 @@
27 26
28ENTRY(memmove) 27ENTRY(memmove)
29ENTRY(__memmove) 28ENTRY(__memmove)
30 CFI_STARTPROC
31 29
32 /* Handle more 32 bytes in loop */ 30 /* Handle more 32 bytes in loop */
33 mov %rdi, %rax 31 mov %rdi, %rax
@@ -207,6 +205,5 @@ ENTRY(__memmove)
207 movb %r11b, (%rdi) 205 movb %r11b, (%rdi)
20813: 20613:
209 retq 207 retq
210 CFI_ENDPROC
211ENDPROC(__memmove) 208ENDPROC(__memmove)
212ENDPROC(memmove) 209ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 93118fb23976..2661fad05827 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -1,7 +1,6 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs */ 1/* Copyright 2002 Andi Kleen, SuSE Labs */
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h> 4#include <asm/cpufeature.h>
6#include <asm/alternative-asm.h> 5#include <asm/alternative-asm.h>
7 6
@@ -66,7 +65,6 @@ ENTRY(memset_erms)
66ENDPROC(memset_erms) 65ENDPROC(memset_erms)
67 66
68ENTRY(memset_orig) 67ENTRY(memset_orig)
69 CFI_STARTPROC
70 movq %rdi,%r10 68 movq %rdi,%r10
71 69
72 /* expand byte value */ 70 /* expand byte value */
@@ -78,7 +76,6 @@ ENTRY(memset_orig)
78 movl %edi,%r9d 76 movl %edi,%r9d
79 andl $7,%r9d 77 andl $7,%r9d
80 jnz .Lbad_alignment 78 jnz .Lbad_alignment
81 CFI_REMEMBER_STATE
82.Lafter_bad_alignment: 79.Lafter_bad_alignment:
83 80
84 movq %rdx,%rcx 81 movq %rdx,%rcx
@@ -128,7 +125,6 @@ ENTRY(memset_orig)
128 movq %r10,%rax 125 movq %r10,%rax
129 ret 126 ret
130 127
131 CFI_RESTORE_STATE
132.Lbad_alignment: 128.Lbad_alignment:
133 cmpq $7,%rdx 129 cmpq $7,%rdx
134 jbe .Lhandle_7 130 jbe .Lhandle_7
@@ -139,5 +135,4 @@ ENTRY(memset_orig)
139 subq %r8,%rdx 135 subq %r8,%rdx
140 jmp .Lafter_bad_alignment 136 jmp .Lafter_bad_alignment
141.Lfinal: 137.Lfinal:
142 CFI_ENDPROC
143ENDPROC(memset_orig) 138ENDPROC(memset_orig)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index 3ca5218fbece..c81556409bbb 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -1,6 +1,5 @@
1#include <linux/linkage.h> 1#include <linux/linkage.h>
2#include <linux/errno.h> 2#include <linux/errno.h>
3#include <asm/dwarf2.h>
4#include <asm/asm.h> 3#include <asm/asm.h>
5#include <asm/msr.h> 4#include <asm/msr.h>
6 5
@@ -13,9 +12,8 @@
13 */ 12 */
14.macro op_safe_regs op 13.macro op_safe_regs op
15ENTRY(\op\()_safe_regs) 14ENTRY(\op\()_safe_regs)
16 CFI_STARTPROC 15 pushq %rbx
17 pushq_cfi_reg rbx 16 pushq %rbp
18 pushq_cfi_reg rbp
19 movq %rdi, %r10 /* Save pointer */ 17 movq %rdi, %r10 /* Save pointer */
20 xorl %r11d, %r11d /* Return value */ 18 xorl %r11d, %r11d /* Return value */
21 movl (%rdi), %eax 19 movl (%rdi), %eax
@@ -25,7 +23,6 @@ ENTRY(\op\()_safe_regs)
25 movl 20(%rdi), %ebp 23 movl 20(%rdi), %ebp
26 movl 24(%rdi), %esi 24 movl 24(%rdi), %esi
27 movl 28(%rdi), %edi 25 movl 28(%rdi), %edi
28 CFI_REMEMBER_STATE
291: \op 261: \op
302: movl %eax, (%r10) 272: movl %eax, (%r10)
31 movl %r11d, %eax /* Return value */ 28 movl %r11d, %eax /* Return value */
@@ -35,16 +32,14 @@ ENTRY(\op\()_safe_regs)
35 movl %ebp, 20(%r10) 32 movl %ebp, 20(%r10)
36 movl %esi, 24(%r10) 33 movl %esi, 24(%r10)
37 movl %edi, 28(%r10) 34 movl %edi, 28(%r10)
38 popq_cfi_reg rbp 35 popq %rbp
39 popq_cfi_reg rbx 36 popq %rbx
40 ret 37 ret
413: 383:
42 CFI_RESTORE_STATE
43 movl $-EIO, %r11d 39 movl $-EIO, %r11d
44 jmp 2b 40 jmp 2b
45 41
46 _ASM_EXTABLE(1b, 3b) 42 _ASM_EXTABLE(1b, 3b)
47 CFI_ENDPROC
48ENDPROC(\op\()_safe_regs) 43ENDPROC(\op\()_safe_regs)
49.endm 44.endm
50 45
@@ -52,13 +47,12 @@ ENDPROC(\op\()_safe_regs)
52 47
53.macro op_safe_regs op 48.macro op_safe_regs op
54ENTRY(\op\()_safe_regs) 49ENTRY(\op\()_safe_regs)
55 CFI_STARTPROC 50 pushl %ebx
56 pushl_cfi_reg ebx 51 pushl %ebp
57 pushl_cfi_reg ebp 52 pushl %esi
58 pushl_cfi_reg esi 53 pushl %edi
59 pushl_cfi_reg edi 54 pushl $0 /* Return value */
60 pushl_cfi $0 /* Return value */ 55 pushl %eax
61 pushl_cfi %eax
62 movl 4(%eax), %ecx 56 movl 4(%eax), %ecx
63 movl 8(%eax), %edx 57 movl 8(%eax), %edx
64 movl 12(%eax), %ebx 58 movl 12(%eax), %ebx
@@ -66,32 +60,28 @@ ENTRY(\op\()_safe_regs)
66 movl 24(%eax), %esi 60 movl 24(%eax), %esi
67 movl 28(%eax), %edi 61 movl 28(%eax), %edi
68 movl (%eax), %eax 62 movl (%eax), %eax
69 CFI_REMEMBER_STATE
701: \op 631: \op
712: pushl_cfi %eax 642: pushl %eax
72 movl 4(%esp), %eax 65 movl 4(%esp), %eax
73 popl_cfi (%eax) 66 popl (%eax)
74 addl $4, %esp 67 addl $4, %esp
75 CFI_ADJUST_CFA_OFFSET -4
76 movl %ecx, 4(%eax) 68 movl %ecx, 4(%eax)
77 movl %edx, 8(%eax) 69 movl %edx, 8(%eax)
78 movl %ebx, 12(%eax) 70 movl %ebx, 12(%eax)
79 movl %ebp, 20(%eax) 71 movl %ebp, 20(%eax)
80 movl %esi, 24(%eax) 72 movl %esi, 24(%eax)
81 movl %edi, 28(%eax) 73 movl %edi, 28(%eax)
82 popl_cfi %eax 74 popl %eax
83 popl_cfi_reg edi 75 popl %edi
84 popl_cfi_reg esi 76 popl %esi
85 popl_cfi_reg ebp 77 popl %ebp
86 popl_cfi_reg ebx 78 popl %ebx
87 ret 79 ret
883: 803:
89 CFI_RESTORE_STATE
90 movl $-EIO, 4(%esp) 81 movl $-EIO, 4(%esp)
91 jmp 2b 82 jmp 2b
92 83
93 _ASM_EXTABLE(1b, 3b) 84 _ASM_EXTABLE(1b, 3b)
94 CFI_ENDPROC
95ENDPROC(\op\()_safe_regs) 85ENDPROC(\op\()_safe_regs)
96.endm 86.endm
97 87
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index fc6ba17a7eec..e0817a12d323 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -11,7 +11,6 @@
11 * return value. 11 * return value.
12 */ 12 */
13#include <linux/linkage.h> 13#include <linux/linkage.h>
14#include <asm/dwarf2.h>
15#include <asm/thread_info.h> 14#include <asm/thread_info.h>
16#include <asm/errno.h> 15#include <asm/errno.h>
17#include <asm/asm.h> 16#include <asm/asm.h>
@@ -30,11 +29,9 @@
30 * as they get called from within inline assembly. 29 * as they get called from within inline assembly.
31 */ 30 */
32 31
33#define ENTER CFI_STARTPROC ; \ 32#define ENTER GET_THREAD_INFO(%_ASM_BX)
34 GET_THREAD_INFO(%_ASM_BX)
35#define EXIT ASM_CLAC ; \ 33#define EXIT ASM_CLAC ; \
36 ret ; \ 34 ret
37 CFI_ENDPROC
38 35
39.text 36.text
40ENTRY(__put_user_1) 37ENTRY(__put_user_1)
@@ -87,7 +84,6 @@ ENTRY(__put_user_8)
87ENDPROC(__put_user_8) 84ENDPROC(__put_user_8)
88 85
89bad_put_user: 86bad_put_user:
90 CFI_STARTPROC
91 movl $-EFAULT,%eax 87 movl $-EFAULT,%eax
92 EXIT 88 EXIT
93END(bad_put_user) 89END(bad_put_user)
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index 2322abe4da3b..40027db99140 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -15,7 +15,6 @@
15 15
16#include <linux/linkage.h> 16#include <linux/linkage.h>
17#include <asm/alternative-asm.h> 17#include <asm/alternative-asm.h>
18#include <asm/dwarf2.h>
19 18
20#define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg) 19#define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg)
21#define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l) 20#define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l)
@@ -34,10 +33,10 @@
34 */ 33 */
35 34
36#define save_common_regs \ 35#define save_common_regs \
37 pushl_cfi_reg ecx 36 pushl %ecx
38 37
39#define restore_common_regs \ 38#define restore_common_regs \
40 popl_cfi_reg ecx 39 popl %ecx
41 40
42 /* Avoid uglifying the argument copying x86-64 needs to do. */ 41 /* Avoid uglifying the argument copying x86-64 needs to do. */
43 .macro movq src, dst 42 .macro movq src, dst
@@ -64,50 +63,45 @@
64 */ 63 */
65 64
66#define save_common_regs \ 65#define save_common_regs \
67 pushq_cfi_reg rdi; \ 66 pushq %rdi; \
68 pushq_cfi_reg rsi; \ 67 pushq %rsi; \
69 pushq_cfi_reg rcx; \ 68 pushq %rcx; \
70 pushq_cfi_reg r8; \ 69 pushq %r8; \
71 pushq_cfi_reg r9; \ 70 pushq %r9; \
72 pushq_cfi_reg r10; \ 71 pushq %r10; \
73 pushq_cfi_reg r11 72 pushq %r11
74 73
75#define restore_common_regs \ 74#define restore_common_regs \
76 popq_cfi_reg r11; \ 75 popq %r11; \
77 popq_cfi_reg r10; \ 76 popq %r10; \
78 popq_cfi_reg r9; \ 77 popq %r9; \
79 popq_cfi_reg r8; \ 78 popq %r8; \
80 popq_cfi_reg rcx; \ 79 popq %rcx; \
81 popq_cfi_reg rsi; \ 80 popq %rsi; \
82 popq_cfi_reg rdi 81 popq %rdi
83 82
84#endif 83#endif
85 84
86/* Fix up special calling conventions */ 85/* Fix up special calling conventions */
87ENTRY(call_rwsem_down_read_failed) 86ENTRY(call_rwsem_down_read_failed)
88 CFI_STARTPROC
89 save_common_regs 87 save_common_regs
90 __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) 88 __ASM_SIZE(push,) %__ASM_REG(dx)
91 movq %rax,%rdi 89 movq %rax,%rdi
92 call rwsem_down_read_failed 90 call rwsem_down_read_failed
93 __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) 91 __ASM_SIZE(pop,) %__ASM_REG(dx)
94 restore_common_regs 92 restore_common_regs
95 ret 93 ret
96 CFI_ENDPROC
97ENDPROC(call_rwsem_down_read_failed) 94ENDPROC(call_rwsem_down_read_failed)
98 95
99ENTRY(call_rwsem_down_write_failed) 96ENTRY(call_rwsem_down_write_failed)
100 CFI_STARTPROC
101 save_common_regs 97 save_common_regs
102 movq %rax,%rdi 98 movq %rax,%rdi
103 call rwsem_down_write_failed 99 call rwsem_down_write_failed
104 restore_common_regs 100 restore_common_regs
105 ret 101 ret
106 CFI_ENDPROC
107ENDPROC(call_rwsem_down_write_failed) 102ENDPROC(call_rwsem_down_write_failed)
108 103
109ENTRY(call_rwsem_wake) 104ENTRY(call_rwsem_wake)
110 CFI_STARTPROC
111 /* do nothing if still outstanding active readers */ 105 /* do nothing if still outstanding active readers */
112 __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx) 106 __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx)
113 jnz 1f 107 jnz 1f
@@ -116,17 +110,14 @@ ENTRY(call_rwsem_wake)
116 call rwsem_wake 110 call rwsem_wake
117 restore_common_regs 111 restore_common_regs
1181: ret 1121: ret
119 CFI_ENDPROC
120ENDPROC(call_rwsem_wake) 113ENDPROC(call_rwsem_wake)
121 114
122ENTRY(call_rwsem_downgrade_wake) 115ENTRY(call_rwsem_downgrade_wake)
123 CFI_STARTPROC
124 save_common_regs 116 save_common_regs
125 __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) 117 __ASM_SIZE(push,) %__ASM_REG(dx)
126 movq %rax,%rdi 118 movq %rax,%rdi
127 call rwsem_downgrade_wake 119 call rwsem_downgrade_wake
128 __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) 120 __ASM_SIZE(pop,) %__ASM_REG(dx)
129 restore_common_regs 121 restore_common_regs
130 ret 122 ret
131 CFI_ENDPROC
132ENDPROC(call_rwsem_downgrade_wake) 123ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1d553186c434..8533b46e6bee 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -40,7 +40,7 @@
40 */ 40 */
41uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { 41uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
42 [_PAGE_CACHE_MODE_WB ] = 0 | 0 , 42 [_PAGE_CACHE_MODE_WB ] = 0 | 0 ,
43 [_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 , 43 [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD,
44 [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, 44 [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD,
45 [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD, 45 [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD,
46 [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, 46 [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD,
@@ -50,11 +50,11 @@ EXPORT_SYMBOL(__cachemode2pte_tbl);
50 50
51uint8_t __pte2cachemode_tbl[8] = { 51uint8_t __pte2cachemode_tbl[8] = {
52 [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, 52 [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB,
53 [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC, 53 [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
54 [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, 54 [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
55 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC, 55 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC,
56 [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB, 56 [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
57 [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, 57 [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
58 [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, 58 [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
59 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, 59 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
60}; 60};
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 9ca35fc60cfe..a9dc7a37e6a2 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -77,13 +77,13 @@ void __iomem *
77iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) 77iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
78{ 78{
79 /* 79 /*
80 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. 80 * For non-PAT systems, translate non-WB request to UC- just in
81 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the 81 * case the caller set the PWT bit to prot directly without using
82 * MTRR is UC or WC. UC_MINUS gets the real intention, of the 82 * pgprot_writecombine(). UC- translates to uncached if the MTRR
83 * user, which is "WC if the MTRR is WC, UC if you can't do that." 83 * is UC or WC. UC- gets the real intention, of the user, which is
84 * "WC if the MTRR is WC, UC if you can't do that."
84 */ 85 */
85 if (!pat_enabled && pgprot_val(prot) == 86 if (!pat_enabled() && pgprot2cachemode(prot) != _PAGE_CACHE_MODE_WB)
86 (__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_WC)))
87 prot = __pgprot(__PAGE_KERNEL | 87 prot = __pgprot(__PAGE_KERNEL |
88 cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)); 88 cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
89 89
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 70e7444c6835..8405c0c6a535 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -42,6 +42,9 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
42 case _PAGE_CACHE_MODE_WC: 42 case _PAGE_CACHE_MODE_WC:
43 err = _set_memory_wc(vaddr, nrpages); 43 err = _set_memory_wc(vaddr, nrpages);
44 break; 44 break;
45 case _PAGE_CACHE_MODE_WT:
46 err = _set_memory_wt(vaddr, nrpages);
47 break;
45 case _PAGE_CACHE_MODE_WB: 48 case _PAGE_CACHE_MODE_WB:
46 err = _set_memory_wb(vaddr, nrpages); 49 err = _set_memory_wb(vaddr, nrpages);
47 break; 50 break;
@@ -172,6 +175,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
172 prot = __pgprot(pgprot_val(prot) | 175 prot = __pgprot(pgprot_val(prot) |
173 cachemode2protval(_PAGE_CACHE_MODE_WC)); 176 cachemode2protval(_PAGE_CACHE_MODE_WC));
174 break; 177 break;
178 case _PAGE_CACHE_MODE_WT:
179 prot = __pgprot(pgprot_val(prot) |
180 cachemode2protval(_PAGE_CACHE_MODE_WT));
181 break;
175 case _PAGE_CACHE_MODE_WB: 182 case _PAGE_CACHE_MODE_WB:
176 break; 183 break;
177 } 184 }
@@ -234,10 +241,11 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
234{ 241{
235 /* 242 /*
236 * Ideally, this should be: 243 * Ideally, this should be:
237 * pat_enabled ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS; 244 * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
238 * 245 *
239 * Till we fix all X drivers to use ioremap_wc(), we will use 246 * Till we fix all X drivers to use ioremap_wc(), we will use
240 * UC MINUS. 247 * UC MINUS. Drivers that are certain they need or can already
248 * be converted over to strong UC can use ioremap_uc().
241 */ 249 */
242 enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS; 250 enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
243 251
@@ -247,6 +255,39 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
247EXPORT_SYMBOL(ioremap_nocache); 255EXPORT_SYMBOL(ioremap_nocache);
248 256
249/** 257/**
258 * ioremap_uc - map bus memory into CPU space as strongly uncachable
259 * @phys_addr: bus address of the memory
260 * @size: size of the resource to map
261 *
262 * ioremap_uc performs a platform specific sequence of operations to
263 * make bus memory CPU accessible via the readb/readw/readl/writeb/
264 * writew/writel functions and the other mmio helpers. The returned
265 * address is not guaranteed to be usable directly as a virtual
266 * address.
267 *
268 * This version of ioremap ensures that the memory is marked with a strong
269 * preference as completely uncachable on the CPU when possible. For non-PAT
270 * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
271 * systems this will set the PAT entry for the pages as strong UC. This call
272 * will honor existing caching rules from things like the PCI bus. Note that
273 * there are other caches and buffers on many busses. In particular driver
274 * authors should read up on PCI writes.
275 *
276 * It's useful if some control registers are in such an area and
277 * write combining or read caching is not desirable:
278 *
279 * Must be freed with iounmap.
280 */
281void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
282{
283 enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
284
285 return __ioremap_caller(phys_addr, size, pcm,
286 __builtin_return_address(0));
287}
288EXPORT_SYMBOL_GPL(ioremap_uc);
289
290/**
250 * ioremap_wc - map memory into CPU space write combined 291 * ioremap_wc - map memory into CPU space write combined
251 * @phys_addr: bus address of the memory 292 * @phys_addr: bus address of the memory
252 * @size: size of the resource to map 293 * @size: size of the resource to map
@@ -258,14 +299,28 @@ EXPORT_SYMBOL(ioremap_nocache);
258 */ 299 */
259void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) 300void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
260{ 301{
261 if (pat_enabled) 302 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
262 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
263 __builtin_return_address(0)); 303 __builtin_return_address(0));
264 else
265 return ioremap_nocache(phys_addr, size);
266} 304}
267EXPORT_SYMBOL(ioremap_wc); 305EXPORT_SYMBOL(ioremap_wc);
268 306
307/**
308 * ioremap_wt - map memory into CPU space write through
309 * @phys_addr: bus address of the memory
310 * @size: size of the resource to map
311 *
312 * This version of ioremap ensures that the memory is marked write through.
313 * Write through stores data into memory while keeping the cache up-to-date.
314 *
315 * Must be freed with iounmap.
316 */
317void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
318{
319 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
320 __builtin_return_address(0));
321}
322EXPORT_SYMBOL(ioremap_wt);
323
269void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) 324void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
270{ 325{
271 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB, 326 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
@@ -331,7 +386,7 @@ void iounmap(volatile void __iomem *addr)
331} 386}
332EXPORT_SYMBOL(iounmap); 387EXPORT_SYMBOL(iounmap);
333 388
334int arch_ioremap_pud_supported(void) 389int __init arch_ioremap_pud_supported(void)
335{ 390{
336#ifdef CONFIG_X86_64 391#ifdef CONFIG_X86_64
337 return cpu_has_gbpages; 392 return cpu_has_gbpages;
@@ -340,7 +395,7 @@ int arch_ioremap_pud_supported(void)
340#endif 395#endif
341} 396}
342 397
343int arch_ioremap_pmd_supported(void) 398int __init arch_ioremap_pmd_supported(void)
344{ 399{
345 return cpu_has_pse; 400 return cpu_has_pse;
346} 401}
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 6629f397b467..8ff686aa7e8c 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -9,6 +9,7 @@
9#include <linux/random.h> 9#include <linux/random.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/vmalloc.h>
12 13
13#include <asm/cacheflush.h> 14#include <asm/cacheflush.h>
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 89af288ec674..727158cb3b3c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -14,6 +14,7 @@
14#include <linux/percpu.h> 14#include <linux/percpu.h>
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/pci.h> 16#include <linux/pci.h>
17#include <linux/vmalloc.h>
17 18
18#include <asm/e820.h> 19#include <asm/e820.h>
19#include <asm/processor.h> 20#include <asm/processor.h>
@@ -129,16 +130,15 @@ within(unsigned long addr, unsigned long start, unsigned long end)
129 */ 130 */
130void clflush_cache_range(void *vaddr, unsigned int size) 131void clflush_cache_range(void *vaddr, unsigned int size)
131{ 132{
132 void *vend = vaddr + size - 1; 133 unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
134 void *vend = vaddr + size;
135 void *p;
133 136
134 mb(); 137 mb();
135 138
136 for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) 139 for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
137 clflushopt(vaddr); 140 p < vend; p += boot_cpu_data.x86_clflush_size)
138 /* 141 clflushopt(p);
139 * Flush any possible final partial cacheline:
140 */
141 clflushopt(vend);
142 142
143 mb(); 143 mb();
144} 144}
@@ -418,13 +418,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
418 phys_addr_t phys_addr; 418 phys_addr_t phys_addr;
419 unsigned long offset; 419 unsigned long offset;
420 enum pg_level level; 420 enum pg_level level;
421 unsigned long psize;
422 unsigned long pmask; 421 unsigned long pmask;
423 pte_t *pte; 422 pte_t *pte;
424 423
425 pte = lookup_address(virt_addr, &level); 424 pte = lookup_address(virt_addr, &level);
426 BUG_ON(!pte); 425 BUG_ON(!pte);
427 psize = page_level_size(level);
428 pmask = page_level_mask(level); 426 pmask = page_level_mask(level);
429 offset = virt_addr & ~pmask; 427 offset = virt_addr & ~pmask;
430 phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 428 phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
@@ -1468,6 +1466,9 @@ int _set_memory_uc(unsigned long addr, int numpages)
1468{ 1466{
1469 /* 1467 /*
1470 * for now UC MINUS. see comments in ioremap_nocache() 1468 * for now UC MINUS. see comments in ioremap_nocache()
1469 * If you really need strong UC use ioremap_uc(), but note
1470 * that you cannot override IO areas with set_memory_*() as
1471 * these helpers cannot work with IO memory.
1471 */ 1472 */
1472 return change_page_attr_set(&addr, numpages, 1473 return change_page_attr_set(&addr, numpages,
1473 cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), 1474 cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
@@ -1502,12 +1503,10 @@ EXPORT_SYMBOL(set_memory_uc);
1502static int _set_memory_array(unsigned long *addr, int addrinarray, 1503static int _set_memory_array(unsigned long *addr, int addrinarray,
1503 enum page_cache_mode new_type) 1504 enum page_cache_mode new_type)
1504{ 1505{
1506 enum page_cache_mode set_type;
1505 int i, j; 1507 int i, j;
1506 int ret; 1508 int ret;
1507 1509
1508 /*
1509 * for now UC MINUS. see comments in ioremap_nocache()
1510 */
1511 for (i = 0; i < addrinarray; i++) { 1510 for (i = 0; i < addrinarray; i++) {
1512 ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, 1511 ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
1513 new_type, NULL); 1512 new_type, NULL);
@@ -1515,9 +1514,12 @@ static int _set_memory_array(unsigned long *addr, int addrinarray,
1515 goto out_free; 1514 goto out_free;
1516 } 1515 }
1517 1516
1517 /* If WC, set to UC- first and then WC */
1518 set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
1519 _PAGE_CACHE_MODE_UC_MINUS : new_type;
1520
1518 ret = change_page_attr_set(addr, addrinarray, 1521 ret = change_page_attr_set(addr, addrinarray,
1519 cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), 1522 cachemode2pgprot(set_type), 1);
1520 1);
1521 1523
1522 if (!ret && new_type == _PAGE_CACHE_MODE_WC) 1524 if (!ret && new_type == _PAGE_CACHE_MODE_WC)
1523 ret = change_page_attr_set_clr(addr, addrinarray, 1525 ret = change_page_attr_set_clr(addr, addrinarray,
@@ -1549,6 +1551,12 @@ int set_memory_array_wc(unsigned long *addr, int addrinarray)
1549} 1551}
1550EXPORT_SYMBOL(set_memory_array_wc); 1552EXPORT_SYMBOL(set_memory_array_wc);
1551 1553
1554int set_memory_array_wt(unsigned long *addr, int addrinarray)
1555{
1556 return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WT);
1557}
1558EXPORT_SYMBOL_GPL(set_memory_array_wt);
1559
1552int _set_memory_wc(unsigned long addr, int numpages) 1560int _set_memory_wc(unsigned long addr, int numpages)
1553{ 1561{
1554 int ret; 1562 int ret;
@@ -1571,27 +1579,42 @@ int set_memory_wc(unsigned long addr, int numpages)
1571{ 1579{
1572 int ret; 1580 int ret;
1573 1581
1574 if (!pat_enabled)
1575 return set_memory_uc(addr, numpages);
1576
1577 ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, 1582 ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
1578 _PAGE_CACHE_MODE_WC, NULL); 1583 _PAGE_CACHE_MODE_WC, NULL);
1579 if (ret) 1584 if (ret)
1580 goto out_err; 1585 return ret;
1581 1586
1582 ret = _set_memory_wc(addr, numpages); 1587 ret = _set_memory_wc(addr, numpages);
1583 if (ret) 1588 if (ret)
1584 goto out_free; 1589 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
1585
1586 return 0;
1587 1590
1588out_free:
1589 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
1590out_err:
1591 return ret; 1591 return ret;
1592} 1592}
1593EXPORT_SYMBOL(set_memory_wc); 1593EXPORT_SYMBOL(set_memory_wc);
1594 1594
1595int _set_memory_wt(unsigned long addr, int numpages)
1596{
1597 return change_page_attr_set(&addr, numpages,
1598 cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0);
1599}
1600
1601int set_memory_wt(unsigned long addr, int numpages)
1602{
1603 int ret;
1604
1605 ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
1606 _PAGE_CACHE_MODE_WT, NULL);
1607 if (ret)
1608 return ret;
1609
1610 ret = _set_memory_wt(addr, numpages);
1611 if (ret)
1612 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
1613
1614 return ret;
1615}
1616EXPORT_SYMBOL_GPL(set_memory_wt);
1617
1595int _set_memory_wb(unsigned long addr, int numpages) 1618int _set_memory_wb(unsigned long addr, int numpages)
1596{ 1619{
1597 /* WB cache mode is hard wired to all cache attribute bits being 0 */ 1620 /* WB cache mode is hard wired to all cache attribute bits being 0 */
@@ -1682,6 +1705,7 @@ static int _set_pages_array(struct page **pages, int addrinarray,
1682{ 1705{
1683 unsigned long start; 1706 unsigned long start;
1684 unsigned long end; 1707 unsigned long end;
1708 enum page_cache_mode set_type;
1685 int i; 1709 int i;
1686 int free_idx; 1710 int free_idx;
1687 int ret; 1711 int ret;
@@ -1695,8 +1719,12 @@ static int _set_pages_array(struct page **pages, int addrinarray,
1695 goto err_out; 1719 goto err_out;
1696 } 1720 }
1697 1721
1722 /* If WC, set to UC- first and then WC */
1723 set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
1724 _PAGE_CACHE_MODE_UC_MINUS : new_type;
1725
1698 ret = cpa_set_pages_array(pages, addrinarray, 1726 ret = cpa_set_pages_array(pages, addrinarray,
1699 cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS)); 1727 cachemode2pgprot(set_type));
1700 if (!ret && new_type == _PAGE_CACHE_MODE_WC) 1728 if (!ret && new_type == _PAGE_CACHE_MODE_WC)
1701 ret = change_page_attr_set_clr(NULL, addrinarray, 1729 ret = change_page_attr_set_clr(NULL, addrinarray,
1702 cachemode2pgprot( 1730 cachemode2pgprot(
@@ -1730,6 +1758,12 @@ int set_pages_array_wc(struct page **pages, int addrinarray)
1730} 1758}
1731EXPORT_SYMBOL(set_pages_array_wc); 1759EXPORT_SYMBOL(set_pages_array_wc);
1732 1760
1761int set_pages_array_wt(struct page **pages, int addrinarray)
1762{
1763 return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WT);
1764}
1765EXPORT_SYMBOL_GPL(set_pages_array_wt);
1766
1733int set_pages_wb(struct page *page, int numpages) 1767int set_pages_wb(struct page *page, int numpages)
1734{ 1768{
1735 unsigned long addr = (unsigned long)page_address(page); 1769 unsigned long addr = (unsigned long)page_address(page);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 35af6771a95a..188e3e07eeeb 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -33,13 +33,17 @@
33#include "pat_internal.h" 33#include "pat_internal.h"
34#include "mm_internal.h" 34#include "mm_internal.h"
35 35
36#ifdef CONFIG_X86_PAT 36#undef pr_fmt
37int __read_mostly pat_enabled = 1; 37#define pr_fmt(fmt) "" fmt
38
39static bool boot_cpu_done;
40
41static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
38 42
39static inline void pat_disable(const char *reason) 43static inline void pat_disable(const char *reason)
40{ 44{
41 pat_enabled = 0; 45 __pat_enabled = 0;
42 printk(KERN_INFO "%s\n", reason); 46 pr_info("x86/PAT: %s\n", reason);
43} 47}
44 48
45static int __init nopat(char *str) 49static int __init nopat(char *str)
@@ -48,13 +52,12 @@ static int __init nopat(char *str)
48 return 0; 52 return 0;
49} 53}
50early_param("nopat", nopat); 54early_param("nopat", nopat);
51#else 55
52static inline void pat_disable(const char *reason) 56bool pat_enabled(void)
53{ 57{
54 (void)reason; 58 return !!__pat_enabled;
55} 59}
56#endif 60EXPORT_SYMBOL_GPL(pat_enabled);
57
58 61
59int pat_debug_enable; 62int pat_debug_enable;
60 63
@@ -65,22 +68,24 @@ static int __init pat_debug_setup(char *str)
65} 68}
66__setup("debugpat", pat_debug_setup); 69__setup("debugpat", pat_debug_setup);
67 70
68static u64 __read_mostly boot_pat_state;
69
70#ifdef CONFIG_X86_PAT 71#ifdef CONFIG_X86_PAT
71/* 72/*
72 * X86 PAT uses page flags WC and Uncached together to keep track of 73 * X86 PAT uses page flags arch_1 and uncached together to keep track of
73 * memory type of pages that have backing page struct. X86 PAT supports 3 74 * memory type of pages that have backing page struct.
74 * different memory types, _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC and 75 *
75 * _PAGE_CACHE_MODE_UC_MINUS and fourth state where page's memory type has not 76 * X86 PAT supports 4 different memory types:
76 * been changed from its default (value of -1 used to denote this). 77 * - _PAGE_CACHE_MODE_WB
77 * Note we do not support _PAGE_CACHE_MODE_UC here. 78 * - _PAGE_CACHE_MODE_WC
79 * - _PAGE_CACHE_MODE_UC_MINUS
80 * - _PAGE_CACHE_MODE_WT
81 *
82 * _PAGE_CACHE_MODE_WB is the default type.
78 */ 83 */
79 84
80#define _PGMT_DEFAULT 0 85#define _PGMT_WB 0
81#define _PGMT_WC (1UL << PG_arch_1) 86#define _PGMT_WC (1UL << PG_arch_1)
82#define _PGMT_UC_MINUS (1UL << PG_uncached) 87#define _PGMT_UC_MINUS (1UL << PG_uncached)
83#define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1) 88#define _PGMT_WT (1UL << PG_uncached | 1UL << PG_arch_1)
84#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) 89#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1)
85#define _PGMT_CLEAR_MASK (~_PGMT_MASK) 90#define _PGMT_CLEAR_MASK (~_PGMT_MASK)
86 91
@@ -88,14 +93,14 @@ static inline enum page_cache_mode get_page_memtype(struct page *pg)
88{ 93{
89 unsigned long pg_flags = pg->flags & _PGMT_MASK; 94 unsigned long pg_flags = pg->flags & _PGMT_MASK;
90 95
91 if (pg_flags == _PGMT_DEFAULT) 96 if (pg_flags == _PGMT_WB)
92 return -1; 97 return _PAGE_CACHE_MODE_WB;
93 else if (pg_flags == _PGMT_WC) 98 else if (pg_flags == _PGMT_WC)
94 return _PAGE_CACHE_MODE_WC; 99 return _PAGE_CACHE_MODE_WC;
95 else if (pg_flags == _PGMT_UC_MINUS) 100 else if (pg_flags == _PGMT_UC_MINUS)
96 return _PAGE_CACHE_MODE_UC_MINUS; 101 return _PAGE_CACHE_MODE_UC_MINUS;
97 else 102 else
98 return _PAGE_CACHE_MODE_WB; 103 return _PAGE_CACHE_MODE_WT;
99} 104}
100 105
101static inline void set_page_memtype(struct page *pg, 106static inline void set_page_memtype(struct page *pg,
@@ -112,11 +117,12 @@ static inline void set_page_memtype(struct page *pg,
112 case _PAGE_CACHE_MODE_UC_MINUS: 117 case _PAGE_CACHE_MODE_UC_MINUS:
113 memtype_flags = _PGMT_UC_MINUS; 118 memtype_flags = _PGMT_UC_MINUS;
114 break; 119 break;
115 case _PAGE_CACHE_MODE_WB: 120 case _PAGE_CACHE_MODE_WT:
116 memtype_flags = _PGMT_WB; 121 memtype_flags = _PGMT_WT;
117 break; 122 break;
123 case _PAGE_CACHE_MODE_WB:
118 default: 124 default:
119 memtype_flags = _PGMT_DEFAULT; 125 memtype_flags = _PGMT_WB;
120 break; 126 break;
121 } 127 }
122 128
@@ -174,78 +180,154 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
174 * configuration. 180 * configuration.
175 * Using lower indices is preferred, so we start with highest index. 181 * Using lower indices is preferred, so we start with highest index.
176 */ 182 */
177void pat_init_cache_modes(void) 183void pat_init_cache_modes(u64 pat)
178{ 184{
179 int i;
180 enum page_cache_mode cache; 185 enum page_cache_mode cache;
181 char pat_msg[33]; 186 char pat_msg[33];
182 u64 pat; 187 int i;
183 188
184 rdmsrl(MSR_IA32_CR_PAT, pat);
185 pat_msg[32] = 0; 189 pat_msg[32] = 0;
186 for (i = 7; i >= 0; i--) { 190 for (i = 7; i >= 0; i--) {
187 cache = pat_get_cache_mode((pat >> (i * 8)) & 7, 191 cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
188 pat_msg + 4 * i); 192 pat_msg + 4 * i);
189 update_cache_mode_entry(i, cache); 193 update_cache_mode_entry(i, cache);
190 } 194 }
191 pr_info("PAT configuration [0-7]: %s\n", pat_msg); 195 pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
192} 196}
193 197
194#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) 198#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
195 199
196void pat_init(void) 200static void pat_bsp_init(u64 pat)
197{ 201{
198 u64 pat; 202 u64 tmp_pat;
199 bool boot_cpu = !boot_pat_state;
200 203
201 if (!pat_enabled) 204 if (!cpu_has_pat) {
205 pat_disable("PAT not supported by CPU.");
202 return; 206 return;
207 }
203 208
204 if (!cpu_has_pat) { 209 if (!pat_enabled())
205 if (!boot_pat_state) { 210 goto done;
206 pat_disable("PAT not supported by CPU."); 211
207 return; 212 rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
208 } else { 213 if (!tmp_pat) {
209 /* 214 pat_disable("PAT MSR is 0, disabled.");
210 * If this happens we are on a secondary CPU, but 215 return;
211 * switched to PAT on the boot CPU. We have no way to
212 * undo PAT.
213 */
214 printk(KERN_ERR "PAT enabled, "
215 "but not supported by secondary CPU\n");
216 BUG();
217 }
218 } 216 }
219 217
220 /* Set PWT to Write-Combining. All other bits stay the same */ 218 wrmsrl(MSR_IA32_CR_PAT, pat);
221 /* 219
222 * PTE encoding used in Linux: 220done:
223 * PAT 221 pat_init_cache_modes(pat);
224 * |PCD 222}
225 * ||PWT 223
226 * ||| 224static void pat_ap_init(u64 pat)
227 * 000 WB _PAGE_CACHE_WB 225{
228 * 001 WC _PAGE_CACHE_WC 226 if (!pat_enabled())
229 * 010 UC- _PAGE_CACHE_UC_MINUS 227 return;
230 * 011 UC _PAGE_CACHE_UC 228
231 * PAT bit unused 229 if (!cpu_has_pat) {
232 */ 230 /*
233 pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | 231 * If this happens we are on a secondary CPU, but switched to
234 PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); 232 * PAT on the boot CPU. We have no way to undo PAT.
235 233 */
236 /* Boot CPU check */ 234 panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
237 if (!boot_pat_state) {
238 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
239 if (!boot_pat_state) {
240 pat_disable("PAT read returns always zero, disabled.");
241 return;
242 }
243 } 235 }
244 236
245 wrmsrl(MSR_IA32_CR_PAT, pat); 237 wrmsrl(MSR_IA32_CR_PAT, pat);
238}
239
240void pat_init(void)
241{
242 u64 pat;
243 struct cpuinfo_x86 *c = &boot_cpu_data;
244
245 if (!pat_enabled()) {
246 /*
247 * No PAT. Emulate the PAT table that corresponds to the two
248 * cache bits, PWT (Write Through) and PCD (Cache Disable). This
249 * setup is the same as the BIOS default setup when the system
250 * has PAT but the "nopat" boot option has been specified. This
251 * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
252 *
253 * PTE encoding:
254 *
255 * PCD
256 * |PWT PAT
257 * || slot
258 * 00 0 WB : _PAGE_CACHE_MODE_WB
259 * 01 1 WT : _PAGE_CACHE_MODE_WT
260 * 10 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
261 * 11 3 UC : _PAGE_CACHE_MODE_UC
262 *
263 * NOTE: When WC or WP is used, it is redirected to UC- per
264 * the default setup in __cachemode2pte_tbl[].
265 */
266 pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
267 PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
246 268
247 if (boot_cpu) 269 } else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
248 pat_init_cache_modes(); 270 (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
271 ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
272 /*
273 * PAT support with the lower four entries. Intel Pentium 2,
274 * 3, M, and 4 are affected by PAT errata, which makes the
275 * upper four entries unusable. To be on the safe side, we don't
276 * use those.
277 *
278 * PTE encoding:
279 * PAT
280 * |PCD
281 * ||PWT PAT
282 * ||| slot
283 * 000 0 WB : _PAGE_CACHE_MODE_WB
284 * 001 1 WC : _PAGE_CACHE_MODE_WC
285 * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
286 * 011 3 UC : _PAGE_CACHE_MODE_UC
287 * PAT bit unused
288 *
289 * NOTE: When WT or WP is used, it is redirected to UC- per
290 * the default setup in __cachemode2pte_tbl[].
291 */
292 pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
293 PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
294 } else {
295 /*
296 * Full PAT support. We put WT in slot 7 to improve
297 * robustness in the presence of errata that might cause
298 * the high PAT bit to be ignored. This way, a buggy slot 7
299 * access will hit slot 3, and slot 3 is UC, so at worst
300 * we lose performance without causing a correctness issue.
301 * Pentium 4 erratum N46 is an example for such an erratum,
302 * although we try not to use PAT at all on affected CPUs.
303 *
304 * PTE encoding:
305 * PAT
306 * |PCD
307 * ||PWT PAT
308 * ||| slot
309 * 000 0 WB : _PAGE_CACHE_MODE_WB
310 * 001 1 WC : _PAGE_CACHE_MODE_WC
311 * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
312 * 011 3 UC : _PAGE_CACHE_MODE_UC
313 * 100 4 WB : Reserved
314 * 101 5 WC : Reserved
315 * 110 6 UC-: Reserved
316 * 111 7 WT : _PAGE_CACHE_MODE_WT
317 *
318 * The reserved slots are unused, but mapped to their
319 * corresponding types in the presence of PAT errata.
320 */
321 pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
322 PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT);
323 }
324
325 if (!boot_cpu_done) {
326 pat_bsp_init(pat);
327 boot_cpu_done = true;
328 } else {
329 pat_ap_init(pat);
330 }
249} 331}
250 332
251#undef PAT 333#undef PAT
@@ -267,9 +349,9 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end,
267 * request is for WB. 349 * request is for WB.
268 */ 350 */
269 if (req_type == _PAGE_CACHE_MODE_WB) { 351 if (req_type == _PAGE_CACHE_MODE_WB) {
270 u8 mtrr_type; 352 u8 mtrr_type, uniform;
271 353
272 mtrr_type = mtrr_type_lookup(start, end); 354 mtrr_type = mtrr_type_lookup(start, end, &uniform);
273 if (mtrr_type != MTRR_TYPE_WRBACK) 355 if (mtrr_type != MTRR_TYPE_WRBACK)
274 return _PAGE_CACHE_MODE_UC_MINUS; 356 return _PAGE_CACHE_MODE_UC_MINUS;
275 357
@@ -324,9 +406,14 @@ static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
324 406
325/* 407/*
326 * For RAM pages, we use page flags to mark the pages with appropriate type. 408 * For RAM pages, we use page flags to mark the pages with appropriate type.
327 * Here we do two pass: 409 * The page flags are limited to four types, WB (default), WC, WT and UC-.
328 * - Find the memtype of all the pages in the range, look for any conflicts 410 * WP request fails with -EINVAL, and UC gets redirected to UC-. Setting
329 * - In case of no conflicts, set the new memtype for pages in the range 411 * a new memory type is only allowed for a page mapped with the default WB
412 * type.
413 *
414 * Here we do two passes:
415 * - Find the memtype of all the pages in the range, look for any conflicts.
416 * - In case of no conflicts, set the new memtype for pages in the range.
330 */ 417 */
331static int reserve_ram_pages_type(u64 start, u64 end, 418static int reserve_ram_pages_type(u64 start, u64 end,
332 enum page_cache_mode req_type, 419 enum page_cache_mode req_type,
@@ -335,6 +422,12 @@ static int reserve_ram_pages_type(u64 start, u64 end,
335 struct page *page; 422 struct page *page;
336 u64 pfn; 423 u64 pfn;
337 424
425 if (req_type == _PAGE_CACHE_MODE_WP) {
426 if (new_type)
427 *new_type = _PAGE_CACHE_MODE_UC_MINUS;
428 return -EINVAL;
429 }
430
338 if (req_type == _PAGE_CACHE_MODE_UC) { 431 if (req_type == _PAGE_CACHE_MODE_UC) {
339 /* We do not support strong UC */ 432 /* We do not support strong UC */
340 WARN_ON_ONCE(1); 433 WARN_ON_ONCE(1);
@@ -346,8 +439,8 @@ static int reserve_ram_pages_type(u64 start, u64 end,
346 439
347 page = pfn_to_page(pfn); 440 page = pfn_to_page(pfn);
348 type = get_page_memtype(page); 441 type = get_page_memtype(page);
349 if (type != -1) { 442 if (type != _PAGE_CACHE_MODE_WB) {
350 pr_info("reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n", 443 pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
351 start, end - 1, type, req_type); 444 start, end - 1, type, req_type);
352 if (new_type) 445 if (new_type)
353 *new_type = type; 446 *new_type = type;
@@ -373,7 +466,7 @@ static int free_ram_pages_type(u64 start, u64 end)
373 466
374 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { 467 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
375 page = pfn_to_page(pfn); 468 page = pfn_to_page(pfn);
376 set_page_memtype(page, -1); 469 set_page_memtype(page, _PAGE_CACHE_MODE_WB);
377 } 470 }
378 return 0; 471 return 0;
379} 472}
@@ -384,6 +477,7 @@ static int free_ram_pages_type(u64 start, u64 end)
384 * - _PAGE_CACHE_MODE_WC 477 * - _PAGE_CACHE_MODE_WC
385 * - _PAGE_CACHE_MODE_UC_MINUS 478 * - _PAGE_CACHE_MODE_UC_MINUS
386 * - _PAGE_CACHE_MODE_UC 479 * - _PAGE_CACHE_MODE_UC
480 * - _PAGE_CACHE_MODE_WT
387 * 481 *
388 * If new_type is NULL, function will return an error if it cannot reserve the 482 * If new_type is NULL, function will return an error if it cannot reserve the
389 * region with req_type. If new_type is non-NULL, function will return 483 * region with req_type. If new_type is non-NULL, function will return
@@ -400,14 +494,10 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
400 494
401 BUG_ON(start >= end); /* end is exclusive */ 495 BUG_ON(start >= end); /* end is exclusive */
402 496
403 if (!pat_enabled) { 497 if (!pat_enabled()) {
404 /* This is identical to page table setting without PAT */ 498 /* This is identical to page table setting without PAT */
405 if (new_type) { 499 if (new_type)
406 if (req_type == _PAGE_CACHE_MODE_WC) 500 *new_type = req_type;
407 *new_type = _PAGE_CACHE_MODE_UC_MINUS;
408 else
409 *new_type = req_type;
410 }
411 return 0; 501 return 0;
412 } 502 }
413 503
@@ -451,9 +541,9 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
451 541
452 err = rbt_memtype_check_insert(new, new_type); 542 err = rbt_memtype_check_insert(new, new_type);
453 if (err) { 543 if (err) {
454 printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", 544 pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
455 start, end - 1, 545 start, end - 1,
456 cattr_name(new->type), cattr_name(req_type)); 546 cattr_name(new->type), cattr_name(req_type));
457 kfree(new); 547 kfree(new);
458 spin_unlock(&memtype_lock); 548 spin_unlock(&memtype_lock);
459 549
@@ -475,7 +565,7 @@ int free_memtype(u64 start, u64 end)
475 int is_range_ram; 565 int is_range_ram;
476 struct memtype *entry; 566 struct memtype *entry;
477 567
478 if (!pat_enabled) 568 if (!pat_enabled())
479 return 0; 569 return 0;
480 570
481 /* Low ISA region is always mapped WB. No need to track */ 571 /* Low ISA region is always mapped WB. No need to track */
@@ -497,8 +587,8 @@ int free_memtype(u64 start, u64 end)
497 spin_unlock(&memtype_lock); 587 spin_unlock(&memtype_lock);
498 588
499 if (!entry) { 589 if (!entry) {
500 printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", 590 pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
501 current->comm, current->pid, start, end - 1); 591 current->comm, current->pid, start, end - 1);
502 return -EINVAL; 592 return -EINVAL;
503 } 593 }
504 594
@@ -517,7 +607,7 @@ int free_memtype(u64 start, u64 end)
517 * Only to be called when PAT is enabled 607 * Only to be called when PAT is enabled
518 * 608 *
519 * Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS 609 * Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS
520 * or _PAGE_CACHE_MODE_UC 610 * or _PAGE_CACHE_MODE_WT.
521 */ 611 */
522static enum page_cache_mode lookup_memtype(u64 paddr) 612static enum page_cache_mode lookup_memtype(u64 paddr)
523{ 613{
@@ -529,16 +619,9 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
529 619
530 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { 620 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
531 struct page *page; 621 struct page *page;
532 page = pfn_to_page(paddr >> PAGE_SHIFT);
533 rettype = get_page_memtype(page);
534 /*
535 * -1 from get_page_memtype() implies RAM page is in its
536 * default state and not reserved, and hence of type WB
537 */
538 if (rettype == -1)
539 rettype = _PAGE_CACHE_MODE_WB;
540 622
541 return rettype; 623 page = pfn_to_page(paddr >> PAGE_SHIFT);
624 return get_page_memtype(page);
542 } 625 }
543 626
544 spin_lock(&memtype_lock); 627 spin_lock(&memtype_lock);
@@ -623,13 +706,13 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
623 u64 to = from + size; 706 u64 to = from + size;
624 u64 cursor = from; 707 u64 cursor = from;
625 708
626 if (!pat_enabled) 709 if (!pat_enabled())
627 return 1; 710 return 1;
628 711
629 while (cursor < to) { 712 while (cursor < to) {
630 if (!devmem_is_allowed(pfn)) { 713 if (!devmem_is_allowed(pfn)) {
631 printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", 714 pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
632 current->comm, from, to - 1); 715 current->comm, from, to - 1);
633 return 0; 716 return 0;
634 } 717 }
635 cursor += PAGE_SIZE; 718 cursor += PAGE_SIZE;
@@ -659,7 +742,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
659 * caching for the high addresses through the KEN pin, but 742 * caching for the high addresses through the KEN pin, but
660 * we maintain the tradition of paranoia in this code. 743 * we maintain the tradition of paranoia in this code.
661 */ 744 */
662 if (!pat_enabled && 745 if (!pat_enabled() &&
663 !(boot_cpu_has(X86_FEATURE_MTRR) || 746 !(boot_cpu_has(X86_FEATURE_MTRR) ||
664 boot_cpu_has(X86_FEATURE_K6_MTRR) || 747 boot_cpu_has(X86_FEATURE_K6_MTRR) ||
665 boot_cpu_has(X86_FEATURE_CYRIX_ARR) || 748 boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
@@ -698,8 +781,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
698 size; 781 size;
699 782
700 if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) { 783 if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
701 printk(KERN_INFO "%s:%d ioremap_change_attr failed %s " 784 pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
702 "for [mem %#010Lx-%#010Lx]\n",
703 current->comm, current->pid, 785 current->comm, current->pid,
704 cattr_name(pcm), 786 cattr_name(pcm),
705 base, (unsigned long long)(base + size-1)); 787 base, (unsigned long long)(base + size-1));
@@ -729,12 +811,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
729 * the type requested matches the type of first page in the range. 811 * the type requested matches the type of first page in the range.
730 */ 812 */
731 if (is_ram) { 813 if (is_ram) {
732 if (!pat_enabled) 814 if (!pat_enabled())
733 return 0; 815 return 0;
734 816
735 pcm = lookup_memtype(paddr); 817 pcm = lookup_memtype(paddr);
736 if (want_pcm != pcm) { 818 if (want_pcm != pcm) {
737 printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", 819 pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
738 current->comm, current->pid, 820 current->comm, current->pid,
739 cattr_name(want_pcm), 821 cattr_name(want_pcm),
740 (unsigned long long)paddr, 822 (unsigned long long)paddr,
@@ -755,13 +837,12 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
755 if (strict_prot || 837 if (strict_prot ||
756 !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { 838 !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
757 free_memtype(paddr, paddr + size); 839 free_memtype(paddr, paddr + size);
758 printk(KERN_ERR "%s:%d map pfn expected mapping type %s" 840 pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
759 " for [mem %#010Lx-%#010Lx], got %s\n", 841 current->comm, current->pid,
760 current->comm, current->pid, 842 cattr_name(want_pcm),
761 cattr_name(want_pcm), 843 (unsigned long long)paddr,
762 (unsigned long long)paddr, 844 (unsigned long long)(paddr + size - 1),
763 (unsigned long long)(paddr + size - 1), 845 cattr_name(pcm));
764 cattr_name(pcm));
765 return -EINVAL; 846 return -EINVAL;
766 } 847 }
767 /* 848 /*
@@ -844,7 +925,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
844 return ret; 925 return ret;
845 } 926 }
846 927
847 if (!pat_enabled) 928 if (!pat_enabled())
848 return 0; 929 return 0;
849 930
850 /* 931 /*
@@ -872,7 +953,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
872{ 953{
873 enum page_cache_mode pcm; 954 enum page_cache_mode pcm;
874 955
875 if (!pat_enabled) 956 if (!pat_enabled())
876 return 0; 957 return 0;
877 958
878 /* Set prot based on lookup */ 959 /* Set prot based on lookup */
@@ -913,14 +994,18 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
913 994
914pgprot_t pgprot_writecombine(pgprot_t prot) 995pgprot_t pgprot_writecombine(pgprot_t prot)
915{ 996{
916 if (pat_enabled) 997 return __pgprot(pgprot_val(prot) |
917 return __pgprot(pgprot_val(prot) |
918 cachemode2protval(_PAGE_CACHE_MODE_WC)); 998 cachemode2protval(_PAGE_CACHE_MODE_WC));
919 else
920 return pgprot_noncached(prot);
921} 999}
922EXPORT_SYMBOL_GPL(pgprot_writecombine); 1000EXPORT_SYMBOL_GPL(pgprot_writecombine);
923 1001
1002pgprot_t pgprot_writethrough(pgprot_t prot)
1003{
1004 return __pgprot(pgprot_val(prot) |
1005 cachemode2protval(_PAGE_CACHE_MODE_WT));
1006}
1007EXPORT_SYMBOL_GPL(pgprot_writethrough);
1008
924#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 1009#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
925 1010
926static struct memtype *memtype_get_idx(loff_t pos) 1011static struct memtype *memtype_get_idx(loff_t pos)
@@ -996,7 +1081,7 @@ static const struct file_operations memtype_fops = {
996 1081
997static int __init pat_memtype_list_init(void) 1082static int __init pat_memtype_list_init(void)
998{ 1083{
999 if (pat_enabled) { 1084 if (pat_enabled()) {
1000 debugfs_create_file("pat_memtype_list", S_IRUSR, 1085 debugfs_create_file("pat_memtype_list", S_IRUSR,
1001 arch_debugfs_dir, NULL, &memtype_fops); 1086 arch_debugfs_dir, NULL, &memtype_fops);
1002 } 1087 }
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h
index f6411620305d..a739bfc40690 100644
--- a/arch/x86/mm/pat_internal.h
+++ b/arch/x86/mm/pat_internal.h
@@ -4,7 +4,7 @@
4extern int pat_debug_enable; 4extern int pat_debug_enable;
5 5
6#define dprintk(fmt, arg...) \ 6#define dprintk(fmt, arg...) \
7 do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) 7 do { if (pat_debug_enable) pr_info("x86/PAT: " fmt, ##arg); } while (0)
8 8
9struct memtype { 9struct memtype {
10 u64 start; 10 u64 start;
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 6582adcc8bd9..63931080366a 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -160,9 +160,9 @@ success:
160 return 0; 160 return 0;
161 161
162failure: 162failure:
163 printk(KERN_INFO "%s:%d conflicting memory types " 163 pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
164 "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, 164 current->comm, current->pid, start, end,
165 end, cattr_name(found_type), cattr_name(match->type)); 165 cattr_name(found_type), cattr_name(match->type));
166 return -EBUSY; 166 return -EBUSY;
167} 167}
168 168
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 0b97d2c75df3..fb0a9dd1d6e4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -563,16 +563,31 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
563} 563}
564 564
565#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 565#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
566/**
567 * pud_set_huge - setup kernel PUD mapping
568 *
569 * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
570 * function sets up a huge page only if any of the following conditions are met:
571 *
572 * - MTRRs are disabled, or
573 *
574 * - MTRRs are enabled and the range is completely covered by a single MTRR, or
575 *
576 * - MTRRs are enabled and the corresponding MTRR memory type is WB, which
577 * has no effect on the requested PAT memory type.
578 *
579 * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
580 * page mapping attempt fails.
581 *
582 * Returns 1 on success and 0 on failure.
583 */
566int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) 584int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
567{ 585{
568 u8 mtrr; 586 u8 mtrr, uniform;
569 587
570 /* 588 mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
571 * Do not use a huge page when the range is covered by non-WB type 589 if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
572 * of MTRRs. 590 (mtrr != MTRR_TYPE_WRBACK))
573 */
574 mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE);
575 if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF))
576 return 0; 591 return 0;
577 592
578 prot = pgprot_4k_2_large(prot); 593 prot = pgprot_4k_2_large(prot);
@@ -584,17 +599,24 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
584 return 1; 599 return 1;
585} 600}
586 601
602/**
603 * pmd_set_huge - setup kernel PMD mapping
604 *
605 * See text over pud_set_huge() above.
606 *
607 * Returns 1 on success and 0 on failure.
608 */
587int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) 609int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
588{ 610{
589 u8 mtrr; 611 u8 mtrr, uniform;
590 612
591 /* 613 mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
592 * Do not use a huge page when the range is covered by non-WB type 614 if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
593 * of MTRRs. 615 (mtrr != MTRR_TYPE_WRBACK)) {
594 */ 616 pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
595 mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); 617 __func__, addr, addr + PMD_SIZE);
596 if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF))
597 return 0; 618 return 0;
619 }
598 620
599 prot = pgprot_4k_2_large(prot); 621 prot = pgprot_4k_2_large(prot);
600 622
@@ -605,6 +627,11 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
605 return 1; 627 return 1;
606} 628}
607 629
630/**
631 * pud_clear_huge - clear kernel PUD mapping when it is set
632 *
633 * Returns 1 on success and 0 on failure (no PUD map is found).
634 */
608int pud_clear_huge(pud_t *pud) 635int pud_clear_huge(pud_t *pud)
609{ 636{
610 if (pud_large(*pud)) { 637 if (pud_large(*pud)) {
@@ -615,6 +642,11 @@ int pud_clear_huge(pud_t *pud)
615 return 0; 642 return 0;
616} 643}
617 644
645/**
646 * pmd_clear_huge - clear kernel PMD mapping when it is set
647 *
648 * Returns 1 on success and 0 on failure (no PMD map is found).
649 */
618int pmd_clear_huge(pmd_t *pmd) 650int pmd_clear_huge(pmd_t *pmd)
619{ 651{
620 if (pmd_large(*pmd)) { 652 if (pmd_large(*pmd)) {
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 6440221ced0d..4093216b3791 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -8,7 +8,6 @@
8 * of the License. 8 * of the License.
9 */ 9 */
10#include <linux/linkage.h> 10#include <linux/linkage.h>
11#include <asm/dwarf2.h>
12 11
13/* 12/*
14 * Calling convention : 13 * Calling convention :
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 99f76103c6b7..ddeff4844a10 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -966,7 +966,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
966 } 966 }
967 ctx.cleanup_addr = proglen; 967 ctx.cleanup_addr = proglen;
968 968
969 for (pass = 0; pass < 10; pass++) { 969 /* JITed image shrinks with every pass and the loop iterates
970 * until the image stops shrinking. Very large bpf programs
971 * may converge on the last pass. In such case do one more
972 * pass to emit the final image
973 */
974 for (pass = 0; pass < 10 || image; pass++) {
970 proglen = do_jit(prog, addrs, image, oldproglen, &ctx); 975 proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
971 if (proglen <= 0) { 976 if (proglen <= 0) {
972 image = NULL; 977 image = NULL;
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index d93963340c3c..14a63ed6fe09 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -482,9 +482,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
482 482
483int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) 483int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
484{ 484{
485 struct pci_sysdata *sd = bridge->bus->sysdata; 485 /*
486 486 * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
487 ACPI_COMPANION_SET(&bridge->dev, sd->companion); 487 * here, pci_create_root_bus() has been called by someone else and
488 * sysdata is likely to be different from what we expect. Let it go in
489 * that case.
490 */
491 if (!bridge->dev.parent) {
492 struct pci_sysdata *sd = bridge->bus->sysdata;
493 ACPI_COMPANION_SET(&bridge->dev, sd->companion);
494 }
488 return 0; 495 return 0;
489} 496}
490 497
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 349c0d32cc0b..0a9f2caf358f 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -429,12 +429,12 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
429 * Caller can followup with UC MINUS request and add a WC mtrr if there 429 * Caller can followup with UC MINUS request and add a WC mtrr if there
430 * is a free mtrr slot. 430 * is a free mtrr slot.
431 */ 431 */
432 if (!pat_enabled && write_combine) 432 if (!pat_enabled() && write_combine)
433 return -EINVAL; 433 return -EINVAL;
434 434
435 if (pat_enabled && write_combine) 435 if (pat_enabled() && write_combine)
436 prot |= cachemode2protval(_PAGE_CACHE_MODE_WC); 436 prot |= cachemode2protval(_PAGE_CACHE_MODE_WC);
437 else if (pat_enabled || boot_cpu_data.x86 > 3) 437 else if (pat_enabled() || boot_cpu_data.x86 > 3)
438 /* 438 /*
439 * ioremap() and ioremap_nocache() defaults to UC MINUS for now. 439 * ioremap() and ioremap_nocache() defaults to UC MINUS for now.
440 * To avoid attribute conflicts, request UC MINUS here 440 * To avoid attribute conflicts, request UC MINUS here
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index a62e0be3a2f1..f1a6c8e86ddd 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -1,4 +1,5 @@
1# Platform specific code goes here 1# Platform specific code goes here
2obj-y += atom/
2obj-y += ce4100/ 3obj-y += ce4100/
3obj-y += efi/ 4obj-y += efi/
4obj-y += geode/ 5obj-y += geode/
diff --git a/arch/x86/platform/atom/Makefile b/arch/x86/platform/atom/Makefile
new file mode 100644
index 000000000000..0a3a40cbc794
--- /dev/null
+++ b/arch/x86/platform/atom/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_PUNIT_ATOM_DEBUG) += punit_atom_debug.o
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
new file mode 100644
index 000000000000..5ca8ead91579
--- /dev/null
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -0,0 +1,183 @@
1/*
2 * Intel SOC Punit device state debug driver
3 * Punit controls power management for North Complex devices (Graphics
4 * blocks, Image Signal Processing, video processing, display, DSP etc.)
5 *
6 * Copyright (c) 2015, Intel Corporation.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 */
18
19#include <linux/module.h>
20#include <linux/init.h>
21#include <linux/device.h>
22#include <linux/debugfs.h>
23#include <linux/seq_file.h>
24#include <linux/io.h>
25#include <asm/cpu_device_id.h>
26#include <asm/iosf_mbi.h>
27
28/* Side band Interface port */
29#define PUNIT_PORT 0x04
30/* Power gate status reg */
31#define PWRGT_STATUS 0x61
32/* Subsystem config/status Video processor */
33#define VED_SS_PM0 0x32
34/* Subsystem config/status ISP (Image Signal Processor) */
35#define ISP_SS_PM0 0x39
36/* Subsystem config/status Input/output controller */
37#define MIO_SS_PM 0x3B
38/* Shift bits for getting status for video, isp and i/o */
39#define SSS_SHIFT 24
40/* Shift bits for getting status for graphics rendering */
41#define RENDER_POS 0
42/* Shift bits for getting status for media control */
43#define MEDIA_POS 2
44/* Shift bits for getting status for Valley View/Baytrail display */
45#define VLV_DISPLAY_POS 6
46/* Subsystem config/status display for Cherry Trail SOC */
47#define CHT_DSP_SSS 0x36
48/* Shift bits for getting status for display */
49#define CHT_DSP_SSS_POS 16
50
51struct punit_device {
52 char *name;
53 int reg;
54 int sss_pos;
55};
56
57static const struct punit_device punit_device_byt[] = {
58 { "GFX RENDER", PWRGT_STATUS, RENDER_POS },
59 { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS },
60 { "DISPLAY", PWRGT_STATUS, VLV_DISPLAY_POS },
61 { "VED", VED_SS_PM0, SSS_SHIFT },
62 { "ISP", ISP_SS_PM0, SSS_SHIFT },
63 { "MIO", MIO_SS_PM, SSS_SHIFT },
64 { NULL }
65};
66
67static const struct punit_device punit_device_cht[] = {
68 { "GFX RENDER", PWRGT_STATUS, RENDER_POS },
69 { "GFX MEDIA", PWRGT_STATUS, MEDIA_POS },
70 { "DISPLAY", CHT_DSP_SSS, CHT_DSP_SSS_POS },
71 { "VED", VED_SS_PM0, SSS_SHIFT },
72 { "ISP", ISP_SS_PM0, SSS_SHIFT },
73 { "MIO", MIO_SS_PM, SSS_SHIFT },
74 { NULL }
75};
76
77static const char * const dstates[] = {"D0", "D0i1", "D0i2", "D0i3"};
78
79static int punit_dev_state_show(struct seq_file *seq_file, void *unused)
80{
81 u32 punit_pwr_status;
82 struct punit_device *punit_devp = seq_file->private;
83 int index;
84 int status;
85
86 seq_puts(seq_file, "\n\nPUNIT NORTH COMPLEX DEVICES :\n");
87 while (punit_devp->name) {
88 status = iosf_mbi_read(PUNIT_PORT, BT_MBI_PMC_READ,
89 punit_devp->reg,
90 &punit_pwr_status);
91 if (status) {
92 seq_printf(seq_file, "%9s : Read Failed\n",
93 punit_devp->name);
94 } else {
95 index = (punit_pwr_status >> punit_devp->sss_pos) & 3;
96 seq_printf(seq_file, "%9s : %s\n", punit_devp->name,
97 dstates[index]);
98 }
99 punit_devp++;
100 }
101
102 return 0;
103}
104
105static int punit_dev_state_open(struct inode *inode, struct file *file)
106{
107 return single_open(file, punit_dev_state_show, inode->i_private);
108}
109
110static const struct file_operations punit_dev_state_ops = {
111 .open = punit_dev_state_open,
112 .read = seq_read,
113 .llseek = seq_lseek,
114 .release = single_release,
115};
116
117static struct dentry *punit_dbg_file;
118
119static int punit_dbgfs_register(struct punit_device *punit_device)
120{
121 static struct dentry *dev_state;
122
123 punit_dbg_file = debugfs_create_dir("punit_atom", NULL);
124 if (!punit_dbg_file)
125 return -ENXIO;
126
127 dev_state = debugfs_create_file("dev_power_state", S_IFREG | S_IRUGO,
128 punit_dbg_file, punit_device,
129 &punit_dev_state_ops);
130 if (!dev_state) {
131 pr_err("punit_dev_state register failed\n");
132 debugfs_remove(punit_dbg_file);
133 return -ENXIO;
134 }
135
136 return 0;
137}
138
139static void punit_dbgfs_unregister(void)
140{
141 debugfs_remove_recursive(punit_dbg_file);
142}
143
144#define ICPU(model, drv_data) \
145 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT,\
146 (kernel_ulong_t)&drv_data }
147
148static const struct x86_cpu_id intel_punit_cpu_ids[] = {
149 ICPU(55, punit_device_byt), /* Valleyview, Bay Trail */
150 ICPU(76, punit_device_cht), /* Braswell, Cherry Trail */
151 {}
152};
153
154MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids);
155
156static int __init punit_atom_debug_init(void)
157{
158 const struct x86_cpu_id *id;
159 int ret;
160
161 id = x86_match_cpu(intel_punit_cpu_ids);
162 if (!id)
163 return -ENODEV;
164
165 ret = punit_dbgfs_register((struct punit_device *)id->driver_data);
166 if (ret < 0)
167 return ret;
168
169 return 0;
170}
171
172static void __exit punit_atom_debug_exit(void)
173{
174 punit_dbgfs_unregister();
175}
176
177module_init(punit_atom_debug_init);
178module_exit(punit_atom_debug_exit);
179
180MODULE_AUTHOR("Kumar P, Mahesh <mahesh.kumar.p@intel.com>");
181MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
182MODULE_DESCRIPTION("Driver for Punit devices states debugging");
183MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index acb384d24669..a8fecc226946 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -26,7 +26,7 @@ else
26 26
27obj-y += syscalls_64.o vdso/ 27obj-y += syscalls_64.o vdso/
28 28
29subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \ 29subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o \
30 ../lib/rwsem.o 30 ../lib/rwsem.o
31 31
32endif 32endif
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 7e8a1a650435..b9531d343134 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -39,7 +39,8 @@
39#define smp_mb() barrier() 39#define smp_mb() barrier()
40#define smp_rmb() barrier() 40#define smp_rmb() barrier()
41#define smp_wmb() barrier() 41#define smp_wmb() barrier()
42#define set_mb(var, value) do { var = value; barrier(); } while (0) 42
43#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
43 44
44#define read_barrier_depends() do { } while (0) 45#define read_barrier_depends() do { } while (0)
45#define smp_read_barrier_depends() do { } while (0) 46#define smp_read_barrier_depends() do { } while (0)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index fe969ac1c65e..a8f57a94785a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1468,6 +1468,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
1468{ 1468{
1469 struct physdev_set_iopl set_iopl; 1469 struct physdev_set_iopl set_iopl;
1470 unsigned long initrd_start = 0; 1470 unsigned long initrd_start = 0;
1471 u64 pat;
1471 int rc; 1472 int rc;
1472 1473
1473 if (!xen_start_info) 1474 if (!xen_start_info)
@@ -1575,8 +1576,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
1575 * Modify the cache mode translation tables to match Xen's PAT 1576 * Modify the cache mode translation tables to match Xen's PAT
1576 * configuration. 1577 * configuration.
1577 */ 1578 */
1578 1579 rdmsrl(MSR_IA32_CR_PAT, pat);
1579 pat_init_cache_modes(); 1580 pat_init_cache_modes(pat);
1580 1581
1581 /* keep using Xen gdt for now; no urgent need to change it */ 1582 /* keep using Xen gdt for now; no urgent need to change it */
1582 1583
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index b47124d4cd67..8b7f18e200aa 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -67,6 +67,7 @@
67#include <linux/seq_file.h> 67#include <linux/seq_file.h>
68#include <linux/bootmem.h> 68#include <linux/bootmem.h>
69#include <linux/slab.h> 69#include <linux/slab.h>
70#include <linux/vmalloc.h>
70 71
71#include <asm/cache.h> 72#include <asm/cache.h>
72#include <asm/setup.h> 73#include <asm/setup.h>
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 956374c1edbc..9e2ba5c6e1dd 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,6 +17,56 @@
17#include "xen-ops.h" 17#include "xen-ops.h"
18#include "debugfs.h" 18#include "debugfs.h"
19 19
20static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
21static DEFINE_PER_CPU(char *, irq_name);
22static bool xen_pvspin = true;
23
24#ifdef CONFIG_QUEUED_SPINLOCKS
25
26#include <asm/qspinlock.h>
27
28static void xen_qlock_kick(int cpu)
29{
30 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
31}
32
33/*
34 * Halt the current CPU & release it back to the host
35 */
36static void xen_qlock_wait(u8 *byte, u8 val)
37{
38 int irq = __this_cpu_read(lock_kicker_irq);
39
40 /* If kicker interrupts not initialized yet, just spin */
41 if (irq == -1)
42 return;
43
44 /* clear pending */
45 xen_clear_irq_pending(irq);
46 barrier();
47
48 /*
49 * We check the byte value after clearing pending IRQ to make sure
50 * that we won't miss a wakeup event because of the clearing.
51 *
52 * The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
53 * So it is effectively a memory barrier for x86.
54 */
55 if (READ_ONCE(*byte) != val)
56 return;
57
58 /*
59 * If an interrupt happens here, it will leave the wakeup irq
60 * pending, which will cause xen_poll_irq() to return
61 * immediately.
62 */
63
64 /* Block until irq becomes pending (or perhaps a spurious wakeup) */
65 xen_poll_irq(irq);
66}
67
68#else /* CONFIG_QUEUED_SPINLOCKS */
69
20enum xen_contention_stat { 70enum xen_contention_stat {
21 TAKEN_SLOW, 71 TAKEN_SLOW,
22 TAKEN_SLOW_PICKUP, 72 TAKEN_SLOW_PICKUP,
@@ -100,12 +150,9 @@ struct xen_lock_waiting {
100 __ticket_t want; 150 __ticket_t want;
101}; 151};
102 152
103static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
104static DEFINE_PER_CPU(char *, irq_name);
105static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); 153static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
106static cpumask_t waiting_cpus; 154static cpumask_t waiting_cpus;
107 155
108static bool xen_pvspin = true;
109__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) 156__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
110{ 157{
111 int irq = __this_cpu_read(lock_kicker_irq); 158 int irq = __this_cpu_read(lock_kicker_irq);
@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
217 } 264 }
218 } 265 }
219} 266}
267#endif /* CONFIG_QUEUED_SPINLOCKS */
220 268
221static irqreturn_t dummy_handler(int irq, void *dev_id) 269static irqreturn_t dummy_handler(int irq, void *dev_id)
222{ 270{
@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
280 return; 328 return;
281 } 329 }
282 printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); 330 printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
331#ifdef CONFIG_QUEUED_SPINLOCKS
332 __pv_init_lock_hash();
333 pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
334 pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
335 pv_lock_ops.wait = xen_qlock_wait;
336 pv_lock_ops.kick = xen_qlock_kick;
337#else
283 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); 338 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
284 pv_lock_ops.unlock_kick = xen_unlock_kick; 339 pv_lock_ops.unlock_kick = xen_unlock_kick;
340#endif
285} 341}
286 342
287/* 343/*
@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
310} 366}
311early_param("xen_nopvspin", xen_parse_nopvspin); 367early_param("xen_nopvspin", xen_parse_nopvspin);
312 368
313#ifdef CONFIG_XEN_DEBUG_FS 369#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
314 370
315static struct dentry *d_spin_debug; 371static struct dentry *d_spin_debug;
316 372
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 04529e620559..f22667abf7b9 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -114,7 +114,7 @@ RELOC(xen_sysret32, 1b+1)
114/* Normal 64-bit system call target */ 114/* Normal 64-bit system call target */
115ENTRY(xen_syscall_target) 115ENTRY(xen_syscall_target)
116 undo_xen_syscall 116 undo_xen_syscall
117 jmp system_call_after_swapgs 117 jmp entry_SYSCALL_64_after_swapgs
118ENDPROC(xen_syscall_target) 118ENDPROC(xen_syscall_target)
119 119
120#ifdef CONFIG_IA32_EMULATION 120#ifdef CONFIG_IA32_EMULATION
@@ -122,13 +122,13 @@ ENDPROC(xen_syscall_target)
122/* 32-bit compat syscall target */ 122/* 32-bit compat syscall target */
123ENTRY(xen_syscall32_target) 123ENTRY(xen_syscall32_target)
124 undo_xen_syscall 124 undo_xen_syscall
125 jmp ia32_cstar_target 125 jmp entry_SYSCALL_compat
126ENDPROC(xen_syscall32_target) 126ENDPROC(xen_syscall32_target)
127 127
128/* 32-bit compat sysenter target */ 128/* 32-bit compat sysenter target */
129ENTRY(xen_sysenter_target) 129ENTRY(xen_sysenter_target)
130 undo_xen_syscall 130 undo_xen_syscall
131 jmp ia32_sysenter_target 131 jmp entry_SYSENTER_compat
132ENDPROC(xen_sysenter_target) 132ENDPROC(xen_sysenter_target)
133 133
134#else /* !CONFIG_IA32_EMULATION */ 134#else /* !CONFIG_IA32_EMULATION */
diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h
index 172a02a6ad14..ba78ccf651e7 100644
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ b/arch/xtensa/include/asm/dma-mapping.h
@@ -185,4 +185,17 @@ static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt,
185 return -EINVAL; 185 return -EINVAL;
186} 186}
187 187
188static inline void *dma_alloc_attrs(struct device *dev, size_t size,
189 dma_addr_t *dma_handle, gfp_t flag,
190 struct dma_attrs *attrs)
191{
192 return NULL;
193}
194
195static inline void dma_free_attrs(struct device *dev, size_t size,
196 void *vaddr, dma_addr_t dma_handle,
197 struct dma_attrs *attrs)
198{
199}
200
188#endif /* _XTENSA_DMA_MAPPING_H */ 201#endif /* _XTENSA_DMA_MAPPING_H */
diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h
index fe1600a09438..c39bb6e61911 100644
--- a/arch/xtensa/include/asm/io.h
+++ b/arch/xtensa/include/asm/io.h
@@ -59,6 +59,7 @@ static inline void __iomem *ioremap_cache(unsigned long offset,
59} 59}
60 60
61#define ioremap_wc ioremap_nocache 61#define ioremap_wc ioremap_nocache
62#define ioremap_wt ioremap_nocache
62 63
63static inline void __iomem *ioremap(unsigned long offset, unsigned long size) 64static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
64{ 65{