aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/uapi/asm/Kbuild2
-rw-r--r--arch/arc/boot/dts/axc003.dtsi8
-rw-r--r--arch/arc/boot/dts/axc003_idu.dtsi8
-rw-r--r--arch/arc/boot/dts/hsdk.dts8
-rw-r--r--arch/arc/configs/hsdk_defconfig5
-rw-r--r--arch/arc/include/asm/uaccess.h5
-rw-r--r--arch/arc/include/uapi/asm/Kbuild1
-rw-r--r--arch/arc/kernel/setup.c2
-rw-r--r--arch/arc/kernel/stacktrace.c2
-rw-r--r--arch/arc/kernel/traps.c14
-rw-r--r--arch/arc/kernel/troubleshoot.c3
-rw-r--r--arch/arc/plat-axs10x/axs10x.c18
-rw-r--r--arch/arc/plat-hsdk/platform.c42
-rw-r--r--arch/arm/boot/dts/am33xx.dtsi2
-rw-r--r--arch/arm/boot/dts/am4372.dtsi6
-rw-r--r--arch/arm/boot/dts/am437x-cm-t43.dts4
-rw-r--r--arch/arm/boot/dts/armada-385-db-ap.dts1
-rw-r--r--arch/arm/boot/dts/armada-385-linksys.dtsi1
-rw-r--r--arch/arm/boot/dts/armada-385-synology-ds116.dts2
-rw-r--r--arch/arm/boot/dts/armada-388-gp.dts2
-rw-r--r--arch/arm/boot/dts/aspeed-g4.dtsi2
-rw-r--r--arch/arm/boot/dts/at91-tse850-3.dts1
-rw-r--r--arch/arm/boot/dts/bcm-nsp.dtsi4
-rw-r--r--arch/arm/boot/dts/bcm283x.dtsi1
-rw-r--r--arch/arm/boot/dts/bcm958623hr.dts4
-rw-r--r--arch/arm/boot/dts/bcm958625hr.dts4
-rw-r--r--arch/arm/boot/dts/da850-lego-ev3.dts4
-rw-r--r--arch/arm/boot/dts/dm814x.dtsi2
-rw-r--r--arch/arm/boot/dts/exynos5800-peach-pi.dts4
-rw-r--r--arch/arm/boot/dts/imx53.dtsi9
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts3
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv.dtsi17
-rw-r--r--arch/arm/boot/dts/ls1021a-qds.dts2
-rw-r--r--arch/arm/boot/dts/ls1021a-twr.dts2
-rw-r--r--arch/arm/boot/dts/meson.dtsi18
-rw-r--r--arch/arm/boot/dts/nspire.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-beagle-xm.dts1
-rw-r--r--arch/arm/boot/dts/omap3-beagle.dts1
-rw-r--r--arch/arm/boot/dts/omap3-cm-t3x.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-evm-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-gta04.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-igep0020-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-igep0030-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-lilly-a83x.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-overo-base.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-pandora-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-tao3530.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4-droid4-xt894.dts1
-rw-r--r--arch/arm/boot/dts/omap4-duovero.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4-panda-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4-var-som-om44.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4.dtsi5
-rw-r--r--arch/arm/boot/dts/omap5-board-common.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5-cm-t54.dts2
-rw-r--r--arch/arm/boot/dts/omap5.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7790.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7792.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7793.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7794.dtsi1
-rw-r--r--arch/arm/boot/dts/rk3066a-marsboard.dts4
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi2
-rw-r--r--arch/arm/boot/dts/sun4i-a10.dtsi4
-rw-r--r--arch/arm/boot/dts/sun5i-a10s.dtsi4
-rw-r--r--arch/arm/boot/dts/sun6i-a31.dtsi4
-rw-r--r--arch/arm/boot/dts/sun7i-a20.dtsi4
-rw-r--r--arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts1
-rw-r--r--arch/arm/boot/dts/tango4-common.dtsi1
-rw-r--r--arch/arm/boot/dts/vf610-zii-dev-rev-c.dts6
-rw-r--r--arch/arm/include/asm/kvm_arm.h3
-rw-r--r--arch/arm/include/asm/kvm_host.h5
-rw-r--r--arch/arm/include/asm/pgtable-3level.h1
-rw-r--r--arch/arm/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm/kernel/entry-header.S4
-rw-r--r--arch/arm/kernel/traps.c1
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S4
-rw-r--r--arch/arm/mach-davinci/dm365.c29
-rw-r--r--arch/arm/mach-meson/platsmp.c2
-rw-r--r--arch/arm/mach-omap2/cm_common.c6
-rw-r--r--arch/arm/mach-omap2/omap-secure.c21
-rw-r--r--arch/arm/mach-omap2/omap-secure.h4
-rw-r--r--arch/arm/mach-omap2/omap_device.c10
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_3xxx_data.c1
-rw-r--r--arch/arm/mach-omap2/pm.h4
-rw-r--r--arch/arm/mach-omap2/pm34xx.c13
-rw-r--r--arch/arm/mach-omap2/prcm-common.h1
-rw-r--r--arch/arm/mach-omap2/prm33xx.c12
-rw-r--r--arch/arm/mach-omap2/sleep34xx.S26
-rw-r--r--arch/arm64/Kconfig12
-rw-r--r--arch/arm64/Makefile3
-rw-r--r--arch/arm64/boot/dts/Makefile2
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts1
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts1
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts3
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi11
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl.dtsi6
-rw-r--r--arch/arm64/boot/dts/renesas/salvator-common.dtsi1
-rw-r--r--arch/arm64/boot/dts/renesas/ulcb.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock64.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi11
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts1
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts1
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts3
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi4
-rw-r--r--arch/arm64/include/asm/assembler.h10
-rw-r--r--arch/arm64/include/asm/cacheflush.h2
-rw-r--r--arch/arm64/include/asm/cpufeature.h3
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/efi.h4
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/include/asm/mmu_context.h46
-rw-r--r--arch/arm64/include/asm/module.h46
-rw-r--r--arch/arm64/include/asm/perf_event.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h42
-rw-r--r--arch/arm64/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--arch/arm64/kernel/Makefile3
-rw-r--r--arch/arm64/kernel/cpu-reset.S1
-rw-r--r--arch/arm64/kernel/cpu_ops.c6
-rw-r--r--arch/arm64/kernel/cpufeature.c3
-rw-r--r--arch/arm64/kernel/efi-entry.S2
-rw-r--r--arch/arm64/kernel/fpsimd.c57
-rw-r--r--arch/arm64/kernel/ftrace-mod.S18
-rw-r--r--arch/arm64/kernel/ftrace.c14
-rw-r--r--arch/arm64/kernel/head.S1
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm64/kernel/module-plts.c50
-rw-r--r--arch/arm64/kernel/module.lds1
-rw-r--r--arch/arm64/kernel/perf_event.c6
-rw-r--r--arch/arm64/kernel/process.c9
-rw-r--r--arch/arm64/kernel/relocate_kernel.S1
-rw-r--r--arch/arm64/kvm/debug.c21
-rw-r--r--arch/arm64/kvm/handle_exit.c57
-rw-r--r--arch/arm64/kvm/hyp-init.S1
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c3
-rw-r--r--arch/arm64/kvm/hyp/switch.c37
-rw-r--r--arch/arm64/mm/context.c28
-rw-r--r--arch/arm64/mm/dump.c2
-rw-r--r--arch/arm64/mm/fault.c5
-rw-r--r--arch/arm64/mm/init.c3
-rw-r--r--arch/arm64/mm/pgd.c2
-rw-r--r--arch/blackfin/include/uapi/asm/Kbuild1
-rw-r--r--arch/c6x/include/uapi/asm/Kbuild1
-rw-r--r--arch/cris/include/uapi/asm/Kbuild1
-rw-r--r--arch/frv/include/uapi/asm/Kbuild2
-rw-r--r--arch/h8300/include/uapi/asm/Kbuild1
-rw-r--r--arch/hexagon/include/uapi/asm/Kbuild1
-rw-r--r--arch/ia64/include/uapi/asm/Kbuild1
-rw-r--r--arch/ia64/kernel/time.c2
-rw-r--r--arch/m32r/include/uapi/asm/Kbuild1
-rw-r--r--arch/m32r/kernel/traps.c1
-rw-r--r--arch/m68k/configs/stmark2_defconfig1
-rw-r--r--arch/m68k/include/uapi/asm/Kbuild1
-rw-r--r--arch/m68k/kernel/vmlinux-nommu.lds2
-rw-r--r--arch/m68k/kernel/vmlinux-std.lds2
-rw-r--r--arch/m68k/kernel/vmlinux-sun3.lds2
-rw-r--r--arch/metag/include/uapi/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/mmu_context_mm.h1
-rw-r--r--arch/microblaze/include/uapi/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/pgtable.h2
-rw-r--r--arch/mips/include/asm/serial.h22
-rw-r--r--arch/mips/include/uapi/asm/Kbuild1
-rw-r--r--arch/mips/kernel/cps-vec.S2
-rw-r--r--arch/mips/kernel/process.c12
-rw-r--r--arch/mips/kernel/ptrace.c147
-rw-r--r--arch/mips/kvm/mips.c7
-rw-r--r--arch/mn10300/include/uapi/asm/Kbuild1
-rw-r--r--arch/nios2/include/uapi/asm/Kbuild1
-rw-r--r--arch/openrisc/include/uapi/asm/Kbuild1
-rw-r--r--arch/parisc/boot/compressed/misc.c4
-rw-r--r--arch/parisc/include/asm/ldcw.h2
-rw-r--r--arch/parisc/include/asm/thread_info.h5
-rw-r--r--arch/parisc/include/uapi/asm/Kbuild1
-rw-r--r--arch/parisc/kernel/drivers.c2
-rw-r--r--arch/parisc/kernel/entry.S25
-rw-r--r--arch/parisc/kernel/hpmc.S1
-rw-r--r--arch/parisc/kernel/pacache.S9
-rw-r--r--arch/parisc/kernel/process.c39
-rw-r--r--arch/parisc/kernel/unwind.c1
-rw-r--r--arch/parisc/lib/delay.c2
-rw-r--r--arch/parisc/mm/init.c10
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/exception-64e.h6
-rw-r--r--arch/powerpc/include/asm/exception-64s.h57
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h13
-rw-r--r--arch/powerpc/include/asm/hvcall.h17
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h1
-rw-r--r--arch/powerpc/include/asm/machdep.h1
-rw-r--r--arch/powerpc/include/asm/mmu_context.h5
-rw-r--r--arch/powerpc/include/asm/paca.h10
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h14
-rw-r--r--arch/powerpc/include/asm/setup.h14
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c5
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S2
-rw-r--r--arch/powerpc/kernel/entry_64.S44
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S137
-rw-r--r--arch/powerpc/kernel/fadump.c22
-rw-r--r--arch/powerpc/kernel/misc_64.S2
-rw-r--r--arch/powerpc/kernel/process.c14
-rw-r--r--arch/powerpc/kernel/setup-common.c27
-rw-r--r--arch/powerpc/kernel/setup_64.c101
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S9
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c127
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S9
-rw-r--r--arch/powerpc/kvm/book3s_pr.c2
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S7
-rw-r--r--arch/powerpc/kvm/book3s_segment.S4
-rw-r--r--arch/powerpc/kvm/book3s_xive.c7
-rw-r--r--arch/powerpc/kvm/powerpc.c7
-rw-r--r--arch/powerpc/lib/feature-fixups.c41
-rw-r--r--arch/powerpc/mm/fault.c7
-rw-r--r--arch/powerpc/mm/hash_native_64.c15
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c6
-rw-r--r--arch/powerpc/perf/core-book3s.c12
-rw-r--r--arch/powerpc/perf/imc-pmu.c17
-rw-r--r--arch/powerpc/platforms/powernv/setup.c49
-rw-r--r--arch/powerpc/platforms/ps3/setup.c15
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c21
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h2
-rw-r--r--arch/powerpc/platforms/pseries/ras.c3
-rw-r--r--arch/powerpc/platforms/pseries/setup.c36
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c4
-rw-r--r--arch/powerpc/xmon/xmon.c10
-rw-r--r--arch/riscv/configs/defconfig75
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/asm.h12
-rw-r--r--arch/riscv/include/asm/atomic.h103
-rw-r--r--arch/riscv/include/asm/barrier.h36
-rw-r--r--arch/riscv/include/asm/bitops.h2
-rw-r--r--arch/riscv/include/asm/bug.h6
-rw-r--r--arch/riscv/include/asm/cacheflush.h30
-rw-r--r--arch/riscv/include/asm/csr.h8
-rw-r--r--arch/riscv/include/asm/io.h20
-rw-r--r--arch/riscv/include/asm/irqflags.h10
-rw-r--r--arch/riscv/include/asm/mmu.h4
-rw-r--r--arch/riscv/include/asm/mmu_context.h45
-rw-r--r--arch/riscv/include/asm/pgtable.h62
-rw-r--r--arch/riscv/include/asm/ptrace.h2
-rw-r--r--arch/riscv/include/asm/spinlock.h11
-rw-r--r--arch/riscv/include/asm/timex.h3
-rw-r--r--arch/riscv/include/asm/tlbflush.h9
-rw-r--r--arch/riscv/include/asm/uaccess.h12
-rw-r--r--arch/riscv/include/asm/unistd.h1
-rw-r--r--arch/riscv/include/asm/vdso.h4
-rw-r--r--arch/riscv/include/uapi/asm/Kbuild1
-rw-r--r--arch/riscv/include/uapi/asm/syscalls.h26
-rw-r--r--arch/riscv/kernel/entry.S8
-rw-r--r--arch/riscv/kernel/head.S3
-rw-r--r--arch/riscv/kernel/process.c4
-rw-r--r--arch/riscv/kernel/riscv_ksyms.c3
-rw-r--r--arch/riscv/kernel/setup.c16
-rw-r--r--arch/riscv/kernel/smp.c55
-rw-r--r--arch/riscv/kernel/sys_riscv.c33
-rw-r--r--arch/riscv/kernel/syscall_table.c1
-rw-r--r--arch/riscv/kernel/vdso/Makefile7
-rw-r--r--arch/riscv/kernel/vdso/clock_getres.S26
-rw-r--r--arch/riscv/kernel/vdso/clock_gettime.S26
-rw-r--r--arch/riscv/kernel/vdso/flush_icache.S30
-rw-r--r--arch/riscv/kernel/vdso/getcpu.S26
-rw-r--r--arch/riscv/kernel/vdso/gettimeofday.S26
-rw-r--r--arch/riscv/kernel/vdso/vdso.lds.S7
-rw-r--r--arch/riscv/lib/delay.c1
-rw-r--r--arch/riscv/mm/Makefile1
-rw-r--r--arch/riscv/mm/cacheflush.c23
-rw-r--r--arch/riscv/mm/fault.c2
-rw-r--r--arch/riscv/mm/ioremap.c2
-rw-r--r--arch/s390/Kbuild1
-rw-r--r--arch/s390/Makefile5
-rw-r--r--arch/s390/appldata/Makefile1
-rw-r--r--arch/s390/appldata/appldata_base.c1
-rw-r--r--arch/s390/appldata/appldata_mem.c1
-rw-r--r--arch/s390/appldata/appldata_net_sum.c1
-rw-r--r--arch/s390/appldata/appldata_os.c1
-rw-r--r--arch/s390/boot/compressed/vmlinux.scr1
-rw-r--r--arch/s390/boot/install.sh5
-rw-r--r--arch/s390/crypto/aes_s390.c7
-rw-r--r--arch/s390/crypto/arch_random.c6
-rw-r--r--arch/s390/crypto/crc32-vx.c1
-rw-r--r--arch/s390/crypto/des_s390.c7
-rw-r--r--arch/s390/crypto/ghash_s390.c1
-rw-r--r--arch/s390/crypto/paes_s390.c6
-rw-r--r--arch/s390/crypto/prng.c1
-rw-r--r--arch/s390/crypto/sha.h7
-rw-r--r--arch/s390/crypto/sha1_s390.c7
-rw-r--r--arch/s390/crypto/sha256_s390.c7
-rw-r--r--arch/s390/crypto/sha512_s390.c7
-rw-r--r--arch/s390/crypto/sha_common.c7
-rw-r--r--arch/s390/hypfs/Makefile1
-rw-r--r--arch/s390/hypfs/inode.c2
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/alternative.h1
-rw-r--r--arch/s390/include/asm/ap.h5
-rw-r--r--arch/s390/include/asm/bugs.h1
-rw-r--r--arch/s390/include/asm/cpu_mf.h5
-rw-r--r--arch/s390/include/asm/elf.h15
-rw-r--r--arch/s390/include/asm/kprobes.h15
-rw-r--r--arch/s390/include/asm/kvm_host.h5
-rw-r--r--arch/s390/include/asm/kvm_para.h7
-rw-r--r--arch/s390/include/asm/livepatch.h8
-rw-r--r--arch/s390/include/asm/mmu_context.h2
-rw-r--r--arch/s390/include/asm/perf_event.h1
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/s390/include/asm/ptrace.h11
-rw-r--r--arch/s390/include/asm/segment.h1
-rw-r--r--arch/s390/include/asm/switch_to.h27
-rw-r--r--arch/s390/include/asm/syscall.h5
-rw-r--r--arch/s390/include/asm/sysinfo.h5
-rw-r--r--arch/s390/include/asm/topology.h1
-rw-r--r--arch/s390/include/asm/vga.h1
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--arch/s390/include/uapi/asm/kvm.h4
-rw-r--r--arch/s390/include/uapi/asm/kvm_para.h4
-rw-r--r--arch/s390/include/uapi/asm/kvm_perf.h4
-rw-r--r--arch/s390/include/uapi/asm/perf_regs.h1
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h125
-rw-r--r--arch/s390/include/uapi/asm/sthyi.h1
-rw-r--r--arch/s390/include/uapi/asm/virtio-ccw.h6
-rw-r--r--arch/s390/include/uapi/asm/vmcp.h1
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h14
-rw-r--r--arch/s390/kernel/alternative.c1
-rw-r--r--arch/s390/kernel/compat_linux.c1
-rw-r--r--arch/s390/kernel/debug.c2
-rw-r--r--arch/s390/kernel/dis.c10
-rw-r--r--arch/s390/kernel/dumpstack.c1
-rw-r--r--arch/s390/kernel/entry.S15
-rw-r--r--arch/s390/kernel/ipl.c1
-rw-r--r--arch/s390/kernel/kprobes.c15
-rw-r--r--arch/s390/kernel/lgr.c1
-rw-r--r--arch/s390/kernel/module.c15
-rw-r--r--arch/s390/kernel/nmi.c1
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c5
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c5
-rw-r--r--arch/s390/kernel/perf_event.c5
-rw-r--r--arch/s390/kernel/perf_regs.c1
-rw-r--r--arch/s390/kernel/ptrace.c8
-rw-r--r--arch/s390/kernel/setup.c1
-rw-r--r--arch/s390/kernel/smp.c1
-rw-r--r--arch/s390/kernel/stacktrace.c1
-rw-r--r--arch/s390/kernel/sthyi.c5
-rw-r--r--arch/s390/kernel/syscalls.S6
-rw-r--r--arch/s390/kernel/time.c1
-rw-r--r--arch/s390/kernel/topology.c1
-rw-r--r--arch/s390/kernel/vdso.c5
-rw-r--r--arch/s390/kernel/vdso32/clock_getres.S5
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S5
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S5
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S5
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S5
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S5
-rw-r--r--arch/s390/kernel/vdso64/note.S1
-rw-r--r--arch/s390/kernel/vtime.c1
-rw-r--r--arch/s390/kvm/Makefile5
-rw-r--r--arch/s390/kvm/diag.c5
-rw-r--r--arch/s390/kvm/gaccess.h5
-rw-r--r--arch/s390/kvm/guestdbg.c5
-rw-r--r--arch/s390/kvm/intercept.c5
-rw-r--r--arch/s390/kvm/interrupt.c5
-rw-r--r--arch/s390/kvm/irq.h5
-rw-r--r--arch/s390/kvm/kvm-s390.c27
-rw-r--r--arch/s390/kvm/kvm-s390.h5
-rw-r--r--arch/s390/kvm/priv.c18
-rw-r--r--arch/s390/kvm/sigp.c5
-rw-r--r--arch/s390/kvm/vsie.c5
-rw-r--r--arch/s390/lib/uaccess.c2
-rw-r--r--arch/s390/mm/cmm.c1
-rw-r--r--arch/s390/mm/gmap.c1
-rw-r--r--arch/s390/mm/mmap.c16
-rw-r--r--arch/s390/mm/pgalloc.c2
-rw-r--r--arch/s390/mm/pgtable.c1
-rw-r--r--arch/s390/net/Makefile1
-rw-r--r--arch/s390/net/bpf_jit_comp.c11
-rw-r--r--arch/s390/numa/Makefile1
-rw-r--r--arch/s390/pci/Makefile1
-rw-r--r--arch/s390/pci/pci.c1
-rw-r--r--arch/s390/pci/pci_debug.c1
-rw-r--r--arch/s390/pci/pci_dma.c22
-rw-r--r--arch/s390/pci/pci_insn.c4
-rw-r--r--arch/s390/tools/gen_opcode_table.c1
-rw-r--r--arch/score/include/uapi/asm/Kbuild1
-rw-r--r--arch/sh/boards/mach-se/770x/setup.c24
-rw-r--r--arch/sh/include/mach-se/mach/se.h1
-rw-r--r--arch/sh/include/uapi/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/pgtable_64.h2
-rw-r--r--arch/sparc/include/uapi/asm/Kbuild1
-rw-r--r--arch/sparc/lib/Makefile2
-rw-r--r--arch/sparc/lib/hweight.S4
-rw-r--r--arch/sparc/mm/fault_32.c2
-rw-r--r--arch/sparc/mm/fault_64.c2
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c6
-rw-r--r--arch/tile/include/asm/pgtable.h1
-rw-r--r--arch/tile/include/uapi/asm/Kbuild1
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/um/include/asm/mmu_context.h3
-rw-r--r--arch/um/kernel/trap.c2
-rw-r--r--arch/unicore32/include/asm/mmu_context.h5
-rw-r--r--arch/unicore32/include/uapi/asm/Kbuild1
-rw-r--r--arch/unicore32/kernel/traps.c1
-rw-r--r--arch/x86/Kconfig18
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/head_64.S16
-rw-r--r--arch/x86/boot/compressed/misc.c16
-rw-r--r--arch/x86/boot/compressed/pagetable.c3
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c28
-rw-r--r--arch/x86/boot/genimage.sh32
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S5
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S3
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S3
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S3
-rw-r--r--arch/x86/crypto/salsa20_glue.c7
-rw-r--r--arch/x86/entry/calling.h147
-rw-r--r--arch/x86/entry/entry_32.S19
-rw-r--r--arch/x86/entry/entry_64.S247
-rw-r--r--arch/x86/entry/entry_64_compat.S32
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c38
-rw-r--r--arch/x86/events/intel/bts.c18
-rw-r--r--arch/x86/events/intel/core.c5
-rw-r--r--arch/x86/events/intel/ds.c146
-rw-r--r--arch/x86/events/perf_event.h23
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/asm-prototypes.h25
-rw-r--r--arch/x86/include/asm/asm.h2
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h81
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h9
-rw-r--r--arch/x86/include/asm/desc.h14
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/espfix.h7
-rw-r--r--arch/x86/include/asm/fixmap.h7
-rw-r--r--arch/x86/include/asm/hypervisor.h25
-rw-r--r--arch/x86/include/asm/intel_ds.h36
-rw-r--r--arch/x86/include/asm/invpcid.h53
-rw-r--r--arch/x86/include/asm/irqdomain.h2
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/kmemcheck.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h19
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h113
-rw-r--r--arch/x86/include/asm/mshyperv.h18
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h214
-rw-r--r--arch/x86/include/asm/paravirt.h9
-rw-r--r--arch/x86/include/asm/pci_x86.h1
-rw-r--r--arch/x86/include/asm/pgalloc.h11
-rw-r--r--arch/x86/include/asm/pgtable.h38
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h15
-rw-r--r--arch/x86/include/asm/pgtable_64.h92
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h59
-rw-r--r--arch/x86/include/asm/processor-flags.h5
-rw-r--r--arch/x86/include/asm/processor.h82
-rw-r--r--arch/x86/include/asm/pti.h14
-rw-r--r--arch/x86/include/asm/segment.h12
-rw-r--r--arch/x86/include/asm/stacktrace.h3
-rw-r--r--arch/x86/include/asm/suspend_32.h8
-rw-r--r--arch/x86/include/asm/suspend_64.h19
-rw-r--r--arch/x86/include/asm/switch_to.h13
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h347
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h16
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/unwind.h20
-rw-r--r--arch/x86/include/asm/vsyscall.h1
-rw-r--r--arch/x86/include/asm/xen/hypercall.h5
-rw-r--r--arch/x86/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h7
-rw-r--r--arch/x86/kernel/alternative.c7
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/msi.c8
-rw-r--r--arch/x86/kernel/apic/probe_32.c2
-rw-r--r--arch/x86/kernel/apic/vector.c24
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c10
-rw-r--r--arch/x86/kernel/asm-offsets_32.c9
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c35
-rw-r--r--arch/x86/kernel/cpu/bugs.c185
-rw-r--r--arch/x86/kernel/cpu/common.c106
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c26
-rw-r--r--arch/x86/kernel/doublefault.c36
-rw-r--r--arch/x86/kernel/dumpstack.c98
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c12
-rw-r--r--arch/x86/kernel/ftrace_32.S6
-rw-r--r--arch/x86/kernel/ftrace_64.S8
-rw-r--r--arch/x86/kernel/head_64.S30
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/irq.c12
-rw-r--r--arch/x86/kernel/irq_32.c9
-rw-r--r--arch/x86/kernel/irq_64.c4
-rw-r--r--arch/x86/kernel/ldt.c198
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/process.c21
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c14
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/kernel/smpboot.c21
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/tboot.c11
-rw-r--r--arch/x86/kernel/tls.c11
-rw-r--r--arch/x86/kernel/traps.c77
-rw-r--r--arch/x86/kernel/unwind_orc.c88
-rw-r--r--arch/x86/kernel/vmlinux.lds.S17
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/emulate.c93
-rw-r--r--arch/x86/kvm/ioapic.c34
-rw-r--r--arch/x86/kvm/lapic.c12
-rw-r--r--arch/x86/kvm/mmu.c27
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c106
-rw-r--r--arch/x86/kvm/x86.c139
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/checksum_32.S7
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/retpoline.S48
-rw-r--r--arch/x86/lib/x86-opcode-map.txt13
-rw-r--r--arch/x86/mm/Makefile9
-rw-r--r--arch/x86/mm/cpu_entry_area.c166
-rw-r--r--arch/x86/mm/debug_pagetables.c80
-rw-r--r--arch/x86/mm/dump_pagetables.c141
-rw-r--r--arch/x86/mm/extable.c6
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/init.c82
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/kasan_init_64.c23
-rw-r--r--arch/x86/mm/kaslr.c32
-rw-r--r--arch/x86/mm/kmemcheck/error.c1
-rw-r--r--arch/x86/mm/kmemcheck/error.h1
-rw-r--r--arch/x86/mm/kmemcheck/opcode.c1
-rw-r--r--arch/x86/mm/kmemcheck/opcode.h1
-rw-r--r--arch/x86/mm/kmemcheck/pte.c1
-rw-r--r--arch/x86/mm/kmemcheck/pte.h1
-rw-r--r--arch/x86/mm/kmemcheck/selftest.c1
-rw-r--r--arch/x86/mm/kmemcheck/selftest.h1
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h1
-rw-r--r--arch/x86/mm/kmmio.c12
-rw-r--r--arch/x86/mm/mem_encrypt.c4
-rw-r--r--arch/x86/mm/pgtable.c5
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/pti.c368
-rw-r--r--arch/x86/mm/tlb.c64
-rw-r--r--arch/x86/pci/broadcom_bus.c2
-rw-r--r--arch/x86/pci/common.c5
-rw-r--r--arch/x86/pci/fixup.c46
-rw-r--r--arch/x86/platform/efi/efi_64.c7
-rw-r--r--arch/x86/platform/efi/quirks.c13
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bt.c2
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/platform/uv/uv_irq.c2
-rw-r--r--arch/x86/platform/uv/uv_nmi.c4
-rw-r--r--arch/x86/power/cpu.c112
-rw-r--r--arch/x86/xen/apic.c2
-rw-r--r--arch/x86/xen/enlighten.c81
-rw-r--r--arch/x86/xen/enlighten_pv.c42
-rw-r--r--arch/x86/xen/mmu_pv.c22
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/xen-asm_64.S14
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--arch/xtensa/include/uapi/asm/Kbuild1
576 files changed, 6674 insertions, 2349 deletions
diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
index b15bf6bc0e94..14a2e9af97e9 100644
--- a/arch/alpha/include/uapi/asm/Kbuild
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3
4generic-y += bpf_perf_event.h
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index 4e6e9f57e790..dc91c663bcc0 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -35,6 +35,14 @@
35 reg = <0x80 0x10>, <0x100 0x10>; 35 reg = <0x80 0x10>, <0x100 0x10>;
36 #clock-cells = <0>; 36 #clock-cells = <0>;
37 clocks = <&input_clk>; 37 clocks = <&input_clk>;
38
39 /*
40 * Set initial core pll output frequency to 90MHz.
41 * It will be applied at the core pll driver probing
42 * on early boot.
43 */
44 assigned-clocks = <&core_clk>;
45 assigned-clock-rates = <90000000>;
38 }; 46 };
39 47
40 core_intc: archs-intc@cpu { 48 core_intc: archs-intc@cpu {
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 63954a8b0100..69ff4895f2ba 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -35,6 +35,14 @@
35 reg = <0x80 0x10>, <0x100 0x10>; 35 reg = <0x80 0x10>, <0x100 0x10>;
36 #clock-cells = <0>; 36 #clock-cells = <0>;
37 clocks = <&input_clk>; 37 clocks = <&input_clk>;
38
39 /*
40 * Set initial core pll output frequency to 100MHz.
41 * It will be applied at the core pll driver probing
42 * on early boot.
43 */
44 assigned-clocks = <&core_clk>;
45 assigned-clock-rates = <100000000>;
38 }; 46 };
39 47
40 core_intc: archs-intc@cpu { 48 core_intc: archs-intc@cpu {
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 8f627c200d60..006aa3de5348 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -114,6 +114,14 @@
114 reg = <0x00 0x10>, <0x14B8 0x4>; 114 reg = <0x00 0x10>, <0x14B8 0x4>;
115 #clock-cells = <0>; 115 #clock-cells = <0>;
116 clocks = <&input_clk>; 116 clocks = <&input_clk>;
117
118 /*
119 * Set initial core pll output frequency to 1GHz.
120 * It will be applied at the core pll driver probing
121 * on early boot.
122 */
123 assigned-clocks = <&core_clk>;
124 assigned-clock-rates = <1000000000>;
117 }; 125 };
118 126
119 serial: serial@5000 { 127 serial: serial@5000 {
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index 7b8f8faf8a24..ac6b0ed8341e 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -49,10 +49,11 @@ CONFIG_SERIAL_8250_DW=y
49CONFIG_SERIAL_OF_PLATFORM=y 49CONFIG_SERIAL_OF_PLATFORM=y
50# CONFIG_HW_RANDOM is not set 50# CONFIG_HW_RANDOM is not set
51# CONFIG_HWMON is not set 51# CONFIG_HWMON is not set
52CONFIG_DRM=y
53# CONFIG_DRM_FBDEV_EMULATION is not set
54CONFIG_DRM_UDL=y
52CONFIG_FB=y 55CONFIG_FB=y
53CONFIG_FB_UDL=y
54CONFIG_FRAMEBUFFER_CONSOLE=y 56CONFIG_FRAMEBUFFER_CONSOLE=y
55CONFIG_USB=y
56CONFIG_USB_EHCI_HCD=y 57CONFIG_USB_EHCI_HCD=y
57CONFIG_USB_EHCI_HCD_PLATFORM=y 58CONFIG_USB_EHCI_HCD_PLATFORM=y
58CONFIG_USB_OHCI_HCD=y 59CONFIG_USB_OHCI_HCD=y
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index f35974ee7264..c9173c02081c 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -668,6 +668,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
668 return 0; 668 return 0;
669 669
670 __asm__ __volatile__( 670 __asm__ __volatile__(
671 " mov lp_count, %5 \n"
671 " lp 3f \n" 672 " lp 3f \n"
672 "1: ldb.ab %3, [%2, 1] \n" 673 "1: ldb.ab %3, [%2, 1] \n"
673 " breq.d %3, 0, 3f \n" 674 " breq.d %3, 0, 3f \n"
@@ -684,8 +685,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
684 " .word 1b, 4b \n" 685 " .word 1b, 4b \n"
685 " .previous \n" 686 " .previous \n"
686 : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) 687 : "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
687 : "g"(-EFAULT), "l"(count) 688 : "g"(-EFAULT), "r"(count)
688 : "memory"); 689 : "lp_count", "lp_start", "lp_end", "memory");
689 690
690 return res; 691 return res;
691} 692}
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index fa6d0ff4ff89..170b5db64afe 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bitsperlong.h 5generic-y += bitsperlong.h
6generic-y += bpf_perf_event.h
6generic-y += errno.h 7generic-y += errno.h
7generic-y += fcntl.h 8generic-y += fcntl.h
8generic-y += ioctl.h 9generic-y += ioctl.h
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 7ef7d9a8ff89..9d27331fe69a 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -199,7 +199,7 @@ static void read_arc_build_cfg_regs(void)
199 unsigned int exec_ctrl; 199 unsigned int exec_ctrl;
200 200
201 READ_BCR(AUX_EXEC_CTRL, exec_ctrl); 201 READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
202 cpu->extn.dual_enb = exec_ctrl & 1; 202 cpu->extn.dual_enb = !(exec_ctrl & 1);
203 203
204 /* dual issue always present for this core */ 204 /* dual issue always present for this core */
205 cpu->extn.dual = 1; 205 cpu->extn.dual = 1;
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index 74315f302971..bf40e06f3fb8 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -163,7 +163,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
163 */ 163 */
164static int __print_sym(unsigned int address, void *unused) 164static int __print_sym(unsigned int address, void *unused)
165{ 165{
166 __print_symbol(" %s\n", address); 166 printk(" %pS\n", (void *)address);
167 return 0; 167 return 0;
168} 168}
169 169
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index bcd7c9fc5d0f..133a4dae41fe 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -83,6 +83,7 @@ DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
83DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR) 83DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR)
84DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT) 84DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
85DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN) 85DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
86DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0)
86 87
87/* 88/*
88 * Entry Point for Misaligned Data access Exception, for emulating in software 89 * Entry Point for Misaligned Data access Exception, for emulating in software
@@ -115,6 +116,8 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
115 * Thus TRAP_S <n> can be used for specific purpose 116 * Thus TRAP_S <n> can be used for specific purpose
116 * -1 used for software breakpointing (gdb) 117 * -1 used for software breakpointing (gdb)
117 * -2 used by kprobes 118 * -2 used by kprobes
119 * -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as
120 * -fno-isolate-erroneous-paths-dereference
118 */ 121 */
119void do_non_swi_trap(unsigned long address, struct pt_regs *regs) 122void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
120{ 123{
@@ -134,6 +137,9 @@ void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
134 kgdb_trap(regs); 137 kgdb_trap(regs);
135 break; 138 break;
136 139
140 case 5:
141 do_trap5_error(address, regs);
142 break;
137 default: 143 default:
138 break; 144 break;
139 } 145 }
@@ -155,3 +161,11 @@ void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs)
155 161
156 insterror_is_error(address, regs); 162 insterror_is_error(address, regs);
157} 163}
164
165/*
166 * abort() call generated by older gcc for __builtin_trap()
167 */
168void abort(void)
169{
170 __asm__ __volatile__("trap_s 5\n");
171}
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 7d8c1d6c2f60..6e9a0a9a6a04 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -163,6 +163,9 @@ static void show_ecr_verbose(struct pt_regs *regs)
163 else 163 else
164 pr_cont("Bus Error, check PRM\n"); 164 pr_cont("Bus Error, check PRM\n");
165#endif 165#endif
166 } else if (vec == ECR_V_TRAP) {
167 if (regs->ecr_param == 5)
168 pr_cont("gcc generated __builtin_trap\n");
166 } else { 169 } else {
167 pr_cont("Check Programmer's Manual\n"); 170 pr_cont("Check Programmer's Manual\n");
168 } 171 }
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index f1ac6790da5f..46544e88492d 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -317,25 +317,23 @@ static void __init axs103_early_init(void)
317 * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack 317 * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
318 * of fudging the freq in DT 318 * of fudging the freq in DT
319 */ 319 */
320#define AXS103_QUAD_CORE_CPU_FREQ_HZ 50000000
321
320 unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; 322 unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
321 if (num_cores > 2) { 323 if (num_cores > 2) {
322 u32 freq = 50, orig; 324 u32 freq;
323 /*
324 * TODO: use cpu node "cpu-freq" param instead of platform-specific
325 * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu.
326 */
327 int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk"); 325 int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
328 const struct fdt_property *prop; 326 const struct fdt_property *prop;
329 327
330 prop = fdt_get_property(initial_boot_params, off, 328 prop = fdt_get_property(initial_boot_params, off,
331 "clock-frequency", NULL); 329 "assigned-clock-rates", NULL);
332 orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000; 330 freq = be32_to_cpu(*(u32 *)(prop->data));
333 331
334 /* Patching .dtb in-place with new core clock value */ 332 /* Patching .dtb in-place with new core clock value */
335 if (freq != orig ) { 333 if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) {
336 freq = cpu_to_be32(freq * 1000000); 334 freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ);
337 fdt_setprop_inplace(initial_boot_params, off, 335 fdt_setprop_inplace(initial_boot_params, off,
338 "clock-frequency", &freq, sizeof(freq)); 336 "assigned-clock-rates", &freq, sizeof(freq));
339 } 337 }
340 } 338 }
341#endif 339#endif
diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c
index fd0ae5e38639..2958aedb649a 100644
--- a/arch/arc/plat-hsdk/platform.c
+++ b/arch/arc/plat-hsdk/platform.c
@@ -38,42 +38,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu)
38#define CREG_PAE (CREG_BASE + 0x180) 38#define CREG_PAE (CREG_BASE + 0x180)
39#define CREG_PAE_UPDATE (CREG_BASE + 0x194) 39#define CREG_PAE_UPDATE (CREG_BASE + 0x194)
40 40
41#define CREG_CORE_IF_CLK_DIV (CREG_BASE + 0x4B8)
42#define CREG_CORE_IF_CLK_DIV_2 0x1
43#define CGU_BASE ARC_PERIPHERAL_BASE
44#define CGU_PLL_STATUS (ARC_PERIPHERAL_BASE + 0x4)
45#define CGU_PLL_CTRL (ARC_PERIPHERAL_BASE + 0x0)
46#define CGU_PLL_STATUS_LOCK BIT(0)
47#define CGU_PLL_STATUS_ERR BIT(1)
48#define CGU_PLL_CTRL_1GHZ 0x3A10
49#define HSDK_PLL_LOCK_TIMEOUT 500
50
51#define HSDK_PLL_LOCKED() \
52 !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK)
53
54#define HSDK_PLL_ERR() \
55 !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR)
56
57static void __init hsdk_set_cpu_freq_1ghz(void)
58{
59 u32 timeout = HSDK_PLL_LOCK_TIMEOUT;
60
61 /*
62 * As we set cpu clock which exceeds 500MHz, the divider for the interface
63 * clock must be programmed to div-by-2.
64 */
65 iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV);
66
67 /* Set cpu clock to 1GHz */
68 iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL);
69
70 while (!HSDK_PLL_LOCKED() && timeout--)
71 cpu_relax();
72
73 if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR())
74 pr_err("Failed to setup CPU frequency to 1GHz!");
75}
76
77#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) 41#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000)
78#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) 42#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108)
79#define SDIO_UHS_REG_EXT_DIV_2 (2 << 30) 43#define SDIO_UHS_REG_EXT_DIV_2 (2 << 30)
@@ -98,12 +62,6 @@ static void __init hsdk_init_early(void)
98 * minimum possible div-by-2. 62 * minimum possible div-by-2.
99 */ 63 */
100 iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT); 64 iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT);
101
102 /*
103 * Setup CPU frequency to 1GHz.
104 * TODO: remove it after smart hsdk pll driver will be introduced.
105 */
106 hsdk_set_cpu_freq_1ghz();
107} 65}
108 66
109static const char *hsdk_compat[] __initconst = { 67static const char *hsdk_compat[] __initconst = {
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 1b81c4e75772..d37f95025807 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -630,6 +630,7 @@
630 reg-names = "phy"; 630 reg-names = "phy";
631 status = "disabled"; 631 status = "disabled";
632 ti,ctrl_mod = <&usb_ctrl_mod>; 632 ti,ctrl_mod = <&usb_ctrl_mod>;
633 #phy-cells = <0>;
633 }; 634 };
634 635
635 usb0: usb@47401000 { 636 usb0: usb@47401000 {
@@ -678,6 +679,7 @@
678 reg-names = "phy"; 679 reg-names = "phy";
679 status = "disabled"; 680 status = "disabled";
680 ti,ctrl_mod = <&usb_ctrl_mod>; 681 ti,ctrl_mod = <&usb_ctrl_mod>;
682 #phy-cells = <0>;
681 }; 683 };
682 684
683 usb1: usb@47401800 { 685 usb1: usb@47401800 {
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index e5b061469bf8..4714a59fd86d 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -927,7 +927,8 @@
927 reg = <0x48038000 0x2000>, 927 reg = <0x48038000 0x2000>,
928 <0x46000000 0x400000>; 928 <0x46000000 0x400000>;
929 reg-names = "mpu", "dat"; 929 reg-names = "mpu", "dat";
930 interrupts = <80>, <81>; 930 interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>,
931 <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
931 interrupt-names = "tx", "rx"; 932 interrupt-names = "tx", "rx";
932 status = "disabled"; 933 status = "disabled";
933 dmas = <&edma 8 2>, 934 dmas = <&edma 8 2>,
@@ -941,7 +942,8 @@
941 reg = <0x4803C000 0x2000>, 942 reg = <0x4803C000 0x2000>,
942 <0x46400000 0x400000>; 943 <0x46400000 0x400000>;
943 reg-names = "mpu", "dat"; 944 reg-names = "mpu", "dat";
944 interrupts = <82>, <83>; 945 interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>,
946 <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
945 interrupt-names = "tx", "rx"; 947 interrupt-names = "tx", "rx";
946 status = "disabled"; 948 status = "disabled";
947 dmas = <&edma 10 2>, 949 dmas = <&edma 10 2>,
diff --git a/arch/arm/boot/dts/am437x-cm-t43.dts b/arch/arm/boot/dts/am437x-cm-t43.dts
index 9e92d480576b..3b9a94c274a7 100644
--- a/arch/arm/boot/dts/am437x-cm-t43.dts
+++ b/arch/arm/boot/dts/am437x-cm-t43.dts
@@ -301,8 +301,8 @@
301 status = "okay"; 301 status = "okay";
302 pinctrl-names = "default"; 302 pinctrl-names = "default";
303 pinctrl-0 = <&spi0_pins>; 303 pinctrl-0 = <&spi0_pins>;
304 dmas = <&edma 16 304 dmas = <&edma 16 0
305 &edma 17>; 305 &edma 17 0>;
306 dma-names = "tx0", "rx0"; 306 dma-names = "tx0", "rx0";
307 307
308 flash: w25q64cvzpig@0 { 308 flash: w25q64cvzpig@0 {
diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts b/arch/arm/boot/dts/armada-385-db-ap.dts
index 25d2d720dc0e..678aa023335d 100644
--- a/arch/arm/boot/dts/armada-385-db-ap.dts
+++ b/arch/arm/boot/dts/armada-385-db-ap.dts
@@ -236,6 +236,7 @@
236 usb3_phy: usb3_phy { 236 usb3_phy: usb3_phy {
237 compatible = "usb-nop-xceiv"; 237 compatible = "usb-nop-xceiv";
238 vcc-supply = <&reg_xhci0_vbus>; 238 vcc-supply = <&reg_xhci0_vbus>;
239 #phy-cells = <0>;
239 }; 240 };
240 241
241 reg_xhci0_vbus: xhci0-vbus { 242 reg_xhci0_vbus: xhci0-vbus {
diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
index e1f355ffc8f7..434dc9aaa5e4 100644
--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
+++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
@@ -66,6 +66,7 @@
66 usb3_1_phy: usb3_1-phy { 66 usb3_1_phy: usb3_1-phy {
67 compatible = "usb-nop-xceiv"; 67 compatible = "usb-nop-xceiv";
68 vcc-supply = <&usb3_1_vbus>; 68 vcc-supply = <&usb3_1_vbus>;
69 #phy-cells = <0>;
69 }; 70 };
70 71
71 usb3_1_vbus: usb3_1-vbus { 72 usb3_1_vbus: usb3_1-vbus {
diff --git a/arch/arm/boot/dts/armada-385-synology-ds116.dts b/arch/arm/boot/dts/armada-385-synology-ds116.dts
index 36ad571e76f3..0a3552ebda3b 100644
--- a/arch/arm/boot/dts/armada-385-synology-ds116.dts
+++ b/arch/arm/boot/dts/armada-385-synology-ds116.dts
@@ -191,11 +191,13 @@
191 usb3_0_phy: usb3_0_phy { 191 usb3_0_phy: usb3_0_phy {
192 compatible = "usb-nop-xceiv"; 192 compatible = "usb-nop-xceiv";
193 vcc-supply = <&reg_usb3_0_vbus>; 193 vcc-supply = <&reg_usb3_0_vbus>;
194 #phy-cells = <0>;
194 }; 195 };
195 196
196 usb3_1_phy: usb3_1_phy { 197 usb3_1_phy: usb3_1_phy {
197 compatible = "usb-nop-xceiv"; 198 compatible = "usb-nop-xceiv";
198 vcc-supply = <&reg_usb3_1_vbus>; 199 vcc-supply = <&reg_usb3_1_vbus>;
200 #phy-cells = <0>;
199 }; 201 };
200 202
201 reg_usb3_0_vbus: usb3-vbus0 { 203 reg_usb3_0_vbus: usb3-vbus0 {
diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts
index f503955dbd3b..51b4ee6df130 100644
--- a/arch/arm/boot/dts/armada-388-gp.dts
+++ b/arch/arm/boot/dts/armada-388-gp.dts
@@ -276,11 +276,13 @@
276 usb2_1_phy: usb2_1_phy { 276 usb2_1_phy: usb2_1_phy {
277 compatible = "usb-nop-xceiv"; 277 compatible = "usb-nop-xceiv";
278 vcc-supply = <&reg_usb2_1_vbus>; 278 vcc-supply = <&reg_usb2_1_vbus>;
279 #phy-cells = <0>;
279 }; 280 };
280 281
281 usb3_phy: usb3_phy { 282 usb3_phy: usb3_phy {
282 compatible = "usb-nop-xceiv"; 283 compatible = "usb-nop-xceiv";
283 vcc-supply = <&reg_usb3_vbus>; 284 vcc-supply = <&reg_usb3_vbus>;
285 #phy-cells = <0>;
284 }; 286 };
285 287
286 reg_usb3_vbus: usb3-vbus { 288 reg_usb3_vbus: usb3-vbus {
diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi
index 45d815a86d42..de08d9045cb8 100644
--- a/arch/arm/boot/dts/aspeed-g4.dtsi
+++ b/arch/arm/boot/dts/aspeed-g4.dtsi
@@ -219,7 +219,7 @@
219 compatible = "aspeed,ast2400-vuart"; 219 compatible = "aspeed,ast2400-vuart";
220 reg = <0x1e787000 0x40>; 220 reg = <0x1e787000 0x40>;
221 reg-shift = <2>; 221 reg-shift = <2>;
222 interrupts = <10>; 222 interrupts = <8>;
223 clocks = <&clk_uart>; 223 clocks = <&clk_uart>;
224 no-loopback-test; 224 no-loopback-test;
225 status = "disabled"; 225 status = "disabled";
diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts
index 5f29010cdbd8..9b82cc8843e1 100644
--- a/arch/arm/boot/dts/at91-tse850-3.dts
+++ b/arch/arm/boot/dts/at91-tse850-3.dts
@@ -221,6 +221,7 @@
221 jc42@18 { 221 jc42@18 {
222 compatible = "nxp,se97b", "jedec,jc-42.4-temp"; 222 compatible = "nxp,se97b", "jedec,jc-42.4-temp";
223 reg = <0x18>; 223 reg = <0x18>;
224 smbus-timeout-disable;
224 }; 225 };
225 226
226 dpot: mcp4651-104@28 { 227 dpot: mcp4651-104@28 {
diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi
index 528b9e3bc1da..dcc55aa84583 100644
--- a/arch/arm/boot/dts/bcm-nsp.dtsi
+++ b/arch/arm/boot/dts/bcm-nsp.dtsi
@@ -85,7 +85,7 @@
85 timer@20200 { 85 timer@20200 {
86 compatible = "arm,cortex-a9-global-timer"; 86 compatible = "arm,cortex-a9-global-timer";
87 reg = <0x20200 0x100>; 87 reg = <0x20200 0x100>;
88 interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; 88 interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
89 clocks = <&periph_clk>; 89 clocks = <&periph_clk>;
90 }; 90 };
91 91
@@ -93,7 +93,7 @@
93 compatible = "arm,cortex-a9-twd-timer"; 93 compatible = "arm,cortex-a9-twd-timer";
94 reg = <0x20600 0x20>; 94 reg = <0x20600 0x20>;
95 interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | 95 interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) |
96 IRQ_TYPE_LEVEL_HIGH)>; 96 IRQ_TYPE_EDGE_RISING)>;
97 clocks = <&periph_clk>; 97 clocks = <&periph_clk>;
98 }; 98 };
99 99
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 013431e3d7c3..dcde93c85c2d 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -639,5 +639,6 @@
639 639
640 usbphy: phy { 640 usbphy: phy {
641 compatible = "usb-nop-xceiv"; 641 compatible = "usb-nop-xceiv";
642 #phy-cells = <0>;
642 }; 643 };
643}; 644};
diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts
index 3bc50849d013..b8bde13de90a 100644
--- a/arch/arm/boot/dts/bcm958623hr.dts
+++ b/arch/arm/boot/dts/bcm958623hr.dts
@@ -141,10 +141,6 @@
141 status = "okay"; 141 status = "okay";
142}; 142};
143 143
144&sata {
145 status = "okay";
146};
147
148&qspi { 144&qspi {
149 bspi-sel = <0>; 145 bspi-sel = <0>;
150 flash: m25p80@0 { 146 flash: m25p80@0 {
diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts
index d94d14b3c745..6a44b8021702 100644
--- a/arch/arm/boot/dts/bcm958625hr.dts
+++ b/arch/arm/boot/dts/bcm958625hr.dts
@@ -177,10 +177,6 @@
177 status = "okay"; 177 status = "okay";
178}; 178};
179 179
180&sata {
181 status = "okay";
182};
183
184&srab { 180&srab {
185 compatible = "brcm,bcm58625-srab", "brcm,nsp-srab"; 181 compatible = "brcm,bcm58625-srab", "brcm,nsp-srab";
186 status = "okay"; 182 status = "okay";
diff --git a/arch/arm/boot/dts/da850-lego-ev3.dts b/arch/arm/boot/dts/da850-lego-ev3.dts
index 413dbd5d9f64..81942ae83e1f 100644
--- a/arch/arm/boot/dts/da850-lego-ev3.dts
+++ b/arch/arm/boot/dts/da850-lego-ev3.dts
@@ -178,7 +178,7 @@
178 */ 178 */
179 battery { 179 battery {
180 pinctrl-names = "default"; 180 pinctrl-names = "default";
181 pintctrl-0 = <&battery_pins>; 181 pinctrl-0 = <&battery_pins>;
182 compatible = "lego,ev3-battery"; 182 compatible = "lego,ev3-battery";
183 io-channels = <&adc 4>, <&adc 3>; 183 io-channels = <&adc 4>, <&adc 3>;
184 io-channel-names = "voltage", "current"; 184 io-channel-names = "voltage", "current";
@@ -392,7 +392,7 @@
392 batt_volt_en { 392 batt_volt_en {
393 gpio-hog; 393 gpio-hog;
394 gpios = <6 GPIO_ACTIVE_HIGH>; 394 gpios = <6 GPIO_ACTIVE_HIGH>;
395 output-low; 395 output-high;
396 }; 396 };
397}; 397};
398 398
diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi
index 9708157f5daf..681f5487406e 100644
--- a/arch/arm/boot/dts/dm814x.dtsi
+++ b/arch/arm/boot/dts/dm814x.dtsi
@@ -75,6 +75,7 @@
75 reg = <0x47401300 0x100>; 75 reg = <0x47401300 0x100>;
76 reg-names = "phy"; 76 reg-names = "phy";
77 ti,ctrl_mod = <&usb_ctrl_mod>; 77 ti,ctrl_mod = <&usb_ctrl_mod>;
78 #phy-cells = <0>;
78 }; 79 };
79 80
80 usb0: usb@47401000 { 81 usb0: usb@47401000 {
@@ -385,6 +386,7 @@
385 reg = <0x1b00 0x100>; 386 reg = <0x1b00 0x100>;
386 reg-names = "phy"; 387 reg-names = "phy";
387 ti,ctrl_mod = <&usb_ctrl_mod>; 388 ti,ctrl_mod = <&usb_ctrl_mod>;
389 #phy-cells = <0>;
388 }; 390 };
389 }; 391 };
390 392
diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts
index b2b95ff205e8..0029ec27819c 100644
--- a/arch/arm/boot/dts/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts
@@ -664,6 +664,10 @@
664 status = "okay"; 664 status = "okay";
665}; 665};
666 666
667&mixer {
668 status = "okay";
669};
670
667/* eMMC flash */ 671/* eMMC flash */
668&mmc_0 { 672&mmc_0 {
669 status = "okay"; 673 status = "okay";
diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
index 589a67c5f796..84f17f7abb71 100644
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@ -433,15 +433,6 @@
433 clock-names = "ipg", "per"; 433 clock-names = "ipg", "per";
434 }; 434 };
435 435
436 srtc: srtc@53fa4000 {
437 compatible = "fsl,imx53-rtc", "fsl,imx25-rtc";
438 reg = <0x53fa4000 0x4000>;
439 interrupts = <24>;
440 interrupt-parent = <&tzic>;
441 clocks = <&clks IMX5_CLK_SRTC_GATE>;
442 clock-names = "ipg";
443 };
444
445 iomuxc: iomuxc@53fa8000 { 436 iomuxc: iomuxc@53fa8000 {
446 compatible = "fsl,imx53-iomuxc"; 437 compatible = "fsl,imx53-iomuxc";
447 reg = <0x53fa8000 0x4000>; 438 reg = <0x53fa8000 0x4000>;
diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
index 38faa90007d7..2fa5eb4bd402 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
@@ -72,7 +72,8 @@
72}; 72};
73 73
74&gpmc { 74&gpmc {
75 ranges = <1 0 0x08000000 0x1000000>; /* CS1: 16MB for LAN9221 */ 75 ranges = <0 0 0x30000000 0x1000000 /* CS0: 16MB for NAND */
76 1 0 0x2c000000 0x1000000>; /* CS1: 16MB for LAN9221 */
76 77
77 ethernet@gpmc { 78 ethernet@gpmc {
78 pinctrl-names = "default"; 79 pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index 26cce4d18405..29cb804d10cc 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -33,11 +33,12 @@
33 hsusb2_phy: hsusb2_phy { 33 hsusb2_phy: hsusb2_phy {
34 compatible = "usb-nop-xceiv"; 34 compatible = "usb-nop-xceiv";
35 reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; /* gpio_4 */ 35 reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; /* gpio_4 */
36 #phy-cells = <0>;
36 }; 37 };
37}; 38};
38 39
39&gpmc { 40&gpmc {
40 ranges = <0 0 0x00000000 0x1000000>; /* CS0: 16MB for NAND */ 41 ranges = <0 0 0x30000000 0x1000000>; /* CS0: 16MB for NAND */
41 42
42 nand@0,0 { 43 nand@0,0 {
43 compatible = "ti,omap2-nand"; 44 compatible = "ti,omap2-nand";
@@ -121,7 +122,7 @@
121 122
122&mmc3 { 123&mmc3 {
123 interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>; 124 interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>;
124 pinctrl-0 = <&mmc3_pins>; 125 pinctrl-0 = <&mmc3_pins &wl127x_gpio>;
125 pinctrl-names = "default"; 126 pinctrl-names = "default";
126 vmmc-supply = <&wl12xx_vmmc>; 127 vmmc-supply = <&wl12xx_vmmc>;
127 non-removable; 128 non-removable;
@@ -132,8 +133,8 @@
132 wlcore: wlcore@2 { 133 wlcore: wlcore@2 {
133 compatible = "ti,wl1273"; 134 compatible = "ti,wl1273";
134 reg = <2>; 135 reg = <2>;
135 interrupt-parent = <&gpio5>; 136 interrupt-parent = <&gpio1>;
136 interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */ 137 interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; /* gpio 2 */
137 ref-clock-frequency = <26000000>; 138 ref-clock-frequency = <26000000>;
138 }; 139 };
139}; 140};
@@ -157,8 +158,6 @@
157 OMAP3_CORE1_IOPAD(0x2166, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat5.sdmmc3_dat1 */ 158 OMAP3_CORE1_IOPAD(0x2166, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat5.sdmmc3_dat1 */
158 OMAP3_CORE1_IOPAD(0x2168, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat2 */ 159 OMAP3_CORE1_IOPAD(0x2168, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat2 */
159 OMAP3_CORE1_IOPAD(0x216a, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat3 */ 160 OMAP3_CORE1_IOPAD(0x216a, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat3 */
160 OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT_PULLUP | MUX_MODE4) /* mcbsp4_clkx.gpio_152 */
161 OMAP3_CORE1_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */
162 OMAP3_CORE1_IOPAD(0x21d0, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs1.sdmmc3_cmd */ 161 OMAP3_CORE1_IOPAD(0x21d0, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs1.sdmmc3_cmd */
163 OMAP3_CORE1_IOPAD(0x21d2, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs2.sdmmc_clk */ 162 OMAP3_CORE1_IOPAD(0x21d2, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs2.sdmmc_clk */
164 >; 163 >;
@@ -228,6 +227,12 @@
228 OMAP3_WKUP_IOPAD(0x2a0e, PIN_OUTPUT | MUX_MODE4) /* sys_boot2.gpio_4 */ 227 OMAP3_WKUP_IOPAD(0x2a0e, PIN_OUTPUT | MUX_MODE4) /* sys_boot2.gpio_4 */
229 >; 228 >;
230 }; 229 };
230 wl127x_gpio: pinmux_wl127x_gpio_pin {
231 pinctrl-single,pins = <
232 OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */
233 OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */
234 >;
235 };
231}; 236};
232 237
233&omap3_pmx_core2 { 238&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/ls1021a-qds.dts b/arch/arm/boot/dts/ls1021a-qds.dts
index 940875316d0f..67b4de0e3439 100644
--- a/arch/arm/boot/dts/ls1021a-qds.dts
+++ b/arch/arm/boot/dts/ls1021a-qds.dts
@@ -215,7 +215,7 @@
215 reg = <0x2a>; 215 reg = <0x2a>;
216 VDDA-supply = <&reg_3p3v>; 216 VDDA-supply = <&reg_3p3v>;
217 VDDIO-supply = <&reg_3p3v>; 217 VDDIO-supply = <&reg_3p3v>;
218 clocks = <&sys_mclk 1>; 218 clocks = <&sys_mclk>;
219 }; 219 };
220 }; 220 };
221 }; 221 };
diff --git a/arch/arm/boot/dts/ls1021a-twr.dts b/arch/arm/boot/dts/ls1021a-twr.dts
index a8b148ad1dd2..44715c8ef756 100644
--- a/arch/arm/boot/dts/ls1021a-twr.dts
+++ b/arch/arm/boot/dts/ls1021a-twr.dts
@@ -187,7 +187,7 @@
187 reg = <0x0a>; 187 reg = <0x0a>;
188 VDDA-supply = <&reg_3p3v>; 188 VDDA-supply = <&reg_3p3v>;
189 VDDIO-supply = <&reg_3p3v>; 189 VDDIO-supply = <&reg_3p3v>;
190 clocks = <&sys_mclk 1>; 190 clocks = <&sys_mclk>;
191 }; 191 };
192}; 192};
193 193
diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi
index 4926133077b3..0d9faf1a51ea 100644
--- a/arch/arm/boot/dts/meson.dtsi
+++ b/arch/arm/boot/dts/meson.dtsi
@@ -85,15 +85,6 @@
85 reg = <0x7c00 0x200>; 85 reg = <0x7c00 0x200>;
86 }; 86 };
87 87
88 gpio_intc: interrupt-controller@9880 {
89 compatible = "amlogic,meson-gpio-intc";
90 reg = <0xc1109880 0x10>;
91 interrupt-controller;
92 #interrupt-cells = <2>;
93 amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>;
94 status = "disabled";
95 };
96
97 hwrng: rng@8100 { 88 hwrng: rng@8100 {
98 compatible = "amlogic,meson-rng"; 89 compatible = "amlogic,meson-rng";
99 reg = <0x8100 0x8>; 90 reg = <0x8100 0x8>;
@@ -191,6 +182,15 @@
191 status = "disabled"; 182 status = "disabled";
192 }; 183 };
193 184
185 gpio_intc: interrupt-controller@9880 {
186 compatible = "amlogic,meson-gpio-intc";
187 reg = <0x9880 0x10>;
188 interrupt-controller;
189 #interrupt-cells = <2>;
190 amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>;
191 status = "disabled";
192 };
193
194 wdt: watchdog@9900 { 194 wdt: watchdog@9900 {
195 compatible = "amlogic,meson6-wdt"; 195 compatible = "amlogic,meson6-wdt";
196 reg = <0x9900 0x8>; 196 reg = <0x9900 0x8>;
diff --git a/arch/arm/boot/dts/nspire.dtsi b/arch/arm/boot/dts/nspire.dtsi
index ec2283b1a638..1a5ae4cd107f 100644
--- a/arch/arm/boot/dts/nspire.dtsi
+++ b/arch/arm/boot/dts/nspire.dtsi
@@ -56,6 +56,7 @@
56 56
57 usb_phy: usb_phy { 57 usb_phy: usb_phy {
58 compatible = "usb-nop-xceiv"; 58 compatible = "usb-nop-xceiv";
59 #phy-cells = <0>;
59 }; 60 };
60 61
61 vbus_reg: vbus_reg { 62 vbus_reg: vbus_reg {
diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts
index 683b96a8f73e..0349fcc9dc26 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -90,6 +90,7 @@
90 compatible = "usb-nop-xceiv"; 90 compatible = "usb-nop-xceiv";
91 reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */ 91 reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */
92 vcc-supply = <&hsusb2_power>; 92 vcc-supply = <&hsusb2_power>;
93 #phy-cells = <0>;
93 }; 94 };
94 95
95 tfp410: encoder0 { 96 tfp410: encoder0 {
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index 4d2eaf843fa9..3ca8991a6c3e 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -64,6 +64,7 @@
64 compatible = "usb-nop-xceiv"; 64 compatible = "usb-nop-xceiv";
65 reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */ 65 reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */
66 vcc-supply = <&hsusb2_power>; 66 vcc-supply = <&hsusb2_power>;
67 #phy-cells = <0>;
67 }; 68 };
68 69
69 sound { 70 sound {
diff --git a/arch/arm/boot/dts/omap3-cm-t3x.dtsi b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
index 31d5ebf38892..ab6003fe5a43 100644
--- a/arch/arm/boot/dts/omap3-cm-t3x.dtsi
+++ b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
@@ -43,12 +43,14 @@
43 hsusb1_phy: hsusb1_phy { 43 hsusb1_phy: hsusb1_phy {
44 compatible = "usb-nop-xceiv"; 44 compatible = "usb-nop-xceiv";
45 vcc-supply = <&hsusb1_power>; 45 vcc-supply = <&hsusb1_power>;
46 #phy-cells = <0>;
46 }; 47 };
47 48
48 /* HS USB Host PHY on PORT 2 */ 49 /* HS USB Host PHY on PORT 2 */
49 hsusb2_phy: hsusb2_phy { 50 hsusb2_phy: hsusb2_phy {
50 compatible = "usb-nop-xceiv"; 51 compatible = "usb-nop-xceiv";
51 vcc-supply = <&hsusb2_power>; 52 vcc-supply = <&hsusb2_power>;
53 #phy-cells = <0>;
52 }; 54 };
53 55
54 ads7846reg: ads7846-reg { 56 ads7846reg: ads7846-reg {
diff --git a/arch/arm/boot/dts/omap3-evm-common.dtsi b/arch/arm/boot/dts/omap3-evm-common.dtsi
index dbc3f030a16c..ee64191e41ca 100644
--- a/arch/arm/boot/dts/omap3-evm-common.dtsi
+++ b/arch/arm/boot/dts/omap3-evm-common.dtsi
@@ -29,6 +29,7 @@
29 compatible = "usb-nop-xceiv"; 29 compatible = "usb-nop-xceiv";
30 reset-gpios = <&gpio1 21 GPIO_ACTIVE_LOW>; /* gpio_21 */ 30 reset-gpios = <&gpio1 21 GPIO_ACTIVE_LOW>; /* gpio_21 */
31 vcc-supply = <&hsusb2_power>; 31 vcc-supply = <&hsusb2_power>;
32 #phy-cells = <0>;
32 }; 33 };
33 34
34 leds { 35 leds {
diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index 4504908c23fe..3dc56fb156b7 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -120,6 +120,7 @@
120 hsusb2_phy: hsusb2_phy { 120 hsusb2_phy: hsusb2_phy {
121 compatible = "usb-nop-xceiv"; 121 compatible = "usb-nop-xceiv";
122 reset-gpios = <&gpio6 14 GPIO_ACTIVE_LOW>; 122 reset-gpios = <&gpio6 14 GPIO_ACTIVE_LOW>;
123 #phy-cells = <0>;
123 }; 124 };
124 125
125 tv0: connector { 126 tv0: connector {
diff --git a/arch/arm/boot/dts/omap3-igep0020-common.dtsi b/arch/arm/boot/dts/omap3-igep0020-common.dtsi
index 667f96245729..ecbec23af49f 100644
--- a/arch/arm/boot/dts/omap3-igep0020-common.dtsi
+++ b/arch/arm/boot/dts/omap3-igep0020-common.dtsi
@@ -58,6 +58,7 @@
58 compatible = "usb-nop-xceiv"; 58 compatible = "usb-nop-xceiv";
59 reset-gpios = <&gpio1 24 GPIO_ACTIVE_LOW>; /* gpio_24 */ 59 reset-gpios = <&gpio1 24 GPIO_ACTIVE_LOW>; /* gpio_24 */
60 vcc-supply = <&hsusb1_power>; 60 vcc-supply = <&hsusb1_power>;
61 #phy-cells = <0>;
61 }; 62 };
62 63
63 tfp410: encoder { 64 tfp410: encoder {
diff --git a/arch/arm/boot/dts/omap3-igep0030-common.dtsi b/arch/arm/boot/dts/omap3-igep0030-common.dtsi
index e94d9427450c..443f71707437 100644
--- a/arch/arm/boot/dts/omap3-igep0030-common.dtsi
+++ b/arch/arm/boot/dts/omap3-igep0030-common.dtsi
@@ -37,6 +37,7 @@
37 hsusb2_phy: hsusb2_phy { 37 hsusb2_phy: hsusb2_phy {
38 compatible = "usb-nop-xceiv"; 38 compatible = "usb-nop-xceiv";
39 reset-gpios = <&gpio2 22 GPIO_ACTIVE_LOW>; /* gpio_54 */ 39 reset-gpios = <&gpio2 22 GPIO_ACTIVE_LOW>; /* gpio_54 */
40 #phy-cells = <0>;
40 }; 41 };
41}; 42};
42 43
diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
index 343a36d8031d..7ada1e93e166 100644
--- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
+++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
@@ -51,6 +51,7 @@
51 hsusb1_phy: hsusb1_phy { 51 hsusb1_phy: hsusb1_phy {
52 compatible = "usb-nop-xceiv"; 52 compatible = "usb-nop-xceiv";
53 vcc-supply = <&reg_vcc3>; 53 vcc-supply = <&reg_vcc3>;
54 #phy-cells = <0>;
54 }; 55 };
55}; 56};
56 57
diff --git a/arch/arm/boot/dts/omap3-overo-base.dtsi b/arch/arm/boot/dts/omap3-overo-base.dtsi
index f25e158e7163..ac141fcd1742 100644
--- a/arch/arm/boot/dts/omap3-overo-base.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-base.dtsi
@@ -51,6 +51,7 @@
51 compatible = "usb-nop-xceiv"; 51 compatible = "usb-nop-xceiv";
52 reset-gpios = <&gpio6 23 GPIO_ACTIVE_LOW>; /* gpio_183 */ 52 reset-gpios = <&gpio6 23 GPIO_ACTIVE_LOW>; /* gpio_183 */
53 vcc-supply = <&hsusb2_power>; 53 vcc-supply = <&hsusb2_power>;
54 #phy-cells = <0>;
54 }; 55 };
55 56
56 /* Regulator to trigger the nPoweron signal of the Wifi module */ 57 /* Regulator to trigger the nPoweron signal of the Wifi module */
diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi
index 53e007abdc71..cd53dc6c0051 100644
--- a/arch/arm/boot/dts/omap3-pandora-common.dtsi
+++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi
@@ -205,6 +205,7 @@
205 compatible = "usb-nop-xceiv"; 205 compatible = "usb-nop-xceiv";
206 reset-gpios = <&gpio1 16 GPIO_ACTIVE_LOW>; /* GPIO_16 */ 206 reset-gpios = <&gpio1 16 GPIO_ACTIVE_LOW>; /* GPIO_16 */
207 vcc-supply = <&vaux2>; 207 vcc-supply = <&vaux2>;
208 #phy-cells = <0>;
208 }; 209 };
209 210
210 /* HS USB Host VBUS supply 211 /* HS USB Host VBUS supply
diff --git a/arch/arm/boot/dts/omap3-tao3530.dtsi b/arch/arm/boot/dts/omap3-tao3530.dtsi
index 9a601d15247b..6f5bd027b717 100644
--- a/arch/arm/boot/dts/omap3-tao3530.dtsi
+++ b/arch/arm/boot/dts/omap3-tao3530.dtsi
@@ -46,6 +46,7 @@
46 compatible = "usb-nop-xceiv"; 46 compatible = "usb-nop-xceiv";
47 reset-gpios = <&gpio6 2 GPIO_ACTIVE_LOW>; /* gpio_162 */ 47 reset-gpios = <&gpio6 2 GPIO_ACTIVE_LOW>; /* gpio_162 */
48 vcc-supply = <&hsusb2_power>; 48 vcc-supply = <&hsusb2_power>;
49 #phy-cells = <0>;
49 }; 50 };
50 51
51 sound { 52 sound {
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index 90b5c7148feb..bb33935df7b0 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -715,6 +715,7 @@
715 compatible = "ti,ohci-omap3"; 715 compatible = "ti,ohci-omap3";
716 reg = <0x48064400 0x400>; 716 reg = <0x48064400 0x400>;
717 interrupts = <76>; 717 interrupts = <76>;
718 remote-wakeup-connected;
718 }; 719 };
719 720
720 usbhsehci: ehci@48064800 { 721 usbhsehci: ehci@48064800 {
diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts
index 8b93d37310f2..24a463f8641f 100644
--- a/arch/arm/boot/dts/omap4-droid4-xt894.dts
+++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts
@@ -73,6 +73,7 @@
73 /* HS USB Host PHY on PORT 1 */ 73 /* HS USB Host PHY on PORT 1 */
74 hsusb1_phy: hsusb1_phy { 74 hsusb1_phy: hsusb1_phy {
75 compatible = "usb-nop-xceiv"; 75 compatible = "usb-nop-xceiv";
76 #phy-cells = <0>;
76 }; 77 };
77 78
78 /* LCD regulator from sw5 source */ 79 /* LCD regulator from sw5 source */
diff --git a/arch/arm/boot/dts/omap4-duovero.dtsi b/arch/arm/boot/dts/omap4-duovero.dtsi
index 6e6810c258eb..eb123b24c8e3 100644
--- a/arch/arm/boot/dts/omap4-duovero.dtsi
+++ b/arch/arm/boot/dts/omap4-duovero.dtsi
@@ -43,6 +43,7 @@
43 hsusb1_phy: hsusb1_phy { 43 hsusb1_phy: hsusb1_phy {
44 compatible = "usb-nop-xceiv"; 44 compatible = "usb-nop-xceiv";
45 reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>; /* gpio_62 */ 45 reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>; /* gpio_62 */
46 #phy-cells = <0>;
46 47
47 pinctrl-names = "default"; 48 pinctrl-names = "default";
48 pinctrl-0 = <&hsusb1phy_pins>; 49 pinctrl-0 = <&hsusb1phy_pins>;
diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi
index 22c1eee9b07a..5501d1b4e6cd 100644
--- a/arch/arm/boot/dts/omap4-panda-common.dtsi
+++ b/arch/arm/boot/dts/omap4-panda-common.dtsi
@@ -89,6 +89,7 @@
89 hsusb1_phy: hsusb1_phy { 89 hsusb1_phy: hsusb1_phy {
90 compatible = "usb-nop-xceiv"; 90 compatible = "usb-nop-xceiv";
91 reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>; /* gpio_62 */ 91 reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>; /* gpio_62 */
92 #phy-cells = <0>;
92 vcc-supply = <&hsusb1_power>; 93 vcc-supply = <&hsusb1_power>;
93 clocks = <&auxclk3_ck>; 94 clocks = <&auxclk3_ck>;
94 clock-names = "main_clk"; 95 clock-names = "main_clk";
diff --git a/arch/arm/boot/dts/omap4-var-som-om44.dtsi b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
index 6500bfc8d130..10fce28ceb5b 100644
--- a/arch/arm/boot/dts/omap4-var-som-om44.dtsi
+++ b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
@@ -44,6 +44,7 @@
44 44
45 reset-gpios = <&gpio6 17 GPIO_ACTIVE_LOW>; /* gpio 177 */ 45 reset-gpios = <&gpio6 17 GPIO_ACTIVE_LOW>; /* gpio 177 */
46 vcc-supply = <&vbat>; 46 vcc-supply = <&vbat>;
47 #phy-cells = <0>;
47 48
48 clocks = <&auxclk3_ck>; 49 clocks = <&auxclk3_ck>;
49 clock-names = "main_clk"; 50 clock-names = "main_clk";
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 1dc5a76b3c71..cc1a07a3620f 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -398,7 +398,7 @@
398 elm: elm@48078000 { 398 elm: elm@48078000 {
399 compatible = "ti,am3352-elm"; 399 compatible = "ti,am3352-elm";
400 reg = <0x48078000 0x2000>; 400 reg = <0x48078000 0x2000>;
401 interrupts = <4>; 401 interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
402 ti,hwmods = "elm"; 402 ti,hwmods = "elm";
403 status = "disabled"; 403 status = "disabled";
404 }; 404 };
@@ -1081,14 +1081,13 @@
1081 usbhsohci: ohci@4a064800 { 1081 usbhsohci: ohci@4a064800 {
1082 compatible = "ti,ohci-omap3"; 1082 compatible = "ti,ohci-omap3";
1083 reg = <0x4a064800 0x400>; 1083 reg = <0x4a064800 0x400>;
1084 interrupt-parent = <&gic>;
1085 interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>; 1084 interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
1085 remote-wakeup-connected;
1086 }; 1086 };
1087 1087
1088 usbhsehci: ehci@4a064c00 { 1088 usbhsehci: ehci@4a064c00 {
1089 compatible = "ti,ehci-omap"; 1089 compatible = "ti,ehci-omap";
1090 reg = <0x4a064c00 0x400>; 1090 reg = <0x4a064c00 0x400>;
1091 interrupt-parent = <&gic>;
1092 interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>; 1091 interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
1093 }; 1092 };
1094 }; 1093 };
diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi
index 575ecffb0e9e..1b20838bb9a4 100644
--- a/arch/arm/boot/dts/omap5-board-common.dtsi
+++ b/arch/arm/boot/dts/omap5-board-common.dtsi
@@ -73,12 +73,14 @@
73 clocks = <&auxclk1_ck>; 73 clocks = <&auxclk1_ck>;
74 clock-names = "main_clk"; 74 clock-names = "main_clk";
75 clock-frequency = <19200000>; 75 clock-frequency = <19200000>;
76 #phy-cells = <0>;
76 }; 77 };
77 78
78 /* HS USB Host PHY on PORT 3 */ 79 /* HS USB Host PHY on PORT 3 */
79 hsusb3_phy: hsusb3_phy { 80 hsusb3_phy: hsusb3_phy {
80 compatible = "usb-nop-xceiv"; 81 compatible = "usb-nop-xceiv";
81 reset-gpios = <&gpio3 15 GPIO_ACTIVE_LOW>; /* gpio3_79 ETH_NRESET */ 82 reset-gpios = <&gpio3 15 GPIO_ACTIVE_LOW>; /* gpio3_79 ETH_NRESET */
83 #phy-cells = <0>;
82 }; 84 };
83 85
84 tpd12s015: encoder { 86 tpd12s015: encoder {
diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts
index 5b172a04b6f1..5e21fb430a65 100644
--- a/arch/arm/boot/dts/omap5-cm-t54.dts
+++ b/arch/arm/boot/dts/omap5-cm-t54.dts
@@ -63,12 +63,14 @@
63 hsusb2_phy: hsusb2_phy { 63 hsusb2_phy: hsusb2_phy {
64 compatible = "usb-nop-xceiv"; 64 compatible = "usb-nop-xceiv";
65 reset-gpios = <&gpio3 12 GPIO_ACTIVE_LOW>; /* gpio3_76 HUB_RESET */ 65 reset-gpios = <&gpio3 12 GPIO_ACTIVE_LOW>; /* gpio3_76 HUB_RESET */
66 #phy-cells = <0>;
66 }; 67 };
67 68
68 /* HS USB Host PHY on PORT 3 */ 69 /* HS USB Host PHY on PORT 3 */
69 hsusb3_phy: hsusb3_phy { 70 hsusb3_phy: hsusb3_phy {
70 compatible = "usb-nop-xceiv"; 71 compatible = "usb-nop-xceiv";
71 reset-gpios = <&gpio3 19 GPIO_ACTIVE_LOW>; /* gpio3_83 ETH_RESET */ 72 reset-gpios = <&gpio3 19 GPIO_ACTIVE_LOW>; /* gpio3_83 ETH_RESET */
73 #phy-cells = <0>;
72 }; 74 };
73 75
74 leds { 76 leds {
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 4cd0005e462f..51a7fb3d7b9a 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -940,6 +940,7 @@
940 compatible = "ti,ohci-omap3"; 940 compatible = "ti,ohci-omap3";
941 reg = <0x4a064800 0x400>; 941 reg = <0x4a064800 0x400>;
942 interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>; 942 interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
943 remote-wakeup-connected;
943 }; 944 };
944 945
945 usbhsehci: ehci@4a064c00 { 946 usbhsehci: ehci@4a064c00 {
diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi
index 2f017fee4009..62baabd757b6 100644
--- a/arch/arm/boot/dts/r8a7790.dtsi
+++ b/arch/arm/boot/dts/r8a7790.dtsi
@@ -1201,6 +1201,7 @@
1201 clock-names = "extal", "usb_extal"; 1201 clock-names = "extal", "usb_extal";
1202 #clock-cells = <2>; 1202 #clock-cells = <2>;
1203 #power-domain-cells = <0>; 1203 #power-domain-cells = <0>;
1204 #reset-cells = <1>;
1204 }; 1205 };
1205 1206
1206 prr: chipid@ff000044 { 1207 prr: chipid@ff000044 {
diff --git a/arch/arm/boot/dts/r8a7792.dtsi b/arch/arm/boot/dts/r8a7792.dtsi
index 131f65b0426e..3d080e07374c 100644
--- a/arch/arm/boot/dts/r8a7792.dtsi
+++ b/arch/arm/boot/dts/r8a7792.dtsi
@@ -829,6 +829,7 @@
829 clock-names = "extal"; 829 clock-names = "extal";
830 #clock-cells = <2>; 830 #clock-cells = <2>;
831 #power-domain-cells = <0>; 831 #power-domain-cells = <0>;
832 #reset-cells = <1>;
832 }; 833 };
833 }; 834 };
834 835
diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi
index 58eae569b4e0..0cd1035de1a4 100644
--- a/arch/arm/boot/dts/r8a7793.dtsi
+++ b/arch/arm/boot/dts/r8a7793.dtsi
@@ -1088,6 +1088,7 @@
1088 clock-names = "extal", "usb_extal"; 1088 clock-names = "extal", "usb_extal";
1089 #clock-cells = <2>; 1089 #clock-cells = <2>;
1090 #power-domain-cells = <0>; 1090 #power-domain-cells = <0>;
1091 #reset-cells = <1>;
1091 }; 1092 };
1092 1093
1093 rst: reset-controller@e6160000 { 1094 rst: reset-controller@e6160000 {
diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi
index 905e50c9b524..5643976c1356 100644
--- a/arch/arm/boot/dts/r8a7794.dtsi
+++ b/arch/arm/boot/dts/r8a7794.dtsi
@@ -1099,6 +1099,7 @@
1099 clock-names = "extal", "usb_extal"; 1099 clock-names = "extal", "usb_extal";
1100 #clock-cells = <2>; 1100 #clock-cells = <2>;
1101 #power-domain-cells = <0>; 1101 #power-domain-cells = <0>;
1102 #reset-cells = <1>;
1102 }; 1103 };
1103 1104
1104 rst: reset-controller@e6160000 { 1105 rst: reset-controller@e6160000 {
diff --git a/arch/arm/boot/dts/rk3066a-marsboard.dts b/arch/arm/boot/dts/rk3066a-marsboard.dts
index c6d92c25df42..d23ee6d911ac 100644
--- a/arch/arm/boot/dts/rk3066a-marsboard.dts
+++ b/arch/arm/boot/dts/rk3066a-marsboard.dts
@@ -83,6 +83,10 @@
83 }; 83 };
84}; 84};
85 85
86&cpu0 {
87 cpu0-supply = <&vdd_arm>;
88};
89
86&i2c1 { 90&i2c1 {
87 status = "okay"; 91 status = "okay";
88 clock-frequency = <400000>; 92 clock-frequency = <400000>;
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index cd24894ee5c6..6102e4e7f35c 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -956,7 +956,7 @@
956 iep_mmu: iommu@ff900800 { 956 iep_mmu: iommu@ff900800 {
957 compatible = "rockchip,iommu"; 957 compatible = "rockchip,iommu";
958 reg = <0x0 0xff900800 0x0 0x40>; 958 reg = <0x0 0xff900800 0x0 0x40>;
959 interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH 0>; 959 interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
960 interrupt-names = "iep_mmu"; 960 interrupt-names = "iep_mmu";
961 #iommu-cells = <0>; 961 #iommu-cells = <0>;
962 status = "disabled"; 962 status = "disabled";
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index b91300d49a31..5840f5c75c3b 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -502,8 +502,8 @@
502 reg = <0x01c16000 0x1000>; 502 reg = <0x01c16000 0x1000>;
503 interrupts = <58>; 503 interrupts = <58>;
504 clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, 504 clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
505 <&ccu 9>, 505 <&ccu CLK_PLL_VIDEO0_2X>,
506 <&ccu 18>; 506 <&ccu CLK_PLL_VIDEO1_2X>;
507 clock-names = "ahb", "mod", "pll-0", "pll-1"; 507 clock-names = "ahb", "mod", "pll-0", "pll-1";
508 dmas = <&dma SUN4I_DMA_NORMAL 16>, 508 dmas = <&dma SUN4I_DMA_NORMAL 16>,
509 <&dma SUN4I_DMA_NORMAL 16>, 509 <&dma SUN4I_DMA_NORMAL 16>,
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index 6ae4d95e230e..316cb8b2945b 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -82,8 +82,8 @@
82 reg = <0x01c16000 0x1000>; 82 reg = <0x01c16000 0x1000>;
83 interrupts = <58>; 83 interrupts = <58>;
84 clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>, 84 clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>,
85 <&ccu 9>, 85 <&ccu CLK_PLL_VIDEO0_2X>,
86 <&ccu 16>; 86 <&ccu CLK_PLL_VIDEO1_2X>;
87 clock-names = "ahb", "mod", "pll-0", "pll-1"; 87 clock-names = "ahb", "mod", "pll-0", "pll-1";
88 dmas = <&dma SUN4I_DMA_NORMAL 16>, 88 dmas = <&dma SUN4I_DMA_NORMAL 16>,
89 <&dma SUN4I_DMA_NORMAL 16>, 89 <&dma SUN4I_DMA_NORMAL 16>,
diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi
index 8bfa12b548e0..72d3fe44ecaf 100644
--- a/arch/arm/boot/dts/sun6i-a31.dtsi
+++ b/arch/arm/boot/dts/sun6i-a31.dtsi
@@ -429,8 +429,8 @@
429 interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>; 429 interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
430 clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>, 430 clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>,
431 <&ccu CLK_HDMI_DDC>, 431 <&ccu CLK_HDMI_DDC>,
432 <&ccu 7>, 432 <&ccu CLK_PLL_VIDEO0_2X>,
433 <&ccu 13>; 433 <&ccu CLK_PLL_VIDEO1_2X>;
434 clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1"; 434 clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1";
435 resets = <&ccu RST_AHB1_HDMI>; 435 resets = <&ccu RST_AHB1_HDMI>;
436 reset-names = "ahb"; 436 reset-names = "ahb";
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 68dfa82544fc..59655e42e4b0 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -581,8 +581,8 @@
581 reg = <0x01c16000 0x1000>; 581 reg = <0x01c16000 0x1000>;
582 interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>; 582 interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
583 clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, 583 clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
584 <&ccu 9>, 584 <&ccu CLK_PLL_VIDEO0_2X>,
585 <&ccu 18>; 585 <&ccu CLK_PLL_VIDEO1_2X>;
586 clock-names = "ahb", "mod", "pll-0", "pll-1"; 586 clock-names = "ahb", "mod", "pll-0", "pll-1";
587 dmas = <&dma SUN4I_DMA_NORMAL 16>, 587 dmas = <&dma SUN4I_DMA_NORMAL 16>,
588 <&dma SUN4I_DMA_NORMAL 16>, 588 <&dma SUN4I_DMA_NORMAL 16>,
diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
index 98715538932f..a021ee6da396 100644
--- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
@@ -146,6 +146,7 @@
146 status = "okay"; 146 status = "okay";
147 147
148 axp81x: pmic@3a3 { 148 axp81x: pmic@3a3 {
149 compatible = "x-powers,axp813";
149 reg = <0x3a3>; 150 reg = <0x3a3>;
150 interrupt-parent = <&r_intc>; 151 interrupt-parent = <&r_intc>;
151 interrupts = <0 IRQ_TYPE_LEVEL_LOW>; 152 interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
diff --git a/arch/arm/boot/dts/tango4-common.dtsi b/arch/arm/boot/dts/tango4-common.dtsi
index 0ec1b0a317b4..ff72a8efb73d 100644
--- a/arch/arm/boot/dts/tango4-common.dtsi
+++ b/arch/arm/boot/dts/tango4-common.dtsi
@@ -156,7 +156,6 @@
156 reg = <0x6e000 0x400>; 156 reg = <0x6e000 0x400>;
157 ranges = <0 0x6e000 0x400>; 157 ranges = <0 0x6e000 0x400>;
158 interrupt-parent = <&gic>; 158 interrupt-parent = <&gic>;
159 interrupt-controller;
160 #address-cells = <1>; 159 #address-cells = <1>;
161 #size-cells = <1>; 160 #size-cells = <1>;
162 161
diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
index 02a6227c717c..4b8edc8982cf 100644
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
@@ -121,7 +121,7 @@
121 switch0port10: port@10 { 121 switch0port10: port@10 {
122 reg = <10>; 122 reg = <10>;
123 label = "dsa"; 123 label = "dsa";
124 phy-mode = "xgmii"; 124 phy-mode = "xaui";
125 link = <&switch1port10>; 125 link = <&switch1port10>;
126 }; 126 };
127 }; 127 };
@@ -208,7 +208,7 @@
208 switch1port10: port@10 { 208 switch1port10: port@10 {
209 reg = <10>; 209 reg = <10>;
210 label = "dsa"; 210 label = "dsa";
211 phy-mode = "xgmii"; 211 phy-mode = "xaui";
212 link = <&switch0port10>; 212 link = <&switch0port10>;
213 }; 213 };
214 }; 214 };
@@ -359,7 +359,7 @@
359}; 359};
360 360
361&i2c1 { 361&i2c1 {
362 at24mac602@0 { 362 at24mac602@50 {
363 compatible = "atmel,24c02"; 363 compatible = "atmel,24c02";
364 reg = <0x50>; 364 reg = <0x50>;
365 read-only; 365 read-only;
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index c8781450905b..3ab8b3781bfe 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,8 +161,7 @@
161#else 161#else
162#define VTTBR_X (5 - KVM_T0SZ) 162#define VTTBR_X (5 - KVM_T0SZ)
163#endif 163#endif
164#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) 164#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)
165#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
166#define VTTBR_VMID_SHIFT _AC(48, ULL) 165#define VTTBR_VMID_SHIFT _AC(48, ULL)
167#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) 166#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
168 167
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 242151ea6908..a9f7d3f47134 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -285,6 +285,11 @@ static inline void kvm_arm_init_debug(void) {}
285static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} 285static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
286static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} 286static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
287static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} 287static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
288static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu,
289 struct kvm_run *run)
290{
291 return false;
292}
288 293
289int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, 294int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
290 struct kvm_device_attr *attr); 295 struct kvm_device_attr *attr);
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 2a029bceaf2f..1a7a17b2a1ba 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -221,7 +221,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
221} 221}
222#define __HAVE_ARCH_PTE_SPECIAL 222#define __HAVE_ARCH_PTE_SPECIAL
223 223
224#define __HAVE_ARCH_PMD_WRITE
225#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) 224#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
226#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) 225#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
227#define pud_page(pud) pmd_page(__pmd(pud_val(pud))) 226#define pud_page(pud) pmd_page(__pmd(pud_val(pud)))
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 4d53de308ee0..4d1cc1847edf 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -7,6 +7,7 @@ generated-y += unistd-oabi.h
7generated-y += unistd-eabi.h 7generated-y += unistd-eabi.h
8 8
9generic-y += bitsperlong.h 9generic-y += bitsperlong.h
10generic-y += bpf_perf_event.h
10generic-y += errno.h 11generic-y += errno.h
11generic-y += ioctl.h 12generic-y += ioctl.h
12generic-y += ipcbuf.h 13generic-y += ipcbuf.h
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 7f4d80c2db6b..0f07579af472 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -300,7 +300,7 @@
300 mov r2, sp 300 mov r2, sp
301 ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr 301 ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr
302 ldr lr, [r2, #\offset + S_PC]! @ get pc 302 ldr lr, [r2, #\offset + S_PC]! @ get pc
303 tst r1, #0xcf 303 tst r1, #PSR_I_BIT | 0x0f
304 bne 1f 304 bne 1f
305 msr spsr_cxsf, r1 @ save in spsr_svc 305 msr spsr_cxsf, r1 @ save in spsr_svc
306#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) 306#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
@@ -332,7 +332,7 @@
332 ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr 332 ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr
333 ldr lr, [sp, #\offset + S_PC] @ get pc 333 ldr lr, [sp, #\offset + S_PC] @ get pc
334 add sp, sp, #\offset + S_SP 334 add sp, sp, #\offset + S_SP
335 tst r1, #0xcf 335 tst r1, #PSR_I_BIT | 0x0f
336 bne 1f 336 bne 1f
337 msr spsr_cxsf, r1 @ save in spsr_svc 337 msr spsr_cxsf, r1 @ save in spsr_svc
338 338
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 5cf04888c581..3e26c6f7a191 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -793,7 +793,6 @@ void abort(void)
793 /* if that doesn't kill us, halt */ 793 /* if that doesn't kill us, halt */
794 panic("Oops failed to kill thread"); 794 panic("Oops failed to kill thread");
795} 795}
796EXPORT_SYMBOL(abort);
797 796
798void __init trap_init(void) 797void __init trap_init(void)
799{ 798{
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 1712f132b80d..b83fdc06286a 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -85,7 +85,11 @@
85 .pushsection .text.fixup,"ax" 85 .pushsection .text.fixup,"ax"
86 .align 4 86 .align 4
879001: mov r4, #-EFAULT 879001: mov r4, #-EFAULT
88#ifdef CONFIG_CPU_SW_DOMAIN_PAN
89 ldr r5, [sp, #9*4] @ *err_ptr
90#else
88 ldr r5, [sp, #8*4] @ *err_ptr 91 ldr r5, [sp, #8*4] @ *err_ptr
92#endif
89 str r4, [r5] 93 str r4, [r5]
90 ldmia sp, {r1, r2} @ retrieve dst, len 94 ldmia sp, {r1, r2} @ retrieve dst, len
91 add r2, r2, r1 95 add r2, r2, r1
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index 8be04ec95adf..5ace9380626a 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -868,10 +868,10 @@ static const struct dma_slave_map dm365_edma_map[] = {
868 { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) }, 868 { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) },
869 { "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) }, 869 { "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) },
870 { "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) }, 870 { "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) },
871 { "dm6441-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) }, 871 { "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) },
872 { "dm6441-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) }, 872 { "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) },
873 { "dm6441-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) }, 873 { "da830-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) },
874 { "dm6441-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) }, 874 { "da830-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) },
875}; 875};
876 876
877static struct edma_soc_info dm365_edma_pdata = { 877static struct edma_soc_info dm365_edma_pdata = {
@@ -925,12 +925,14 @@ static struct resource edma_resources[] = {
925 /* not using TC*_ERR */ 925 /* not using TC*_ERR */
926}; 926};
927 927
928static struct platform_device dm365_edma_device = { 928static const struct platform_device_info dm365_edma_device __initconst = {
929 .name = "edma", 929 .name = "edma",
930 .id = 0, 930 .id = 0,
931 .dev.platform_data = &dm365_edma_pdata, 931 .dma_mask = DMA_BIT_MASK(32),
932 .num_resources = ARRAY_SIZE(edma_resources), 932 .res = edma_resources,
933 .resource = edma_resources, 933 .num_res = ARRAY_SIZE(edma_resources),
934 .data = &dm365_edma_pdata,
935 .size_data = sizeof(dm365_edma_pdata),
934}; 936};
935 937
936static struct resource dm365_asp_resources[] = { 938static struct resource dm365_asp_resources[] = {
@@ -1428,13 +1430,18 @@ int __init dm365_init_video(struct vpfe_config *vpfe_cfg,
1428 1430
1429static int __init dm365_init_devices(void) 1431static int __init dm365_init_devices(void)
1430{ 1432{
1433 struct platform_device *edma_pdev;
1431 int ret = 0; 1434 int ret = 0;
1432 1435
1433 if (!cpu_is_davinci_dm365()) 1436 if (!cpu_is_davinci_dm365())
1434 return 0; 1437 return 0;
1435 1438
1436 davinci_cfg_reg(DM365_INT_EDMA_CC); 1439 davinci_cfg_reg(DM365_INT_EDMA_CC);
1437 platform_device_register(&dm365_edma_device); 1440 edma_pdev = platform_device_register_full(&dm365_edma_device);
1441 if (IS_ERR(edma_pdev)) {
1442 pr_warn("%s: Failed to register eDMA\n", __func__);
1443 return PTR_ERR(edma_pdev);
1444 }
1438 1445
1439 platform_device_register(&dm365_mdio_device); 1446 platform_device_register(&dm365_mdio_device);
1440 platform_device_register(&dm365_emac_device); 1447 platform_device_register(&dm365_emac_device);
diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c
index 2555f9056a33..cad7ee8f0d6b 100644
--- a/arch/arm/mach-meson/platsmp.c
+++ b/arch/arm/mach-meson/platsmp.c
@@ -102,7 +102,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible,
102 102
103 scu_base = of_iomap(node, 0); 103 scu_base = of_iomap(node, 0);
104 if (!scu_base) { 104 if (!scu_base) {
105 pr_err("Couln't map SCU registers\n"); 105 pr_err("Couldn't map SCU registers\n");
106 return; 106 return;
107 } 107 }
108 108
diff --git a/arch/arm/mach-omap2/cm_common.c b/arch/arm/mach-omap2/cm_common.c
index d555791cf349..83c6fa74cc31 100644
--- a/arch/arm/mach-omap2/cm_common.c
+++ b/arch/arm/mach-omap2/cm_common.c
@@ -68,14 +68,17 @@ void __init omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2)
68int cm_split_idlest_reg(struct clk_omap_reg *idlest_reg, s16 *prcm_inst, 68int cm_split_idlest_reg(struct clk_omap_reg *idlest_reg, s16 *prcm_inst,
69 u8 *idlest_reg_id) 69 u8 *idlest_reg_id)
70{ 70{
71 int ret;
71 if (!cm_ll_data->split_idlest_reg) { 72 if (!cm_ll_data->split_idlest_reg) {
72 WARN_ONCE(1, "cm: %s: no low-level function defined\n", 73 WARN_ONCE(1, "cm: %s: no low-level function defined\n",
73 __func__); 74 __func__);
74 return -EINVAL; 75 return -EINVAL;
75 } 76 }
76 77
77 return cm_ll_data->split_idlest_reg(idlest_reg, prcm_inst, 78 ret = cm_ll_data->split_idlest_reg(idlest_reg, prcm_inst,
78 idlest_reg_id); 79 idlest_reg_id);
80 *prcm_inst -= cm_base.offset;
81 return ret;
79} 82}
80 83
81/** 84/**
@@ -337,6 +340,7 @@ int __init omap2_cm_base_init(void)
337 if (mem) { 340 if (mem) {
338 mem->pa = res.start + data->offset; 341 mem->pa = res.start + data->offset;
339 mem->va = data->mem + data->offset; 342 mem->va = data->mem + data->offset;
343 mem->offset = data->offset;
340 } 344 }
341 345
342 data->np = np; 346 data->np = np;
diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c
index 5ac122e88f67..fa7f308c9027 100644
--- a/arch/arm/mach-omap2/omap-secure.c
+++ b/arch/arm/mach-omap2/omap-secure.c
@@ -73,6 +73,27 @@ phys_addr_t omap_secure_ram_mempool_base(void)
73 return omap_secure_memblock_base; 73 return omap_secure_memblock_base;
74} 74}
75 75
76#if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM)
77u32 omap3_save_secure_ram(void __iomem *addr, int size)
78{
79 u32 ret;
80 u32 param[5];
81
82 if (size != OMAP3_SAVE_SECURE_RAM_SZ)
83 return OMAP3_SAVE_SECURE_RAM_SZ;
84
85 param[0] = 4; /* Number of arguments */
86 param[1] = __pa(addr); /* Physical address for saving */
87 param[2] = 0;
88 param[3] = 1;
89 param[4] = 1;
90
91 ret = save_secure_ram_context(__pa(param));
92
93 return ret;
94}
95#endif
96
76/** 97/**
77 * rx51_secure_dispatcher: Routine to dispatch secure PPA API calls 98 * rx51_secure_dispatcher: Routine to dispatch secure PPA API calls
78 * @idx: The PPA API index 99 * @idx: The PPA API index
diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h
index bae263fba640..c509cde71f93 100644
--- a/arch/arm/mach-omap2/omap-secure.h
+++ b/arch/arm/mach-omap2/omap-secure.h
@@ -31,6 +31,8 @@
31/* Maximum Secure memory storage size */ 31/* Maximum Secure memory storage size */
32#define OMAP_SECURE_RAM_STORAGE (88 * SZ_1K) 32#define OMAP_SECURE_RAM_STORAGE (88 * SZ_1K)
33 33
34#define OMAP3_SAVE_SECURE_RAM_SZ 0x803F
35
34/* Secure low power HAL API index */ 36/* Secure low power HAL API index */
35#define OMAP4_HAL_SAVESECURERAM_INDEX 0x1a 37#define OMAP4_HAL_SAVESECURERAM_INDEX 0x1a
36#define OMAP4_HAL_SAVEHW_INDEX 0x1b 38#define OMAP4_HAL_SAVEHW_INDEX 0x1b
@@ -65,6 +67,8 @@ extern u32 omap_smc2(u32 id, u32 falg, u32 pargs);
65extern u32 omap_smc3(u32 id, u32 process, u32 flag, u32 pargs); 67extern u32 omap_smc3(u32 id, u32 process, u32 flag, u32 pargs);
66extern phys_addr_t omap_secure_ram_mempool_base(void); 68extern phys_addr_t omap_secure_ram_mempool_base(void);
67extern int omap_secure_ram_reserve_memblock(void); 69extern int omap_secure_ram_reserve_memblock(void);
70extern u32 save_secure_ram_context(u32 args_pa);
71extern u32 omap3_save_secure_ram(void __iomem *save_regs, int size);
68 72
69extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs, 73extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs,
70 u32 arg1, u32 arg2, u32 arg3, u32 arg4); 74 u32 arg1, u32 arg2, u32 arg3, u32 arg4);
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index d45cbfdb4be6..f0388058b7da 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -391,10 +391,8 @@ omap_device_copy_resources(struct omap_hwmod *oh,
391 const char *name; 391 const char *name;
392 int error, irq = 0; 392 int error, irq = 0;
393 393
394 if (!oh || !oh->od || !oh->od->pdev) { 394 if (!oh || !oh->od || !oh->od->pdev)
395 error = -EINVAL; 395 return -EINVAL;
396 goto error;
397 }
398 396
399 np = oh->od->pdev->dev.of_node; 397 np = oh->od->pdev->dev.of_node;
400 if (!np) { 398 if (!np) {
@@ -516,8 +514,10 @@ struct platform_device __init *omap_device_build(const char *pdev_name,
516 goto odbs_exit1; 514 goto odbs_exit1;
517 515
518 od = omap_device_alloc(pdev, &oh, 1); 516 od = omap_device_alloc(pdev, &oh, 1);
519 if (IS_ERR(od)) 517 if (IS_ERR(od)) {
518 ret = PTR_ERR(od);
520 goto odbs_exit1; 519 goto odbs_exit1;
520 }
521 521
522 ret = platform_device_add_data(pdev, pdata, pdata_len); 522 ret = platform_device_add_data(pdev, pdata, pdata_len);
523 if (ret) 523 if (ret)
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index d2106ae4410a..52c9d585b44d 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -1646,6 +1646,7 @@ static struct omap_hwmod omap3xxx_mmc3_hwmod = {
1646 .main_clk = "mmchs3_fck", 1646 .main_clk = "mmchs3_fck",
1647 .prcm = { 1647 .prcm = {
1648 .omap2 = { 1648 .omap2 = {
1649 .module_offs = CORE_MOD,
1649 .prcm_reg_id = 1, 1650 .prcm_reg_id = 1,
1650 .module_bit = OMAP3430_EN_MMC3_SHIFT, 1651 .module_bit = OMAP3430_EN_MMC3_SHIFT,
1651 .idlest_reg_id = 1, 1652 .idlest_reg_id = 1,
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h
index b668719b9b25..8e30772cfe32 100644
--- a/arch/arm/mach-omap2/pm.h
+++ b/arch/arm/mach-omap2/pm.h
@@ -81,10 +81,6 @@ extern unsigned int omap3_do_wfi_sz;
81/* ... and its pointer from SRAM after copy */ 81/* ... and its pointer from SRAM after copy */
82extern void (*omap3_do_wfi_sram)(void); 82extern void (*omap3_do_wfi_sram)(void);
83 83
84/* save_secure_ram_context function pointer and size, for copy to SRAM */
85extern int save_secure_ram_context(u32 *addr);
86extern unsigned int save_secure_ram_context_sz;
87
88extern void omap3_save_scratchpad_contents(void); 84extern void omap3_save_scratchpad_contents(void);
89 85
90#define PM_RTA_ERRATUM_i608 (1 << 0) 86#define PM_RTA_ERRATUM_i608 (1 << 0)
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 841ba19d64a6..36c55547137c 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -48,6 +48,7 @@
48#include "prm3xxx.h" 48#include "prm3xxx.h"
49#include "pm.h" 49#include "pm.h"
50#include "sdrc.h" 50#include "sdrc.h"
51#include "omap-secure.h"
51#include "sram.h" 52#include "sram.h"
52#include "control.h" 53#include "control.h"
53#include "vc.h" 54#include "vc.h"
@@ -66,7 +67,6 @@ struct power_state {
66 67
67static LIST_HEAD(pwrst_list); 68static LIST_HEAD(pwrst_list);
68 69
69static int (*_omap_save_secure_sram)(u32 *addr);
70void (*omap3_do_wfi_sram)(void); 70void (*omap3_do_wfi_sram)(void);
71 71
72static struct powerdomain *mpu_pwrdm, *neon_pwrdm; 72static struct powerdomain *mpu_pwrdm, *neon_pwrdm;
@@ -121,8 +121,8 @@ static void omap3_save_secure_ram_context(void)
121 * will hang the system. 121 * will hang the system.
122 */ 122 */
123 pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON); 123 pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON);
124 ret = _omap_save_secure_sram((u32 *)(unsigned long) 124 ret = omap3_save_secure_ram(omap3_secure_ram_storage,
125 __pa(omap3_secure_ram_storage)); 125 OMAP3_SAVE_SECURE_RAM_SZ);
126 pwrdm_set_next_pwrst(mpu_pwrdm, mpu_next_state); 126 pwrdm_set_next_pwrst(mpu_pwrdm, mpu_next_state);
127 /* Following is for error tracking, it should not happen */ 127 /* Following is for error tracking, it should not happen */
128 if (ret) { 128 if (ret) {
@@ -434,15 +434,10 @@ static int __init pwrdms_setup(struct powerdomain *pwrdm, void *unused)
434 * 434 *
435 * The minimum set of functions is pushed to SRAM for execution: 435 * The minimum set of functions is pushed to SRAM for execution:
436 * - omap3_do_wfi for erratum i581 WA, 436 * - omap3_do_wfi for erratum i581 WA,
437 * - save_secure_ram_context for security extensions.
438 */ 437 */
439void omap_push_sram_idle(void) 438void omap_push_sram_idle(void)
440{ 439{
441 omap3_do_wfi_sram = omap_sram_push(omap3_do_wfi, omap3_do_wfi_sz); 440 omap3_do_wfi_sram = omap_sram_push(omap3_do_wfi, omap3_do_wfi_sz);
442
443 if (omap_type() != OMAP2_DEVICE_TYPE_GP)
444 _omap_save_secure_sram = omap_sram_push(save_secure_ram_context,
445 save_secure_ram_context_sz);
446} 441}
447 442
448static void __init pm_errata_configure(void) 443static void __init pm_errata_configure(void)
@@ -553,7 +548,7 @@ int __init omap3_pm_init(void)
553 clkdm_add_wkdep(neon_clkdm, mpu_clkdm); 548 clkdm_add_wkdep(neon_clkdm, mpu_clkdm);
554 if (omap_type() != OMAP2_DEVICE_TYPE_GP) { 549 if (omap_type() != OMAP2_DEVICE_TYPE_GP) {
555 omap3_secure_ram_storage = 550 omap3_secure_ram_storage =
556 kmalloc(0x803F, GFP_KERNEL); 551 kmalloc(OMAP3_SAVE_SECURE_RAM_SZ, GFP_KERNEL);
557 if (!omap3_secure_ram_storage) 552 if (!omap3_secure_ram_storage)
558 pr_err("Memory allocation failed when allocating for secure sram context\n"); 553 pr_err("Memory allocation failed when allocating for secure sram context\n");
559 554
diff --git a/arch/arm/mach-omap2/prcm-common.h b/arch/arm/mach-omap2/prcm-common.h
index 0592b23902c6..0977da0dab76 100644
--- a/arch/arm/mach-omap2/prcm-common.h
+++ b/arch/arm/mach-omap2/prcm-common.h
@@ -528,6 +528,7 @@ struct omap_prcm_irq_setup {
528struct omap_domain_base { 528struct omap_domain_base {
529 u32 pa; 529 u32 pa;
530 void __iomem *va; 530 void __iomem *va;
531 s16 offset;
531}; 532};
532 533
533/** 534/**
diff --git a/arch/arm/mach-omap2/prm33xx.c b/arch/arm/mach-omap2/prm33xx.c
index d2c5bcabdbeb..ebaf80d72a10 100644
--- a/arch/arm/mach-omap2/prm33xx.c
+++ b/arch/arm/mach-omap2/prm33xx.c
@@ -176,17 +176,6 @@ static int am33xx_pwrdm_read_pwrst(struct powerdomain *pwrdm)
176 return v; 176 return v;
177} 177}
178 178
179static int am33xx_pwrdm_read_prev_pwrst(struct powerdomain *pwrdm)
180{
181 u32 v;
182
183 v = am33xx_prm_read_reg(pwrdm->prcm_offs, pwrdm->pwrstst_offs);
184 v &= AM33XX_LASTPOWERSTATEENTERED_MASK;
185 v >>= AM33XX_LASTPOWERSTATEENTERED_SHIFT;
186
187 return v;
188}
189
190static int am33xx_pwrdm_set_lowpwrstchange(struct powerdomain *pwrdm) 179static int am33xx_pwrdm_set_lowpwrstchange(struct powerdomain *pwrdm)
191{ 180{
192 am33xx_prm_rmw_reg_bits(AM33XX_LOWPOWERSTATECHANGE_MASK, 181 am33xx_prm_rmw_reg_bits(AM33XX_LOWPOWERSTATECHANGE_MASK,
@@ -357,7 +346,6 @@ struct pwrdm_ops am33xx_pwrdm_operations = {
357 .pwrdm_set_next_pwrst = am33xx_pwrdm_set_next_pwrst, 346 .pwrdm_set_next_pwrst = am33xx_pwrdm_set_next_pwrst,
358 .pwrdm_read_next_pwrst = am33xx_pwrdm_read_next_pwrst, 347 .pwrdm_read_next_pwrst = am33xx_pwrdm_read_next_pwrst,
359 .pwrdm_read_pwrst = am33xx_pwrdm_read_pwrst, 348 .pwrdm_read_pwrst = am33xx_pwrdm_read_pwrst,
360 .pwrdm_read_prev_pwrst = am33xx_pwrdm_read_prev_pwrst,
361 .pwrdm_set_logic_retst = am33xx_pwrdm_set_logic_retst, 349 .pwrdm_set_logic_retst = am33xx_pwrdm_set_logic_retst,
362 .pwrdm_read_logic_pwrst = am33xx_pwrdm_read_logic_pwrst, 350 .pwrdm_read_logic_pwrst = am33xx_pwrdm_read_logic_pwrst,
363 .pwrdm_read_logic_retst = am33xx_pwrdm_read_logic_retst, 351 .pwrdm_read_logic_retst = am33xx_pwrdm_read_logic_retst,
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index fa5fd24f524c..22daf4efed68 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -93,20 +93,13 @@ ENTRY(enable_omap3630_toggle_l2_on_restore)
93ENDPROC(enable_omap3630_toggle_l2_on_restore) 93ENDPROC(enable_omap3630_toggle_l2_on_restore)
94 94
95/* 95/*
96 * Function to call rom code to save secure ram context. This gets 96 * Function to call rom code to save secure ram context.
97 * relocated to SRAM, so it can be all in .data section. Otherwise 97 *
98 * we need to initialize api_params separately. 98 * r0 = physical address of the parameters
99 */ 99 */
100 .data
101 .align 3
102ENTRY(save_secure_ram_context) 100ENTRY(save_secure_ram_context)
103 stmfd sp!, {r4 - r11, lr} @ save registers on stack 101 stmfd sp!, {r4 - r11, lr} @ save registers on stack
104 adr r3, api_params @ r3 points to parameters 102 mov r3, r0 @ physical address of parameters
105 str r0, [r3,#0x4] @ r0 has sdram address
106 ldr r12, high_mask
107 and r3, r3, r12
108 ldr r12, sram_phy_addr_mask
109 orr r3, r3, r12
110 mov r0, #25 @ set service ID for PPA 103 mov r0, #25 @ set service ID for PPA
111 mov r12, r0 @ copy secure service ID in r12 104 mov r12, r0 @ copy secure service ID in r12
112 mov r1, #0 @ set task id for ROM code in r1 105 mov r1, #0 @ set task id for ROM code in r1
@@ -120,18 +113,7 @@ ENTRY(save_secure_ram_context)
120 nop 113 nop
121 nop 114 nop
122 ldmfd sp!, {r4 - r11, pc} 115 ldmfd sp!, {r4 - r11, pc}
123 .align
124sram_phy_addr_mask:
125 .word SRAM_BASE_P
126high_mask:
127 .word 0xffff
128api_params:
129 .word 0x4, 0x0, 0x0, 0x1, 0x1
130ENDPROC(save_secure_ram_context) 116ENDPROC(save_secure_ram_context)
131ENTRY(save_secure_ram_context_sz)
132 .word . - save_secure_ram_context
133
134 .text
135 117
136/* 118/*
137 * ====================== 119 * ======================
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a93339f5178f..c9a7e9e1414f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065
557 557
558 If unsure, say Y. 558 If unsure, say Y.
559 559
560
561config SOCIONEXT_SYNQUACER_PREITS 560config SOCIONEXT_SYNQUACER_PREITS
562 bool "Socionext Synquacer: Workaround for GICv3 pre-ITS" 561 bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
563 default y 562 default y
@@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802
576 a 128kB offset to be applied to the target address in this commands. 575 a 128kB offset to be applied to the target address in this commands.
577 576
578 If unsure, say Y. 577 If unsure, say Y.
578
579config QCOM_FALKOR_ERRATUM_E1041
580 bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
581 default y
582 help
583 Falkor CPU may speculatively fetch instructions from an improper
584 memory location when MMU translation is changed from SCTLR_ELn[M]=1
585 to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem.
586
587 If unsure, say Y.
588
579endmenu 589endmenu
580 590
581 591
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b35788c909f1..b481b4a7c011 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -83,9 +83,6 @@ endif
83 83
84ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) 84ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
85KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds 85KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
86ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
87KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o
88endif
89endif 86endif
90 87
91# Default value 88# Default value
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index d7c22d51bc50..4aa50b9b26bc 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -12,6 +12,7 @@ subdir-y += cavium
12subdir-y += exynos 12subdir-y += exynos
13subdir-y += freescale 13subdir-y += freescale
14subdir-y += hisilicon 14subdir-y += hisilicon
15subdir-y += lg
15subdir-y += marvell 16subdir-y += marvell
16subdir-y += mediatek 17subdir-y += mediatek
17subdir-y += nvidia 18subdir-y += nvidia
@@ -22,5 +23,4 @@ subdir-y += rockchip
22subdir-y += socionext 23subdir-y += socionext
23subdir-y += sprd 24subdir-y += sprd
24subdir-y += xilinx 25subdir-y += xilinx
25subdir-y += lg
26subdir-y += zte 26subdir-y += zte
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
index 45bdbfb96126..4a8d3f83a36e 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
@@ -75,6 +75,7 @@
75 pinctrl-0 = <&rgmii_pins>; 75 pinctrl-0 = <&rgmii_pins>;
76 phy-mode = "rgmii"; 76 phy-mode = "rgmii";
77 phy-handle = <&ext_rgmii_phy>; 77 phy-handle = <&ext_rgmii_phy>;
78 phy-supply = <&reg_dc1sw>;
78 status = "okay"; 79 status = "okay";
79}; 80};
80 81
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
index 806442d3e846..604cdaedac38 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
@@ -77,6 +77,7 @@
77 pinctrl-0 = <&rmii_pins>; 77 pinctrl-0 = <&rmii_pins>;
78 phy-mode = "rmii"; 78 phy-mode = "rmii";
79 phy-handle = <&ext_rmii_phy1>; 79 phy-handle = <&ext_rmii_phy1>;
80 phy-supply = <&reg_dc1sw>;
80 status = "okay"; 81 status = "okay";
81 82
82}; 83};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
index 0eb2acedf8c3..abe179de35d7 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
@@ -82,6 +82,7 @@
82 pinctrl-0 = <&rgmii_pins>; 82 pinctrl-0 = <&rgmii_pins>;
83 phy-mode = "rgmii"; 83 phy-mode = "rgmii";
84 phy-handle = <&ext_rgmii_phy>; 84 phy-handle = <&ext_rgmii_phy>;
85 phy-supply = <&reg_dc1sw>;
85 status = "okay"; 86 status = "okay";
86}; 87};
87 88
@@ -95,7 +96,7 @@
95&mmc2 { 96&mmc2 {
96 pinctrl-names = "default"; 97 pinctrl-names = "default";
97 pinctrl-0 = <&mmc2_pins>; 98 pinctrl-0 = <&mmc2_pins>;
98 vmmc-supply = <&reg_vcc3v3>; 99 vmmc-supply = <&reg_dcdc1>;
99 vqmmc-supply = <&reg_vcc1v8>; 100 vqmmc-supply = <&reg_vcc1v8>;
100 bus-width = <8>; 101 bus-width = <8>;
101 non-removable; 102 non-removable;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi
index a5da18a6f286..43418bd881d8 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi
@@ -45,19 +45,10 @@
45 45
46#include "sun50i-a64.dtsi" 46#include "sun50i-a64.dtsi"
47 47
48/ {
49 reg_vcc3v3: vcc3v3 {
50 compatible = "regulator-fixed";
51 regulator-name = "vcc3v3";
52 regulator-min-microvolt = <3300000>;
53 regulator-max-microvolt = <3300000>;
54 };
55};
56
57&mmc0 { 48&mmc0 {
58 pinctrl-names = "default"; 49 pinctrl-names = "default";
59 pinctrl-0 = <&mmc0_pins>; 50 pinctrl-0 = <&mmc0_pins>;
60 vmmc-supply = <&reg_vcc3v3>; 51 vmmc-supply = <&reg_dcdc1>;
61 non-removable; 52 non-removable;
62 disable-wp; 53 disable-wp;
63 bus-width = <4>; 54 bus-width = <4>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts
index b6b7a561df8c..a42fd79a62a3 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts
@@ -71,7 +71,7 @@
71 pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; 71 pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
72 vmmc-supply = <&reg_vcc3v3>; 72 vmmc-supply = <&reg_vcc3v3>;
73 bus-width = <4>; 73 bus-width = <4>;
74 cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; 74 cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>;
75 status = "okay"; 75 status = "okay";
76}; 76};
77 77
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index ead895a4e9a5..1fb8b9d6cb4e 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -753,12 +753,12 @@
753 753
754&uart_B { 754&uart_B {
755 clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>; 755 clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>;
756 clock-names = "xtal", "core", "baud"; 756 clock-names = "xtal", "pclk", "baud";
757}; 757};
758 758
759&uart_C { 759&uart_C {
760 clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>; 760 clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>;
761 clock-names = "xtal", "core", "baud"; 761 clock-names = "xtal", "pclk", "baud";
762}; 762};
763 763
764&vpu { 764&vpu {
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 8ed981f59e5a..6524b89e7115 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -688,7 +688,7 @@
688 688
689&uart_A { 689&uart_A {
690 clocks = <&xtal>, <&clkc CLKID_UART0>, <&xtal>; 690 clocks = <&xtal>, <&clkc CLKID_UART0>, <&xtal>;
691 clock-names = "xtal", "core", "baud"; 691 clock-names = "xtal", "pclk", "baud";
692}; 692};
693 693
694&uart_AO { 694&uart_AO {
@@ -703,12 +703,12 @@
703 703
704&uart_B { 704&uart_B {
705 clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>; 705 clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>;
706 clock-names = "xtal", "core", "baud"; 706 clock-names = "xtal", "pclk", "baud";
707}; 707};
708 708
709&uart_C { 709&uart_C {
710 clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>; 710 clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>;
711 clock-names = "xtal", "core", "baud"; 711 clock-names = "xtal", "pclk", "baud";
712}; 712};
713 713
714&vpu { 714&vpu {
diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
index a298df74ca6c..dbe2648649db 100644
--- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
@@ -255,7 +255,6 @@
255&avb { 255&avb {
256 pinctrl-0 = <&avb_pins>; 256 pinctrl-0 = <&avb_pins>;
257 pinctrl-names = "default"; 257 pinctrl-names = "default";
258 renesas,no-ether-link;
259 phy-handle = <&phy0>; 258 phy-handle = <&phy0>;
260 status = "okay"; 259 status = "okay";
261 260
diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi
index 0d85b315ce71..73439cf48659 100644
--- a/arch/arm64/boot/dts/renesas/ulcb.dtsi
+++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi
@@ -145,7 +145,6 @@
145&avb { 145&avb {
146 pinctrl-0 = <&avb_pins>; 146 pinctrl-0 = <&avb_pins>;
147 pinctrl-names = "default"; 147 pinctrl-names = "default";
148 renesas,no-ether-link;
149 phy-handle = <&phy0>; 148 phy-handle = <&phy0>;
150 status = "okay"; 149 status = "okay";
151 150
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index d4f80786e7c2..3890468678ce 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -132,6 +132,8 @@
132 assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; 132 assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
133 assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>; 133 assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
134 clock_in_out = "input"; 134 clock_in_out = "input";
135 /* shows instability at 1GBit right now */
136 max-speed = <100>;
135 phy-supply = <&vcc_io>; 137 phy-supply = <&vcc_io>;
136 phy-mode = "rgmii"; 138 phy-mode = "rgmii";
137 pinctrl-names = "default"; 139 pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 41d61840fb99..2426da631938 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -514,7 +514,7 @@
514 tsadc: tsadc@ff250000 { 514 tsadc: tsadc@ff250000 {
515 compatible = "rockchip,rk3328-tsadc"; 515 compatible = "rockchip,rk3328-tsadc";
516 reg = <0x0 0xff250000 0x0 0x100>; 516 reg = <0x0 0xff250000 0x0 0x100>;
517 interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>; 517 interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
518 assigned-clocks = <&cru SCLK_TSADC>; 518 assigned-clocks = <&cru SCLK_TSADC>;
519 assigned-clock-rates = <50000>; 519 assigned-clock-rates = <50000>;
520 clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>; 520 clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
index 910628d18add..1fc5060d7027 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
@@ -155,17 +155,6 @@
155 regulator-min-microvolt = <5000000>; 155 regulator-min-microvolt = <5000000>;
156 regulator-max-microvolt = <5000000>; 156 regulator-max-microvolt = <5000000>;
157 }; 157 };
158
159 vdd_log: vdd-log {
160 compatible = "pwm-regulator";
161 pwms = <&pwm2 0 25000 0>;
162 regulator-name = "vdd_log";
163 regulator-min-microvolt = <800000>;
164 regulator-max-microvolt = <1400000>;
165 regulator-always-on;
166 regulator-boot-on;
167 status = "okay";
168 };
169}; 158};
170 159
171&cpu_b0 { 160&cpu_b0 {
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
index dd7193acc7df..6bdefb26b329 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
@@ -40,7 +40,6 @@
40}; 40};
41 41
42&ethsc { 42&ethsc {
43 interrupt-parent = <&gpio>;
44 interrupts = <0 8>; 43 interrupts = <0 8>;
45}; 44};
46 45
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
index d99e3731358c..254d6795c67e 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
@@ -40,7 +40,6 @@
40}; 40};
41 41
42&ethsc { 42&ethsc {
43 interrupt-parent = <&gpio>;
44 interrupts = <0 8>; 43 interrupts = <0 8>;
45}; 44};
46 45
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
index 864feeb35180..f9f06fcfb94a 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
@@ -38,8 +38,7 @@
38}; 38};
39 39
40&ethsc { 40&ethsc {
41 interrupt-parent = <&gpio>; 41 interrupts = <4 8>;
42 interrupts = <0 8>;
43}; 42};
44 43
45&serial0 { 44&serial0 {
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
index 48e733136db4..0ac2ace82435 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
@@ -198,8 +198,8 @@
198 gpio-controller; 198 gpio-controller;
199 #gpio-cells = <2>; 199 #gpio-cells = <2>;
200 gpio-ranges = <&pinctrl 0 0 0>, 200 gpio-ranges = <&pinctrl 0 0 0>,
201 <&pinctrl 96 0 0>, 201 <&pinctrl 104 0 0>,
202 <&pinctrl 160 0 0>; 202 <&pinctrl 168 0 0>;
203 gpio-ranges-group-names = "gpio_range0", 203 gpio-ranges-group-names = "gpio_range0",
204 "gpio_range1", 204 "gpio_range1",
205 "gpio_range2"; 205 "gpio_range2";
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index aef72d886677..8b168280976f 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -512,4 +512,14 @@ alternative_else_nop_endif
512#endif 512#endif
513 .endm 513 .endm
514 514
515/**
516 * Errata workaround prior to disable MMU. Insert an ISB immediately prior
517 * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
518 */
519 .macro pre_disable_mmu_workaround
520#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
521 isb
522#endif
523 .endm
524
515#endif /* __ASM_ASSEMBLER_H */ 525#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 76d1cc85d5b1..955130762a3c 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -38,7 +38,7 @@
38 * 38 *
39 * See Documentation/cachetlb.txt for more information. Please note that 39 * See Documentation/cachetlb.txt for more information. Please note that
40 * the implementation assumes non-aliasing VIPT D-cache and (aliasing) 40 * the implementation assumes non-aliasing VIPT D-cache and (aliasing)
41 * VIPT or ASID-tagged VIVT I-cache. 41 * VIPT I-cache.
42 * 42 *
43 * flush_cache_mm(mm) 43 * flush_cache_mm(mm)
44 * 44 *
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index ac67cfc2585a..060e3a4008ab 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -60,6 +60,9 @@ enum ftr_type {
60#define FTR_VISIBLE true /* Feature visible to the user space */ 60#define FTR_VISIBLE true /* Feature visible to the user space */
61#define FTR_HIDDEN false /* Feature is hidden from the user */ 61#define FTR_HIDDEN false /* Feature is hidden from the user */
62 62
63#define FTR_VISIBLE_IF_IS_ENABLED(config) \
64 (IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN)
65
63struct arm64_ftr_bits { 66struct arm64_ftr_bits {
64 bool sign; /* Value is signed ? */ 67 bool sign; /* Value is signed ? */
65 bool visible; 68 bool visible;
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 235e77d98261..cbf08d7cbf30 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -91,6 +91,7 @@
91#define BRCM_CPU_PART_VULCAN 0x516 91#define BRCM_CPU_PART_VULCAN 0x516
92 92
93#define QCOM_CPU_PART_FALKOR_V1 0x800 93#define QCOM_CPU_PART_FALKOR_V1 0x800
94#define QCOM_CPU_PART_FALKOR 0xC00
94 95
95#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) 96#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
96#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) 97#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
@@ -99,6 +100,7 @@
99#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) 100#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
100#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) 101#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
101#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) 102#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
103#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
102 104
103#ifndef __ASSEMBLY__ 105#ifndef __ASSEMBLY__
104 106
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 650344d01124..c4cd5081d78b 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -132,11 +132,9 @@ static inline void efi_set_pgd(struct mm_struct *mm)
132 * Defer the switch to the current thread's TTBR0_EL1 132 * Defer the switch to the current thread's TTBR0_EL1
133 * until uaccess_enable(). Restore the current 133 * until uaccess_enable(). Restore the current
134 * thread's saved ttbr0 corresponding to its active_mm 134 * thread's saved ttbr0 corresponding to its active_mm
135 * (if different from init_mm).
136 */ 135 */
137 cpu_set_reserved_ttbr0(); 136 cpu_set_reserved_ttbr0();
138 if (current->active_mm != &init_mm) 137 update_saved_ttbr0(current, current->active_mm);
139 update_saved_ttbr0(current, current->active_mm);
140 } 138 }
141 } 139 }
142} 140}
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 7f069ff37f06..715d395ef45b 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -170,8 +170,7 @@
170#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) 170#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
171#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) 171#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
172 172
173#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) 173#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
174#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
175#define VTTBR_VMID_SHIFT (UL(48)) 174#define VTTBR_VMID_SHIFT (UL(48))
176#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) 175#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
177 176
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 674912d7a571..ea6cb5b24258 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -370,6 +370,7 @@ void kvm_arm_init_debug(void);
370void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); 370void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
371void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); 371void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
372void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); 372void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
373bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run);
373int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, 374int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
374 struct kvm_device_attr *attr); 375 struct kvm_device_attr *attr);
375int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, 376int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 3257895a9b5e..9d155fa9a507 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -156,29 +156,21 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
156 156
157#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) 157#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; })
158 158
159/*
160 * This is called when "tsk" is about to enter lazy TLB mode.
161 *
162 * mm: describes the currently active mm context
163 * tsk: task which is entering lazy tlb
164 * cpu: cpu number which is entering lazy tlb
165 *
166 * tsk->mm will be NULL
167 */
168static inline void
169enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
170{
171}
172
173#ifdef CONFIG_ARM64_SW_TTBR0_PAN 159#ifdef CONFIG_ARM64_SW_TTBR0_PAN
174static inline void update_saved_ttbr0(struct task_struct *tsk, 160static inline void update_saved_ttbr0(struct task_struct *tsk,
175 struct mm_struct *mm) 161 struct mm_struct *mm)
176{ 162{
177 if (system_uses_ttbr0_pan()) { 163 u64 ttbr;
178 BUG_ON(mm->pgd == swapper_pg_dir); 164
179 task_thread_info(tsk)->ttbr0 = 165 if (!system_uses_ttbr0_pan())
180 virt_to_phys(mm->pgd) | ASID(mm) << 48; 166 return;
181 } 167
168 if (mm == &init_mm)
169 ttbr = __pa_symbol(empty_zero_page);
170 else
171 ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
172
173 task_thread_info(tsk)->ttbr0 = ttbr;
182} 174}
183#else 175#else
184static inline void update_saved_ttbr0(struct task_struct *tsk, 176static inline void update_saved_ttbr0(struct task_struct *tsk,
@@ -187,6 +179,16 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
187} 179}
188#endif 180#endif
189 181
182static inline void
183enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
184{
185 /*
186 * We don't actually care about the ttbr0 mapping, so point it at the
187 * zero page.
188 */
189 update_saved_ttbr0(tsk, &init_mm);
190}
191
190static inline void __switch_mm(struct mm_struct *next) 192static inline void __switch_mm(struct mm_struct *next)
191{ 193{
192 unsigned int cpu = smp_processor_id(); 194 unsigned int cpu = smp_processor_id();
@@ -214,11 +216,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
214 * Update the saved TTBR0_EL1 of the scheduled-in task as the previous 216 * Update the saved TTBR0_EL1 of the scheduled-in task as the previous
215 * value may have not been initialised yet (activate_mm caller) or the 217 * value may have not been initialised yet (activate_mm caller) or the
216 * ASID has changed since the last run (following the context switch 218 * ASID has changed since the last run (following the context switch
217 * of another thread of the same process). Avoid setting the reserved 219 * of another thread of the same process).
218 * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit).
219 */ 220 */
220 if (next != &init_mm) 221 update_saved_ttbr0(tsk, next);
221 update_saved_ttbr0(tsk, next);
222} 222}
223 223
224#define deactivate_mm(tsk,mm) do { } while (0) 224#define deactivate_mm(tsk,mm) do { } while (0)
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 19bd97671bb8..4f766178fa6f 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -32,7 +32,7 @@ struct mod_arch_specific {
32 struct mod_plt_sec init; 32 struct mod_plt_sec init;
33 33
34 /* for CONFIG_DYNAMIC_FTRACE */ 34 /* for CONFIG_DYNAMIC_FTRACE */
35 void *ftrace_trampoline; 35 struct plt_entry *ftrace_trampoline;
36}; 36};
37#endif 37#endif
38 38
@@ -45,4 +45,48 @@ extern u64 module_alloc_base;
45#define module_alloc_base ((u64)_etext - MODULES_VSIZE) 45#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
46#endif 46#endif
47 47
48struct plt_entry {
49 /*
50 * A program that conforms to the AArch64 Procedure Call Standard
51 * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
52 * IP1 (x17) may be inserted at any branch instruction that is
53 * exposed to a relocation that supports long branches. Since that
54 * is exactly what we are dealing with here, we are free to use x16
55 * as a scratch register in the PLT veneers.
56 */
57 __le32 mov0; /* movn x16, #0x.... */
58 __le32 mov1; /* movk x16, #0x...., lsl #16 */
59 __le32 mov2; /* movk x16, #0x...., lsl #32 */
60 __le32 br; /* br x16 */
61};
62
63static inline struct plt_entry get_plt_entry(u64 val)
64{
65 /*
66 * MOVK/MOVN/MOVZ opcode:
67 * +--------+------------+--------+-----------+-------------+---------+
68 * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
69 * +--------+------------+--------+-----------+-------------+---------+
70 *
71 * Rd := 0x10 (x16)
72 * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
73 * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
74 * sf := 1 (64-bit variant)
75 */
76 return (struct plt_entry){
77 cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
78 cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
79 cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
80 cpu_to_le32(0xd61f0200)
81 };
82}
83
84static inline bool plt_entries_equal(const struct plt_entry *a,
85 const struct plt_entry *b)
86{
87 return a->mov0 == b->mov0 &&
88 a->mov1 == b->mov1 &&
89 a->mov2 == b->mov2;
90}
91
48#endif /* __ASM_MODULE_H */ 92#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 8d5cbec17d80..f9ccc36d3dc3 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -18,6 +18,7 @@
18#define __ASM_PERF_EVENT_H 18#define __ASM_PERF_EVENT_H
19 19
20#include <asm/stack_pointer.h> 20#include <asm/stack_pointer.h>
21#include <asm/ptrace.h>
21 22
22#define ARMV8_PMU_MAX_COUNTERS 32 23#define ARMV8_PMU_MAX_COUNTERS 32
23#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) 24#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
@@ -79,6 +80,7 @@ struct pt_regs;
79extern unsigned long perf_instruction_pointer(struct pt_regs *regs); 80extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
80extern unsigned long perf_misc_flags(struct pt_regs *regs); 81extern unsigned long perf_misc_flags(struct pt_regs *regs);
81#define perf_misc_flags(regs) perf_misc_flags(regs) 82#define perf_misc_flags(regs) perf_misc_flags(regs)
83#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
82#endif 84#endif
83 85
84#define perf_arch_fetch_caller_regs(regs, __ip) { \ 86#define perf_arch_fetch_caller_regs(regs, __ip) { \
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c9530b5b5ca8..bdcc7f1c9d06 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -42,6 +42,8 @@
42#include <asm/cmpxchg.h> 42#include <asm/cmpxchg.h>
43#include <asm/fixmap.h> 43#include <asm/fixmap.h>
44#include <linux/mmdebug.h> 44#include <linux/mmdebug.h>
45#include <linux/mm_types.h>
46#include <linux/sched.h>
45 47
46extern void __pte_error(const char *file, int line, unsigned long val); 48extern void __pte_error(const char *file, int line, unsigned long val);
47extern void __pmd_error(const char *file, int line, unsigned long val); 49extern void __pmd_error(const char *file, int line, unsigned long val);
@@ -149,12 +151,20 @@ static inline pte_t pte_mkwrite(pte_t pte)
149 151
150static inline pte_t pte_mkclean(pte_t pte) 152static inline pte_t pte_mkclean(pte_t pte)
151{ 153{
152 return clear_pte_bit(pte, __pgprot(PTE_DIRTY)); 154 pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
155 pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
156
157 return pte;
153} 158}
154 159
155static inline pte_t pte_mkdirty(pte_t pte) 160static inline pte_t pte_mkdirty(pte_t pte)
156{ 161{
157 return set_pte_bit(pte, __pgprot(PTE_DIRTY)); 162 pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
163
164 if (pte_write(pte))
165 pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
166
167 return pte;
158} 168}
159 169
160static inline pte_t pte_mkold(pte_t pte) 170static inline pte_t pte_mkold(pte_t pte)
@@ -207,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
207 } 217 }
208} 218}
209 219
210struct mm_struct;
211struct vm_area_struct;
212
213extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); 220extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
214 221
215/* 222/*
@@ -238,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
238 * hardware updates of the pte (ptep_set_access_flags safely changes 245 * hardware updates of the pte (ptep_set_access_flags safely changes
239 * valid ptes without going through an invalid entry). 246 * valid ptes without going through an invalid entry).
240 */ 247 */
241 if (pte_valid(*ptep) && pte_valid(pte)) { 248 if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
249 (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
242 VM_WARN_ONCE(!pte_young(pte), 250 VM_WARN_ONCE(!pte_young(pte),
243 "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", 251 "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
244 __func__, pte_val(*ptep), pte_val(pte)); 252 __func__, pte_val(*ptep), pte_val(pte));
@@ -345,7 +353,6 @@ static inline int pmd_protnone(pmd_t pmd)
345 353
346#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) 354#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
347 355
348#define __HAVE_ARCH_PMD_WRITE
349#define pmd_write(pmd) pte_write(pmd_pte(pmd)) 356#define pmd_write(pmd) pte_write(pmd_pte(pmd))
350 357
351#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) 358#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
@@ -642,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
642#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 649#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
643 650
644/* 651/*
645 * ptep_set_wrprotect - mark read-only while preserving the hardware update of 652 * ptep_set_wrprotect - mark read-only while trasferring potential hardware
646 * the Access Flag. 653 * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
647 */ 654 */
648#define __HAVE_ARCH_PTEP_SET_WRPROTECT 655#define __HAVE_ARCH_PTEP_SET_WRPROTECT
649static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) 656static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
650{ 657{
651 pte_t old_pte, pte; 658 pte_t old_pte, pte;
652 659
653 /*
654 * ptep_set_wrprotect() is only called on CoW mappings which are
655 * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
656 * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
657 * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
658 * protection_map[]. There is no race with the hardware update of the
659 * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
660 * is set.
661 */
662 VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
663 "%s: potential race with hardware DBM", __func__);
664 pte = READ_ONCE(*ptep); 660 pte = READ_ONCE(*ptep);
665 do { 661 do {
666 old_pte = pte; 662 old_pte = pte;
663 /*
664 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
665 * clear), set the PTE_DIRTY bit.
666 */
667 if (pte_hw_dirty(pte))
668 pte = pte_mkdirty(pte);
667 pte = pte_wrprotect(pte); 669 pte = pte_wrprotect(pte);
668 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), 670 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
669 pte_val(old_pte), pte_val(pte)); 671 pte_val(old_pte), pte_val(pte));
diff --git a/arch/arm64/include/uapi/asm/bpf_perf_event.h b/arch/arm64/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..b551b741653d
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
3#define _UAPI__ASM_BPF_PERF_EVENT_H__
4
5#include <asm/ptrace.h>
6
7typedef struct user_pt_regs bpf_user_pt_regs_t;
8
9#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 8265dd790895..067baace74a0 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds
61ifeq ($(CONFIG_DEBUG_EFI),y) 61ifeq ($(CONFIG_DEBUG_EFI),y)
62AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" 62AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
63endif 63endif
64
65# will be included by each individual module but not by the core kernel itself
66extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 65f42d257414..2a752cb2a0f3 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart)
37 mrs x12, sctlr_el1 37 mrs x12, sctlr_el1
38 ldr x13, =SCTLR_ELx_FLAGS 38 ldr x13, =SCTLR_ELx_FLAGS
39 bic x12, x12, x13 39 bic x12, x12, x13
40 pre_disable_mmu_workaround
40 msr sctlr_el1, x12 41 msr sctlr_el1, x12
41 isb 42 isb
42 43
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index d16978213c5b..ea001241bdd4 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops;
31 31
32const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; 32const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
33 33
34static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { 34static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
35 &smp_spin_table_ops, 35 &smp_spin_table_ops,
36 &cpu_psci_ops, 36 &cpu_psci_ops,
37 NULL, 37 NULL,
38}; 38};
39 39
40static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { 40static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
41#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL 41#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
42 &acpi_parking_protocol_ops, 42 &acpi_parking_protocol_ops,
43#endif 43#endif
@@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
47 47
48static const struct cpu_operations * __init cpu_get_ops(const char *name) 48static const struct cpu_operations * __init cpu_get_ops(const char *name)
49{ 49{
50 const struct cpu_operations **ops; 50 const struct cpu_operations *const *ops;
51 51
52 ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; 52 ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
53 53
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c5ba0097887f..a73a5928f09b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
145}; 145};
146 146
147static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { 147static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
148 ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), 148 ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
149 FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
149 ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), 150 ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
150 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), 151 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
151 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), 152 S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 4e6ad355bd05..6b9736c3fb56 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -96,6 +96,7 @@ ENTRY(entry)
96 mrs x0, sctlr_el2 96 mrs x0, sctlr_el2
97 bic x0, x0, #1 << 0 // clear SCTLR.M 97 bic x0, x0, #1 << 0 // clear SCTLR.M
98 bic x0, x0, #1 << 2 // clear SCTLR.C 98 bic x0, x0, #1 << 2 // clear SCTLR.C
99 pre_disable_mmu_workaround
99 msr sctlr_el2, x0 100 msr sctlr_el2, x0
100 isb 101 isb
101 b 2f 102 b 2f
@@ -103,6 +104,7 @@ ENTRY(entry)
103 mrs x0, sctlr_el1 104 mrs x0, sctlr_el1
104 bic x0, x0, #1 << 0 // clear SCTLR.M 105 bic x0, x0, #1 << 0 // clear SCTLR.M
105 bic x0, x0, #1 << 2 // clear SCTLR.C 106 bic x0, x0, #1 << 2 // clear SCTLR.C
107 pre_disable_mmu_workaround
106 msr sctlr_el1, x0 108 msr sctlr_el1, x0
107 isb 109 isb
1082: 1102:
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 143b3e72c25e..fae81f7964b4 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -114,7 +114,12 @@
114 * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so 114 * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
115 * whatever is in the FPSIMD registers is not saved to memory, but discarded. 115 * whatever is in the FPSIMD registers is not saved to memory, but discarded.
116 */ 116 */
117static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); 117struct fpsimd_last_state_struct {
118 struct fpsimd_state *st;
119 bool sve_in_use;
120};
121
122static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
118 123
119/* Default VL for tasks that don't set it explicitly: */ 124/* Default VL for tasks that don't set it explicitly: */
120static int sve_default_vl = -1; 125static int sve_default_vl = -1;
@@ -905,7 +910,7 @@ void fpsimd_thread_switch(struct task_struct *next)
905 */ 910 */
906 struct fpsimd_state *st = &next->thread.fpsimd_state; 911 struct fpsimd_state *st = &next->thread.fpsimd_state;
907 912
908 if (__this_cpu_read(fpsimd_last_state) == st 913 if (__this_cpu_read(fpsimd_last_state.st) == st
909 && st->cpu == smp_processor_id()) 914 && st->cpu == smp_processor_id())
910 clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); 915 clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
911 else 916 else
@@ -992,6 +997,21 @@ void fpsimd_signal_preserve_current_state(void)
992} 997}
993 998
994/* 999/*
1000 * Associate current's FPSIMD context with this cpu
1001 * Preemption must be disabled when calling this function.
1002 */
1003static void fpsimd_bind_to_cpu(void)
1004{
1005 struct fpsimd_last_state_struct *last =
1006 this_cpu_ptr(&fpsimd_last_state);
1007 struct fpsimd_state *st = &current->thread.fpsimd_state;
1008
1009 last->st = st;
1010 last->sve_in_use = test_thread_flag(TIF_SVE);
1011 st->cpu = smp_processor_id();
1012}
1013
1014/*
995 * Load the userland FPSIMD state of 'current' from memory, but only if the 1015 * Load the userland FPSIMD state of 'current' from memory, but only if the
996 * FPSIMD state already held in the registers is /not/ the most recent FPSIMD 1016 * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
997 * state of 'current' 1017 * state of 'current'
@@ -1004,11 +1024,8 @@ void fpsimd_restore_current_state(void)
1004 local_bh_disable(); 1024 local_bh_disable();
1005 1025
1006 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { 1026 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
1007 struct fpsimd_state *st = &current->thread.fpsimd_state;
1008
1009 task_fpsimd_load(); 1027 task_fpsimd_load();
1010 __this_cpu_write(fpsimd_last_state, st); 1028 fpsimd_bind_to_cpu();
1011 st->cpu = smp_processor_id();
1012 } 1029 }
1013 1030
1014 local_bh_enable(); 1031 local_bh_enable();
@@ -1026,18 +1043,14 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
1026 1043
1027 local_bh_disable(); 1044 local_bh_disable();
1028 1045
1029 if (system_supports_sve() && test_thread_flag(TIF_SVE)) { 1046 current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd;
1030 current->thread.fpsimd_state = *state; 1047 if (system_supports_sve() && test_thread_flag(TIF_SVE))
1031 fpsimd_to_sve(current); 1048 fpsimd_to_sve(current);
1032 }
1033 task_fpsimd_load();
1034 1049
1035 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { 1050 task_fpsimd_load();
1036 struct fpsimd_state *st = &current->thread.fpsimd_state;
1037 1051
1038 __this_cpu_write(fpsimd_last_state, st); 1052 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
1039 st->cpu = smp_processor_id(); 1053 fpsimd_bind_to_cpu();
1040 }
1041 1054
1042 local_bh_enable(); 1055 local_bh_enable();
1043} 1056}
@@ -1052,7 +1065,7 @@ void fpsimd_flush_task_state(struct task_struct *t)
1052 1065
1053static inline void fpsimd_flush_cpu_state(void) 1066static inline void fpsimd_flush_cpu_state(void)
1054{ 1067{
1055 __this_cpu_write(fpsimd_last_state, NULL); 1068 __this_cpu_write(fpsimd_last_state.st, NULL);
1056} 1069}
1057 1070
1058/* 1071/*
@@ -1065,14 +1078,10 @@ static inline void fpsimd_flush_cpu_state(void)
1065#ifdef CONFIG_ARM64_SVE 1078#ifdef CONFIG_ARM64_SVE
1066void sve_flush_cpu_state(void) 1079void sve_flush_cpu_state(void)
1067{ 1080{
1068 struct fpsimd_state *const fpstate = __this_cpu_read(fpsimd_last_state); 1081 struct fpsimd_last_state_struct const *last =
1069 struct task_struct *tsk; 1082 this_cpu_ptr(&fpsimd_last_state);
1070
1071 if (!fpstate)
1072 return;
1073 1083
1074 tsk = container_of(fpstate, struct task_struct, thread.fpsimd_state); 1084 if (last->st && last->sve_in_use)
1075 if (test_tsk_thread_flag(tsk, TIF_SVE))
1076 fpsimd_flush_cpu_state(); 1085 fpsimd_flush_cpu_state();
1077} 1086}
1078#endif /* CONFIG_ARM64_SVE */ 1087#endif /* CONFIG_ARM64_SVE */
@@ -1267,7 +1276,7 @@ static inline void fpsimd_pm_init(void) { }
1267#ifdef CONFIG_HOTPLUG_CPU 1276#ifdef CONFIG_HOTPLUG_CPU
1268static int fpsimd_cpu_dead(unsigned int cpu) 1277static int fpsimd_cpu_dead(unsigned int cpu)
1269{ 1278{
1270 per_cpu(fpsimd_last_state, cpu) = NULL; 1279 per_cpu(fpsimd_last_state.st, cpu) = NULL;
1271 return 0; 1280 return 0;
1272} 1281}
1273 1282
diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S
deleted file mode 100644
index 00c4025be4ff..000000000000
--- a/arch/arm64/kernel/ftrace-mod.S
+++ /dev/null
@@ -1,18 +0,0 @@
1/*
2 * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11
12 .section ".text.ftrace_trampoline", "ax"
13 .align 3
140: .quad 0
15__ftrace_trampoline:
16 ldr x16, 0b
17 br x16
18ENDPROC(__ftrace_trampoline)
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index c13b1fca0e5b..50986e388d2b 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
76 76
77 if (offset < -SZ_128M || offset >= SZ_128M) { 77 if (offset < -SZ_128M || offset >= SZ_128M) {
78#ifdef CONFIG_ARM64_MODULE_PLTS 78#ifdef CONFIG_ARM64_MODULE_PLTS
79 unsigned long *trampoline; 79 struct plt_entry trampoline;
80 struct module *mod; 80 struct module *mod;
81 81
82 /* 82 /*
@@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
104 * is added in the future, but for now, the pr_err() below 104 * is added in the future, but for now, the pr_err() below
105 * deals with a theoretical issue only. 105 * deals with a theoretical issue only.
106 */ 106 */
107 trampoline = (unsigned long *)mod->arch.ftrace_trampoline; 107 trampoline = get_plt_entry(addr);
108 if (trampoline[0] != addr) { 108 if (!plt_entries_equal(mod->arch.ftrace_trampoline,
109 if (trampoline[0] != 0) { 109 &trampoline)) {
110 if (!plt_entries_equal(mod->arch.ftrace_trampoline,
111 &(struct plt_entry){})) {
110 pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); 112 pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
111 return -EINVAL; 113 return -EINVAL;
112 } 114 }
113 115
114 /* point the trampoline to our ftrace entry point */ 116 /* point the trampoline to our ftrace entry point */
115 module_disable_ro(mod); 117 module_disable_ro(mod);
116 trampoline[0] = addr; 118 *mod->arch.ftrace_trampoline = trampoline;
117 module_enable_ro(mod, true); 119 module_enable_ro(mod, true);
118 120
119 /* update trampoline before patching in the branch */ 121 /* update trampoline before patching in the branch */
120 smp_wmb(); 122 smp_wmb();
121 } 123 }
122 addr = (unsigned long)&trampoline[1]; 124 addr = (unsigned long)(void *)mod->arch.ftrace_trampoline;
123#else /* CONFIG_ARM64_MODULE_PLTS */ 125#else /* CONFIG_ARM64_MODULE_PLTS */
124 return -EINVAL; 126 return -EINVAL;
125#endif /* CONFIG_ARM64_MODULE_PLTS */ 127#endif /* CONFIG_ARM64_MODULE_PLTS */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 67e86a0f57ac..e3cb9fbf96b6 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -750,6 +750,7 @@ __primary_switch:
750 * to take into account by discarding the current kernel mapping and 750 * to take into account by discarding the current kernel mapping and
751 * creating a new one. 751 * creating a new one.
752 */ 752 */
753 pre_disable_mmu_workaround
753 msr sctlr_el1, x20 // disable the MMU 754 msr sctlr_el1, x20 // disable the MMU
754 isb 755 isb
755 bl __create_page_tables // recreate kernel mapping 756 bl __create_page_tables // recreate kernel mapping
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 749f81779420..74bb56f656ef 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -28,6 +28,7 @@
28#include <linux/perf_event.h> 28#include <linux/perf_event.h>
29#include <linux/ptrace.h> 29#include <linux/ptrace.h>
30#include <linux/smp.h> 30#include <linux/smp.h>
31#include <linux/uaccess.h>
31 32
32#include <asm/compat.h> 33#include <asm/compat.h>
33#include <asm/current.h> 34#include <asm/current.h>
@@ -36,7 +37,6 @@
36#include <asm/traps.h> 37#include <asm/traps.h>
37#include <asm/cputype.h> 38#include <asm/cputype.h>
38#include <asm/system_misc.h> 39#include <asm/system_misc.h>
39#include <asm/uaccess.h>
40 40
41/* Breakpoint currently in use for each BRP. */ 41/* Breakpoint currently in use for each BRP. */
42static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); 42static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index d05dbe658409..ea640f92fe5a 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -11,21 +11,6 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/sort.h> 12#include <linux/sort.h>
13 13
14struct plt_entry {
15 /*
16 * A program that conforms to the AArch64 Procedure Call Standard
17 * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
18 * IP1 (x17) may be inserted at any branch instruction that is
19 * exposed to a relocation that supports long branches. Since that
20 * is exactly what we are dealing with here, we are free to use x16
21 * as a scratch register in the PLT veneers.
22 */
23 __le32 mov0; /* movn x16, #0x.... */
24 __le32 mov1; /* movk x16, #0x...., lsl #16 */
25 __le32 mov2; /* movk x16, #0x...., lsl #32 */
26 __le32 br; /* br x16 */
27};
28
29static bool in_init(const struct module *mod, void *loc) 14static bool in_init(const struct module *mod, void *loc)
30{ 15{
31 return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; 16 return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
@@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
40 int i = pltsec->plt_num_entries; 25 int i = pltsec->plt_num_entries;
41 u64 val = sym->st_value + rela->r_addend; 26 u64 val = sym->st_value + rela->r_addend;
42 27
43 /* 28 plt[i] = get_plt_entry(val);
44 * MOVK/MOVN/MOVZ opcode:
45 * +--------+------------+--------+-----------+-------------+---------+
46 * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
47 * +--------+------------+--------+-----------+-------------+---------+
48 *
49 * Rd := 0x10 (x16)
50 * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
51 * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
52 * sf := 1 (64-bit variant)
53 */
54 plt[i] = (struct plt_entry){
55 cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
56 cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
57 cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
58 cpu_to_le32(0xd61f0200)
59 };
60 29
61 /* 30 /*
62 * Check if the entry we just created is a duplicate. Given that the 31 * Check if the entry we just created is a duplicate. Given that the
63 * relocations are sorted, this will be the last entry we allocated. 32 * relocations are sorted, this will be the last entry we allocated.
64 * (if one exists). 33 * (if one exists).
65 */ 34 */
66 if (i > 0 && 35 if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
67 plt[i].mov0 == plt[i - 1].mov0 &&
68 plt[i].mov1 == plt[i - 1].mov1 &&
69 plt[i].mov2 == plt[i - 1].mov2)
70 return (u64)&plt[i - 1]; 36 return (u64)&plt[i - 1];
71 37
72 pltsec->plt_num_entries++; 38 pltsec->plt_num_entries++;
@@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
154 unsigned long core_plts = 0; 120 unsigned long core_plts = 0;
155 unsigned long init_plts = 0; 121 unsigned long init_plts = 0;
156 Elf64_Sym *syms = NULL; 122 Elf64_Sym *syms = NULL;
123 Elf_Shdr *tramp = NULL;
157 int i; 124 int i;
158 125
159 /* 126 /*
@@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
165 mod->arch.core.plt = sechdrs + i; 132 mod->arch.core.plt = sechdrs + i;
166 else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) 133 else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
167 mod->arch.init.plt = sechdrs + i; 134 mod->arch.init.plt = sechdrs + i;
135 else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
136 !strcmp(secstrings + sechdrs[i].sh_name,
137 ".text.ftrace_trampoline"))
138 tramp = sechdrs + i;
168 else if (sechdrs[i].sh_type == SHT_SYMTAB) 139 else if (sechdrs[i].sh_type == SHT_SYMTAB)
169 syms = (Elf64_Sym *)sechdrs[i].sh_addr; 140 syms = (Elf64_Sym *)sechdrs[i].sh_addr;
170 } 141 }
@@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
215 mod->arch.init.plt_num_entries = 0; 186 mod->arch.init.plt_num_entries = 0;
216 mod->arch.init.plt_max_entries = init_plts; 187 mod->arch.init.plt_max_entries = init_plts;
217 188
189 if (tramp) {
190 tramp->sh_type = SHT_NOBITS;
191 tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
192 tramp->sh_addralign = __alignof__(struct plt_entry);
193 tramp->sh_size = sizeof(struct plt_entry);
194 }
195
218 return 0; 196 return 0;
219} 197}
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
index f7c9781a9d48..22e36a21c113 100644
--- a/arch/arm64/kernel/module.lds
+++ b/arch/arm64/kernel/module.lds
@@ -1,4 +1,5 @@
1SECTIONS { 1SECTIONS {
2 .plt (NOLOAD) : { BYTE(0) } 2 .plt (NOLOAD) : { BYTE(0) }
3 .init.plt (NOLOAD) : { BYTE(0) } 3 .init.plt (NOLOAD) : { BYTE(0) }
4 .text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
4} 5}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 9eaef51f83ff..3affca3dd96a 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
262 262
263 [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, 263 [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
264 [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, 264 [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
265
266 [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
267 [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
268
269 [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
270 [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
271}; 265};
272 266
273static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 267static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b2adcce7bc18..6b7dcf4310ac 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -314,6 +314,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
314 clear_tsk_thread_flag(p, TIF_SVE); 314 clear_tsk_thread_flag(p, TIF_SVE);
315 p->thread.sve_state = NULL; 315 p->thread.sve_state = NULL;
316 316
317 /*
318 * In case p was allocated the same task_struct pointer as some
319 * other recently-exited task, make sure p is disassociated from
320 * any cpu that may have run that now-exited task recently.
321 * Otherwise we could erroneously skip reloading the FPSIMD
322 * registers for p.
323 */
324 fpsimd_flush_task_state(p);
325
317 if (likely(!(p->flags & PF_KTHREAD))) { 326 if (likely(!(p->flags & PF_KTHREAD))) {
318 *childregs = *current_pt_regs(); 327 *childregs = *current_pt_regs();
319 childregs->regs[0] = 0; 328 childregs->regs[0] = 0;
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index ce704a4aeadd..f407e422a720 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel)
45 mrs x0, sctlr_el2 45 mrs x0, sctlr_el2
46 ldr x1, =SCTLR_ELx_FLAGS 46 ldr x1, =SCTLR_ELx_FLAGS
47 bic x0, x0, x1 47 bic x0, x0, x1
48 pre_disable_mmu_workaround
48 msr sctlr_el2, x0 49 msr sctlr_el2, x0
49 isb 50 isb
501: 511:
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index dbadfaf850a7..fa63b28c65e0 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -221,3 +221,24 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
221 } 221 }
222 } 222 }
223} 223}
224
225
226/*
227 * After successfully emulating an instruction, we might want to
228 * return to user space with a KVM_EXIT_DEBUG. We can only do this
229 * once the emulation is complete, though, so for userspace emulations
230 * we have to wait until we have re-entered KVM before calling this
231 * helper.
232 *
233 * Return true (and set exit_reason) to return to userspace or false
234 * if no further action is required.
235 */
236bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
237{
238 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
239 run->exit_reason = KVM_EXIT_DEBUG;
240 run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT;
241 return true;
242 }
243 return false;
244}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index b71247995469..304203fa9e33 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -28,6 +28,7 @@
28#include <asm/kvm_emulate.h> 28#include <asm/kvm_emulate.h>
29#include <asm/kvm_mmu.h> 29#include <asm/kvm_mmu.h>
30#include <asm/kvm_psci.h> 30#include <asm/kvm_psci.h>
31#include <asm/debug-monitors.h>
31 32
32#define CREATE_TRACE_POINTS 33#define CREATE_TRACE_POINTS
33#include "trace.h" 34#include "trace.h"
@@ -187,14 +188,46 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
187} 188}
188 189
189/* 190/*
191 * We may be single-stepping an emulated instruction. If the emulation
192 * has been completed in the kernel, we can return to userspace with a
193 * KVM_EXIT_DEBUG, otherwise userspace needs to complete its
194 * emulation first.
195 */
196static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
197{
198 int handled;
199
200 /*
201 * See ARM ARM B1.14.1: "Hyp traps on instructions
202 * that fail their condition code check"
203 */
204 if (!kvm_condition_valid(vcpu)) {
205 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
206 handled = 1;
207 } else {
208 exit_handle_fn exit_handler;
209
210 exit_handler = kvm_get_exit_handler(vcpu);
211 handled = exit_handler(vcpu, run);
212 }
213
214 /*
215 * kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run
216 * structure if we need to return to userspace.
217 */
218 if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run))
219 handled = 0;
220
221 return handled;
222}
223
224/*
190 * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on 225 * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
191 * proper exit to userspace. 226 * proper exit to userspace.
192 */ 227 */
193int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, 228int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
194 int exception_index) 229 int exception_index)
195{ 230{
196 exit_handle_fn exit_handler;
197
198 if (ARM_SERROR_PENDING(exception_index)) { 231 if (ARM_SERROR_PENDING(exception_index)) {
199 u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); 232 u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
200 233
@@ -220,20 +253,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
220 return 1; 253 return 1;
221 case ARM_EXCEPTION_EL1_SERROR: 254 case ARM_EXCEPTION_EL1_SERROR:
222 kvm_inject_vabt(vcpu); 255 kvm_inject_vabt(vcpu);
223 return 1; 256 /* We may still need to return for single-step */
224 case ARM_EXCEPTION_TRAP: 257 if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
225 /* 258 && kvm_arm_handle_step_debug(vcpu, run))
226 * See ARM ARM B1.14.1: "Hyp traps on instructions 259 return 0;
227 * that fail their condition code check" 260 else
228 */
229 if (!kvm_condition_valid(vcpu)) {
230 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
231 return 1; 261 return 1;
232 } 262 case ARM_EXCEPTION_TRAP:
233 263 return handle_trap_exceptions(vcpu, run);
234 exit_handler = kvm_get_exit_handler(vcpu);
235
236 return exit_handler(vcpu, run);
237 case ARM_EXCEPTION_HYP_GONE: 264 case ARM_EXCEPTION_HYP_GONE:
238 /* 265 /*
239 * EL2 has been reset to the hyp-stub. This happens when a guest 266 * EL2 has been reset to the hyp-stub. This happens when a guest
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 3f9615582377..870828c364c5 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -151,6 +151,7 @@ reset:
151 mrs x5, sctlr_el2 151 mrs x5, sctlr_el2
152 ldr x6, =SCTLR_ELx_FLAGS 152 ldr x6, =SCTLR_ELx_FLAGS
153 bic x5, x5, x6 // Clear SCTL_M and etc 153 bic x5, x5, x6 // Clear SCTL_M and etc
154 pre_disable_mmu_workaround
154 msr sctlr_el2, x5 155 msr sctlr_el2, x5
155 isb 156 isb
156 157
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 321c9c05dd9e..f4363d40e2cd 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
74{ 74{
75 u64 reg; 75 u64 reg;
76 76
77 /* Clear pmscr in case of early return */
78 *pmscr_el1 = 0;
79
77 /* SPE present on this CPU? */ 80 /* SPE present on this CPU? */
78 if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), 81 if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
79 ID_AA64DFR0_PMSVER_SHIFT)) 82 ID_AA64DFR0_PMSVER_SHIFT))
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 525c01f48867..f7c651f3a8c0 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -22,6 +22,7 @@
22#include <asm/kvm_emulate.h> 22#include <asm/kvm_emulate.h>
23#include <asm/kvm_hyp.h> 23#include <asm/kvm_hyp.h>
24#include <asm/fpsimd.h> 24#include <asm/fpsimd.h>
25#include <asm/debug-monitors.h>
25 26
26static bool __hyp_text __fpsimd_enabled_nvhe(void) 27static bool __hyp_text __fpsimd_enabled_nvhe(void)
27{ 28{
@@ -269,7 +270,11 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
269 return true; 270 return true;
270} 271}
271 272
272static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu) 273/* Skip an instruction which has been emulated. Returns true if
274 * execution can continue or false if we need to exit hyp mode because
275 * single-step was in effect.
276 */
277static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
273{ 278{
274 *vcpu_pc(vcpu) = read_sysreg_el2(elr); 279 *vcpu_pc(vcpu) = read_sysreg_el2(elr);
275 280
@@ -282,6 +287,14 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
282 } 287 }
283 288
284 write_sysreg_el2(*vcpu_pc(vcpu), elr); 289 write_sysreg_el2(*vcpu_pc(vcpu), elr);
290
291 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
292 vcpu->arch.fault.esr_el2 =
293 (ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22;
294 return false;
295 } else {
296 return true;
297 }
285} 298}
286 299
287int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) 300int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
@@ -342,13 +355,21 @@ again:
342 int ret = __vgic_v2_perform_cpuif_access(vcpu); 355 int ret = __vgic_v2_perform_cpuif_access(vcpu);
343 356
344 if (ret == 1) { 357 if (ret == 1) {
345 __skip_instr(vcpu); 358 if (__skip_instr(vcpu))
346 goto again; 359 goto again;
360 else
361 exit_code = ARM_EXCEPTION_TRAP;
347 } 362 }
348 363
349 if (ret == -1) { 364 if (ret == -1) {
350 /* Promote an illegal access to an SError */ 365 /* Promote an illegal access to an
351 __skip_instr(vcpu); 366 * SError. If we would be returning
367 * due to single-step clear the SS
368 * bit so handle_exit knows what to
369 * do after dealing with the error.
370 */
371 if (!__skip_instr(vcpu))
372 *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
352 exit_code = ARM_EXCEPTION_EL1_SERROR; 373 exit_code = ARM_EXCEPTION_EL1_SERROR;
353 } 374 }
354 375
@@ -363,8 +384,10 @@ again:
363 int ret = __vgic_v3_perform_cpuif_access(vcpu); 384 int ret = __vgic_v3_perform_cpuif_access(vcpu);
364 385
365 if (ret == 1) { 386 if (ret == 1) {
366 __skip_instr(vcpu); 387 if (__skip_instr(vcpu))
367 goto again; 388 goto again;
389 else
390 exit_code = ARM_EXCEPTION_TRAP;
368 } 391 }
369 392
370 /* 0 falls through to be handled out of EL2 */ 393 /* 0 falls through to be handled out of EL2 */
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index ab9f5f0fb2c7..6f4017046323 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu)
96 96
97 set_reserved_asid_bits(); 97 set_reserved_asid_bits();
98 98
99 /*
100 * Ensure the generation bump is observed before we xchg the
101 * active_asids.
102 */
103 smp_wmb();
104
105 for_each_possible_cpu(i) { 99 for_each_possible_cpu(i) {
106 asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); 100 asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
107 /* 101 /*
@@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu)
117 per_cpu(reserved_asids, i) = asid; 111 per_cpu(reserved_asids, i) = asid;
118 } 112 }
119 113
120 /* Queue a TLB invalidate and flush the I-cache if necessary. */ 114 /*
115 * Queue a TLB invalidation for each CPU to perform on next
116 * context-switch
117 */
121 cpumask_setall(&tlb_flush_pending); 118 cpumask_setall(&tlb_flush_pending);
122} 119}
123 120
@@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
202 asid = atomic64_read(&mm->context.id); 199 asid = atomic64_read(&mm->context.id);
203 200
204 /* 201 /*
205 * The memory ordering here is subtle. We rely on the control 202 * The memory ordering here is subtle.
206 * dependency between the generation read and the update of 203 * If our ASID matches the current generation, then we update
207 * active_asids to ensure that we are synchronised with a 204 * our active_asids entry with a relaxed xchg. Racing with a
208 * parallel rollover (i.e. this pairs with the smp_wmb() in 205 * concurrent rollover means that either:
209 * flush_context). 206 *
207 * - We get a zero back from the xchg and end up waiting on the
208 * lock. Taking the lock synchronises with the rollover and so
209 * we are forced to see the updated generation.
210 *
211 * - We get a valid ASID back from the xchg, which means the
212 * relaxed xchg in flush_context will treat us as reserved
213 * because atomic RmWs are totally ordered for a given location.
210 */ 214 */
211 if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) 215 if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
212 && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) 216 && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index ca74a2aace42..7b60d62ac593 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -389,7 +389,7 @@ void ptdump_check_wx(void)
389 .check_wx = true, 389 .check_wx = true,
390 }; 390 };
391 391
392 walk_pgd(&st, &init_mm, 0); 392 walk_pgd(&st, &init_mm, VA_START);
393 note_page(&st, 0, 0, 0); 393 note_page(&st, 0, 0, 0);
394 if (st.wx_pages || st.uxn_pages) 394 if (st.wx_pages || st.uxn_pages)
395 pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", 395 pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 22168cd0dde7..9b7f89df49db 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
574{ 574{
575 struct siginfo info; 575 struct siginfo info;
576 const struct fault_info *inf; 576 const struct fault_info *inf;
577 int ret = 0;
578 577
579 inf = esr_to_fault_info(esr); 578 inf = esr_to_fault_info(esr);
580 pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", 579 pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
@@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
589 if (interrupts_enabled(regs)) 588 if (interrupts_enabled(regs))
590 nmi_enter(); 589 nmi_enter();
591 590
592 ret = ghes_notify_sea(); 591 ghes_notify_sea();
593 592
594 if (interrupts_enabled(regs)) 593 if (interrupts_enabled(regs))
595 nmi_exit(); 594 nmi_exit();
@@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
604 info.si_addr = (void __user *)addr; 603 info.si_addr = (void __user *)addr;
605 arm64_notify_die("", regs, &info, esr); 604 arm64_notify_die("", regs, &info, esr);
606 605
607 return ret; 606 return 0;
608} 607}
609 608
610static const struct fault_info fault_info[] = { 609static const struct fault_info fault_info[] = {
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 5960bef0170d..00e7b900ca41 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -476,6 +476,8 @@ void __init arm64_memblock_init(void)
476 476
477 reserve_elfcorehdr(); 477 reserve_elfcorehdr();
478 478
479 high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
480
479 dma_contiguous_reserve(arm64_dma_phys_limit); 481 dma_contiguous_reserve(arm64_dma_phys_limit);
480 482
481 memblock_allow_resize(); 483 memblock_allow_resize();
@@ -502,7 +504,6 @@ void __init bootmem_init(void)
502 sparse_init(); 504 sparse_init();
503 zone_sizes_init(min, max); 505 zone_sizes_init(min, max);
504 506
505 high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
506 memblock_dump_all(); 507 memblock_dump_all();
507} 508}
508 509
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 371c5f03a170..051e71ec3335 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -26,7 +26,7 @@
26#include <asm/page.h> 26#include <asm/page.h>
27#include <asm/tlbflush.h> 27#include <asm/tlbflush.h>
28 28
29static struct kmem_cache *pgd_cache; 29static struct kmem_cache *pgd_cache __ro_after_init;
30 30
31pgd_t *pgd_alloc(struct mm_struct *mm) 31pgd_t *pgd_alloc(struct mm_struct *mm)
32{ 32{
diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
index aa624b4ab655..2240b38c2915 100644
--- a/arch/blackfin/include/uapi/asm/Kbuild
+++ b/arch/blackfin/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bitsperlong.h 5generic-y += bitsperlong.h
6generic-y += bpf_perf_event.h
6generic-y += errno.h 7generic-y += errno.h
7generic-y += ioctl.h 8generic-y += ioctl.h
8generic-y += ipcbuf.h 9generic-y += ipcbuf.h
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 67ee896a76a7..26644e15d854 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bitsperlong.h 5generic-y += bitsperlong.h
6generic-y += bpf_perf_event.h
6generic-y += errno.h 7generic-y += errno.h
7generic-y += fcntl.h 8generic-y += fcntl.h
8generic-y += ioctl.h 9generic-y += ioctl.h
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index 3687b54bb18e..3470c6e9c7b9 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bitsperlong.h 5generic-y += bitsperlong.h
6generic-y += bpf_perf_event.h
6generic-y += errno.h 7generic-y += errno.h
7generic-y += fcntl.h 8generic-y += fcntl.h
8generic-y += ioctl.h 9generic-y += ioctl.h
diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild
index b15bf6bc0e94..14a2e9af97e9 100644
--- a/arch/frv/include/uapi/asm/Kbuild
+++ b/arch/frv/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3
4generic-y += bpf_perf_event.h
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index 187aed820e71..2f65f78792cb 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bpf_perf_event.h
5generic-y += errno.h 6generic-y += errno.h
6generic-y += fcntl.h 7generic-y += fcntl.h
7generic-y += ioctl.h 8generic-y += ioctl.h
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index cb5df3aad3a8..41a176dbb53e 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bpf_perf_event.h
5generic-y += errno.h 6generic-y += errno.h
6generic-y += fcntl.h 7generic-y += fcntl.h
7generic-y += ioctl.h 8generic-y += ioctl.h
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
index 13a97aa2285f..f5c6967a93bb 100644
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bpf_perf_event.h
4generic-y += kvm_para.h 5generic-y += kvm_para.h
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index c6ecb97151a2..9025699049ca 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk)
88 } 88 }
89 89
90 if (ti->softirq_time) { 90 if (ti->softirq_time) {
91 delta = cycle_to_nsec(ti->softirq_time)); 91 delta = cycle_to_nsec(ti->softirq_time);
92 account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ); 92 account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
93 } 93 }
94 94
diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
index 1c44d3b3eba0..451bf6071c6e 100644
--- a/arch/m32r/include/uapi/asm/Kbuild
+++ b/arch/m32r/include/uapi/asm/Kbuild
@@ -1,5 +1,6 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bpf_perf_event.h
4generic-y += kvm_para.h 5generic-y += kvm_para.h
5generic-y += siginfo.h 6generic-y += siginfo.h
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index cb79fba79d43..b88a8dd14933 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -122,7 +122,6 @@ void abort(void)
122 /* if that doesn't kill us, halt */ 122 /* if that doesn't kill us, halt */
123 panic("Oops failed to kill thread"); 123 panic("Oops failed to kill thread");
124} 124}
125EXPORT_SYMBOL(abort);
126 125
127void __init trap_init(void) 126void __init trap_init(void)
128{ 127{
diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig
index 55e55dbc2fb6..3d07b1de7eb0 100644
--- a/arch/m68k/configs/stmark2_defconfig
+++ b/arch/m68k/configs/stmark2_defconfig
@@ -5,7 +5,6 @@ CONFIG_SYSVIPC=y
5CONFIG_LOG_BUF_SHIFT=14 5CONFIG_LOG_BUF_SHIFT=14
6CONFIG_NAMESPACES=y 6CONFIG_NAMESPACES=y
7CONFIG_BLK_DEV_INITRD=y 7CONFIG_BLK_DEV_INITRD=y
8CONFIG_INITRAMFS_SOURCE="../uClinux-dist/romfs"
9# CONFIG_RD_BZIP2 is not set 8# CONFIG_RD_BZIP2 is not set
10# CONFIG_RD_LZMA is not set 9# CONFIG_RD_LZMA is not set
11# CONFIG_RD_XZ is not set 10# CONFIG_RD_XZ is not set
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 3717b64a620d..c2e26a44c482 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bitsperlong.h 5generic-y += bitsperlong.h
6generic-y += bpf_perf_event.h
6generic-y += errno.h 7generic-y += errno.h
7generic-y += ioctl.h 8generic-y += ioctl.h
8generic-y += ipcbuf.h 9generic-y += ipcbuf.h
diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds
index 3aa571a513b5..cf6edda38971 100644
--- a/arch/m68k/kernel/vmlinux-nommu.lds
+++ b/arch/m68k/kernel/vmlinux-nommu.lds
@@ -45,6 +45,8 @@ SECTIONS {
45 .text : { 45 .text : {
46 HEAD_TEXT 46 HEAD_TEXT
47 TEXT_TEXT 47 TEXT_TEXT
48 IRQENTRY_TEXT
49 SOFTIRQENTRY_TEXT
48 SCHED_TEXT 50 SCHED_TEXT
49 CPUIDLE_TEXT 51 CPUIDLE_TEXT
50 LOCK_TEXT 52 LOCK_TEXT
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 89172b8974b9..625a5785804f 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -16,6 +16,8 @@ SECTIONS
16 .text : { 16 .text : {
17 HEAD_TEXT 17 HEAD_TEXT
18 TEXT_TEXT 18 TEXT_TEXT
19 IRQENTRY_TEXT
20 SOFTIRQENTRY_TEXT
19 SCHED_TEXT 21 SCHED_TEXT
20 CPUIDLE_TEXT 22 CPUIDLE_TEXT
21 LOCK_TEXT 23 LOCK_TEXT
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 293990efc917..9868270b0984 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -16,6 +16,8 @@ SECTIONS
16 .text : { 16 .text : {
17 HEAD_TEXT 17 HEAD_TEXT
18 TEXT_TEXT 18 TEXT_TEXT
19 IRQENTRY_TEXT
20 SOFTIRQENTRY_TEXT
19 SCHED_TEXT 21 SCHED_TEXT
20 CPUIDLE_TEXT 22 CPUIDLE_TEXT
21 LOCK_TEXT 23 LOCK_TEXT
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
index 6ac763d9a3e3..f9eaf07d29f8 100644
--- a/arch/metag/include/uapi/asm/Kbuild
+++ b/arch/metag/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bitsperlong.h 5generic-y += bitsperlong.h
6generic-y += bpf_perf_event.h
6generic-y += errno.h 7generic-y += errno.h
7generic-y += fcntl.h 8generic-y += fcntl.h
8generic-y += ioctl.h 9generic-y += ioctl.h
diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h
index 99472d2ca340..97559fe0b953 100644
--- a/arch/microblaze/include/asm/mmu_context_mm.h
+++ b/arch/microblaze/include/asm/mmu_context_mm.h
@@ -13,6 +13,7 @@
13 13
14#include <linux/atomic.h> 14#include <linux/atomic.h>
15#include <linux/mm_types.h> 15#include <linux/mm_types.h>
16#include <linux/sched.h>
16 17
17#include <asm/bitops.h> 18#include <asm/bitops.h>
18#include <asm/mmu.h> 19#include <asm/mmu.h>
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 06609ca36115..2c6a6bffea32 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bitsperlong.h 4generic-y += bitsperlong.h
5generic-y += bpf_perf_event.h
5generic-y += errno.h 6generic-y += errno.h
6generic-y += fcntl.h 7generic-y += fcntl.h
7generic-y += ioctl.h 8generic-y += ioctl.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 7c8aab23bce8..b1f66699677d 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += qrwlock.h
16generic-y += qspinlock.h 16generic-y += qspinlock.h
17generic-y += sections.h 17generic-y += sections.h
18generic-y += segment.h 18generic-y += segment.h
19generic-y += serial.h
20generic-y += trace_clock.h 19generic-y += trace_clock.h
21generic-y += unaligned.h 20generic-y += unaligned.h
22generic-y += user.h 21generic-y += user.h
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9e9e94415d08..1a508a74d48d 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -552,7 +552,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
552extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, 552extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
553 pmd_t *pmdp, pmd_t pmd); 553 pmd_t *pmdp, pmd_t pmd);
554 554
555#define __HAVE_ARCH_PMD_WRITE 555#define pmd_write pmd_write
556static inline int pmd_write(pmd_t pmd) 556static inline int pmd_write(pmd_t pmd)
557{ 557{
558 return !!(pmd_val(pmd) & _PAGE_WRITE); 558 return !!(pmd_val(pmd) & _PAGE_WRITE);
diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h
new file mode 100644
index 000000000000..1d830c6666c2
--- /dev/null
+++ b/arch/mips/include/asm/serial.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (C) 2017 MIPS Tech, LLC
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 2 of the License, or (at your
7 * option) any later version.
8 */
9#ifndef __ASM__SERIAL_H
10#define __ASM__SERIAL_H
11
12#ifdef CONFIG_MIPS_GENERIC
13/*
14 * Generic kernels cannot know a correct value for all platforms at
15 * compile time. Set it to 0 to prevent 8250_early using it
16 */
17#define BASE_BAUD 0
18#else
19#include <asm-generic/serial.h>
20#endif
21
22#endif /* __ASM__SERIAL_H */
diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild
index a0266feba9e6..7a4becd8963a 100644
--- a/arch/mips/include/uapi/asm/Kbuild
+++ b/arch/mips/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bpf_perf_event.h
4generic-y += ipcbuf.h 5generic-y += ipcbuf.h
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index c7ed26029cbb..e68e6e04063a 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -235,6 +235,7 @@ LEAF(mips_cps_core_init)
235 has_mt t0, 3f 235 has_mt t0, 3f
236 236
237 .set push 237 .set push
238 .set MIPS_ISA_LEVEL_RAW
238 .set mt 239 .set mt
239 240
240 /* Only allow 1 TC per VPE to execute... */ 241 /* Only allow 1 TC per VPE to execute... */
@@ -388,6 +389,7 @@ LEAF(mips_cps_boot_vpes)
388#elif defined(CONFIG_MIPS_MT) 389#elif defined(CONFIG_MIPS_MT)
389 390
390 .set push 391 .set push
392 .set MIPS_ISA_LEVEL_RAW
391 .set mt 393 .set mt
392 394
393 /* If the core doesn't support MT then return */ 395 /* If the core doesn't support MT then return */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 45d0b6b037ee..57028d49c202 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -705,6 +705,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
705 struct task_struct *t; 705 struct task_struct *t;
706 int max_users; 706 int max_users;
707 707
708 /* If nothing to change, return right away, successfully. */
709 if (value == mips_get_process_fp_mode(task))
710 return 0;
711
712 /* Only accept a mode change if 64-bit FP enabled for o32. */
713 if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
714 return -EOPNOTSUPP;
715
716 /* And only for o32 tasks. */
717 if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
718 return -EOPNOTSUPP;
719
708 /* Check the value is valid */ 720 /* Check the value is valid */
709 if (value & ~known_bits) 721 if (value & ~known_bits)
710 return -EOPNOTSUPP; 722 return -EOPNOTSUPP;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index efbd8df8b665..0b23b1ad99e6 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -419,63 +419,160 @@ static int gpr64_set(struct task_struct *target,
419 419
420#endif /* CONFIG_64BIT */ 420#endif /* CONFIG_64BIT */
421 421
422/*
423 * Copy the floating-point context to the supplied NT_PRFPREG buffer,
424 * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
425 * correspond 1:1 to buffer slots. Only general registers are copied.
426 */
427static int fpr_get_fpa(struct task_struct *target,
428 unsigned int *pos, unsigned int *count,
429 void **kbuf, void __user **ubuf)
430{
431 return user_regset_copyout(pos, count, kbuf, ubuf,
432 &target->thread.fpu,
433 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
434}
435
436/*
437 * Copy the floating-point context to the supplied NT_PRFPREG buffer,
438 * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
439 * general register slots are copied to buffer slots. Only general
440 * registers are copied.
441 */
442static int fpr_get_msa(struct task_struct *target,
443 unsigned int *pos, unsigned int *count,
444 void **kbuf, void __user **ubuf)
445{
446 unsigned int i;
447 u64 fpr_val;
448 int err;
449
450 BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
451 for (i = 0; i < NUM_FPU_REGS; i++) {
452 fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
453 err = user_regset_copyout(pos, count, kbuf, ubuf,
454 &fpr_val, i * sizeof(elf_fpreg_t),
455 (i + 1) * sizeof(elf_fpreg_t));
456 if (err)
457 return err;
458 }
459
460 return 0;
461}
462
463/*
464 * Copy the floating-point context to the supplied NT_PRFPREG buffer.
465 * Choose the appropriate helper for general registers, and then copy
466 * the FCSR register separately.
467 */
422static int fpr_get(struct task_struct *target, 468static int fpr_get(struct task_struct *target,
423 const struct user_regset *regset, 469 const struct user_regset *regset,
424 unsigned int pos, unsigned int count, 470 unsigned int pos, unsigned int count,
425 void *kbuf, void __user *ubuf) 471 void *kbuf, void __user *ubuf)
426{ 472{
427 unsigned i; 473 const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
428 int err; 474 int err;
429 u64 fpr_val;
430 475
431 /* XXX fcr31 */ 476 if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
477 err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
478 else
479 err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
480 if (err)
481 return err;
432 482
433 if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) 483 err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
434 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 484 &target->thread.fpu.fcr31,
435 &target->thread.fpu, 485 fcr31_pos, fcr31_pos + sizeof(u32));
436 0, sizeof(elf_fpregset_t));
437 486
438 for (i = 0; i < NUM_FPU_REGS; i++) { 487 return err;
439 fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); 488}
440 err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, 489
441 &fpr_val, i * sizeof(elf_fpreg_t), 490/*
442 (i + 1) * sizeof(elf_fpreg_t)); 491 * Copy the supplied NT_PRFPREG buffer to the floating-point context,
492 * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
493 * context's general register slots. Only general registers are copied.
494 */
495static int fpr_set_fpa(struct task_struct *target,
496 unsigned int *pos, unsigned int *count,
497 const void **kbuf, const void __user **ubuf)
498{
499 return user_regset_copyin(pos, count, kbuf, ubuf,
500 &target->thread.fpu,
501 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
502}
503
504/*
505 * Copy the supplied NT_PRFPREG buffer to the floating-point context,
506 * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
507 * bits only of FP context's general register slots. Only general
508 * registers are copied.
509 */
510static int fpr_set_msa(struct task_struct *target,
511 unsigned int *pos, unsigned int *count,
512 const void **kbuf, const void __user **ubuf)
513{
514 unsigned int i;
515 u64 fpr_val;
516 int err;
517
518 BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
519 for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
520 err = user_regset_copyin(pos, count, kbuf, ubuf,
521 &fpr_val, i * sizeof(elf_fpreg_t),
522 (i + 1) * sizeof(elf_fpreg_t));
443 if (err) 523 if (err)
444 return err; 524 return err;
525 set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
445 } 526 }
446 527
447 return 0; 528 return 0;
448} 529}
449 530
531/*
532 * Copy the supplied NT_PRFPREG buffer to the floating-point context.
533 * Choose the appropriate helper for general registers, and then copy
534 * the FCSR register separately.
535 *
536 * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
537 * which is supposed to have been guaranteed by the kernel before
538 * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
539 * so that we can safely avoid preinitializing temporaries for
540 * partial register writes.
541 */
450static int fpr_set(struct task_struct *target, 542static int fpr_set(struct task_struct *target,
451 const struct user_regset *regset, 543 const struct user_regset *regset,
452 unsigned int pos, unsigned int count, 544 unsigned int pos, unsigned int count,
453 const void *kbuf, const void __user *ubuf) 545 const void *kbuf, const void __user *ubuf)
454{ 546{
455 unsigned i; 547 const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
548 u32 fcr31;
456 int err; 549 int err;
457 u64 fpr_val;
458 550
459 /* XXX fcr31 */ 551 BUG_ON(count % sizeof(elf_fpreg_t));
552
553 if (pos + count > sizeof(elf_fpregset_t))
554 return -EIO;
460 555
461 init_fp_ctx(target); 556 init_fp_ctx(target);
462 557
463 if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) 558 if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
464 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 559 err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
465 &target->thread.fpu, 560 else
466 0, sizeof(elf_fpregset_t)); 561 err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
562 if (err)
563 return err;
467 564
468 BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); 565 if (count > 0) {
469 for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
470 err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 566 err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
471 &fpr_val, i * sizeof(elf_fpreg_t), 567 &fcr31,
472 (i + 1) * sizeof(elf_fpreg_t)); 568 fcr31_pos, fcr31_pos + sizeof(u32));
473 if (err) 569 if (err)
474 return err; 570 return err;
475 set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val); 571
572 ptrace_setfcr31(target, fcr31);
476 } 573 }
477 574
478 return 0; 575 return err;
479} 576}
480 577
481enum mips_regset { 578enum mips_regset {
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index d535edc01434..75fdeaa8c62f 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -445,10 +445,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
445int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 445int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
446{ 446{
447 int r = -EINTR; 447 int r = -EINTR;
448 sigset_t sigsaved;
449 448
450 if (vcpu->sigset_active) 449 kvm_sigset_activate(vcpu);
451 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
452 450
453 if (vcpu->mmio_needed) { 451 if (vcpu->mmio_needed) {
454 if (!vcpu->mmio_is_write) 452 if (!vcpu->mmio_is_write)
@@ -480,8 +478,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
480 local_irq_enable(); 478 local_irq_enable();
481 479
482out: 480out:
483 if (vcpu->sigset_active) 481 kvm_sigset_deactivate(vcpu);
484 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
485 482
486 return r; 483 return r;
487} 484}
diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild
index c94ee54210bc..81271d3af47c 100644
--- a/arch/mn10300/include/uapi/asm/Kbuild
+++ b/arch/mn10300/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bpf_perf_event.h
4generic-y += siginfo.h 5generic-y += siginfo.h
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index ffca24da7647..13a3d77b4d7b 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bitsperlong.h 5generic-y += bitsperlong.h
6generic-y += bpf_perf_event.h
6generic-y += errno.h 7generic-y += errno.h
7generic-y += fcntl.h 8generic-y += fcntl.h
8generic-y += ioctl.h 9generic-y += ioctl.h
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index 62286dbeb904..130c16ccba0a 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bitsperlong.h 5generic-y += bitsperlong.h
6generic-y += bpf_perf_event.h
6generic-y += errno.h 7generic-y += errno.h
7generic-y += fcntl.h 8generic-y += fcntl.h
8generic-y += ioctl.h 9generic-y += ioctl.h
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 9345b44b86f0..f57118e1f6b4 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
123 while ((nuline = strchr(s, '\n')) != NULL) { 123 while ((nuline = strchr(s, '\n')) != NULL) {
124 if (nuline != s) 124 if (nuline != s)
125 pdc_iodc_print(s, nuline - s); 125 pdc_iodc_print(s, nuline - s);
126 pdc_iodc_print("\r\n", 2); 126 pdc_iodc_print("\r\n", 2);
127 s = nuline + 1; 127 s = nuline + 1;
128 } 128 }
129 if (*s != '\0') 129 if (*s != '\0')
130 pdc_iodc_print(s, strlen(s)); 130 pdc_iodc_print(s, strlen(s));
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index dd5a08aaa4da..3eb4bfc1fb36 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -12,6 +12,7 @@
12 for the semaphore. */ 12 for the semaphore. */
13 13
14#define __PA_LDCW_ALIGNMENT 16 14#define __PA_LDCW_ALIGNMENT 16
15#define __PA_LDCW_ALIGN_ORDER 4
15#define __ldcw_align(a) ({ \ 16#define __ldcw_align(a) ({ \
16 unsigned long __ret = (unsigned long) &(a)->lock[0]; \ 17 unsigned long __ret = (unsigned long) &(a)->lock[0]; \
17 __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ 18 __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \
@@ -29,6 +30,7 @@
29 ldcd). */ 30 ldcd). */
30 31
31#define __PA_LDCW_ALIGNMENT 4 32#define __PA_LDCW_ALIGNMENT 4
33#define __PA_LDCW_ALIGN_ORDER 2
32#define __ldcw_align(a) (&(a)->slock) 34#define __ldcw_align(a) (&(a)->slock)
33#define __LDCW "ldcw,co" 35#define __LDCW "ldcw,co"
34 36
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index c980a02a52bc..598c8d60fa5e 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
35 35
36/* thread information allocation */ 36/* thread information allocation */
37 37
38#ifdef CONFIG_IRQSTACKS
39#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */
40#else
38#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ 41#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */
42#endif
43
39/* Be sure to hunt all references to this down when you change the size of 44/* Be sure to hunt all references to this down when you change the size of
40 * the kernel stack */ 45 * the kernel stack */
41#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 46#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index 196d2a4efb31..286ef5a5904b 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bpf_perf_event.h
5generic-y += kvm_para.h 6generic-y += kvm_para.h
6generic-y += param.h 7generic-y += param.h
7generic-y += poll.h 8generic-y += poll.h
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index d8f77358e2ba..29b99b8964aa 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -870,7 +870,7 @@ static void print_parisc_device(struct parisc_device *dev)
870 static int count; 870 static int count;
871 871
872 print_pa_hwpath(dev, hw_path); 872 print_pa_hwpath(dev, hw_path);
873 printk(KERN_INFO "%d. %s at 0x%p [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", 873 printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
874 ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, 874 ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type,
875 dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); 875 dev->id.hversion_rev, dev->id.hversion, dev->id.sversion);
876 876
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index a4fd296c958e..e95207c0565e 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -35,6 +35,7 @@
35#include <asm/pgtable.h> 35#include <asm/pgtable.h>
36#include <asm/signal.h> 36#include <asm/signal.h>
37#include <asm/unistd.h> 37#include <asm/unistd.h>
38#include <asm/ldcw.h>
38#include <asm/thread_info.h> 39#include <asm/thread_info.h>
39 40
40#include <linux/linkage.h> 41#include <linux/linkage.h>
@@ -46,6 +47,14 @@
46#endif 47#endif
47 48
48 .import pa_tlb_lock,data 49 .import pa_tlb_lock,data
50 .macro load_pa_tlb_lock reg
51#if __PA_LDCW_ALIGNMENT > 4
52 load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
53 depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg
54#else
55 load32 PA(pa_tlb_lock), \reg
56#endif
57 .endm
49 58
50 /* space_to_prot macro creates a prot id from a space id */ 59 /* space_to_prot macro creates a prot id from a space id */
51 60
@@ -457,7 +466,7 @@
457 .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault 466 .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault
458#ifdef CONFIG_SMP 467#ifdef CONFIG_SMP
459 cmpib,COND(=),n 0,\spc,2f 468 cmpib,COND(=),n 0,\spc,2f
460 load32 PA(pa_tlb_lock),\tmp 469 load_pa_tlb_lock \tmp
4611: LDCW 0(\tmp),\tmp1 4701: LDCW 0(\tmp),\tmp1
462 cmpib,COND(=) 0,\tmp1,1b 471 cmpib,COND(=) 0,\tmp1,1b
463 nop 472 nop
@@ -480,7 +489,7 @@
480 /* Release pa_tlb_lock lock. */ 489 /* Release pa_tlb_lock lock. */
481 .macro tlb_unlock1 spc,tmp 490 .macro tlb_unlock1 spc,tmp
482#ifdef CONFIG_SMP 491#ifdef CONFIG_SMP
483 load32 PA(pa_tlb_lock),\tmp 492 load_pa_tlb_lock \tmp
484 tlb_unlock0 \spc,\tmp 493 tlb_unlock0 \spc,\tmp
485#endif 494#endif
486 .endm 495 .endm
@@ -878,9 +887,6 @@ ENTRY_CFI(syscall_exit_rfi)
878 STREG %r19,PT_SR7(%r16) 887 STREG %r19,PT_SR7(%r16)
879 888
880intr_return: 889intr_return:
881 /* NOTE: Need to enable interrupts incase we schedule. */
882 ssm PSW_SM_I, %r0
883
884 /* check for reschedule */ 890 /* check for reschedule */
885 mfctl %cr30,%r1 891 mfctl %cr30,%r1
886 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ 892 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +913,11 @@ intr_check_sig:
907 LDREG PT_IASQ1(%r16), %r20 913 LDREG PT_IASQ1(%r16), %r20
908 cmpib,COND(=),n 0,%r20,intr_restore /* backward */ 914 cmpib,COND(=),n 0,%r20,intr_restore /* backward */
909 915
916 /* NOTE: We need to enable interrupts if we have to deliver
917 * signals. We used to do this earlier but it caused kernel
918 * stack overflows. */
919 ssm PSW_SM_I, %r0
920
910 copy %r0, %r25 /* long in_syscall = 0 */ 921 copy %r0, %r25 /* long in_syscall = 0 */
911#ifdef CONFIG_64BIT 922#ifdef CONFIG_64BIT
912 ldo -16(%r30),%r29 /* Reference param save area */ 923 ldo -16(%r30),%r29 /* Reference param save area */
@@ -958,6 +969,10 @@ intr_do_resched:
958 cmpib,COND(=) 0, %r20, intr_do_preempt 969 cmpib,COND(=) 0, %r20, intr_do_preempt
959 nop 970 nop
960 971
972 /* NOTE: We need to enable interrupts if we schedule. We used
973 * to do this earlier but it caused kernel stack overflows. */
974 ssm PSW_SM_I, %r0
975
961#ifdef CONFIG_64BIT 976#ifdef CONFIG_64BIT
962 ldo -16(%r30),%r29 /* Reference param save area */ 977 ldo -16(%r30),%r29 /* Reference param save area */
963#endif 978#endif
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
index e3a8e5e4d5de..8d072c44f300 100644
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
305 305
306 306
307 __INITRODATA 307 __INITRODATA
308 .align 4
308 .export os_hpmc_size 309 .export os_hpmc_size
309os_hpmc_size: 310os_hpmc_size:
310 .word .os_hpmc_end-.os_hpmc 311 .word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index adf7187f8951..2d40c4ff3f69 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -36,6 +36,7 @@
36#include <asm/assembly.h> 36#include <asm/assembly.h>
37#include <asm/pgtable.h> 37#include <asm/pgtable.h>
38#include <asm/cache.h> 38#include <asm/cache.h>
39#include <asm/ldcw.h>
39#include <linux/linkage.h> 40#include <linux/linkage.h>
40 41
41 .text 42 .text
@@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local)
333 334
334 .macro tlb_lock la,flags,tmp 335 .macro tlb_lock la,flags,tmp
335#ifdef CONFIG_SMP 336#ifdef CONFIG_SMP
336 ldil L%pa_tlb_lock,%r1 337#if __PA_LDCW_ALIGNMENT > 4
337 ldo R%pa_tlb_lock(%r1),\la 338 load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
339 depi 0,31,__PA_LDCW_ALIGN_ORDER, \la
340#else
341 load32 pa_tlb_lock, \la
342#endif
338 rsm PSW_SM_I,\flags 343 rsm PSW_SM_I,\flags
3391: LDCW 0(\la),\tmp 3441: LDCW 0(\la),\tmp
340 cmpib,<>,n 0,\tmp,3f 345 cmpib,<>,n 0,\tmp,3f
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 30f92391a93e..cad3e8661cd6 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -39,6 +39,7 @@
39#include <linux/kernel.h> 39#include <linux/kernel.h>
40#include <linux/mm.h> 40#include <linux/mm.h>
41#include <linux/fs.h> 41#include <linux/fs.h>
42#include <linux/cpu.h>
42#include <linux/module.h> 43#include <linux/module.h>
43#include <linux/personality.h> 44#include <linux/personality.h>
44#include <linux/ptrace.h> 45#include <linux/ptrace.h>
@@ -184,6 +185,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
184} 185}
185 186
186/* 187/*
188 * Idle thread support
189 *
190 * Detect when running on QEMU with SeaBIOS PDC Firmware and let
191 * QEMU idle the host too.
192 */
193
194int running_on_qemu __read_mostly;
195
196void __cpuidle arch_cpu_idle_dead(void)
197{
198 /* nop on real hardware, qemu will offline CPU. */
199 asm volatile("or %%r31,%%r31,%%r31\n":::);
200}
201
202void __cpuidle arch_cpu_idle(void)
203{
204 local_irq_enable();
205
206 /* nop on real hardware, qemu will idle sleep. */
207 asm volatile("or %%r10,%%r10,%%r10\n":::);
208}
209
210static int __init parisc_idle_init(void)
211{
212 const char *marker;
213
214 /* check QEMU/SeaBIOS marker in PAGE0 */
215 marker = (char *) &PAGE0->pad0;
216 running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
217
218 if (!running_on_qemu)
219 cpu_idle_poll_ctrl(1);
220
221 return 0;
222}
223arch_initcall(parisc_idle_init);
224
225/*
187 * Copy architecture-specific thread state 226 * Copy architecture-specific thread state
188 */ 227 */
189int 228int
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 5a657986ebbf..143f90e2f9f3 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/kallsyms.h> 16#include <linux/kallsyms.h>
17#include <linux/sort.h> 17#include <linux/sort.h>
18#include <linux/sched.h>
19 18
20#include <linux/uaccess.h> 19#include <linux/uaccess.h>
21#include <asm/assembly.h> 20#include <asm/assembly.h>
diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c
index 7eab4bb8abe6..66e506520505 100644
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
16#include <linux/preempt.h> 16#include <linux/preempt.h>
17#include <linux/init.h> 17#include <linux/init.h>
18 18
19#include <asm/processor.h>
20#include <asm/delay.h> 19#include <asm/delay.h>
21
22#include <asm/special_insns.h> /* for mfctl() */ 20#include <asm/special_insns.h> /* for mfctl() */
23#include <asm/processor.h> /* for boot_cpu_data */ 21#include <asm/processor.h> /* for boot_cpu_data */
24 22
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 13f7854e0d49..48f41399fc0b 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -631,11 +631,11 @@ void __init mem_init(void)
631 mem_init_print_info(NULL); 631 mem_init_print_info(NULL);
632#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ 632#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */
633 printk("virtual kernel memory layout:\n" 633 printk("virtual kernel memory layout:\n"
634 " vmalloc : 0x%p - 0x%p (%4ld MB)\n" 634 " vmalloc : 0x%px - 0x%px (%4ld MB)\n"
635 " memory : 0x%p - 0x%p (%4ld MB)\n" 635 " memory : 0x%px - 0x%px (%4ld MB)\n"
636 " .init : 0x%p - 0x%p (%4ld kB)\n" 636 " .init : 0x%px - 0x%px (%4ld kB)\n"
637 " .data : 0x%p - 0x%p (%4ld kB)\n" 637 " .data : 0x%px - 0x%px (%4ld kB)\n"
638 " .text : 0x%p - 0x%p (%4ld kB)\n", 638 " .text : 0x%px - 0x%px (%4ld kB)\n",
639 639
640 (void*)VMALLOC_START, (void*)VMALLOC_END, 640 (void*)VMALLOC_START, (void*)VMALLOC_END,
641 (VMALLOC_END - VMALLOC_START) >> 20, 641 (VMALLOC_END - VMALLOC_START) >> 20,
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 9a677cd5997f..44697817ccc6 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd)
1005} 1005}
1006#endif /* CONFIG_NUMA_BALANCING */ 1006#endif /* CONFIG_NUMA_BALANCING */
1007 1007
1008#define __HAVE_ARCH_PMD_WRITE
1009#define pmd_write(pmd) pte_write(pmd_pte(pmd)) 1008#define pmd_write(pmd) pte_write(pmd_pte(pmd))
1010#define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) 1009#define __pmd_write(pmd) __pte_write(pmd_pte(pmd))
1011#define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) 1010#define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd))
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index a703452d67b6..555e22d5e07f 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -209,5 +209,11 @@ exc_##label##_book3e:
209 ori r3,r3,vector_offset@l; \ 209 ori r3,r3,vector_offset@l; \
210 mtspr SPRN_IVOR##vector_number,r3; 210 mtspr SPRN_IVOR##vector_number,r3;
211 211
212#define RFI_TO_KERNEL \
213 rfi
214
215#define RFI_TO_USER \
216 rfi
217
212#endif /* _ASM_POWERPC_EXCEPTION_64E_H */ 218#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
213 219
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index b27205297e1d..7197b179c1b1 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -74,6 +74,59 @@
74 */ 74 */
75#define EX_R3 EX_DAR 75#define EX_R3 EX_DAR
76 76
77/*
78 * Macros for annotating the expected destination of (h)rfid
79 *
80 * The nop instructions allow us to insert one or more instructions to flush the
81 * L1-D cache when returning to userspace or a guest.
82 */
83#define RFI_FLUSH_SLOT \
84 RFI_FLUSH_FIXUP_SECTION; \
85 nop; \
86 nop; \
87 nop
88
89#define RFI_TO_KERNEL \
90 rfid
91
92#define RFI_TO_USER \
93 RFI_FLUSH_SLOT; \
94 rfid; \
95 b rfi_flush_fallback
96
97#define RFI_TO_USER_OR_KERNEL \
98 RFI_FLUSH_SLOT; \
99 rfid; \
100 b rfi_flush_fallback
101
102#define RFI_TO_GUEST \
103 RFI_FLUSH_SLOT; \
104 rfid; \
105 b rfi_flush_fallback
106
107#define HRFI_TO_KERNEL \
108 hrfid
109
110#define HRFI_TO_USER \
111 RFI_FLUSH_SLOT; \
112 hrfid; \
113 b hrfi_flush_fallback
114
115#define HRFI_TO_USER_OR_KERNEL \
116 RFI_FLUSH_SLOT; \
117 hrfid; \
118 b hrfi_flush_fallback
119
120#define HRFI_TO_GUEST \
121 RFI_FLUSH_SLOT; \
122 hrfid; \
123 b hrfi_flush_fallback
124
125#define HRFI_TO_UNKNOWN \
126 RFI_FLUSH_SLOT; \
127 hrfid; \
128 b hrfi_flush_fallback
129
77#ifdef CONFIG_RELOCATABLE 130#ifdef CONFIG_RELOCATABLE
78#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ 131#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
79 mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ 132 mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
@@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
218 mtspr SPRN_##h##SRR0,r12; \ 271 mtspr SPRN_##h##SRR0,r12; \
219 mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ 272 mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
220 mtspr SPRN_##h##SRR1,r10; \ 273 mtspr SPRN_##h##SRR1,r10; \
221 h##rfid; \ 274 h##RFI_TO_KERNEL; \
222 b . /* prevent speculative execution */ 275 b . /* prevent speculative execution */
223#define EXCEPTION_PROLOG_PSERIES_1(label, h) \ 276#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
224 __EXCEPTION_PROLOG_PSERIES_1(label, h) 277 __EXCEPTION_PROLOG_PSERIES_1(label, h)
@@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
232 mtspr SPRN_##h##SRR0,r12; \ 285 mtspr SPRN_##h##SRR0,r12; \
233 mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ 286 mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
234 mtspr SPRN_##h##SRR1,r10; \ 287 mtspr SPRN_##h##SRR1,r10; \
235 h##rfid; \ 288 h##RFI_TO_KERNEL; \
236 b . /* prevent speculative execution */ 289 b . /* prevent speculative execution */
237 290
238#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ 291#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 8f88f771cc55..1e82eb3caabd 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -187,7 +187,20 @@ label##3: \
187 FTR_ENTRY_OFFSET label##1b-label##3b; \ 187 FTR_ENTRY_OFFSET label##1b-label##3b; \
188 .popsection; 188 .popsection;
189 189
190#define RFI_FLUSH_FIXUP_SECTION \
191951: \
192 .pushsection __rfi_flush_fixup,"a"; \
193 .align 2; \
194952: \
195 FTR_ENTRY_OFFSET 951b-952b; \
196 .popsection;
197
198
190#ifndef __ASSEMBLY__ 199#ifndef __ASSEMBLY__
200#include <linux/types.h>
201
202extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
203
191void apply_feature_fixups(void); 204void apply_feature_fixups(void);
192void setup_feature_keys(void); 205void setup_feature_keys(void);
193#endif 206#endif
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a409177be8bd..f0461618bf7b 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -241,6 +241,7 @@
241#define H_GET_HCA_INFO 0x1B8 241#define H_GET_HCA_INFO 0x1B8
242#define H_GET_PERF_COUNT 0x1BC 242#define H_GET_PERF_COUNT 0x1BC
243#define H_MANAGE_TRACE 0x1C0 243#define H_MANAGE_TRACE 0x1C0
244#define H_GET_CPU_CHARACTERISTICS 0x1C8
244#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 245#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
245#define H_QUERY_INT_STATE 0x1E4 246#define H_QUERY_INT_STATE 0x1E4
246#define H_POLL_PENDING 0x1D8 247#define H_POLL_PENDING 0x1D8
@@ -330,6 +331,17 @@
330#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 331#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
331/* >= 0 values are CPU number */ 332/* >= 0 values are CPU number */
332 333
334/* H_GET_CPU_CHARACTERISTICS return values */
335#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
336#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
337#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
338#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
339#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
340
341#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
342#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
343#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
344
333/* Flag values used in H_REGISTER_PROC_TBL hcall */ 345/* Flag values used in H_REGISTER_PROC_TBL hcall */
334#define PROC_TABLE_OP_MASK 0x18 346#define PROC_TABLE_OP_MASK 0x18
335#define PROC_TABLE_DEREG 0x10 347#define PROC_TABLE_DEREG 0x10
@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
436 } 448 }
437} 449}
438 450
451struct h_cpu_char_result {
452 u64 character;
453 u64 behaviour;
454};
455
439#endif /* __ASSEMBLY__ */ 456#endif /* __ASSEMBLY__ */
440#endif /* __KERNEL__ */ 457#endif /* __KERNEL__ */
441#endif /* _ASM_POWERPC_HVCALL_H */ 458#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 96753f3aac6d..941c2a3f231b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -180,6 +180,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
180 struct iommu_group *grp); 180 struct iommu_group *grp);
181extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm); 181extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
182extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm); 182extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
183extern void kvmppc_setup_partition_table(struct kvm *kvm);
183 184
184extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 185extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
185 struct kvm_create_spapr_tce_64 *args); 186 struct kvm_create_spapr_tce_64 *args);
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 73b92017b6d7..cd2fc1cc1cc7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -76,6 +76,7 @@ struct machdep_calls {
76 76
77 void __noreturn (*restart)(char *cmd); 77 void __noreturn (*restart)(char *cmd);
78 void __noreturn (*halt)(void); 78 void __noreturn (*halt)(void);
79 void (*panic)(char *str);
79 void (*cpu_die)(void); 80 void (*cpu_die)(void);
80 81
81 long (*time_init)(void); /* Optional, may be NULL */ 82 long (*time_init)(void); /* Optional, may be NULL */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6177d43f0ce8..e2a2b8400490 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
160#endif 160#endif
161} 161}
162 162
163static inline void arch_dup_mmap(struct mm_struct *oldmm, 163static inline int arch_dup_mmap(struct mm_struct *oldmm,
164 struct mm_struct *mm) 164 struct mm_struct *mm)
165{ 165{
166 return 0;
166} 167}
167 168
168#ifndef CONFIG_PPC_BOOK3S_64 169#ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 3892db93b837..23ac7fc0af23 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -232,6 +232,16 @@ struct paca_struct {
232 struct sibling_subcore_state *sibling_subcore_state; 232 struct sibling_subcore_state *sibling_subcore_state;
233#endif 233#endif
234#endif 234#endif
235#ifdef CONFIG_PPC_BOOK3S_64
236 /*
237 * rfi fallback flush must be in its own cacheline to prevent
238 * other paca data leaking into the L1d
239 */
240 u64 exrfi[EX_SIZE] __aligned(0x80);
241 void *rfi_flush_fallback_area;
242 u64 l1d_flush_congruence;
243 u64 l1d_flush_sets;
244#endif
235}; 245};
236 246
237extern void copy_mm_to_paca(struct mm_struct *mm); 247extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 7f01b22fa6cb..55eddf50d149 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
326 return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); 326 return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
327} 327}
328 328
329static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
330{
331 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
332 long rc;
333
334 rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
335 if (rc == H_SUCCESS) {
336 p->character = retbuf[0];
337 p->behaviour = retbuf[1];
338 }
339
340 return rc;
341}
342
329#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ 343#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 257d23dbf55d..469b7fdc9be4 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -24,6 +24,7 @@ extern void reloc_got2(unsigned long);
24 24
25void check_for_initrd(void); 25void check_for_initrd(void);
26void initmem_init(void); 26void initmem_init(void);
27void setup_panic(void);
27#define ARCH_PANIC_TIMEOUT 180 28#define ARCH_PANIC_TIMEOUT 180
28 29
29#ifdef CONFIG_PPC_PSERIES 30#ifdef CONFIG_PPC_PSERIES
@@ -38,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
38static inline void pseries_little_endian_exceptions(void) {} 39static inline void pseries_little_endian_exceptions(void) {}
39#endif /* CONFIG_PPC_PSERIES */ 40#endif /* CONFIG_PPC_PSERIES */
40 41
42void rfi_flush_enable(bool enable);
43
44/* These are bit flags */
45enum l1d_flush_type {
46 L1D_FLUSH_NONE = 0x1,
47 L1D_FLUSH_FALLBACK = 0x2,
48 L1D_FLUSH_ORI = 0x4,
49 L1D_FLUSH_MTTRIG = 0x8,
50};
51
52void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
53void do_rfi_flush_fixups(enum l1d_flush_type types);
54
41#endif /* !__ASSEMBLY__ */ 55#endif /* !__ASSEMBLY__ */
42 56
43#endif /* _ASM_POWERPC_SETUP_H */ 57#endif /* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 0d960ef78a9a..1a6ed5919ffd 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bpf_perf_event.h
4generic-y += param.h 5generic-y += param.h
5generic-y += poll.h 6generic-y += poll.h
6generic-y += resource.h 7generic-y += resource.h
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6b958414b4e0..f390d57cf2e1 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -237,6 +237,11 @@ int main(void)
237 OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); 237 OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
238 OFFSET(PACA_IN_MCE, paca_struct, in_mce); 238 OFFSET(PACA_IN_MCE, paca_struct, in_mce);
239 OFFSET(PACA_IN_NMI, paca_struct, in_nmi); 239 OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
240 OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
241 OFFSET(PACA_EXRFI, paca_struct, exrfi);
242 OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
243 OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
244
240#endif 245#endif
241 OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); 246 OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
242 OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); 247 OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 610955fe8b81..679bbe714e85 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -102,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
102 li r0,0 102 li r0,0
103 mtspr SPRN_PSSCR,r0 103 mtspr SPRN_PSSCR,r0
104 mtspr SPRN_LPID,r0 104 mtspr SPRN_LPID,r0
105 mtspr SPRN_PID,r0
105 mfspr r3,SPRN_LPCR 106 mfspr r3,SPRN_LPCR
106 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) 107 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
107 or r3, r3, r4 108 or r3, r3, r4
@@ -126,6 +127,7 @@ _GLOBAL(__restore_cpu_power9)
126 li r0,0 127 li r0,0
127 mtspr SPRN_PSSCR,r0 128 mtspr SPRN_PSSCR,r0
128 mtspr SPRN_LPID,r0 129 mtspr SPRN_LPID,r0
130 mtspr SPRN_PID,r0
129 mfspr r3,SPRN_LPCR 131 mfspr r3,SPRN_LPCR
130 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) 132 LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
131 or r3, r3, r4 133 or r3, r3, r4
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 3320bcac7192..2748584b767d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -37,6 +37,11 @@
37#include <asm/tm.h> 37#include <asm/tm.h>
38#include <asm/ppc-opcode.h> 38#include <asm/ppc-opcode.h>
39#include <asm/export.h> 39#include <asm/export.h>
40#ifdef CONFIG_PPC_BOOK3S
41#include <asm/exception-64s.h>
42#else
43#include <asm/exception-64e.h>
44#endif
40 45
41/* 46/*
42 * System calls. 47 * System calls.
@@ -262,13 +267,23 @@ BEGIN_FTR_SECTION
262END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 267END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
263 268
264 ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ 269 ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
270 ld r2,GPR2(r1)
271 ld r1,GPR1(r1)
272 mtlr r4
273 mtcr r5
274 mtspr SPRN_SRR0,r7
275 mtspr SPRN_SRR1,r8
276 RFI_TO_USER
277 b . /* prevent speculative execution */
278
279 /* exit to kernel */
2651: ld r2,GPR2(r1) 2801: ld r2,GPR2(r1)
266 ld r1,GPR1(r1) 281 ld r1,GPR1(r1)
267 mtlr r4 282 mtlr r4
268 mtcr r5 283 mtcr r5
269 mtspr SPRN_SRR0,r7 284 mtspr SPRN_SRR0,r7
270 mtspr SPRN_SRR1,r8 285 mtspr SPRN_SRR1,r8
271 RFI 286 RFI_TO_KERNEL
272 b . /* prevent speculative execution */ 287 b . /* prevent speculative execution */
273 288
274.Lsyscall_error: 289.Lsyscall_error:
@@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
397 mtmsrd r10, 1 412 mtmsrd r10, 1
398 mtspr SPRN_SRR0, r11 413 mtspr SPRN_SRR0, r11
399 mtspr SPRN_SRR1, r12 414 mtspr SPRN_SRR1, r12
400 415 RFI_TO_USER
401 rfid
402 b . /* prevent speculative execution */ 416 b . /* prevent speculative execution */
403#endif 417#endif
404_ASM_NOKPROBE_SYMBOL(system_call_common); 418_ASM_NOKPROBE_SYMBOL(system_call_common);
@@ -878,7 +892,7 @@ BEGIN_FTR_SECTION
878END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 892END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
879 ACCOUNT_CPU_USER_EXIT(r13, r2, r4) 893 ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
880 REST_GPR(13, r1) 894 REST_GPR(13, r1)
8811: 895
882 mtspr SPRN_SRR1,r3 896 mtspr SPRN_SRR1,r3
883 897
884 ld r2,_CCR(r1) 898 ld r2,_CCR(r1)
@@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
891 ld r3,GPR3(r1) 905 ld r3,GPR3(r1)
892 ld r4,GPR4(r1) 906 ld r4,GPR4(r1)
893 ld r1,GPR1(r1) 907 ld r1,GPR1(r1)
908 RFI_TO_USER
909 b . /* prevent speculative execution */
894 910
895 rfid 9111: mtspr SPRN_SRR1,r3
912
913 ld r2,_CCR(r1)
914 mtcrf 0xFF,r2
915 ld r2,_NIP(r1)
916 mtspr SPRN_SRR0,r2
917
918 ld r0,GPR0(r1)
919 ld r2,GPR2(r1)
920 ld r3,GPR3(r1)
921 ld r4,GPR4(r1)
922 ld r1,GPR1(r1)
923 RFI_TO_KERNEL
896 b . /* prevent speculative execution */ 924 b . /* prevent speculative execution */
897 925
898#endif /* CONFIG_PPC_BOOK3E */ 926#endif /* CONFIG_PPC_BOOK3E */
@@ -1073,7 +1101,7 @@ __enter_rtas:
1073 1101
1074 mtspr SPRN_SRR0,r5 1102 mtspr SPRN_SRR0,r5
1075 mtspr SPRN_SRR1,r6 1103 mtspr SPRN_SRR1,r6
1076 rfid 1104 RFI_TO_KERNEL
1077 b . /* prevent speculative execution */ 1105 b . /* prevent speculative execution */
1078 1106
1079rtas_return_loc: 1107rtas_return_loc:
@@ -1098,7 +1126,7 @@ rtas_return_loc:
1098 1126
1099 mtspr SPRN_SRR0,r3 1127 mtspr SPRN_SRR0,r3
1100 mtspr SPRN_SRR1,r4 1128 mtspr SPRN_SRR1,r4
1101 rfid 1129 RFI_TO_KERNEL
1102 b . /* prevent speculative execution */ 1130 b . /* prevent speculative execution */
1103_ASM_NOKPROBE_SYMBOL(__enter_rtas) 1131_ASM_NOKPROBE_SYMBOL(__enter_rtas)
1104_ASM_NOKPROBE_SYMBOL(rtas_return_loc) 1132_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
@@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)
1171 LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) 1199 LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
1172 andc r11,r11,r12 1200 andc r11,r11,r12
1173 mtsrr1 r11 1201 mtsrr1 r11
1174 rfid 1202 RFI_TO_KERNEL
1175#endif /* CONFIG_PPC_BOOK3E */ 1203#endif /* CONFIG_PPC_BOOK3E */
1176 1204
11771: /* Return from OF */ 12051: /* Return from OF */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e441b469dc8f..2dc10bf646b8 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -256,7 +256,7 @@ BEGIN_FTR_SECTION
256 LOAD_HANDLER(r12, machine_check_handle_early) 256 LOAD_HANDLER(r12, machine_check_handle_early)
2571: mtspr SPRN_SRR0,r12 2571: mtspr SPRN_SRR0,r12
258 mtspr SPRN_SRR1,r11 258 mtspr SPRN_SRR1,r11
259 rfid 259 RFI_TO_KERNEL
260 b . /* prevent speculative execution */ 260 b . /* prevent speculative execution */
2612: 2612:
262 /* Stack overflow. Stay on emergency stack and panic. 262 /* Stack overflow. Stay on emergency stack and panic.
@@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
445 li r3,MSR_ME 445 li r3,MSR_ME
446 andc r10,r10,r3 /* Turn off MSR_ME */ 446 andc r10,r10,r3 /* Turn off MSR_ME */
447 mtspr SPRN_SRR1,r10 447 mtspr SPRN_SRR1,r10
448 rfid 448 RFI_TO_KERNEL
449 b . 449 b .
4502: 4502:
451 /* 451 /*
@@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
463 */ 463 */
464 bl machine_check_queue_event 464 bl machine_check_queue_event
465 MACHINE_CHECK_HANDLER_WINDUP 465 MACHINE_CHECK_HANDLER_WINDUP
466 rfid 466 RFI_TO_USER_OR_KERNEL
4679: 4679:
468 /* Deliver the machine check to host kernel in V mode. */ 468 /* Deliver the machine check to host kernel in V mode. */
469 MACHINE_CHECK_HANDLER_WINDUP 469 MACHINE_CHECK_HANDLER_WINDUP
@@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
598 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ 598 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
599 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ 599 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
600 600
601 andi. r9,r11,MSR_PR // Check for exception from userspace
602 cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later
603
601 /* 604 /*
602 * Test MSR_RI before calling slb_allocate_realmode, because the 605 * Test MSR_RI before calling slb_allocate_realmode, because the
603 * MSR in r11 gets clobbered. However we still want to allocate 606 * MSR in r11 gets clobbered. However we still want to allocate
@@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
624 627
625 /* All done -- return from exception. */ 628 /* All done -- return from exception. */
626 629
630 bne cr4,1f /* returning to kernel */
631
627.machine push 632.machine push
628.machine "power4" 633.machine "power4"
629 mtcrf 0x80,r9 634 mtcrf 0x80,r9
635 mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
630 mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ 636 mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
631 mtcrf 0x02,r9 /* I/D indication is in cr6 */ 637 mtcrf 0x02,r9 /* I/D indication is in cr6 */
632 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ 638 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
@@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
640 ld r11,PACA_EXSLB+EX_R11(r13) 646 ld r11,PACA_EXSLB+EX_R11(r13)
641 ld r12,PACA_EXSLB+EX_R12(r13) 647 ld r12,PACA_EXSLB+EX_R12(r13)
642 ld r13,PACA_EXSLB+EX_R13(r13) 648 ld r13,PACA_EXSLB+EX_R13(r13)
643 rfid 649 RFI_TO_USER
650 b . /* prevent speculative execution */
6511:
652.machine push
653.machine "power4"
654 mtcrf 0x80,r9
655 mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
656 mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
657 mtcrf 0x02,r9 /* I/D indication is in cr6 */
658 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
659.machine pop
660
661 RESTORE_CTR(r9, PACA_EXSLB)
662 RESTORE_PPR_PACA(PACA_EXSLB, r9)
663 mr r3,r12
664 ld r9,PACA_EXSLB+EX_R9(r13)
665 ld r10,PACA_EXSLB+EX_R10(r13)
666 ld r11,PACA_EXSLB+EX_R11(r13)
667 ld r12,PACA_EXSLB+EX_R12(r13)
668 ld r13,PACA_EXSLB+EX_R13(r13)
669 RFI_TO_KERNEL
644 b . /* prevent speculative execution */ 670 b . /* prevent speculative execution */
645 671
672
6462: std r3,PACA_EXSLB+EX_DAR(r13) 6732: std r3,PACA_EXSLB+EX_DAR(r13)
647 mr r3,r12 674 mr r3,r12
648 mfspr r11,SPRN_SRR0 675 mfspr r11,SPRN_SRR0
@@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
651 mtspr SPRN_SRR0,r10 678 mtspr SPRN_SRR0,r10
652 ld r10,PACAKMSR(r13) 679 ld r10,PACAKMSR(r13)
653 mtspr SPRN_SRR1,r10 680 mtspr SPRN_SRR1,r10
654 rfid 681 RFI_TO_KERNEL
655 b . 682 b .
656 683
6578: std r3,PACA_EXSLB+EX_DAR(r13) 6848: std r3,PACA_EXSLB+EX_DAR(r13)
@@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
662 mtspr SPRN_SRR0,r10 689 mtspr SPRN_SRR0,r10
663 ld r10,PACAKMSR(r13) 690 ld r10,PACAKMSR(r13)
664 mtspr SPRN_SRR1,r10 691 mtspr SPRN_SRR1,r10
665 rfid 692 RFI_TO_KERNEL
666 b . 693 b .
667 694
668EXC_COMMON_BEGIN(unrecov_slb) 695EXC_COMMON_BEGIN(unrecov_slb)
@@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
901 mtspr SPRN_SRR0,r10 ; \ 928 mtspr SPRN_SRR0,r10 ; \
902 ld r10,PACAKMSR(r13) ; \ 929 ld r10,PACAKMSR(r13) ; \
903 mtspr SPRN_SRR1,r10 ; \ 930 mtspr SPRN_SRR1,r10 ; \
904 rfid ; \ 931 RFI_TO_KERNEL ; \
905 b . ; /* prevent speculative execution */ 932 b . ; /* prevent speculative execution */
906 933
907#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH 934#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
@@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
917 xori r12,r12,MSR_LE ; \ 944 xori r12,r12,MSR_LE ; \
918 mtspr SPRN_SRR1,r12 ; \ 945 mtspr SPRN_SRR1,r12 ; \
919 mr r13,r9 ; \ 946 mr r13,r9 ; \
920 rfid ; /* return to userspace */ \ 947 RFI_TO_USER ; /* return to userspace */ \
921 b . ; /* prevent speculative execution */ 948 b . ; /* prevent speculative execution */
922#else 949#else
923#define SYSCALL_FASTENDIAN_TEST 950#define SYSCALL_FASTENDIAN_TEST
@@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
1063 mtcr r11 1090 mtcr r11
1064 REST_GPR(11, r1) 1091 REST_GPR(11, r1)
1065 ld r1,GPR1(r1) 1092 ld r1,GPR1(r1)
1066 hrfid 1093 HRFI_TO_USER_OR_KERNEL
1067 1094
10681: mtcr r11 10951: mtcr r11
1069 REST_GPR(11, r1) 1096 REST_GPR(11, r1)
@@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1314 ld r11,PACA_EXGEN+EX_R11(r13) 1341 ld r11,PACA_EXGEN+EX_R11(r13)
1315 ld r12,PACA_EXGEN+EX_R12(r13) 1342 ld r12,PACA_EXGEN+EX_R12(r13)
1316 ld r13,PACA_EXGEN+EX_R13(r13) 1343 ld r13,PACA_EXGEN+EX_R13(r13)
1317 HRFID 1344 HRFI_TO_UNKNOWN
1318 b . 1345 b .
1319#endif 1346#endif
1320 1347
@@ -1418,10 +1445,94 @@ masked_##_H##interrupt: \
1418 ld r10,PACA_EXGEN+EX_R10(r13); \ 1445 ld r10,PACA_EXGEN+EX_R10(r13); \
1419 ld r11,PACA_EXGEN+EX_R11(r13); \ 1446 ld r11,PACA_EXGEN+EX_R11(r13); \
1420 /* returns to kernel where r13 must be set up, so don't restore it */ \ 1447 /* returns to kernel where r13 must be set up, so don't restore it */ \
1421 ##_H##rfid; \ 1448 ##_H##RFI_TO_KERNEL; \
1422 b .; \ 1449 b .; \
1423 MASKED_DEC_HANDLER(_H) 1450 MASKED_DEC_HANDLER(_H)
1424 1451
1452TRAMP_REAL_BEGIN(rfi_flush_fallback)
1453 SET_SCRATCH0(r13);
1454 GET_PACA(r13);
1455 std r9,PACA_EXRFI+EX_R9(r13)
1456 std r10,PACA_EXRFI+EX_R10(r13)
1457 std r11,PACA_EXRFI+EX_R11(r13)
1458 std r12,PACA_EXRFI+EX_R12(r13)
1459 std r8,PACA_EXRFI+EX_R13(r13)
1460 mfctr r9
1461 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
1462 ld r11,PACA_L1D_FLUSH_SETS(r13)
1463 ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
1464 /*
1465 * The load adresses are at staggered offsets within cachelines,
1466 * which suits some pipelines better (on others it should not
1467 * hurt).
1468 */
1469 addi r12,r12,8
1470 mtctr r11
1471 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
1472
1473 /* order ld/st prior to dcbt stop all streams with flushing */
1474 sync
14751: li r8,0
1476 .rept 8 /* 8-way set associative */
1477 ldx r11,r10,r8
1478 add r8,r8,r12
1479 xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
1480 add r8,r8,r11 // Add 0, this creates a dependency on the ldx
1481 .endr
1482 addi r10,r10,128 /* 128 byte cache line */
1483 bdnz 1b
1484
1485 mtctr r9
1486 ld r9,PACA_EXRFI+EX_R9(r13)
1487 ld r10,PACA_EXRFI+EX_R10(r13)
1488 ld r11,PACA_EXRFI+EX_R11(r13)
1489 ld r12,PACA_EXRFI+EX_R12(r13)
1490 ld r8,PACA_EXRFI+EX_R13(r13)
1491 GET_SCRATCH0(r13);
1492 rfid
1493
1494TRAMP_REAL_BEGIN(hrfi_flush_fallback)
1495 SET_SCRATCH0(r13);
1496 GET_PACA(r13);
1497 std r9,PACA_EXRFI+EX_R9(r13)
1498 std r10,PACA_EXRFI+EX_R10(r13)
1499 std r11,PACA_EXRFI+EX_R11(r13)
1500 std r12,PACA_EXRFI+EX_R12(r13)
1501 std r8,PACA_EXRFI+EX_R13(r13)
1502 mfctr r9
1503 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
1504 ld r11,PACA_L1D_FLUSH_SETS(r13)
1505 ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
1506 /*
1507 * The load adresses are at staggered offsets within cachelines,
1508 * which suits some pipelines better (on others it should not
1509 * hurt).
1510 */
1511 addi r12,r12,8
1512 mtctr r11
1513 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
1514
1515 /* order ld/st prior to dcbt stop all streams with flushing */
1516 sync
15171: li r8,0
1518 .rept 8 /* 8-way set associative */
1519 ldx r11,r10,r8
1520 add r8,r8,r12
1521 xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
1522 add r8,r8,r11 // Add 0, this creates a dependency on the ldx
1523 .endr
1524 addi r10,r10,128 /* 128 byte cache line */
1525 bdnz 1b
1526
1527 mtctr r9
1528 ld r9,PACA_EXRFI+EX_R9(r13)
1529 ld r10,PACA_EXRFI+EX_R10(r13)
1530 ld r11,PACA_EXRFI+EX_R11(r13)
1531 ld r12,PACA_EXRFI+EX_R12(r13)
1532 ld r8,PACA_EXRFI+EX_R13(r13)
1533 GET_SCRATCH0(r13);
1534 hrfid
1535
1425/* 1536/*
1426 * Real mode exceptions actually use this too, but alternate 1537 * Real mode exceptions actually use this too, but alternate
1427 * instruction code patches (which end up in the common .text area) 1538 * instruction code patches (which end up in the common .text area)
@@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
1441 addi r13, r13, 4 1552 addi r13, r13, 4
1442 mtspr SPRN_SRR0, r13 1553 mtspr SPRN_SRR0, r13
1443 GET_SCRATCH0(r13) 1554 GET_SCRATCH0(r13)
1444 rfid 1555 RFI_TO_KERNEL
1445 b . 1556 b .
1446 1557
1447TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) 1558TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
@@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
1453 addi r13, r13, 4 1564 addi r13, r13, 4
1454 mtspr SPRN_HSRR0, r13 1565 mtspr SPRN_HSRR0, r13
1455 GET_SCRATCH0(r13) 1566 GET_SCRATCH0(r13)
1456 hrfid 1567 HRFI_TO_KERNEL
1457 b . 1568 b .
1458#endif 1569#endif
1459 1570
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 04ea5c04fd24..3c2c2688918f 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1462,25 +1462,6 @@ static void fadump_init_files(void)
1462 return; 1462 return;
1463} 1463}
1464 1464
1465static int fadump_panic_event(struct notifier_block *this,
1466 unsigned long event, void *ptr)
1467{
1468 /*
1469 * If firmware-assisted dump has been registered then trigger
1470 * firmware-assisted dump and let firmware handle everything
1471 * else. If this returns, then fadump was not registered, so
1472 * go through the rest of the panic path.
1473 */
1474 crash_fadump(NULL, ptr);
1475
1476 return NOTIFY_DONE;
1477}
1478
1479static struct notifier_block fadump_panic_block = {
1480 .notifier_call = fadump_panic_event,
1481 .priority = INT_MIN /* may not return; must be done last */
1482};
1483
1484/* 1465/*
1485 * Prepare for firmware-assisted dump. 1466 * Prepare for firmware-assisted dump.
1486 */ 1467 */
@@ -1513,9 +1494,6 @@ int __init setup_fadump(void)
1513 init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); 1494 init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
1514 fadump_init_files(); 1495 fadump_init_files();
1515 1496
1516 atomic_notifier_chain_register(&panic_notifier_list,
1517 &fadump_panic_block);
1518
1519 return 1; 1497 return 1;
1520} 1498}
1521subsys_initcall(setup_fadump); 1499subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 8ac0bd2bddb0..3280953a82cf 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -623,7 +623,9 @@ BEGIN_FTR_SECTION
623 * NOTE, we rely on r0 being 0 from above. 623 * NOTE, we rely on r0 being 0 from above.
624 */ 624 */
625 mtspr SPRN_IAMR,r0 625 mtspr SPRN_IAMR,r0
626BEGIN_FTR_SECTION_NESTED(42)
626 mtspr SPRN_AMOR,r0 627 mtspr SPRN_AMOR,r0
628END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42)
627END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 629END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
628 630
629 /* save regs for local vars on new stack. 631 /* save regs for local vars on new stack.
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index bfdd783e3916..72be0c32e902 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
1403 1403
1404 printk("NIP: "REG" LR: "REG" CTR: "REG"\n", 1404 printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
1405 regs->nip, regs->link, regs->ctr); 1405 regs->nip, regs->link, regs->ctr);
1406 printk("REGS: %p TRAP: %04lx %s (%s)\n", 1406 printk("REGS: %px TRAP: %04lx %s (%s)\n",
1407 regs, regs->trap, print_tainted(), init_utsname()->release); 1407 regs, regs->trap, print_tainted(), init_utsname()->release);
1408 printk("MSR: "REG" ", regs->msr); 1408 printk("MSR: "REG" ", regs->msr);
1409 print_msr_bits(regs->msr); 1409 print_msr_bits(regs->msr);
@@ -1569,16 +1569,22 @@ void arch_release_task_struct(struct task_struct *t)
1569 */ 1569 */
1570int set_thread_tidr(struct task_struct *t) 1570int set_thread_tidr(struct task_struct *t)
1571{ 1571{
1572 int rc;
1573
1572 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 1574 if (!cpu_has_feature(CPU_FTR_ARCH_300))
1573 return -EINVAL; 1575 return -EINVAL;
1574 1576
1575 if (t != current) 1577 if (t != current)
1576 return -EINVAL; 1578 return -EINVAL;
1577 1579
1578 t->thread.tidr = assign_thread_tidr(); 1580 if (t->thread.tidr)
1579 if (t->thread.tidr < 0) 1581 return 0;
1580 return t->thread.tidr; 1582
1583 rc = assign_thread_tidr();
1584 if (rc < 0)
1585 return rc;
1581 1586
1587 t->thread.tidr = rc;
1582 mtspr(SPRN_TIDR, t->thread.tidr); 1588 mtspr(SPRN_TIDR, t->thread.tidr);
1583 1589
1584 return 0; 1590 return 0;
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2075322cd225..9d213542a48b 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -704,6 +704,30 @@ int check_legacy_ioport(unsigned long base_port)
704} 704}
705EXPORT_SYMBOL(check_legacy_ioport); 705EXPORT_SYMBOL(check_legacy_ioport);
706 706
707static int ppc_panic_event(struct notifier_block *this,
708 unsigned long event, void *ptr)
709{
710 /*
711 * If firmware-assisted dump has been registered then trigger
712 * firmware-assisted dump and let firmware handle everything else.
713 */
714 crash_fadump(NULL, ptr);
715 ppc_md.panic(ptr); /* May not return */
716 return NOTIFY_DONE;
717}
718
719static struct notifier_block ppc_panic_block = {
720 .notifier_call = ppc_panic_event,
721 .priority = INT_MIN /* may not return; must be done last */
722};
723
724void __init setup_panic(void)
725{
726 if (!ppc_md.panic)
727 return;
728 atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
729}
730
707#ifdef CONFIG_CHECK_CACHE_COHERENCY 731#ifdef CONFIG_CHECK_CACHE_COHERENCY
708/* 732/*
709 * For platforms that have configurable cache-coherency. This function 733 * For platforms that have configurable cache-coherency. This function
@@ -848,6 +872,9 @@ void __init setup_arch(char **cmdline_p)
848 /* Probe the machine type, establish ppc_md. */ 872 /* Probe the machine type, establish ppc_md. */
849 probe_machine(); 873 probe_machine();
850 874
875 /* Setup panic notifier if requested by the platform. */
876 setup_panic();
877
851 /* 878 /*
852 * Configure ppc_md.power_save (ppc32 only, 64-bit machines do 879 * Configure ppc_md.power_save (ppc32 only, 64-bit machines do
853 * it from their respective probe() function. 880 * it from their respective probe() function.
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 8956a9856604..491be4179ddd 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -801,3 +801,104 @@ static int __init disable_hardlockup_detector(void)
801 return 0; 801 return 0;
802} 802}
803early_initcall(disable_hardlockup_detector); 803early_initcall(disable_hardlockup_detector);
804
805#ifdef CONFIG_PPC_BOOK3S_64
806static enum l1d_flush_type enabled_flush_types;
807static void *l1d_flush_fallback_area;
808static bool no_rfi_flush;
809bool rfi_flush;
810
811static int __init handle_no_rfi_flush(char *p)
812{
813 pr_info("rfi-flush: disabled on command line.");
814 no_rfi_flush = true;
815 return 0;
816}
817early_param("no_rfi_flush", handle_no_rfi_flush);
818
819/*
820 * The RFI flush is not KPTI, but because users will see doco that says to use
821 * nopti we hijack that option here to also disable the RFI flush.
822 */
823static int __init handle_no_pti(char *p)
824{
825 pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
826 handle_no_rfi_flush(NULL);
827 return 0;
828}
829early_param("nopti", handle_no_pti);
830
831static void do_nothing(void *unused)
832{
833 /*
834 * We don't need to do the flush explicitly, just enter+exit kernel is
835 * sufficient, the RFI exit handlers will do the right thing.
836 */
837}
838
839void rfi_flush_enable(bool enable)
840{
841 if (rfi_flush == enable)
842 return;
843
844 if (enable) {
845 do_rfi_flush_fixups(enabled_flush_types);
846 on_each_cpu(do_nothing, NULL, 1);
847 } else
848 do_rfi_flush_fixups(L1D_FLUSH_NONE);
849
850 rfi_flush = enable;
851}
852
853static void init_fallback_flush(void)
854{
855 u64 l1d_size, limit;
856 int cpu;
857
858 l1d_size = ppc64_caches.l1d.size;
859 limit = min(safe_stack_limit(), ppc64_rma_size);
860
861 /*
862 * Align to L1d size, and size it at 2x L1d size, to catch possible
863 * hardware prefetch runoff. We don't have a recipe for load patterns to
864 * reliably avoid the prefetcher.
865 */
866 l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
867 memset(l1d_flush_fallback_area, 0, l1d_size * 2);
868
869 for_each_possible_cpu(cpu) {
870 /*
871 * The fallback flush is currently coded for 8-way
872 * associativity. Different associativity is possible, but it
873 * will be treated as 8-way and may not evict the lines as
874 * effectively.
875 *
876 * 128 byte lines are mandatory.
877 */
878 u64 c = l1d_size / 8;
879
880 paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
881 paca[cpu].l1d_flush_congruence = c;
882 paca[cpu].l1d_flush_sets = c / 128;
883 }
884}
885
886void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
887{
888 if (types & L1D_FLUSH_FALLBACK) {
889 pr_info("rfi-flush: Using fallback displacement flush\n");
890 init_fallback_flush();
891 }
892
893 if (types & L1D_FLUSH_ORI)
894 pr_info("rfi-flush: Using ori type flush\n");
895
896 if (types & L1D_FLUSH_MTTRIG)
897 pr_info("rfi-flush: Using mttrig type flush\n");
898
899 enabled_flush_types = types;
900
901 if (!no_rfi_flush)
902 rfi_flush_enable(enable);
903}
904#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 0494e1566ee2..307843d23682 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -132,6 +132,15 @@ SECTIONS
132 /* Read-only data */ 132 /* Read-only data */
133 RO_DATA(PAGE_SIZE) 133 RO_DATA(PAGE_SIZE)
134 134
135#ifdef CONFIG_PPC64
136 . = ALIGN(8);
137 __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
138 __start___rfi_flush_fixup = .;
139 *(__rfi_flush_fixup)
140 __stop___rfi_flush_fixup = .;
141 }
142#endif
143
135 EXCEPTION_TABLE(0) 144 EXCEPTION_TABLE(0)
136 145
137 NOTES :kernel :notes 146 NOTES :kernel :notes
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 29ebe2fd5867..a93d719edc90 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
235 gpte->may_read = true; 235 gpte->may_read = true;
236 gpte->may_write = true; 236 gpte->may_write = true;
237 gpte->page_size = MMU_PAGE_4K; 237 gpte->page_size = MMU_PAGE_4K;
238 gpte->wimg = HPTE_R_M;
238 239
239 return 0; 240 return 0;
240 } 241 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 235319c2574e..b73dbc9e797d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -65,11 +65,17 @@ struct kvm_resize_hpt {
65 u32 order; 65 u32 order;
66 66
67 /* These fields protected by kvm->lock */ 67 /* These fields protected by kvm->lock */
68
69 /* Possible values and their usage:
70 * <0 an error occurred during allocation,
71 * -EBUSY allocation is in the progress,
72 * 0 allocation made successfuly.
73 */
68 int error; 74 int error;
69 bool prepare_done;
70 75
71 /* Private to the work thread, until prepare_done is true, 76 /* Private to the work thread, until error != -EBUSY,
72 * then protected by kvm->resize_hpt_sem */ 77 * then protected by kvm->lock.
78 */
73 struct kvm_hpt_info hpt; 79 struct kvm_hpt_info hpt;
74}; 80};
75 81
@@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
159 * Reset all the reverse-mapping chains for all memslots 165 * Reset all the reverse-mapping chains for all memslots
160 */ 166 */
161 kvmppc_rmap_reset(kvm); 167 kvmppc_rmap_reset(kvm);
162 /* Ensure that each vcpu will flush its TLB on next entry. */
163 cpumask_setall(&kvm->arch.need_tlb_flush);
164 err = 0; 168 err = 0;
165 goto out; 169 goto out;
166 } 170 }
@@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
176 kvmppc_set_hpt(kvm, &info); 180 kvmppc_set_hpt(kvm, &info);
177 181
178out: 182out:
183 if (err == 0)
184 /* Ensure that each vcpu will flush its TLB on next entry. */
185 cpumask_setall(&kvm->arch.need_tlb_flush);
186
179 mutex_unlock(&kvm->lock); 187 mutex_unlock(&kvm->lock);
180 return err; 188 return err;
181} 189}
@@ -1238,8 +1246,9 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
1238 unsigned long vpte, rpte, guest_rpte; 1246 unsigned long vpte, rpte, guest_rpte;
1239 int ret; 1247 int ret;
1240 struct revmap_entry *rev; 1248 struct revmap_entry *rev;
1241 unsigned long apsize, psize, avpn, pteg, hash; 1249 unsigned long apsize, avpn, pteg, hash;
1242 unsigned long new_idx, new_pteg, replace_vpte; 1250 unsigned long new_idx, new_pteg, replace_vpte;
1251 int pshift;
1243 1252
1244 hptep = (__be64 *)(old->virt + (idx << 4)); 1253 hptep = (__be64 *)(old->virt + (idx << 4));
1245 1254
@@ -1298,8 +1307,8 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
1298 goto out; 1307 goto out;
1299 1308
1300 rpte = be64_to_cpu(hptep[1]); 1309 rpte = be64_to_cpu(hptep[1]);
1301 psize = hpte_base_page_size(vpte, rpte); 1310 pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
1302 avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23); 1311 avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
1303 pteg = idx / HPTES_PER_GROUP; 1312 pteg = idx / HPTES_PER_GROUP;
1304 if (vpte & HPTE_V_SECONDARY) 1313 if (vpte & HPTE_V_SECONDARY)
1305 pteg = ~pteg; 1314 pteg = ~pteg;
@@ -1311,20 +1320,20 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
1311 offset = (avpn & 0x1f) << 23; 1320 offset = (avpn & 0x1f) << 23;
1312 vsid = avpn >> 5; 1321 vsid = avpn >> 5;
1313 /* We can find more bits from the pteg value */ 1322 /* We can find more bits from the pteg value */
1314 if (psize < (1ULL << 23)) 1323 if (pshift < 23)
1315 offset |= ((vsid ^ pteg) & old_hash_mask) * psize; 1324 offset |= ((vsid ^ pteg) & old_hash_mask) << pshift;
1316 1325
1317 hash = vsid ^ (offset / psize); 1326 hash = vsid ^ (offset >> pshift);
1318 } else { 1327 } else {
1319 unsigned long offset, vsid; 1328 unsigned long offset, vsid;
1320 1329
1321 /* We only have 40 - 23 bits of seg_off in avpn */ 1330 /* We only have 40 - 23 bits of seg_off in avpn */
1322 offset = (avpn & 0x1ffff) << 23; 1331 offset = (avpn & 0x1ffff) << 23;
1323 vsid = avpn >> 17; 1332 vsid = avpn >> 17;
1324 if (psize < (1ULL << 23)) 1333 if (pshift < 23)
1325 offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize; 1334 offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift;
1326 1335
1327 hash = vsid ^ (vsid << 25) ^ (offset / psize); 1336 hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
1328 } 1337 }
1329 1338
1330 new_pteg = hash & new_hash_mask; 1339 new_pteg = hash & new_hash_mask;
@@ -1412,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
1412 1421
1413static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) 1422static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
1414{ 1423{
1415 BUG_ON(kvm->arch.resize_hpt != resize); 1424 if (WARN_ON(!mutex_is_locked(&kvm->lock)))
1425 return;
1416 1426
1417 if (!resize) 1427 if (!resize)
1418 return; 1428 return;
1419 1429
1420 if (resize->hpt.virt) 1430 if (resize->error != -EBUSY) {
1421 kvmppc_free_hpt(&resize->hpt); 1431 if (resize->hpt.virt)
1432 kvmppc_free_hpt(&resize->hpt);
1433 kfree(resize);
1434 }
1422 1435
1423 kvm->arch.resize_hpt = NULL; 1436 if (kvm->arch.resize_hpt == resize)
1424 kfree(resize); 1437 kvm->arch.resize_hpt = NULL;
1425} 1438}
1426 1439
1427static void resize_hpt_prepare_work(struct work_struct *work) 1440static void resize_hpt_prepare_work(struct work_struct *work)
@@ -1430,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
1430 struct kvm_resize_hpt, 1443 struct kvm_resize_hpt,
1431 work); 1444 work);
1432 struct kvm *kvm = resize->kvm; 1445 struct kvm *kvm = resize->kvm;
1433 int err; 1446 int err = 0;
1434 1447
1435 resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", 1448 if (WARN_ON(resize->error != -EBUSY))
1436 resize->order); 1449 return;
1437
1438 err = resize_hpt_allocate(resize);
1439 1450
1440 mutex_lock(&kvm->lock); 1451 mutex_lock(&kvm->lock);
1441 1452
1453 /* Request is still current? */
1454 if (kvm->arch.resize_hpt == resize) {
1455 /* We may request large allocations here:
1456 * do not sleep with kvm->lock held for a while.
1457 */
1458 mutex_unlock(&kvm->lock);
1459
1460 resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
1461 resize->order);
1462
1463 err = resize_hpt_allocate(resize);
1464
1465 /* We have strict assumption about -EBUSY
1466 * when preparing for HPT resize.
1467 */
1468 if (WARN_ON(err == -EBUSY))
1469 err = -EINPROGRESS;
1470
1471 mutex_lock(&kvm->lock);
1472 /* It is possible that kvm->arch.resize_hpt != resize
1473 * after we grab kvm->lock again.
1474 */
1475 }
1476
1442 resize->error = err; 1477 resize->error = err;
1443 resize->prepare_done = true; 1478
1479 if (kvm->arch.resize_hpt != resize)
1480 resize_hpt_release(kvm, resize);
1444 1481
1445 mutex_unlock(&kvm->lock); 1482 mutex_unlock(&kvm->lock);
1446} 1483}
@@ -1465,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
1465 1502
1466 if (resize) { 1503 if (resize) {
1467 if (resize->order == shift) { 1504 if (resize->order == shift) {
1468 /* Suitable resize in progress */ 1505 /* Suitable resize in progress? */
1469 if (resize->prepare_done) { 1506 ret = resize->error;
1470 ret = resize->error; 1507 if (ret == -EBUSY)
1471 if (ret != 0)
1472 resize_hpt_release(kvm, resize);
1473 } else {
1474 ret = 100; /* estimated time in ms */ 1508 ret = 100; /* estimated time in ms */
1475 } 1509 else if (ret)
1510 resize_hpt_release(kvm, resize);
1476 1511
1477 goto out; 1512 goto out;
1478 } 1513 }
@@ -1492,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
1492 ret = -ENOMEM; 1527 ret = -ENOMEM;
1493 goto out; 1528 goto out;
1494 } 1529 }
1530
1531 resize->error = -EBUSY;
1495 resize->order = shift; 1532 resize->order = shift;
1496 resize->kvm = kvm; 1533 resize->kvm = kvm;
1497 INIT_WORK(&resize->work, resize_hpt_prepare_work); 1534 INIT_WORK(&resize->work, resize_hpt_prepare_work);
@@ -1546,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
1546 if (!resize || (resize->order != shift)) 1583 if (!resize || (resize->order != shift))
1547 goto out; 1584 goto out;
1548 1585
1549 ret = -EBUSY;
1550 if (!resize->prepare_done)
1551 goto out;
1552
1553 ret = resize->error; 1586 ret = resize->error;
1554 if (ret != 0) 1587 if (ret)
1555 goto out; 1588 goto out;
1556 1589
1557 ret = resize_hpt_rehash(resize); 1590 ret = resize_hpt_rehash(resize);
1558 if (ret != 0) 1591 if (ret)
1559 goto out; 1592 goto out;
1560 1593
1561 resize_hpt_pivot(resize); 1594 resize_hpt_pivot(resize);
@@ -1801,6 +1834,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1801 ssize_t nb; 1834 ssize_t nb;
1802 long int err, ret; 1835 long int err, ret;
1803 int mmu_ready; 1836 int mmu_ready;
1837 int pshift;
1804 1838
1805 if (!access_ok(VERIFY_READ, buf, count)) 1839 if (!access_ok(VERIFY_READ, buf, count))
1806 return -EFAULT; 1840 return -EFAULT;
@@ -1855,6 +1889,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1855 err = -EINVAL; 1889 err = -EINVAL;
1856 if (!(v & HPTE_V_VALID)) 1890 if (!(v & HPTE_V_VALID))
1857 goto out; 1891 goto out;
1892 pshift = kvmppc_hpte_base_page_shift(v, r);
1893 if (pshift <= 0)
1894 goto out;
1858 lbuf += 2; 1895 lbuf += 2;
1859 nb += HPTE_SIZE; 1896 nb += HPTE_SIZE;
1860 1897
@@ -1869,14 +1906,18 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1869 goto out; 1906 goto out;
1870 } 1907 }
1871 if (!mmu_ready && is_vrma_hpte(v)) { 1908 if (!mmu_ready && is_vrma_hpte(v)) {
1872 unsigned long psize = hpte_base_page_size(v, r); 1909 unsigned long senc, lpcr;
1873 unsigned long senc = slb_pgsize_encoding(psize);
1874 unsigned long lpcr;
1875 1910
1911 senc = slb_pgsize_encoding(1ul << pshift);
1876 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1912 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1877 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1913 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1878 lpcr = senc << (LPCR_VRMASD_SH - 4); 1914 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1879 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); 1915 lpcr = senc << (LPCR_VRMASD_SH - 4);
1916 kvmppc_update_lpcr(kvm, lpcr,
1917 LPCR_VRMASD);
1918 } else {
1919 kvmppc_setup_partition_table(kvm);
1920 }
1880 mmu_ready = 1; 1921 mmu_ready = 1;
1881 } 1922 }
1882 ++i; 1923 ++i;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 79ea3d9269db..2d46037ce936 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
120 120
121static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 121static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
122static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 122static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
123static void kvmppc_setup_partition_table(struct kvm *kvm);
124 123
125static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, 124static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
126 int *ip) 125 int *ip)
@@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
3574 return; 3573 return;
3575} 3574}
3576 3575
3577static void kvmppc_setup_partition_table(struct kvm *kvm) 3576void kvmppc_setup_partition_table(struct kvm *kvm)
3578{ 3577{
3579 unsigned long dw0, dw1; 3578 unsigned long dw0, dw1;
3580 3579
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2659844784b8..9c61f736c75b 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
79 mtmsrd r0,1 /* clear RI in MSR */ 79 mtmsrd r0,1 /* clear RI in MSR */
80 mtsrr0 r5 80 mtsrr0 r5
81 mtsrr1 r6 81 mtsrr1 r6
82 RFI 82 RFI_TO_KERNEL
83 83
84kvmppc_call_hv_entry: 84kvmppc_call_hv_entry:
85BEGIN_FTR_SECTION 85BEGIN_FTR_SECTION
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
199 mtmsrd r6, 1 /* Clear RI in MSR */ 199 mtmsrd r6, 1 /* Clear RI in MSR */
200 mtsrr0 r8 200 mtsrr0 r8
201 mtsrr1 r7 201 mtsrr1 r7
202 RFI 202 RFI_TO_KERNEL
203 203
204 /* Virtual-mode return */ 204 /* Virtual-mode return */
205.Lvirt_return: 205.Lvirt_return:
@@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1167 1167
1168 ld r0, VCPU_GPR(R0)(r4) 1168 ld r0, VCPU_GPR(R0)(r4)
1169 ld r4, VCPU_GPR(R4)(r4) 1169 ld r4, VCPU_GPR(R4)(r4)
1170 1170 HRFI_TO_GUEST
1171 hrfid
1172 b . 1171 b .
1173 1172
1174secondary_too_late: 1173secondary_too_late:
@@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
3320 ld r4, PACAKMSR(r13) 3319 ld r4, PACAKMSR(r13)
3321 mtspr SPRN_SRR0, r3 3320 mtspr SPRN_SRR0, r3
3322 mtspr SPRN_SRR1, r4 3321 mtspr SPRN_SRR1, r4
3323 rfid 3322 RFI_TO_KERNEL
33249: addi r3, r1, STACK_FRAME_OVERHEAD 33239: addi r3, r1, STACK_FRAME_OVERHEAD
3325 bl kvmppc_bad_interrupt 3324 bl kvmppc_bad_interrupt
3326 b 9b 3325 b 9b
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d0dc8624198f..7deaeeb14b93 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
60#define MSR_USER32 MSR_USER 60#define MSR_USER32 MSR_USER
61#define MSR_USER64 MSR_USER 61#define MSR_USER64 MSR_USER
62#define HW_PAGE_SIZE PAGE_SIZE 62#define HW_PAGE_SIZE PAGE_SIZE
63#define HPTE_R_M _PAGE_COHERENT
63#endif 64#endif
64 65
65static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) 66static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
@@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
557 pte.eaddr = eaddr; 558 pte.eaddr = eaddr;
558 pte.vpage = eaddr >> 12; 559 pte.vpage = eaddr >> 12;
559 pte.page_size = MMU_PAGE_64K; 560 pte.page_size = MMU_PAGE_64K;
561 pte.wimg = HPTE_R_M;
560 } 562 }
561 563
562 switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) { 564 switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 42a4b237df5f..34a5adeff084 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -46,6 +46,9 @@
46 46
47#define FUNC(name) name 47#define FUNC(name) name
48 48
49#define RFI_TO_KERNEL RFI
50#define RFI_TO_GUEST RFI
51
49.macro INTERRUPT_TRAMPOLINE intno 52.macro INTERRUPT_TRAMPOLINE intno
50 53
51.global kvmppc_trampoline_\intno 54.global kvmppc_trampoline_\intno
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
141 GET_SCRATCH0(r13) 144 GET_SCRATCH0(r13)
142 145
143 /* And get back into the code */ 146 /* And get back into the code */
144 RFI 147 RFI_TO_KERNEL
145#endif 148#endif
146 149
147/* 150/*
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
164 ori r5, r5, MSR_EE 167 ori r5, r5, MSR_EE
165 mtsrr0 r7 168 mtsrr0 r7
166 mtsrr1 r6 169 mtsrr1 r6
167 RFI 170 RFI_TO_KERNEL
168 171
169#include "book3s_segment.S" 172#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 2a2b96d53999..93a180ceefad 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -156,7 +156,7 @@ no_dcbz32_on:
156 PPC_LL r9, SVCPU_R9(r3) 156 PPC_LL r9, SVCPU_R9(r3)
157 PPC_LL r3, (SVCPU_R3)(r3) 157 PPC_LL r3, (SVCPU_R3)(r3)
158 158
159 RFI 159 RFI_TO_GUEST
160kvmppc_handler_trampoline_enter_end: 160kvmppc_handler_trampoline_enter_end:
161 161
162 162
@@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
407 cmpwi r12, BOOK3S_INTERRUPT_DOORBELL 407 cmpwi r12, BOOK3S_INTERRUPT_DOORBELL
408 beqa BOOK3S_INTERRUPT_DOORBELL 408 beqa BOOK3S_INTERRUPT_DOORBELL
409 409
410 RFI 410 RFI_TO_KERNEL
411kvmppc_handler_trampoline_exit_end: 411kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index bf457843e032..0d750d274c4e 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
725 725
726 /* Return the per-cpu state for state saving/migration */ 726 /* Return the per-cpu state for state saving/migration */
727 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | 727 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
728 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; 728 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
729 (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
729} 730}
730 731
731int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) 732int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1558 1559
1559 /* 1560 /*
1560 * Restore P and Q. If the interrupt was pending, we 1561 * Restore P and Q. If the interrupt was pending, we
1561 * force both P and Q, which will trigger a resend. 1562 * force Q and !P, which will trigger a resend.
1562 * 1563 *
1563 * That means that a guest that had both an interrupt 1564 * That means that a guest that had both an interrupt
1564 * pending (queued) and Q set will restore with only 1565 * pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1566 * is perfectly fine as coalescing interrupts that haven't 1567 * is perfectly fine as coalescing interrupts that haven't
1567 * been presented yet is always allowed. 1568 * been presented yet is always allowed.
1568 */ 1569 */
1569 if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) 1570 if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
1570 state->old_p = true; 1571 state->old_p = true;
1571 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) 1572 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
1572 state->old_q = true; 1573 state->old_q = true;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6b6c53c42ac9..1915e86cef6f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1407,7 +1407,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1407int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 1407int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
1408{ 1408{
1409 int r; 1409 int r;
1410 sigset_t sigsaved;
1411 1410
1412 if (vcpu->mmio_needed) { 1411 if (vcpu->mmio_needed) {
1413 vcpu->mmio_needed = 0; 1412 vcpu->mmio_needed = 0;
@@ -1448,16 +1447,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
1448#endif 1447#endif
1449 } 1448 }
1450 1449
1451 if (vcpu->sigset_active) 1450 kvm_sigset_activate(vcpu);
1452 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1453 1451
1454 if (run->immediate_exit) 1452 if (run->immediate_exit)
1455 r = -EINTR; 1453 r = -EINTR;
1456 else 1454 else
1457 r = kvmppc_vcpu_run(run, vcpu); 1455 r = kvmppc_vcpu_run(run, vcpu);
1458 1456
1459 if (vcpu->sigset_active) 1457 kvm_sigset_deactivate(vcpu);
1460 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1461 1458
1462 return r; 1459 return r;
1463} 1460}
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 41cf5ae273cf..a95ea007d654 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
116 } 116 }
117} 117}
118 118
119#ifdef CONFIG_PPC_BOOK3S_64
120void do_rfi_flush_fixups(enum l1d_flush_type types)
121{
122 unsigned int instrs[3], *dest;
123 long *start, *end;
124 int i;
125
126 start = PTRRELOC(&__start___rfi_flush_fixup),
127 end = PTRRELOC(&__stop___rfi_flush_fixup);
128
129 instrs[0] = 0x60000000; /* nop */
130 instrs[1] = 0x60000000; /* nop */
131 instrs[2] = 0x60000000; /* nop */
132
133 if (types & L1D_FLUSH_FALLBACK)
134 /* b .+16 to fallback flush */
135 instrs[0] = 0x48000010;
136
137 i = 0;
138 if (types & L1D_FLUSH_ORI) {
139 instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
140 instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
141 }
142
143 if (types & L1D_FLUSH_MTTRIG)
144 instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
145
146 for (i = 0; start < end; start++, i++) {
147 dest = (void *)start + *start;
148
149 pr_devel("patching dest %lx\n", (unsigned long)dest);
150
151 patch_instruction(dest, instrs[0]);
152 patch_instruction(dest + 1, instrs[1]);
153 patch_instruction(dest + 2, instrs[2]);
154 }
155
156 printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
157}
158#endif /* CONFIG_PPC_BOOK3S_64 */
159
119void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) 160void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
120{ 161{
121 long *start, *end; 162 long *start, *end;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 4797d08581ce..6e1e39035380 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
145 return __bad_area(regs, address, SEGV_MAPERR); 145 return __bad_area(regs, address, SEGV_MAPERR);
146} 146}
147 147
148static noinline int bad_access(struct pt_regs *regs, unsigned long address)
149{
150 return __bad_area(regs, address, SEGV_ACCERR);
151}
152
148static int do_sigbus(struct pt_regs *regs, unsigned long address, 153static int do_sigbus(struct pt_regs *regs, unsigned long address,
149 unsigned int fault) 154 unsigned int fault)
150{ 155{
@@ -490,7 +495,7 @@ retry:
490 495
491good_area: 496good_area:
492 if (unlikely(access_error(is_write, is_exec, vma))) 497 if (unlikely(access_error(is_write, is_exec, vma)))
493 return bad_area(regs, address); 498 return bad_access(regs, address);
494 499
495 /* 500 /*
496 * If for any reason at all we couldn't handle the fault, 501 * If for any reason at all we couldn't handle the fault,
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 3848af167df9..640cf566e986 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -47,7 +47,8 @@
47 47
48DEFINE_RAW_SPINLOCK(native_tlbie_lock); 48DEFINE_RAW_SPINLOCK(native_tlbie_lock);
49 49
50static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) 50static inline unsigned long ___tlbie(unsigned long vpn, int psize,
51 int apsize, int ssize)
51{ 52{
52 unsigned long va; 53 unsigned long va;
53 unsigned int penc; 54 unsigned int penc;
@@ -100,7 +101,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
100 : "memory"); 101 : "memory");
101 break; 102 break;
102 } 103 }
103 trace_tlbie(0, 0, va, 0, 0, 0, 0); 104 return va;
105}
106
107static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
108{
109 unsigned long rb;
110
111 rb = ___tlbie(vpn, psize, apsize, ssize);
112 trace_tlbie(0, 0, rb, 0, 0, 0, 0);
104} 113}
105 114
106static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) 115static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
@@ -652,7 +661,7 @@ static void native_hpte_clear(void)
652 if (hpte_v & HPTE_V_VALID) { 661 if (hpte_v & HPTE_V_VALID) {
653 hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn); 662 hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
654 hptep->v = 0; 663 hptep->v = 0;
655 __tlbie(vpn, psize, apsize, ssize); 664 ___tlbie(vpn, psize, apsize, ssize);
656 } 665 }
657 } 666 }
658 667
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 46d74e81aff1..d183b4801bdb 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ emit_clear:
763 func = (u8 *) __bpf_call_base + imm; 763 func = (u8 *) __bpf_call_base + imm;
764 764
765 /* Save skb pointer if we need to re-cache skb data */ 765 /* Save skb pointer if we need to re-cache skb data */
766 if (bpf_helper_changes_pkt_data(func)) 766 if ((ctx->seen & SEEN_SKB) &&
767 bpf_helper_changes_pkt_data(func))
767 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); 768 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
768 769
769 bpf_jit_emit_func_call(image, ctx, (u64)func); 770 bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
772 PPC_MR(b2p[BPF_REG_0], 3); 773 PPC_MR(b2p[BPF_REG_0], 3);
773 774
774 /* refresh skb cache */ 775 /* refresh skb cache */
775 if (bpf_helper_changes_pkt_data(func)) { 776 if ((ctx->seen & SEEN_SKB) &&
777 bpf_helper_changes_pkt_data(func)) {
776 /* reload skb pointer to r3 */ 778 /* reload skb pointer to r3 */
777 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); 779 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
778 bpf_jit_emit_skb_loads(image, ctx); 780 bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 9e3da168d54c..fce545774d50 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
410 int ret; 410 int ret;
411 __u64 target; 411 __u64 target;
412 412
413 if (is_kernel_addr(addr)) 413 if (is_kernel_addr(addr)) {
414 return branch_target((unsigned int *)addr); 414 if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
415 return 0;
416
417 return branch_target(&instr);
418 }
415 419
416 /* Userspace: need copy instruction here then translate it */ 420 /* Userspace: need copy instruction here then translate it */
417 pagefault_disable(); 421 pagefault_disable();
@@ -1415,7 +1419,7 @@ static int collect_events(struct perf_event *group, int max_count,
1415 int n = 0; 1419 int n = 0;
1416 struct perf_event *event; 1420 struct perf_event *event;
1417 1421
1418 if (!is_software_event(group)) { 1422 if (group->pmu->task_ctx_nr == perf_hw_context) {
1419 if (n >= max_count) 1423 if (n >= max_count)
1420 return -1; 1424 return -1;
1421 ctrs[n] = group; 1425 ctrs[n] = group;
@@ -1423,7 +1427,7 @@ static int collect_events(struct perf_event *group, int max_count,
1423 events[n++] = group->hw.config; 1427 events[n++] = group->hw.config;
1424 } 1428 }
1425 list_for_each_entry(event, &group->sibling_list, group_entry) { 1429 list_for_each_entry(event, &group->sibling_list, group_entry) {
1426 if (!is_software_event(event) && 1430 if (event->pmu->task_ctx_nr == perf_hw_context &&
1427 event->state != PERF_EVENT_STATE_OFF) { 1431 event->state != PERF_EVENT_STATE_OFF) {
1428 if (n >= max_count) 1432 if (n >= max_count)
1429 return -1; 1433 return -1;
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0ead3cd73caa..be4e7f84f70a 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -310,6 +310,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
310 return 0; 310 return 0;
311 311
312 /* 312 /*
313 * Check whether nest_imc is registered. We could end up here if the
314 * cpuhotplug callback registration fails. i.e, callback invokes the
315 * offline path for all successfully registered nodes. At this stage,
316 * nest_imc pmu will not be registered and we should return here.
317 *
318 * We return with a zero since this is not an offline failure. And
319 * cpuhp_setup_state() returns the actual failure reason to the caller,
320 * which in turn will call the cleanup routine.
321 */
322 if (!nest_pmus)
323 return 0;
324
325 /*
313 * Now that this cpu is one of the designated, 326 * Now that this cpu is one of the designated,
314 * find a next cpu a) which is online and b) in same chip. 327 * find a next cpu a) which is online and b) in same chip.
315 */ 328 */
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1171 if (nest_pmus == 1) { 1184 if (nest_pmus == 1) {
1172 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); 1185 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
1173 kfree(nest_imc_refc); 1186 kfree(nest_imc_refc);
1187 kfree(per_nest_pmu_arr);
1174 } 1188 }
1175 1189
1176 if (nest_pmus > 0) 1190 if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1195 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); 1209 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
1196 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); 1210 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
1197 kfree(pmu_ptr); 1211 kfree(pmu_ptr);
1198 kfree(per_nest_pmu_arr);
1199 return; 1212 return;
1200} 1213}
1201 1214
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
1309 ret = nest_pmu_cpumask_init(); 1322 ret = nest_pmu_cpumask_init();
1310 if (ret) { 1323 if (ret) {
1311 mutex_unlock(&nest_init_lock); 1324 mutex_unlock(&nest_init_lock);
1325 kfree(nest_imc_refc);
1326 kfree(per_nest_pmu_arr);
1312 goto err_free; 1327 goto err_free;
1313 } 1328 }
1314 } 1329 }
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 1edfbc1e40f4..4fb21e17504a 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -37,13 +37,62 @@
37#include <asm/kexec.h> 37#include <asm/kexec.h>
38#include <asm/smp.h> 38#include <asm/smp.h>
39#include <asm/tm.h> 39#include <asm/tm.h>
40#include <asm/setup.h>
40 41
41#include "powernv.h" 42#include "powernv.h"
42 43
44static void pnv_setup_rfi_flush(void)
45{
46 struct device_node *np, *fw_features;
47 enum l1d_flush_type type;
48 int enable;
49
50 /* Default to fallback in case fw-features are not available */
51 type = L1D_FLUSH_FALLBACK;
52 enable = 1;
53
54 np = of_find_node_by_name(NULL, "ibm,opal");
55 fw_features = of_get_child_by_name(np, "fw-features");
56 of_node_put(np);
57
58 if (fw_features) {
59 np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
60 if (np && of_property_read_bool(np, "enabled"))
61 type = L1D_FLUSH_MTTRIG;
62
63 of_node_put(np);
64
65 np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
66 if (np && of_property_read_bool(np, "enabled"))
67 type = L1D_FLUSH_ORI;
68
69 of_node_put(np);
70
71 /* Enable unless firmware says NOT to */
72 enable = 2;
73 np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
74 if (np && of_property_read_bool(np, "disabled"))
75 enable--;
76
77 of_node_put(np);
78
79 np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
80 if (np && of_property_read_bool(np, "disabled"))
81 enable--;
82
83 of_node_put(np);
84 of_node_put(fw_features);
85 }
86
87 setup_rfi_flush(type, enable > 0);
88}
89
43static void __init pnv_setup_arch(void) 90static void __init pnv_setup_arch(void)
44{ 91{
45 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); 92 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
46 93
94 pnv_setup_rfi_flush();
95
47 /* Initialize SMP */ 96 /* Initialize SMP */
48 pnv_smp_init(); 97 pnv_smp_init();
49 98
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 9dabea6e1443..6244bc849469 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -104,6 +104,20 @@ static void __noreturn ps3_halt(void)
104 ps3_sys_manager_halt(); /* never returns */ 104 ps3_sys_manager_halt(); /* never returns */
105} 105}
106 106
107static void ps3_panic(char *str)
108{
109 DBG("%s:%d %s\n", __func__, __LINE__, str);
110
111 smp_send_stop();
112 printk("\n");
113 printk(" System does not reboot automatically.\n");
114 printk(" Please press POWER button.\n");
115 printk("\n");
116
117 while(1)
118 lv1_pause(1);
119}
120
107#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \ 121#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \
108 defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE) 122 defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
109static void __init prealloc(struct ps3_prealloc *p) 123static void __init prealloc(struct ps3_prealloc *p)
@@ -255,6 +269,7 @@ define_machine(ps3) {
255 .probe = ps3_probe, 269 .probe = ps3_probe,
256 .setup_arch = ps3_setup_arch, 270 .setup_arch = ps3_setup_arch,
257 .init_IRQ = ps3_init_IRQ, 271 .init_IRQ = ps3_init_IRQ,
272 .panic = ps3_panic,
258 .get_boot_time = ps3_get_boot_time, 273 .get_boot_time = ps3_get_boot_time,
259 .set_dabr = ps3_set_dabr, 274 .set_dabr = ps3_set_dabr,
260 .calibrate_decr = ps3_calibrate_decr, 275 .calibrate_decr = ps3_calibrate_decr,
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 6e35780c5962..a0b20c03f078 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
574 574
575static CLASS_ATTR_RW(dlpar); 575static CLASS_ATTR_RW(dlpar);
576 576
577static int __init pseries_dlpar_init(void) 577int __init dlpar_workqueue_init(void)
578{ 578{
579 if (pseries_hp_wq)
580 return 0;
581
579 pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue", 582 pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
580 WQ_UNBOUND, 1); 583 WQ_UNBOUND, 1);
584
585 return pseries_hp_wq ? 0 : -ENOMEM;
586}
587
588static int __init dlpar_sysfs_init(void)
589{
590 int rc;
591
592 rc = dlpar_workqueue_init();
593 if (rc)
594 return rc;
595
581 return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr); 596 return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
582} 597}
583machine_device_initcall(pseries, pseries_dlpar_init); 598machine_device_initcall(pseries, dlpar_sysfs_init);
584 599
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 4470a3194311..1ae1d9f4dbe9 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void)
98 return CMO_PageSize; 98 return CMO_PageSize;
99} 99}
100 100
101int dlpar_workqueue_init(void);
102
101#endif /* _PSERIES_PSERIES_H */ 103#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 4923ffe230cf..81d8614e7379 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void)
69 /* Hotplug Events */ 69 /* Hotplug Events */
70 np = of_find_node_by_path("/event-sources/hot-plug-events"); 70 np = of_find_node_by_path("/event-sources/hot-plug-events");
71 if (np != NULL) { 71 if (np != NULL) {
72 request_event_sources_irqs(np, ras_hotplug_interrupt, 72 if (dlpar_workqueue_init() == 0)
73 request_event_sources_irqs(np, ras_hotplug_interrupt,
73 "RAS_HOTPLUG"); 74 "RAS_HOTPLUG");
74 of_node_put(np); 75 of_node_put(np);
75 } 76 }
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 5f1beb8367ac..ae4f596273b5 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)
459 of_pci_check_probe_only(); 459 of_pci_check_probe_only();
460} 460}
461 461
462static void pseries_setup_rfi_flush(void)
463{
464 struct h_cpu_char_result result;
465 enum l1d_flush_type types;
466 bool enable;
467 long rc;
468
469 /* Enable by default */
470 enable = true;
471
472 rc = plpar_get_cpu_characteristics(&result);
473 if (rc == H_SUCCESS) {
474 types = L1D_FLUSH_NONE;
475
476 if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
477 types |= L1D_FLUSH_MTTRIG;
478 if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
479 types |= L1D_FLUSH_ORI;
480
481 /* Use fallback if nothing set in hcall */
482 if (types == L1D_FLUSH_NONE)
483 types = L1D_FLUSH_FALLBACK;
484
485 if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
486 enable = false;
487 } else {
488 /* Default to fallback if case hcall is not available */
489 types = L1D_FLUSH_FALLBACK;
490 }
491
492 setup_rfi_flush(types, enable);
493}
494
462static void __init pSeries_setup_arch(void) 495static void __init pSeries_setup_arch(void)
463{ 496{
464 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); 497 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)
476 509
477 fwnmi_init(); 510 fwnmi_init();
478 511
512 pseries_setup_rfi_flush();
513
479 /* By default, only probe PCI (can be overridden by rtas_pci) */ 514 /* By default, only probe PCI (can be overridden by rtas_pci) */
480 pci_add_flags(PCI_PROBE_ONLY); 515 pci_add_flags(PCI_PROBE_ONLY);
481 516
@@ -726,6 +761,7 @@ define_machine(pseries) {
726 .pcibios_fixup = pSeries_final_fixup, 761 .pcibios_fixup = pSeries_final_fixup,
727 .restart = rtas_restart, 762 .restart = rtas_restart,
728 .halt = rtas_halt, 763 .halt = rtas_halt,
764 .panic = rtas_os_term,
729 .get_boot_time = rtas_get_boot_time, 765 .get_boot_time = rtas_get_boot_time,
730 .get_rtc_time = rtas_get_rtc_time, 766 .get_rtc_time = rtas_get_rtc_time,
731 .set_rtc_time = rtas_set_rtc_time, 767 .set_rtc_time = rtas_set_rtc_time,
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 44cbf4c12ea1..df95102e732c 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
354} 354}
355 355
356static struct lock_class_key fsl_msi_irq_class; 356static struct lock_class_key fsl_msi_irq_class;
357static struct lock_class_key fsl_msi_irq_request_class;
357 358
358static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, 359static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
359 int offset, int irq_index) 360 int offset, int irq_index)
@@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
373 dev_err(&dev->dev, "No memory for MSI cascade data\n"); 374 dev_err(&dev->dev, "No memory for MSI cascade data\n");
374 return -ENOMEM; 375 return -ENOMEM;
375 } 376 }
376 irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); 377 irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
378 &fsl_msi_irq_request_class);
377 cascade_data->index = offset; 379 cascade_data->index = offset;
378 cascade_data->msi_data = msi; 380 cascade_data->msi_data = msi;
379 cascade_data->virq = virt_msir; 381 cascade_data->virq = virt_msir;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1b2d8cb49abb..cab24f549e7c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1590,7 +1590,7 @@ static void print_bug_trap(struct pt_regs *regs)
1590 printf("kernel BUG at %s:%u!\n", 1590 printf("kernel BUG at %s:%u!\n",
1591 bug->file, bug->line); 1591 bug->file, bug->line);
1592#else 1592#else
1593 printf("kernel BUG at %p!\n", (void *)bug->bug_addr); 1593 printf("kernel BUG at %px!\n", (void *)bug->bug_addr);
1594#endif 1594#endif
1595#endif /* CONFIG_BUG */ 1595#endif /* CONFIG_BUG */
1596} 1596}
@@ -2329,7 +2329,7 @@ static void dump_one_paca(int cpu)
2329 2329
2330 p = &paca[cpu]; 2330 p = &paca[cpu];
2331 2331
2332 printf("paca for cpu 0x%x @ %p:\n", cpu, p); 2332 printf("paca for cpu 0x%x @ %px:\n", cpu, p);
2333 2333
2334 printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : "no"); 2334 printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : "no");
2335 printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no"); 2335 printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no");
@@ -2945,7 +2945,7 @@ static void show_task(struct task_struct *tsk)
2945 (tsk->exit_state & EXIT_DEAD) ? 'E' : 2945 (tsk->exit_state & EXIT_DEAD) ? 'E' :
2946 (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; 2946 (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
2947 2947
2948 printf("%p %016lx %6d %6d %c %2d %s\n", tsk, 2948 printf("%px %016lx %6d %6d %c %2d %s\n", tsk,
2949 tsk->thread.ksp, 2949 tsk->thread.ksp,
2950 tsk->pid, tsk->parent->pid, 2950 tsk->pid, tsk->parent->pid,
2951 state, task_thread_info(tsk)->cpu, 2951 state, task_thread_info(tsk)->cpu,
@@ -2988,7 +2988,7 @@ static void show_pte(unsigned long addr)
2988 2988
2989 if (setjmp(bus_error_jmp) != 0) { 2989 if (setjmp(bus_error_jmp) != 0) {
2990 catch_memory_errors = 0; 2990 catch_memory_errors = 0;
2991 printf("*** Error dumping pte for task %p\n", tsk); 2991 printf("*** Error dumping pte for task %px\n", tsk);
2992 return; 2992 return;
2993 } 2993 }
2994 2994
@@ -3074,7 +3074,7 @@ static void show_tasks(void)
3074 3074
3075 if (setjmp(bus_error_jmp) != 0) { 3075 if (setjmp(bus_error_jmp) != 0) {
3076 catch_memory_errors = 0; 3076 catch_memory_errors = 0;
3077 printf("*** Error dumping task %p\n", tsk); 3077 printf("*** Error dumping task %px\n", tsk);
3078 return; 3078 return;
3079 } 3079 }
3080 3080
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index e69de29bb2d1..47dacf06c679 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -0,0 +1,75 @@
1CONFIG_SMP=y
2CONFIG_PCI=y
3CONFIG_PCIE_XILINX=y
4CONFIG_SYSVIPC=y
5CONFIG_POSIX_MQUEUE=y
6CONFIG_IKCONFIG=y
7CONFIG_IKCONFIG_PROC=y
8CONFIG_CGROUPS=y
9CONFIG_CGROUP_SCHED=y
10CONFIG_CFS_BANDWIDTH=y
11CONFIG_CGROUP_BPF=y
12CONFIG_NAMESPACES=y
13CONFIG_USER_NS=y
14CONFIG_BLK_DEV_INITRD=y
15CONFIG_EXPERT=y
16CONFIG_CHECKPOINT_RESTORE=y
17CONFIG_BPF_SYSCALL=y
18CONFIG_NET=y
19CONFIG_PACKET=y
20CONFIG_UNIX=y
21CONFIG_INET=y
22CONFIG_IP_MULTICAST=y
23CONFIG_IP_ADVANCED_ROUTER=y
24CONFIG_IP_PNP=y
25CONFIG_IP_PNP_DHCP=y
26CONFIG_IP_PNP_BOOTP=y
27CONFIG_IP_PNP_RARP=y
28CONFIG_NETLINK_DIAG=y
29CONFIG_DEVTMPFS=y
30CONFIG_BLK_DEV_LOOP=y
31CONFIG_VIRTIO_BLK=y
32CONFIG_BLK_DEV_SD=y
33CONFIG_BLK_DEV_SR=y
34CONFIG_ATA=y
35CONFIG_SATA_AHCI=y
36CONFIG_SATA_AHCI_PLATFORM=y
37CONFIG_NETDEVICES=y
38CONFIG_VIRTIO_NET=y
39CONFIG_MACB=y
40CONFIG_E1000E=y
41CONFIG_R8169=y
42CONFIG_MICROSEMI_PHY=y
43CONFIG_INPUT_MOUSEDEV=y
44CONFIG_SERIAL_8250=y
45CONFIG_SERIAL_8250_CONSOLE=y
46CONFIG_SERIAL_OF_PLATFORM=y
47# CONFIG_PTP_1588_CLOCK is not set
48CONFIG_DRM=y
49CONFIG_DRM_RADEON=y
50CONFIG_FRAMEBUFFER_CONSOLE=y
51CONFIG_USB=y
52CONFIG_USB_XHCI_HCD=y
53CONFIG_USB_XHCI_PLATFORM=y
54CONFIG_USB_EHCI_HCD=y
55CONFIG_USB_EHCI_HCD_PLATFORM=y
56CONFIG_USB_OHCI_HCD=y
57CONFIG_USB_OHCI_HCD_PLATFORM=y
58CONFIG_USB_STORAGE=y
59CONFIG_USB_UAS=y
60CONFIG_VIRTIO_MMIO=y
61CONFIG_RAS=y
62CONFIG_EXT4_FS=y
63CONFIG_EXT4_FS_POSIX_ACL=y
64CONFIG_AUTOFS4_FS=y
65CONFIG_MSDOS_FS=y
66CONFIG_VFAT_FS=y
67CONFIG_TMPFS=y
68CONFIG_TMPFS_POSIX_ACL=y
69CONFIG_NFS_FS=y
70CONFIG_NFS_V4=y
71CONFIG_NFS_V4_1=y
72CONFIG_NFS_V4_2=y
73CONFIG_ROOT_NFS=y
74# CONFIG_RCU_TRACE is not set
75CONFIG_CRYPTO_USER_API_HASH=y
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 18158be62a2b..970460a0b492 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -40,6 +40,7 @@ generic-y += resource.h
40generic-y += scatterlist.h 40generic-y += scatterlist.h
41generic-y += sections.h 41generic-y += sections.h
42generic-y += sembuf.h 42generic-y += sembuf.h
43generic-y += serial.h
43generic-y += setup.h 44generic-y += setup.h
44generic-y += shmbuf.h 45generic-y += shmbuf.h
45generic-y += shmparam.h 46generic-y += shmparam.h
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 6cbbb6a68d76..5ad4cb622bed 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -58,17 +58,17 @@
58#endif 58#endif
59 59
60#if (__SIZEOF_INT__ == 4) 60#if (__SIZEOF_INT__ == 4)
61#define INT __ASM_STR(.word) 61#define RISCV_INT __ASM_STR(.word)
62#define SZINT __ASM_STR(4) 62#define RISCV_SZINT __ASM_STR(4)
63#define LGINT __ASM_STR(2) 63#define RISCV_LGINT __ASM_STR(2)
64#else 64#else
65#error "Unexpected __SIZEOF_INT__" 65#error "Unexpected __SIZEOF_INT__"
66#endif 66#endif
67 67
68#if (__SIZEOF_SHORT__ == 2) 68#if (__SIZEOF_SHORT__ == 2)
69#define SHORT __ASM_STR(.half) 69#define RISCV_SHORT __ASM_STR(.half)
70#define SZSHORT __ASM_STR(2) 70#define RISCV_SZSHORT __ASM_STR(2)
71#define LGSHORT __ASM_STR(1) 71#define RISCV_LGSHORT __ASM_STR(1)
72#else 72#else
73#error "Unexpected __SIZEOF_SHORT__" 73#error "Unexpected __SIZEOF_SHORT__"
74#endif 74#endif
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index e2e37c57cbeb..e65d1cd89e28 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i)
50 * have the AQ or RL bits set. These don't return anything, so there's only 50 * have the AQ or RL bits set. These don't return anything, so there's only
51 * one version to worry about. 51 * one version to worry about.
52 */ 52 */
53#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix) \ 53#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
54static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ 54static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
55{ \ 55{ \
56 __asm__ __volatile__ ( \ 56 __asm__ __volatile__ ( \
57 "amo" #asm_op "." #asm_type " zero, %1, %0" \ 57 "amo" #asm_op "." #asm_type " zero, %1, %0" \
58 : "+A" (v->counter) \ 58 : "+A" (v->counter) \
59 : "r" (I) \ 59 : "r" (I) \
60 : "memory"); \ 60 : "memory"); \
61} 61}
62 62
63#ifdef CONFIG_GENERIC_ATOMIC64 63#ifdef CONFIG_GENERIC_ATOMIC64
64#define ATOMIC_OPS(op, asm_op, c_op, I) \ 64#define ATOMIC_OPS(op, asm_op, I) \
65 ATOMIC_OP (op, asm_op, c_op, I, w, int, ) 65 ATOMIC_OP (op, asm_op, I, w, int, )
66#else 66#else
67#define ATOMIC_OPS(op, asm_op, c_op, I) \ 67#define ATOMIC_OPS(op, asm_op, I) \
68 ATOMIC_OP (op, asm_op, c_op, I, w, int, ) \ 68 ATOMIC_OP (op, asm_op, I, w, int, ) \
69 ATOMIC_OP (op, asm_op, c_op, I, d, long, 64) 69 ATOMIC_OP (op, asm_op, I, d, long, 64)
70#endif 70#endif
71 71
72ATOMIC_OPS(add, add, +, i) 72ATOMIC_OPS(add, add, i)
73ATOMIC_OPS(sub, add, +, -i) 73ATOMIC_OPS(sub, add, -i)
74ATOMIC_OPS(and, and, &, i) 74ATOMIC_OPS(and, and, i)
75ATOMIC_OPS( or, or, |, i) 75ATOMIC_OPS( or, or, i)
76ATOMIC_OPS(xor, xor, ^, i) 76ATOMIC_OPS(xor, xor, i)
77 77
78#undef ATOMIC_OP 78#undef ATOMIC_OP
79#undef ATOMIC_OPS 79#undef ATOMIC_OPS
@@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^, i)
83 * There's two flavors of these: the arithmatic ops have both fetch and return 83 * There's two flavors of these: the arithmatic ops have both fetch and return
84 * versions, while the logical ops only have fetch versions. 84 * versions, while the logical ops only have fetch versions.
85 */ 85 */
86#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \ 86#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \
87static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \ 87static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \
88{ \ 88{ \
89 register c_type ret; \ 89 register c_type ret; \
@@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato
103 103
104#ifdef CONFIG_GENERIC_ATOMIC64 104#ifdef CONFIG_GENERIC_ATOMIC64
105#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ 105#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
106 ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ 106 ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \
107 ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) 107 ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, )
108#else 108#else
109#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ 109#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
110 ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ 110 ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \
111 ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ 111 ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
112 ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, d, long, 64) \ 112 ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \
113 ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) 113 ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
114#endif 114#endif
115 115
@@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl, )
126#undef ATOMIC_OPS 126#undef ATOMIC_OPS
127 127
128#ifdef CONFIG_GENERIC_ATOMIC64 128#ifdef CONFIG_GENERIC_ATOMIC64
129#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ 129#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \
130 ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) 130 ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, )
131#else 131#else
132#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ 132#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \
133 ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ 133 ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \
134 ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) 134 ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64)
135#endif 135#endif
136 136
137ATOMIC_OPS(and, and, &, i, , _relaxed) 137ATOMIC_OPS(and, and, i, , _relaxed)
138ATOMIC_OPS(and, and, &, i, .aq , _acquire) 138ATOMIC_OPS(and, and, i, .aq , _acquire)
139ATOMIC_OPS(and, and, &, i, .rl , _release) 139ATOMIC_OPS(and, and, i, .rl , _release)
140ATOMIC_OPS(and, and, &, i, .aqrl, ) 140ATOMIC_OPS(and, and, i, .aqrl, )
141 141
142ATOMIC_OPS( or, or, |, i, , _relaxed) 142ATOMIC_OPS( or, or, i, , _relaxed)
143ATOMIC_OPS( or, or, |, i, .aq , _acquire) 143ATOMIC_OPS( or, or, i, .aq , _acquire)
144ATOMIC_OPS( or, or, |, i, .rl , _release) 144ATOMIC_OPS( or, or, i, .rl , _release)
145ATOMIC_OPS( or, or, |, i, .aqrl, ) 145ATOMIC_OPS( or, or, i, .aqrl, )
146 146
147ATOMIC_OPS(xor, xor, ^, i, , _relaxed) 147ATOMIC_OPS(xor, xor, i, , _relaxed)
148ATOMIC_OPS(xor, xor, ^, i, .aq , _acquire) 148ATOMIC_OPS(xor, xor, i, .aq , _acquire)
149ATOMIC_OPS(xor, xor, ^, i, .rl , _release) 149ATOMIC_OPS(xor, xor, i, .rl , _release)
150ATOMIC_OPS(xor, xor, ^, i, .aqrl, ) 150ATOMIC_OPS(xor, xor, i, .aqrl, )
151 151
152#undef ATOMIC_OPS 152#undef ATOMIC_OPS
153 153
@@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add, <, 0)
182#undef ATOMIC_OP 182#undef ATOMIC_OP
183#undef ATOMIC_OPS 183#undef ATOMIC_OPS
184 184
185#define ATOMIC_OP(op, func_op, c_op, I, c_type, prefix) \ 185#define ATOMIC_OP(op, func_op, I, c_type, prefix) \
186static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \ 186static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \
187{ \ 187{ \
188 atomic##prefix##_##func_op(I, v); \ 188 atomic##prefix##_##func_op(I, v); \
189} 189}
190 190
191#define ATOMIC_FETCH_OP(op, func_op, c_op, I, c_type, prefix) \ 191#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \
192static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ 192static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \
193{ \ 193{ \
194 return atomic##prefix##_fetch_##func_op(I, v); \ 194 return atomic##prefix##_fetch_##func_op(I, v); \
@@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t
202 202
203#ifdef CONFIG_GENERIC_ATOMIC64 203#ifdef CONFIG_GENERIC_ATOMIC64
204#define ATOMIC_OPS(op, asm_op, c_op, I) \ 204#define ATOMIC_OPS(op, asm_op, c_op, I) \
205 ATOMIC_OP (op, asm_op, c_op, I, int, ) \ 205 ATOMIC_OP (op, asm_op, I, int, ) \
206 ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ 206 ATOMIC_FETCH_OP (op, asm_op, I, int, ) \
207 ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) 207 ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, )
208#else 208#else
209#define ATOMIC_OPS(op, asm_op, c_op, I) \ 209#define ATOMIC_OPS(op, asm_op, c_op, I) \
210 ATOMIC_OP (op, asm_op, c_op, I, int, ) \ 210 ATOMIC_OP (op, asm_op, I, int, ) \
211 ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ 211 ATOMIC_FETCH_OP (op, asm_op, I, int, ) \
212 ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ 212 ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \
213 ATOMIC_OP (op, asm_op, c_op, I, long, 64) \ 213 ATOMIC_OP (op, asm_op, I, long, 64) \
214 ATOMIC_FETCH_OP (op, asm_op, c_op, I, long, 64) \ 214 ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \
215 ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64) 215 ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
216#endif 216#endif
217 217
@@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
300 300
301/* 301/*
302 * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as 302 * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
303 * {cmp,}xchg and the operations that return, so they need a barrier. We just 303 * {cmp,}xchg and the operations that return, so they need a barrier.
304 * use the other implementations directly. 304 */
305/*
306 * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by
307 * assigning the same barrier to both the LR and SC operations, but that might
308 * not make any sense. We're waiting on a memory model specification to
309 * determine exactly what the right thing to do is here.
305 */ 310 */
306#define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \ 311#define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \
307static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \ 312static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 183534b7c39b..c0319cbf1eec 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -39,27 +39,23 @@
39#define smp_wmb() RISCV_FENCE(w,w) 39#define smp_wmb() RISCV_FENCE(w,w)
40 40
41/* 41/*
42 * These fences exist to enforce ordering around the relaxed AMOs. The 42 * This is a very specific barrier: it's currently only used in two places in
43 * documentation defines that 43 * the kernel, both in the scheduler. See include/linux/spinlock.h for the two
44 * " 44 * orderings it guarantees, but the "critical section is RCsc" guarantee
45 * atomic_fetch_add(); 45 * mandates a barrier on RISC-V. The sequence looks like:
46 * is equivalent to: 46 *
47 * smp_mb__before_atomic(); 47 * lr.aq lock
48 * atomic_fetch_add_relaxed(); 48 * sc lock <= LOCKED
49 * smp_mb__after_atomic(); 49 * smp_mb__after_spinlock()
50 * " 50 * // critical section
51 * So we emit full fences on both sides. 51 * lr lock
52 */ 52 * sc.rl lock <= UNLOCKED
53#define __smb_mb__before_atomic() smp_mb() 53 *
54#define __smb_mb__after_atomic() smp_mb() 54 * The AQ/RL pair provides a RCpc critical section, but there's not really any
55 55 * way we can take advantage of that here because the ordering is only enforced
56/* 56 * on that one lock. Thus, we're just doing a full fence.
57 * These barriers prevent accesses performed outside a spinlock from being moved
58 * inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only
59 * enforce release consistency, we need full fences here.
60 */ 57 */
61#define smb_mb__before_spinlock() smp_mb() 58#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw)
62#define smb_mb__after_spinlock() smp_mb()
63 59
64#include <asm-generic/barrier.h> 60#include <asm-generic/barrier.h>
65 61
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 7c281ef1d583..f30daf26f08f 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -67,7 +67,7 @@
67 : "memory"); 67 : "memory");
68 68
69#define __test_and_op_bit(op, mod, nr, addr) \ 69#define __test_and_op_bit(op, mod, nr, addr) \
70 __test_and_op_bit_ord(op, mod, nr, addr, ) 70 __test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
71#define __op_bit(op, mod, nr, addr) \ 71#define __op_bit(op, mod, nr, addr) \
72 __op_bit_ord(op, mod, nr, addr, ) 72 __op_bit_ord(op, mod, nr, addr, )
73 73
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index c3e13764a943..bfc7f099ab1f 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -27,8 +27,8 @@
27typedef u32 bug_insn_t; 27typedef u32 bug_insn_t;
28 28
29#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS 29#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
30#define __BUG_ENTRY_ADDR INT " 1b - 2b" 30#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b"
31#define __BUG_ENTRY_FILE INT " %0 - 2b" 31#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b"
32#else 32#else
33#define __BUG_ENTRY_ADDR RISCV_PTR " 1b" 33#define __BUG_ENTRY_ADDR RISCV_PTR " 1b"
34#define __BUG_ENTRY_FILE RISCV_PTR " %0" 34#define __BUG_ENTRY_FILE RISCV_PTR " %0"
@@ -38,7 +38,7 @@ typedef u32 bug_insn_t;
38#define __BUG_ENTRY \ 38#define __BUG_ENTRY \
39 __BUG_ENTRY_ADDR "\n\t" \ 39 __BUG_ENTRY_ADDR "\n\t" \
40 __BUG_ENTRY_FILE "\n\t" \ 40 __BUG_ENTRY_FILE "\n\t" \
41 SHORT " %1" 41 RISCV_SHORT " %1"
42#else 42#else
43#define __BUG_ENTRY \ 43#define __BUG_ENTRY \
44 __BUG_ENTRY_ADDR 44 __BUG_ENTRY_ADDR
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index 0595585013b0..efd89a88d2d0 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -18,22 +18,44 @@
18 18
19#undef flush_icache_range 19#undef flush_icache_range
20#undef flush_icache_user_range 20#undef flush_icache_user_range
21#undef flush_dcache_page
21 22
22static inline void local_flush_icache_all(void) 23static inline void local_flush_icache_all(void)
23{ 24{
24 asm volatile ("fence.i" ::: "memory"); 25 asm volatile ("fence.i" ::: "memory");
25} 26}
26 27
28#define PG_dcache_clean PG_arch_1
29
30static inline void flush_dcache_page(struct page *page)
31{
32 if (test_bit(PG_dcache_clean, &page->flags))
33 clear_bit(PG_dcache_clean, &page->flags);
34}
35
36/*
37 * RISC-V doesn't have an instruction to flush parts of the instruction cache,
38 * so instead we just flush the whole thing.
39 */
40#define flush_icache_range(start, end) flush_icache_all()
41#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all()
42
27#ifndef CONFIG_SMP 43#ifndef CONFIG_SMP
28 44
29#define flush_icache_range(start, end) local_flush_icache_all() 45#define flush_icache_all() local_flush_icache_all()
30#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all() 46#define flush_icache_mm(mm, local) flush_icache_all()
31 47
32#else /* CONFIG_SMP */ 48#else /* CONFIG_SMP */
33 49
34#define flush_icache_range(start, end) sbi_remote_fence_i(0) 50#define flush_icache_all() sbi_remote_fence_i(0)
35#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0) 51void flush_icache_mm(struct mm_struct *mm, bool local);
36 52
37#endif /* CONFIG_SMP */ 53#endif /* CONFIG_SMP */
38 54
55/*
56 * Bits in sys_riscv_flush_icache()'s flags argument.
57 */
58#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
59#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL)
60
39#endif /* _ASM_RISCV_CACHEFLUSH_H */ 61#endif /* _ASM_RISCV_CACHEFLUSH_H */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 0d64bc9f4f91..3c7a2c97e377 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -17,10 +17,10 @@
17#include <linux/const.h> 17#include <linux/const.h>
18 18
19/* Status register flags */ 19/* Status register flags */
20#define SR_IE _AC(0x00000002, UL) /* Interrupt Enable */ 20#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
21#define SR_PIE _AC(0x00000020, UL) /* Previous IE */ 21#define SR_SPIE _AC(0x00000020, UL) /* Previous Supervisor IE */
22#define SR_PS _AC(0x00000100, UL) /* Previously Supervisor */ 22#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */
23#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */ 23#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */
24 24
25#define SR_FS _AC(0x00006000, UL) /* Floating-point Status */ 25#define SR_FS _AC(0x00006000, UL) /* Floating-point Status */
26#define SR_FS_OFF _AC(0x00000000, UL) 26#define SR_FS_OFF _AC(0x00000000, UL)
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index c1f32cfcc79b..b269451e7e85 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -19,7 +19,7 @@
19#ifndef _ASM_RISCV_IO_H 19#ifndef _ASM_RISCV_IO_H
20#define _ASM_RISCV_IO_H 20#define _ASM_RISCV_IO_H
21 21
22#ifdef CONFIG_MMU 22#include <linux/types.h>
23 23
24extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); 24extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
25 25
@@ -32,9 +32,7 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
32#define ioremap_wc(addr, size) ioremap((addr), (size)) 32#define ioremap_wc(addr, size) ioremap((addr), (size))
33#define ioremap_wt(addr, size) ioremap((addr), (size)) 33#define ioremap_wt(addr, size) ioremap((addr), (size))
34 34
35extern void iounmap(void __iomem *addr); 35extern void iounmap(volatile void __iomem *addr);
36
37#endif /* CONFIG_MMU */
38 36
39/* Generic IO read/write. These perform native-endian accesses. */ 37/* Generic IO read/write. These perform native-endian accesses. */
40#define __raw_writeb __raw_writeb 38#define __raw_writeb __raw_writeb
@@ -250,7 +248,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
250 const ctype *buf = buffer; \ 248 const ctype *buf = buffer; \
251 \ 249 \
252 do { \ 250 do { \
253 __raw_writeq(*buf++, addr); \ 251 __raw_write ## len(*buf++, addr); \
254 } while (--count); \ 252 } while (--count); \
255 } \ 253 } \
256 afence; \ 254 afence; \
@@ -266,9 +264,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar())
266__io_reads_ins(ins, u8, b, __io_pbr(), __io_par()) 264__io_reads_ins(ins, u8, b, __io_pbr(), __io_par())
267__io_reads_ins(ins, u16, w, __io_pbr(), __io_par()) 265__io_reads_ins(ins, u16, w, __io_pbr(), __io_par())
268__io_reads_ins(ins, u32, l, __io_pbr(), __io_par()) 266__io_reads_ins(ins, u32, l, __io_pbr(), __io_par())
269#define insb(addr, buffer, count) __insb((void __iomem *)addr, buffer, count) 267#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count)
270#define insw(addr, buffer, count) __insw((void __iomem *)addr, buffer, count) 268#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count)
271#define insl(addr, buffer, count) __insl((void __iomem *)addr, buffer, count) 269#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count)
272 270
273__io_writes_outs(writes, u8, b, __io_bw(), __io_aw()) 271__io_writes_outs(writes, u8, b, __io_bw(), __io_aw())
274__io_writes_outs(writes, u16, w, __io_bw(), __io_aw()) 272__io_writes_outs(writes, u16, w, __io_bw(), __io_aw())
@@ -280,9 +278,9 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw())
280__io_writes_outs(outs, u8, b, __io_pbw(), __io_paw()) 278__io_writes_outs(outs, u8, b, __io_pbw(), __io_paw())
281__io_writes_outs(outs, u16, w, __io_pbw(), __io_paw()) 279__io_writes_outs(outs, u16, w, __io_pbw(), __io_paw())
282__io_writes_outs(outs, u32, l, __io_pbw(), __io_paw()) 280__io_writes_outs(outs, u32, l, __io_pbw(), __io_paw())
283#define outsb(addr, buffer, count) __outsb((void __iomem *)addr, buffer, count) 281#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count)
284#define outsw(addr, buffer, count) __outsw((void __iomem *)addr, buffer, count) 282#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count)
285#define outsl(addr, buffer, count) __outsl((void __iomem *)addr, buffer, count) 283#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count)
286 284
287#ifdef CONFIG_64BIT 285#ifdef CONFIG_64BIT
288__io_reads_ins(reads, u64, q, __io_br(), __io_ar()) 286__io_reads_ins(reads, u64, q, __io_br(), __io_ar())
diff --git a/arch/riscv/include/asm/irqflags.h b/arch/riscv/include/asm/irqflags.h
index 6fdc860d7f84..07a3c6d5706f 100644
--- a/arch/riscv/include/asm/irqflags.h
+++ b/arch/riscv/include/asm/irqflags.h
@@ -27,25 +27,25 @@ static inline unsigned long arch_local_save_flags(void)
27/* unconditionally enable interrupts */ 27/* unconditionally enable interrupts */
28static inline void arch_local_irq_enable(void) 28static inline void arch_local_irq_enable(void)
29{ 29{
30 csr_set(sstatus, SR_IE); 30 csr_set(sstatus, SR_SIE);
31} 31}
32 32
33/* unconditionally disable interrupts */ 33/* unconditionally disable interrupts */
34static inline void arch_local_irq_disable(void) 34static inline void arch_local_irq_disable(void)
35{ 35{
36 csr_clear(sstatus, SR_IE); 36 csr_clear(sstatus, SR_SIE);
37} 37}
38 38
39/* get status and disable interrupts */ 39/* get status and disable interrupts */
40static inline unsigned long arch_local_irq_save(void) 40static inline unsigned long arch_local_irq_save(void)
41{ 41{
42 return csr_read_clear(sstatus, SR_IE); 42 return csr_read_clear(sstatus, SR_SIE);
43} 43}
44 44
45/* test flags */ 45/* test flags */
46static inline int arch_irqs_disabled_flags(unsigned long flags) 46static inline int arch_irqs_disabled_flags(unsigned long flags)
47{ 47{
48 return !(flags & SR_IE); 48 return !(flags & SR_SIE);
49} 49}
50 50
51/* test hardware interrupt enable bit */ 51/* test hardware interrupt enable bit */
@@ -57,7 +57,7 @@ static inline int arch_irqs_disabled(void)
57/* set interrupt enabled status */ 57/* set interrupt enabled status */
58static inline void arch_local_irq_restore(unsigned long flags) 58static inline void arch_local_irq_restore(unsigned long flags)
59{ 59{
60 csr_set(sstatus, flags & SR_IE); 60 csr_set(sstatus, flags & SR_SIE);
61} 61}
62 62
63#endif /* _ASM_RISCV_IRQFLAGS_H */ 63#endif /* _ASM_RISCV_IRQFLAGS_H */
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 66805cba9a27..5df2dccdba12 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -19,6 +19,10 @@
19 19
20typedef struct { 20typedef struct {
21 void *vdso; 21 void *vdso;
22#ifdef CONFIG_SMP
23 /* A local icache flush is needed before user execution can resume. */
24 cpumask_t icache_stale_mask;
25#endif
22} mm_context_t; 26} mm_context_t;
23 27
24#endif /* __ASSEMBLY__ */ 28#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h
index de1fc1631fc4..97424834dce2 100644
--- a/arch/riscv/include/asm/mmu_context.h
+++ b/arch/riscv/include/asm/mmu_context.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (C) 2012 Regents of the University of California 2 * Copyright (C) 2012 Regents of the University of California
3 * Copyright (C) 2017 SiFive
3 * 4 *
4 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License 6 * modify it under the terms of the GNU General Public License
@@ -14,11 +15,13 @@
14#ifndef _ASM_RISCV_MMU_CONTEXT_H 15#ifndef _ASM_RISCV_MMU_CONTEXT_H
15#define _ASM_RISCV_MMU_CONTEXT_H 16#define _ASM_RISCV_MMU_CONTEXT_H
16 17
18#include <linux/mm_types.h>
17#include <asm-generic/mm_hooks.h> 19#include <asm-generic/mm_hooks.h>
18 20
19#include <linux/mm.h> 21#include <linux/mm.h>
20#include <linux/sched.h> 22#include <linux/sched.h>
21#include <asm/tlbflush.h> 23#include <asm/tlbflush.h>
24#include <asm/cacheflush.h>
22 25
23static inline void enter_lazy_tlb(struct mm_struct *mm, 26static inline void enter_lazy_tlb(struct mm_struct *mm,
24 struct task_struct *task) 27 struct task_struct *task)
@@ -46,12 +49,54 @@ static inline void set_pgdir(pgd_t *pgd)
46 csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); 49 csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE);
47} 50}
48 51
52/*
53 * When necessary, performs a deferred icache flush for the given MM context,
54 * on the local CPU. RISC-V has no direct mechanism for instruction cache
55 * shoot downs, so instead we send an IPI that informs the remote harts they
56 * need to flush their local instruction caches. To avoid pathologically slow
57 * behavior in a common case (a bunch of single-hart processes on a many-hart
58 * machine, ie 'make -j') we avoid the IPIs for harts that are not currently
59 * executing a MM context and instead schedule a deferred local instruction
60 * cache flush to be performed before execution resumes on each hart. This
61 * actually performs that local instruction cache flush, which implicitly only
62 * refers to the current hart.
63 */
64static inline void flush_icache_deferred(struct mm_struct *mm)
65{
66#ifdef CONFIG_SMP
67 unsigned int cpu = smp_processor_id();
68 cpumask_t *mask = &mm->context.icache_stale_mask;
69
70 if (cpumask_test_cpu(cpu, mask)) {
71 cpumask_clear_cpu(cpu, mask);
72 /*
73 * Ensure the remote hart's writes are visible to this hart.
74 * This pairs with a barrier in flush_icache_mm.
75 */
76 smp_mb();
77 local_flush_icache_all();
78 }
79#endif
80}
81
49static inline void switch_mm(struct mm_struct *prev, 82static inline void switch_mm(struct mm_struct *prev,
50 struct mm_struct *next, struct task_struct *task) 83 struct mm_struct *next, struct task_struct *task)
51{ 84{
52 if (likely(prev != next)) { 85 if (likely(prev != next)) {
86 /*
87 * Mark the current MM context as inactive, and the next as
88 * active. This is at least used by the icache flushing
89 * routines in order to determine who should
90 */
91 unsigned int cpu = smp_processor_id();
92
93 cpumask_clear_cpu(cpu, mm_cpumask(prev));
94 cpumask_set_cpu(cpu, mm_cpumask(next));
95
53 set_pgdir(next->pgd); 96 set_pgdir(next->pgd);
54 local_flush_tlb_all(); 97 local_flush_tlb_all();
98
99 flush_icache_deferred(next);
55 } 100 }
56} 101}
57 102
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 3399257780b2..16301966d65b 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -20,8 +20,6 @@
20 20
21#ifndef __ASSEMBLY__ 21#ifndef __ASSEMBLY__
22 22
23#ifdef CONFIG_MMU
24
25/* Page Upper Directory not used in RISC-V */ 23/* Page Upper Directory not used in RISC-V */
26#include <asm-generic/pgtable-nopud.h> 24#include <asm-generic/pgtable-nopud.h>
27#include <asm/page.h> 25#include <asm/page.h>
@@ -178,28 +176,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr)
178#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) 176#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr))
179#define pte_unmap(pte) ((void)(pte)) 177#define pte_unmap(pte) ((void)(pte))
180 178
181/*
182 * Certain architectures need to do special things when PTEs within
183 * a page table are directly modified. Thus, the following hook is
184 * made available.
185 */
186static inline void set_pte(pte_t *ptep, pte_t pteval)
187{
188 *ptep = pteval;
189}
190
191static inline void set_pte_at(struct mm_struct *mm,
192 unsigned long addr, pte_t *ptep, pte_t pteval)
193{
194 set_pte(ptep, pteval);
195}
196
197static inline void pte_clear(struct mm_struct *mm,
198 unsigned long addr, pte_t *ptep)
199{
200 set_pte_at(mm, addr, ptep, __pte(0));
201}
202
203static inline int pte_present(pte_t pte) 179static inline int pte_present(pte_t pte)
204{ 180{
205 return (pte_val(pte) & _PAGE_PRESENT); 181 return (pte_val(pte) & _PAGE_PRESENT);
@@ -210,21 +186,22 @@ static inline int pte_none(pte_t pte)
210 return (pte_val(pte) == 0); 186 return (pte_val(pte) == 0);
211} 187}
212 188
213/* static inline int pte_read(pte_t pte) */
214
215static inline int pte_write(pte_t pte) 189static inline int pte_write(pte_t pte)
216{ 190{
217 return pte_val(pte) & _PAGE_WRITE; 191 return pte_val(pte) & _PAGE_WRITE;
218} 192}
219 193
194static inline int pte_exec(pte_t pte)
195{
196 return pte_val(pte) & _PAGE_EXEC;
197}
198
220static inline int pte_huge(pte_t pte) 199static inline int pte_huge(pte_t pte)
221{ 200{
222 return pte_present(pte) 201 return pte_present(pte)
223 && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); 202 && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
224} 203}
225 204
226/* static inline int pte_exec(pte_t pte) */
227
228static inline int pte_dirty(pte_t pte) 205static inline int pte_dirty(pte_t pte)
229{ 206{
230 return pte_val(pte) & _PAGE_DIRTY; 207 return pte_val(pte) & _PAGE_DIRTY;
@@ -311,6 +288,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
311 return pte_val(pte_a) == pte_val(pte_b); 288 return pte_val(pte_a) == pte_val(pte_b);
312} 289}
313 290
291/*
292 * Certain architectures need to do special things when PTEs within
293 * a page table are directly modified. Thus, the following hook is
294 * made available.
295 */
296static inline void set_pte(pte_t *ptep, pte_t pteval)
297{
298 *ptep = pteval;
299}
300
301void flush_icache_pte(pte_t pte);
302
303static inline void set_pte_at(struct mm_struct *mm,
304 unsigned long addr, pte_t *ptep, pte_t pteval)
305{
306 if (pte_present(pteval) && pte_exec(pteval))
307 flush_icache_pte(pteval);
308
309 set_pte(ptep, pteval);
310}
311
312static inline void pte_clear(struct mm_struct *mm,
313 unsigned long addr, pte_t *ptep)
314{
315 set_pte_at(mm, addr, ptep, __pte(0));
316}
317
314#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 318#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
315static inline int ptep_set_access_flags(struct vm_area_struct *vma, 319static inline int ptep_set_access_flags(struct vm_area_struct *vma,
316 unsigned long address, pte_t *ptep, 320 unsigned long address, pte_t *ptep,
@@ -407,8 +411,6 @@ static inline void pgtable_cache_init(void)
407 /* No page table caches to initialize */ 411 /* No page table caches to initialize */
408} 412}
409 413
410#endif /* CONFIG_MMU */
411
412#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) 414#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
413#define VMALLOC_END (PAGE_OFFSET - 1) 415#define VMALLOC_END (PAGE_OFFSET - 1)
414#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) 416#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index 93b8956e25e4..2c5df945d43c 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -66,7 +66,7 @@ struct pt_regs {
66#define REG_FMT "%08lx" 66#define REG_FMT "%08lx"
67#endif 67#endif
68 68
69#define user_mode(regs) (((regs)->sstatus & SR_PS) == 0) 69#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0)
70 70
71 71
72/* Helpers for working with the instruction pointer */ 72/* Helpers for working with the instruction pointer */
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
index 04c71d938afd..2fd27e8ef1fd 100644
--- a/arch/riscv/include/asm/spinlock.h
+++ b/arch/riscv/include/asm/spinlock.h
@@ -24,7 +24,7 @@
24 24
25/* FIXME: Replace this with a ticket lock, like MIPS. */ 25/* FIXME: Replace this with a ticket lock, like MIPS. */
26 26
27#define arch_spin_is_locked(x) ((x)->lock != 0) 27#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
28 28
29static inline void arch_spin_unlock(arch_spinlock_t *lock) 29static inline void arch_spin_unlock(arch_spinlock_t *lock)
30{ 30{
@@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
58 } 58 }
59} 59}
60 60
61static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
62{
63 smp_rmb();
64 do {
65 cpu_relax();
66 } while (arch_spin_is_locked(lock));
67 smp_acquire__after_ctrl_dep();
68}
69
70/***********************************************************/ 61/***********************************************************/
71 62
72static inline void arch_read_lock(arch_rwlock_t *lock) 63static inline void arch_read_lock(arch_rwlock_t *lock)
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index 3df4932d8964..2f26989cb864 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -18,7 +18,7 @@
18 18
19typedef unsigned long cycles_t; 19typedef unsigned long cycles_t;
20 20
21static inline cycles_t get_cycles(void) 21static inline cycles_t get_cycles_inline(void)
22{ 22{
23 cycles_t n; 23 cycles_t n;
24 24
@@ -27,6 +27,7 @@ static inline cycles_t get_cycles(void)
27 : "=r" (n)); 27 : "=r" (n));
28 return n; 28 return n;
29} 29}
30#define get_cycles get_cycles_inline
30 31
31#ifdef CONFIG_64BIT 32#ifdef CONFIG_64BIT
32static inline uint64_t get_cycles64(void) 33static inline uint64_t get_cycles64(void)
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 5ee4ae370b5e..7b9c24ebdf52 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -15,9 +15,12 @@
15#ifndef _ASM_RISCV_TLBFLUSH_H 15#ifndef _ASM_RISCV_TLBFLUSH_H
16#define _ASM_RISCV_TLBFLUSH_H 16#define _ASM_RISCV_TLBFLUSH_H
17 17
18#ifdef CONFIG_MMU 18#include <linux/mm_types.h>
19 19
20/* Flush entire local TLB */ 20/*
21 * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
22 * cache as well, so a 'fence.i' is not necessary.
23 */
21static inline void local_flush_tlb_all(void) 24static inline void local_flush_tlb_all(void)
22{ 25{
23 __asm__ __volatile__ ("sfence.vma" : : : "memory"); 26 __asm__ __volatile__ ("sfence.vma" : : : "memory");
@@ -59,6 +62,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
59 flush_tlb_all(); 62 flush_tlb_all();
60} 63}
61 64
62#endif /* CONFIG_MMU */
63
64#endif /* _ASM_RISCV_TLBFLUSH_H */ 65#endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 27b90d64814b..14b0b22fb578 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -127,7 +127,6 @@ extern int fixup_exception(struct pt_regs *state);
127 * call. 127 * call.
128 */ 128 */
129 129
130#ifdef CONFIG_MMU
131#define __get_user_asm(insn, x, ptr, err) \ 130#define __get_user_asm(insn, x, ptr, err) \
132do { \ 131do { \
133 uintptr_t __tmp; \ 132 uintptr_t __tmp; \
@@ -153,13 +152,11 @@ do { \
153 __disable_user_access(); \ 152 __disable_user_access(); \
154 (x) = __x; \ 153 (x) = __x; \
155} while (0) 154} while (0)
156#endif /* CONFIG_MMU */
157 155
158#ifdef CONFIG_64BIT 156#ifdef CONFIG_64BIT
159#define __get_user_8(x, ptr, err) \ 157#define __get_user_8(x, ptr, err) \
160 __get_user_asm("ld", x, ptr, err) 158 __get_user_asm("ld", x, ptr, err)
161#else /* !CONFIG_64BIT */ 159#else /* !CONFIG_64BIT */
162#ifdef CONFIG_MMU
163#define __get_user_8(x, ptr, err) \ 160#define __get_user_8(x, ptr, err) \
164do { \ 161do { \
165 u32 __user *__ptr = (u32 __user *)(ptr); \ 162 u32 __user *__ptr = (u32 __user *)(ptr); \
@@ -193,7 +190,6 @@ do { \
193 (x) = (__typeof__(x))((__typeof__((x)-(x)))( \ 190 (x) = (__typeof__(x))((__typeof__((x)-(x)))( \
194 (((u64)__hi << 32) | __lo))); \ 191 (((u64)__hi << 32) | __lo))); \
195} while (0) 192} while (0)
196#endif /* CONFIG_MMU */
197#endif /* CONFIG_64BIT */ 193#endif /* CONFIG_64BIT */
198 194
199 195
@@ -267,8 +263,6 @@ do { \
267 ((x) = 0, -EFAULT); \ 263 ((x) = 0, -EFAULT); \
268}) 264})
269 265
270
271#ifdef CONFIG_MMU
272#define __put_user_asm(insn, x, ptr, err) \ 266#define __put_user_asm(insn, x, ptr, err) \
273do { \ 267do { \
274 uintptr_t __tmp; \ 268 uintptr_t __tmp; \
@@ -292,14 +286,11 @@ do { \
292 : "rJ" (__x), "i" (-EFAULT)); \ 286 : "rJ" (__x), "i" (-EFAULT)); \
293 __disable_user_access(); \ 287 __disable_user_access(); \
294} while (0) 288} while (0)
295#endif /* CONFIG_MMU */
296
297 289
298#ifdef CONFIG_64BIT 290#ifdef CONFIG_64BIT
299#define __put_user_8(x, ptr, err) \ 291#define __put_user_8(x, ptr, err) \
300 __put_user_asm("sd", x, ptr, err) 292 __put_user_asm("sd", x, ptr, err)
301#else /* !CONFIG_64BIT */ 293#else /* !CONFIG_64BIT */
302#ifdef CONFIG_MMU
303#define __put_user_8(x, ptr, err) \ 294#define __put_user_8(x, ptr, err) \
304do { \ 295do { \
305 u32 __user *__ptr = (u32 __user *)(ptr); \ 296 u32 __user *__ptr = (u32 __user *)(ptr); \
@@ -329,7 +320,6 @@ do { \
329 : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \ 320 : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \
330 __disable_user_access(); \ 321 __disable_user_access(); \
331} while (0) 322} while (0)
332#endif /* CONFIG_MMU */
333#endif /* CONFIG_64BIT */ 323#endif /* CONFIG_64BIT */
334 324
335 325
@@ -438,7 +428,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
438 * will set "err" to -EFAULT, while successful accesses return the previous 428 * will set "err" to -EFAULT, while successful accesses return the previous
439 * value. 429 * value.
440 */ 430 */
441#ifdef CONFIG_MMU
442#define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \ 431#define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \
443({ \ 432({ \
444 __typeof__(ptr) __ptr = (ptr); \ 433 __typeof__(ptr) __ptr = (ptr); \
@@ -508,6 +497,5 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
508 (err) = __err; \ 497 (err) = __err; \
509 __ret; \ 498 __ret; \
510}) 499})
511#endif /* CONFIG_MMU */
512 500
513#endif /* _ASM_RISCV_UACCESS_H */ 501#endif /* _ASM_RISCV_UACCESS_H */
diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
index 9f250ed007cd..2f704a5c4196 100644
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -14,3 +14,4 @@
14#define __ARCH_HAVE_MMU 14#define __ARCH_HAVE_MMU
15#define __ARCH_WANT_SYS_CLONE 15#define __ARCH_WANT_SYS_CLONE
16#include <uapi/asm/unistd.h> 16#include <uapi/asm/unistd.h>
17#include <uapi/asm/syscalls.h>
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 602f61257553..541544d64c33 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -38,4 +38,8 @@ struct vdso_data {
38 (void __user *)((unsigned long)(base) + __vdso_##name); \ 38 (void __user *)((unsigned long)(base) + __vdso_##name); \
39}) 39})
40 40
41#ifdef CONFIG_SMP
42asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
43#endif
44
41#endif /* _ASM_RISCV_VDSO_H */ 45#endif /* _ASM_RISCV_VDSO_H */
diff --git a/arch/riscv/include/uapi/asm/Kbuild b/arch/riscv/include/uapi/asm/Kbuild
index 5ded96b06352..7e91f4850475 100644
--- a/arch/riscv/include/uapi/asm/Kbuild
+++ b/arch/riscv/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += setup.h 4generic-y += setup.h
5generic-y += unistd.h 5generic-y += unistd.h
6generic-y += bpf_perf_event.h
6generic-y += errno.h 7generic-y += errno.h
7generic-y += fcntl.h 8generic-y += fcntl.h
8generic-y += ioctl.h 9generic-y += ioctl.h
diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/syscalls.h
new file mode 100644
index 000000000000..818655b0d535
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/syscalls.h
@@ -0,0 +1,26 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2017 SiFive
4 */
5
6#ifndef _ASM__UAPI__SYSCALLS_H
7#define _ASM__UAPI__SYSCALLS_H
8
9/*
10 * Allows the instruction cache to be flushed from userspace. Despite RISC-V
11 * having a direct 'fence.i' instruction available to userspace (which we
12 * can't trap!), that's not actually viable when running on Linux because the
13 * kernel might schedule a process on another hart. There is no way for
14 * userspace to handle this without invoking the kernel (as it doesn't know the
15 * thread->hart mappings), so we've defined a RISC-V specific system call to
16 * flush the instruction cache.
17 *
18 * __NR_riscv_flush_icache is defined to flush the instruction cache over an
19 * address range, with the flush applying to either all threads or just the
20 * caller. We don't currently do anything with the address range, that's just
21 * in there for forwards compatibility.
22 */
23#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
24__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
25
26#endif
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 20ee86f782a9..7404ec222406 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -196,7 +196,7 @@ handle_syscall:
196 addi s2, s2, 0x4 196 addi s2, s2, 0x4
197 REG_S s2, PT_SEPC(sp) 197 REG_S s2, PT_SEPC(sp)
198 /* System calls run with interrupts enabled */ 198 /* System calls run with interrupts enabled */
199 csrs sstatus, SR_IE 199 csrs sstatus, SR_SIE
200 /* Trace syscalls, but only if requested by the user. */ 200 /* Trace syscalls, but only if requested by the user. */
201 REG_L t0, TASK_TI_FLAGS(tp) 201 REG_L t0, TASK_TI_FLAGS(tp)
202 andi t0, t0, _TIF_SYSCALL_TRACE 202 andi t0, t0, _TIF_SYSCALL_TRACE
@@ -224,8 +224,8 @@ ret_from_syscall:
224 224
225ret_from_exception: 225ret_from_exception:
226 REG_L s0, PT_SSTATUS(sp) 226 REG_L s0, PT_SSTATUS(sp)
227 csrc sstatus, SR_IE 227 csrc sstatus, SR_SIE
228 andi s0, s0, SR_PS 228 andi s0, s0, SR_SPP
229 bnez s0, restore_all 229 bnez s0, restore_all
230 230
231resume_userspace: 231resume_userspace:
@@ -255,7 +255,7 @@ work_pending:
255 bnez s1, work_resched 255 bnez s1, work_resched
256work_notifysig: 256work_notifysig:
257 /* Handle pending signals and notify-resume requests */ 257 /* Handle pending signals and notify-resume requests */
258 csrs sstatus, SR_IE /* Enable interrupts for do_notify_resume() */ 258 csrs sstatus, SR_SIE /* Enable interrupts for do_notify_resume() */
259 move a0, sp /* pt_regs */ 259 move a0, sp /* pt_regs */
260 move a1, s0 /* current_thread_info->flags */ 260 move a1, s0 /* current_thread_info->flags */
261 tail do_notify_resume 261 tail do_notify_resume
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 76af908f87c1..78f670d70133 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -152,6 +152,3 @@ END(_start)
152__PAGE_ALIGNED_BSS 152__PAGE_ALIGNED_BSS
153 /* Empty zero page */ 153 /* Empty zero page */
154 .balign PAGE_SIZE 154 .balign PAGE_SIZE
155ENTRY(empty_zero_page)
156 .fill (empty_zero_page + PAGE_SIZE) - ., 1, 0x00
157END(empty_zero_page)
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 0d90dcc1fbd3..d74d4adf2d54 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -76,7 +76,7 @@ void show_regs(struct pt_regs *regs)
76void start_thread(struct pt_regs *regs, unsigned long pc, 76void start_thread(struct pt_regs *regs, unsigned long pc,
77 unsigned long sp) 77 unsigned long sp)
78{ 78{
79 regs->sstatus = SR_PIE /* User mode, irqs on */ | SR_FS_INITIAL; 79 regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL;
80 regs->sepc = pc; 80 regs->sepc = pc;
81 regs->sp = sp; 81 regs->sp = sp;
82 set_fs(USER_DS); 82 set_fs(USER_DS);
@@ -110,7 +110,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
110 const register unsigned long gp __asm__ ("gp"); 110 const register unsigned long gp __asm__ ("gp");
111 memset(childregs, 0, sizeof(struct pt_regs)); 111 memset(childregs, 0, sizeof(struct pt_regs));
112 childregs->gp = gp; 112 childregs->gp = gp;
113 childregs->sstatus = SR_PS | SR_PIE; /* Supervisor, irqs on */ 113 childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */
114 114
115 p->thread.ra = (unsigned long)ret_from_kernel_thread; 115 p->thread.ra = (unsigned long)ret_from_kernel_thread;
116 p->thread.s[0] = usp; /* fn */ 116 p->thread.s[0] = usp; /* fn */
diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
index 23cc81ec9e94..551734248748 100644
--- a/arch/riscv/kernel/riscv_ksyms.c
+++ b/arch/riscv/kernel/riscv_ksyms.c
@@ -12,4 +12,7 @@
12/* 12/*
13 * Assembly functions that may be used (directly or indirectly) by modules 13 * Assembly functions that may be used (directly or indirectly) by modules
14 */ 14 */
15EXPORT_SYMBOL(__clear_user);
15EXPORT_SYMBOL(__copy_user); 16EXPORT_SYMBOL(__copy_user);
17EXPORT_SYMBOL(memset);
18EXPORT_SYMBOL(memcpy);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index de7db114c315..cb7b0c63014e 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -38,10 +38,6 @@
38#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
39#include <asm/thread_info.h> 39#include <asm/thread_info.h>
40 40
41#ifdef CONFIG_HVC_RISCV_SBI
42#include <asm/hvc_riscv_sbi.h>
43#endif
44
45#ifdef CONFIG_DUMMY_CONSOLE 41#ifdef CONFIG_DUMMY_CONSOLE
46struct screen_info screen_info = { 42struct screen_info screen_info = {
47 .orig_video_lines = 30, 43 .orig_video_lines = 30,
@@ -58,7 +54,12 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
58#endif /* CONFIG_CMDLINE_BOOL */ 54#endif /* CONFIG_CMDLINE_BOOL */
59 55
60unsigned long va_pa_offset; 56unsigned long va_pa_offset;
57EXPORT_SYMBOL(va_pa_offset);
61unsigned long pfn_base; 58unsigned long pfn_base;
59EXPORT_SYMBOL(pfn_base);
60
61unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
62EXPORT_SYMBOL(empty_zero_page);
62 63
63/* The lucky hart to first increment this variable will boot the other cores */ 64/* The lucky hart to first increment this variable will boot the other cores */
64atomic_t hart_lottery; 65atomic_t hart_lottery;
@@ -207,13 +208,6 @@ static void __init setup_bootmem(void)
207 208
208void __init setup_arch(char **cmdline_p) 209void __init setup_arch(char **cmdline_p)
209{ 210{
210#if defined(CONFIG_HVC_RISCV_SBI)
211 if (likely(early_console == NULL)) {
212 early_console = &riscv_sbi_early_console_dev;
213 register_console(early_console);
214 }
215#endif
216
217#ifdef CONFIG_CMDLINE_BOOL 211#ifdef CONFIG_CMDLINE_BOOL
218#ifdef CONFIG_CMDLINE_OVERRIDE 212#ifdef CONFIG_CMDLINE_OVERRIDE
219 strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); 213 strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index b4a71ec5906f..6d3962435720 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -38,6 +38,13 @@ enum ipi_message_type {
38 IPI_MAX 38 IPI_MAX
39}; 39};
40 40
41
42/* Unsupported */
43int setup_profiling_timer(unsigned int multiplier)
44{
45 return -EINVAL;
46}
47
41irqreturn_t handle_ipi(void) 48irqreturn_t handle_ipi(void)
42{ 49{
43 unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; 50 unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
@@ -108,3 +115,51 @@ void smp_send_reschedule(int cpu)
108{ 115{
109 send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); 116 send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
110} 117}
118
119/*
120 * Performs an icache flush for the given MM context. RISC-V has no direct
121 * mechanism for instruction cache shoot downs, so instead we send an IPI that
122 * informs the remote harts they need to flush their local instruction caches.
123 * To avoid pathologically slow behavior in a common case (a bunch of
124 * single-hart processes on a many-hart machine, ie 'make -j') we avoid the
125 * IPIs for harts that are not currently executing a MM context and instead
126 * schedule a deferred local instruction cache flush to be performed before
127 * execution resumes on each hart.
128 */
129void flush_icache_mm(struct mm_struct *mm, bool local)
130{
131 unsigned int cpu;
132 cpumask_t others, *mask;
133
134 preempt_disable();
135
136 /* Mark every hart's icache as needing a flush for this MM. */
137 mask = &mm->context.icache_stale_mask;
138 cpumask_setall(mask);
139 /* Flush this hart's I$ now, and mark it as flushed. */
140 cpu = smp_processor_id();
141 cpumask_clear_cpu(cpu, mask);
142 local_flush_icache_all();
143
144 /*
145 * Flush the I$ of other harts concurrently executing, and mark them as
146 * flushed.
147 */
148 cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
149 local |= cpumask_empty(&others);
150 if (mm != current->active_mm || !local)
151 sbi_remote_fence_i(others.bits);
152 else {
153 /*
154 * It's assumed that at least one strongly ordered operation is
155 * performed on this hart between setting a hart's cpumask bit
156 * and scheduling this MM context on that hart. Sending an SBI
157 * remote message will do this, but in the case where no
158 * messages are sent we still need to order this hart's writes
159 * with flush_icache_deferred().
160 */
161 smp_mb();
162 }
163
164 preempt_enable();
165}
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 4351be7d0533..79c78668258e 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -14,8 +14,8 @@
14 */ 14 */
15 15
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <asm/cmpxchg.h>
18#include <asm/unistd.h> 17#include <asm/unistd.h>
18#include <asm/cacheflush.h>
19 19
20static long riscv_sys_mmap(unsigned long addr, unsigned long len, 20static long riscv_sys_mmap(unsigned long addr, unsigned long len,
21 unsigned long prot, unsigned long flags, 21 unsigned long prot, unsigned long flags,
@@ -47,3 +47,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
47 return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12); 47 return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12);
48} 48}
49#endif /* !CONFIG_64BIT */ 49#endif /* !CONFIG_64BIT */
50
51#ifdef CONFIG_SMP
52/*
53 * Allows the instruction cache to be flushed from userspace. Despite RISC-V
54 * having a direct 'fence.i' instruction available to userspace (which we
55 * can't trap!), that's not actually viable when running on Linux because the
56 * kernel might schedule a process on another hart. There is no way for
57 * userspace to handle this without invoking the kernel (as it doesn't know the
58 * thread->hart mappings), so we've defined a RISC-V specific system call to
59 * flush the instruction cache.
60 *
61 * sys_riscv_flush_icache() is defined to flush the instruction cache over an
62 * address range, with the flush applying to either all threads or just the
63 * caller. We don't currently do anything with the address range, that's just
64 * in there for forwards compatibility.
65 */
66SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
67 uintptr_t, flags)
68{
69 struct mm_struct *mm = current->mm;
70 bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
71
72 /* Check the reserved flags. */
73 if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
74 return -EINVAL;
75
76 flush_icache_mm(mm, local);
77
78 return 0;
79}
80#endif
diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c
index 4e30dc5fb593..ade52b903a43 100644
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -15,6 +15,7 @@
15#include <linux/linkage.h> 15#include <linux/linkage.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <asm-generic/syscalls.h> 17#include <asm-generic/syscalls.h>
18#include <asm/vdso.h>
18 19
19#undef __SYSCALL 20#undef __SYSCALL
20#define __SYSCALL(nr, call) [nr] = (call), 21#define __SYSCALL(nr, call) [nr] = (call),
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 523d0a8ac8db..324568d33921 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -1,7 +1,12 @@
1# Copied from arch/tile/kernel/vdso/Makefile 1# Copied from arch/tile/kernel/vdso/Makefile
2 2
3# Symbols present in the vdso 3# Symbols present in the vdso
4vdso-syms = rt_sigreturn 4vdso-syms = rt_sigreturn
5vdso-syms += gettimeofday
6vdso-syms += clock_gettime
7vdso-syms += clock_getres
8vdso-syms += getcpu
9vdso-syms += flush_icache
5 10
6# Files to link into the vdso 11# Files to link into the vdso
7obj-vdso = $(patsubst %, %.o, $(vdso-syms)) 12obj-vdso = $(patsubst %, %.o, $(vdso-syms))
diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S
new file mode 100644
index 000000000000..edf7e2339648
--- /dev/null
+++ b/arch/riscv/kernel/vdso/clock_getres.S
@@ -0,0 +1,26 @@
1/*
2 * Copyright (C) 2017 SiFive
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/linkage.h>
15#include <asm/unistd.h>
16
17 .text
18/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */
19ENTRY(__vdso_clock_getres)
20 .cfi_startproc
21 /* For now, just do the syscall. */
22 li a7, __NR_clock_getres
23 ecall
24 ret
25 .cfi_endproc
26ENDPROC(__vdso_clock_getres)
diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S
new file mode 100644
index 000000000000..aac65676c6d5
--- /dev/null
+++ b/arch/riscv/kernel/vdso/clock_gettime.S
@@ -0,0 +1,26 @@
1/*
2 * Copyright (C) 2017 SiFive
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/linkage.h>
15#include <asm/unistd.h>
16
17 .text
18/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */
19ENTRY(__vdso_clock_gettime)
20 .cfi_startproc
21 /* For now, just do the syscall. */
22 li a7, __NR_clock_gettime
23 ecall
24 ret
25 .cfi_endproc
26ENDPROC(__vdso_clock_gettime)
diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S
new file mode 100644
index 000000000000..023e4d4aef58
--- /dev/null
+++ b/arch/riscv/kernel/vdso/flush_icache.S
@@ -0,0 +1,30 @@
1/*
2 * Copyright (C) 2017 SiFive
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/linkage.h>
15#include <asm/unistd.h>
16
17 .text
18/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
19ENTRY(__vdso_flush_icache)
20 .cfi_startproc
21#ifdef CONFIG_SMP
22 li a7, __NR_riscv_flush_icache
23 ecall
24#else
25 fence.i
26 li a0, 0
27#endif
28 ret
29 .cfi_endproc
30ENDPROC(__vdso_flush_icache)
diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S
new file mode 100644
index 000000000000..cc7e98924484
--- /dev/null
+++ b/arch/riscv/kernel/vdso/getcpu.S
@@ -0,0 +1,26 @@
1/*
2 * Copyright (C) 2017 SiFive
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/linkage.h>
15#include <asm/unistd.h>
16
17 .text
18/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */
19ENTRY(__vdso_getcpu)
20 .cfi_startproc
21 /* For now, just do the syscall. */
22 li a7, __NR_getcpu
23 ecall
24 ret
25 .cfi_endproc
26ENDPROC(__vdso_getcpu)
diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S
new file mode 100644
index 000000000000..da85d33e8990
--- /dev/null
+++ b/arch/riscv/kernel/vdso/gettimeofday.S
@@ -0,0 +1,26 @@
1/*
2 * Copyright (C) 2017 SiFive
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/linkage.h>
15#include <asm/unistd.h>
16
17 .text
18/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */
19ENTRY(__vdso_gettimeofday)
20 .cfi_startproc
21 /* For now, just do the syscall. */
22 li a7, __NR_gettimeofday
23 ecall
24 ret
25 .cfi_endproc
26ENDPROC(__vdso_gettimeofday)
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index 8c9dce95c11d..cd1d47e0724b 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -70,8 +70,11 @@ VERSION
70 LINUX_4.15 { 70 LINUX_4.15 {
71 global: 71 global:
72 __vdso_rt_sigreturn; 72 __vdso_rt_sigreturn;
73 __vdso_cmpxchg32; 73 __vdso_gettimeofday;
74 __vdso_cmpxchg64; 74 __vdso_clock_gettime;
75 __vdso_clock_getres;
76 __vdso_getcpu;
77 __vdso_flush_icache;
75 local: *; 78 local: *;
76 }; 79 };
77} 80}
diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c
index 1cc4ac3964b4..dce8ae24c6d3 100644
--- a/arch/riscv/lib/delay.c
+++ b/arch/riscv/lib/delay.c
@@ -84,6 +84,7 @@ void __delay(unsigned long cycles)
84 while ((unsigned long)(get_cycles() - t0) < cycles) 84 while ((unsigned long)(get_cycles() - t0) < cycles)
85 cpu_relax(); 85 cpu_relax();
86} 86}
87EXPORT_SYMBOL(__delay);
87 88
88void udelay(unsigned long usecs) 89void udelay(unsigned long usecs)
89{ 90{
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 81f7d9ce6d88..eb22ab49b3e0 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -2,3 +2,4 @@ obj-y += init.o
2obj-y += fault.o 2obj-y += fault.o
3obj-y += extable.o 3obj-y += extable.o
4obj-y += ioremap.o 4obj-y += ioremap.o
5obj-y += cacheflush.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
new file mode 100644
index 000000000000..498c0a0814fe
--- /dev/null
+++ b/arch/riscv/mm/cacheflush.c
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) 2017 SiFive
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <asm/pgtable.h>
15#include <asm/cacheflush.h>
16
17void flush_icache_pte(pte_t pte)
18{
19 struct page *page = pte_page(pte);
20
21 if (!test_and_set_bit(PG_dcache_clean, &page->flags))
22 flush_icache_all();
23}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index df2ca3c65048..0713f3c67ab4 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -63,7 +63,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
63 goto vmalloc_fault; 63 goto vmalloc_fault;
64 64
65 /* Enable interrupts if they were enabled in the parent context. */ 65 /* Enable interrupts if they were enabled in the parent context. */
66 if (likely(regs->sstatus & SR_PIE)) 66 if (likely(regs->sstatus & SR_SPIE))
67 local_irq_enable(); 67 local_irq_enable();
68 68
69 /* 69 /*
diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c
index e99194a4077e..70ef2724cdf6 100644
--- a/arch/riscv/mm/ioremap.c
+++ b/arch/riscv/mm/ioremap.c
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(ioremap);
85 * 85 *
86 * Caller must ensure there is only one unmapping for the same pointer. 86 * Caller must ensure there is only one unmapping for the same pointer.
87 */ 87 */
88void iounmap(void __iomem *addr) 88void iounmap(volatile void __iomem *addr)
89{ 89{
90 vunmap((void *)((unsigned long)addr & PAGE_MASK)); 90 vunmap((void *)((unsigned long)addr & PAGE_MASK));
91} 91}
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index eae2c64cf69d..9fdff3fe1a42 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-y += kernel/ 2obj-y += kernel/
2obj-y += mm/ 3obj-y += mm/
3obj-$(CONFIG_KVM) += kvm/ 4obj-$(CONFIG_KVM) += kvm/
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 6b3f41985f28..de54cfc6109d 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# s390/Makefile 3# s390/Makefile
3# 4#
@@ -6,10 +7,6 @@
6# for "archclean" and "archdep" for cleaning up and making dependencies for 7# for "archclean" and "archdep" for cleaning up and making dependencies for
7# this architecture 8# this architecture
8# 9#
9# This file is subject to the terms and conditions of the GNU General Public
10# License. See the file "COPYING" in the main directory of this archive
11# for more details.
12#
13# Copyright (C) 1994 by Linus Torvalds 10# Copyright (C) 1994 by Linus Torvalds
14# 11#
15 12
diff --git a/arch/s390/appldata/Makefile b/arch/s390/appldata/Makefile
index 99f1cf071304..b06def4a4f2f 100644
--- a/arch/s390/appldata/Makefile
+++ b/arch/s390/appldata/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the Linux - z/VM Monitor Stream. 3# Makefile for the Linux - z/VM Monitor Stream.
3# 4#
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index ef3fb1b9201f..cb6e8066b1ad 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1. 3 * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1.
3 * Exports appldata_register_ops() and appldata_unregister_ops() for the 4 * Exports appldata_register_ops() and appldata_unregister_ops() for the
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 598df5708501..e68136c3c23a 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Data gathering module for Linux-VM Monitor Stream, Stage 1. 3 * Data gathering module for Linux-VM Monitor Stream, Stage 1.
3 * Collects data related to memory management. 4 * Collects data related to memory management.
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 66037d2622b4..8bc14b0d1def 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Data gathering module for Linux-VM Monitor Stream, Stage 1. 3 * Data gathering module for Linux-VM Monitor Stream, Stage 1.
3 * Collects accumulated network statistics (Packets received/transmitted, 4 * Collects accumulated network statistics (Packets received/transmitted,
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 45b3178200ab..433a994b1a89 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Data gathering module for Linux-VM Monitor Stream, Stage 1. 3 * Data gathering module for Linux-VM Monitor Stream, Stage 1.
3 * Collects misc. OS related data (CPU utilization, running processes). 4 * Collects misc. OS related data (CPU utilization, running processes).
diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr
index f02382ae5c48..42a242597f34 100644
--- a/arch/s390/boot/compressed/vmlinux.scr
+++ b/arch/s390/boot/compressed/vmlinux.scr
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1SECTIONS 2SECTIONS
2{ 3{
3 .rodata.compressed : { 4 .rodata.compressed : {
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index aed3069699bd..bed227f267ae 100644
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -1,11 +1,8 @@
1#!/bin/sh 1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
2# 3#
3# arch/s390x/boot/install.sh 4# arch/s390x/boot/install.sh
4# 5#
5# This file is subject to the terms and conditions of the GNU General Public
6# License. See the file "COPYING" in the main directory of this archive
7# for more details.
8#
9# Copyright (C) 1995 by Linus Torvalds 6# Copyright (C) 1995 by Linus Torvalds
10# 7#
11# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin 8# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index b48e20dd94e9..d60798737d86 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Cryptographic API. 3 * Cryptographic API.
3 * 4 *
@@ -11,12 +12,6 @@
11 * Harald Freudenberger <freude@de.ibm.com> 12 * Harald Freudenberger <freude@de.ibm.com>
12 * 13 *
13 * Derived from "crypto/aes_generic.c" 14 * Derived from "crypto/aes_generic.c"
14 *
15 * This program is free software; you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 */ 15 */
21 16
22#define KMSG_COMPONENT "aes_s390" 17#define KMSG_COMPONENT "aes_s390"
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index 36aefc07d10c..8720e9203ecf 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -1,13 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * s390 arch random implementation. 3 * s390 arch random implementation.
3 * 4 *
4 * Copyright IBM Corp. 2017 5 * Copyright IBM Corp. 2017
5 * Author(s): Harald Freudenberger <freude@de.ibm.com> 6 * Author(s): Harald Freudenberger <freude@de.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 *
11 */ 7 */
12 8
13#include <linux/kernel.h> 9#include <linux/kernel.h>
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 992e630c227b..436865926c26 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Crypto-API module for CRC-32 algorithms implemented with the 3 * Crypto-API module for CRC-32 algorithms implemented with the
3 * z/Architecture Vector Extension Facility. 4 * z/Architecture Vector Extension Facility.
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 0d296662bbf0..5346b5a80bb6 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Cryptographic API. 3 * Cryptographic API.
3 * 4 *
@@ -6,12 +7,6 @@
6 * Copyright IBM Corp. 2003, 2011 7 * Copyright IBM Corp. 2003, 2011
7 * Author(s): Thomas Spatzier 8 * Author(s): Thomas Spatzier
8 * Jan Glauber (jan.glauber@de.ibm.com) 9 * Jan Glauber (jan.glauber@de.ibm.com)
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 */ 10 */
16 11
17#include <linux/init.h> 12#include <linux/init.h>
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 564616d48d8b..3b7f96c9eead 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Cryptographic API. 3 * Cryptographic API.
3 * 4 *
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index a4e903ed7e21..003932db8d12 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Cryptographic API. 3 * Cryptographic API.
3 * 4 *
@@ -7,11 +8,6 @@
7 * Copyright IBM Corp. 2017 8 * Copyright IBM Corp. 2017
8 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 9 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
9 * Harald Freudenberger <freude@de.ibm.com> 10 * Harald Freudenberger <freude@de.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License (version 2 only)
13 * as published by the Free Software Foundation.
14 *
15 */ 11 */
16 12
17#define KMSG_COMPONENT "paes_s390" 13#define KMSG_COMPONENT "paes_s390"
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 3e47c4a0f18b..a97a1802cfb4 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright IBM Corp. 2006, 2015 3 * Copyright IBM Corp. 2006, 2015
3 * Author(s): Jan Glauber <jan.glauber@de.ibm.com> 4 * Author(s): Jan Glauber <jan.glauber@de.ibm.com>
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
index 10f200790079..d6f8258b44df 100644
--- a/arch/s390/crypto/sha.h
+++ b/arch/s390/crypto/sha.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0+ */
1/* 2/*
2 * Cryptographic API. 3 * Cryptographic API.
3 * 4 *
@@ -5,12 +6,6 @@
5 * 6 *
6 * Copyright IBM Corp. 2007 7 * Copyright IBM Corp. 2007
7 * Author(s): Jan Glauber (jang@de.ibm.com) 8 * Author(s): Jan Glauber (jang@de.ibm.com)
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation; either version 2 of the License, or (at your option)
12 * any later version.
13 *
14 */ 9 */
15#ifndef _CRYPTO_ARCH_S390_SHA_H 10#ifndef _CRYPTO_ARCH_S390_SHA_H
16#define _CRYPTO_ARCH_S390_SHA_H 11#define _CRYPTO_ARCH_S390_SHA_H
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index c7de53d8da75..a00c17f761c1 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Cryptographic API. 3 * Cryptographic API.
3 * 4 *
@@ -16,12 +17,6 @@
16 * Copyright (c) Alan Smithee. 17 * Copyright (c) Alan Smithee.
17 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> 18 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
18 * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> 19 * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
19 *
20 * This program is free software; you can redistribute it and/or modify it
21 * under the terms of the GNU General Public License as published by the Free
22 * Software Foundation; either version 2 of the License, or (at your option)
23 * any later version.
24 *
25 */ 20 */
26#include <crypto/internal/hash.h> 21#include <crypto/internal/hash.h>
27#include <linux/init.h> 22#include <linux/init.h>
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 53c277999a28..944aa6b237cd 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Cryptographic API. 3 * Cryptographic API.
3 * 4 *
@@ -6,12 +7,6 @@
6 * s390 Version: 7 * s390 Version:
7 * Copyright IBM Corp. 2005, 2011 8 * Copyright IBM Corp. 2005, 2011
8 * Author(s): Jan Glauber (jang@de.ibm.com) 9 * Author(s): Jan Glauber (jang@de.ibm.com)
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 *
15 */ 10 */
16#include <crypto/internal/hash.h> 11#include <crypto/internal/hash.h>
17#include <linux/init.h> 12#include <linux/init.h>
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 2f4caa1ef123..b17eded532b1 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Cryptographic API. 3 * Cryptographic API.
3 * 4 *
@@ -5,12 +6,6 @@
5 * 6 *
6 * Copyright IBM Corp. 2007 7 * Copyright IBM Corp. 2007
7 * Author(s): Jan Glauber (jang@de.ibm.com) 8 * Author(s): Jan Glauber (jang@de.ibm.com)
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation; either version 2 of the License, or (at your option)
12 * any later version.
13 *
14 */ 9 */
15#include <crypto/internal/hash.h> 10#include <crypto/internal/hash.h>
16#include <crypto/sha.h> 11#include <crypto/sha.h>
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index c740f77285b2..cf0718d121bc 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Cryptographic API. 3 * Cryptographic API.
3 * 4 *
@@ -5,12 +6,6 @@
5 * 6 *
6 * Copyright IBM Corp. 2007 7 * Copyright IBM Corp. 2007
7 * Author(s): Jan Glauber (jang@de.ibm.com) 8 * Author(s): Jan Glauber (jang@de.ibm.com)
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation; either version 2 of the License, or (at your option)
12 * any later version.
13 *
14 */ 9 */
15 10
16#include <crypto/internal/hash.h> 11#include <crypto/internal/hash.h>
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
index 2ee25ba252d6..06f601509ce9 100644
--- a/arch/s390/hypfs/Makefile
+++ b/arch/s390/hypfs/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the linux hypfs filesystem routines. 3# Makefile for the linux hypfs filesystem routines.
3# 4#
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index cf8a2d92467f..43bbe63e2992 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -1,9 +1,9 @@
1// SPDX-License-Identifier: GPL-1.0+
1/* 2/*
2 * Hypervisor filesystem for Linux on s390. 3 * Hypervisor filesystem for Linux on s390.
3 * 4 *
4 * Copyright IBM Corp. 2006, 2008 5 * Copyright IBM Corp. 2006, 2008
5 * Author(s): Michael Holzheu <holzheu@de.ibm.com> 6 * Author(s): Michael Holzheu <holzheu@de.ibm.com>
6 * License: GPL
7 */ 7 */
8 8
9#define KMSG_COMPONENT "hypfs" 9#define KMSG_COMPONENT "hypfs"
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 41c211a4d8b1..048450869328 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1generic-y += asm-offsets.h 2generic-y += asm-offsets.h
2generic-y += cacheflush.h 3generic-y += cacheflush.h
3generic-y += clkdev.h 4generic-y += clkdev.h
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index a72002056b54..c2cf7bcdef9b 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _ASM_S390_ALTERNATIVE_H 2#ifndef _ASM_S390_ALTERNATIVE_H
2#define _ASM_S390_ALTERNATIVE_H 3#define _ASM_S390_ALTERNATIVE_H
3 4
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index c02f4aba88a6..cfce6835b109 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -1,12 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Adjunct processor (AP) interfaces 3 * Adjunct processor (AP) interfaces
3 * 4 *
4 * Copyright IBM Corp. 2017 5 * Copyright IBM Corp. 2017
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Tony Krowiak <akrowia@linux.vnet.ibm.com> 7 * Author(s): Tony Krowiak <akrowia@linux.vnet.ibm.com>
11 * Martin Schwidefsky <schwidefsky@de.ibm.com> 8 * Martin Schwidefsky <schwidefsky@de.ibm.com>
12 * Harald Freudenberger <freude@de.ibm.com> 9 * Harald Freudenberger <freude@de.ibm.com>
diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h
index 0f5bd894f4dc..aa42a179be33 100644
--- a/arch/s390/include/asm/bugs.h
+++ b/arch/s390/include/asm/bugs.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * S390 version 3 * S390 version
3 * Copyright IBM Corp. 1999 4 * Copyright IBM Corp. 1999
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 792cda339af1..dd08db491b89 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -1,13 +1,10 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * CPU-measurement facilities 3 * CPU-measurement facilities
3 * 4 *
4 * Copyright IBM Corp. 2012 5 * Copyright IBM Corp. 2012
5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> 6 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6 * Jan Glauber <jang@linux.vnet.ibm.com> 7 * Jan Glauber <jang@linux.vnet.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License (version 2 only)
10 * as published by the Free Software Foundation.
11 */ 8 */
12#ifndef _ASM_S390_CPU_MF_H 9#ifndef _ASM_S390_CPU_MF_H
13#define _ASM_S390_CPU_MF_H 10#define _ASM_S390_CPU_MF_H
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 9a3cb3983c01..1a61b1b997f2 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -194,13 +194,14 @@ struct arch_elf_state {
194#define CORE_DUMP_USE_REGSET 194#define CORE_DUMP_USE_REGSET
195#define ELF_EXEC_PAGESIZE PAGE_SIZE 195#define ELF_EXEC_PAGESIZE PAGE_SIZE
196 196
197/* 197/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
198 * This is the base location for PIE (ET_DYN with INTERP) loads. On 198 use of this is to invoke "./ld.so someprog" to test out a new version of
199 * 64-bit, this is raised to 4GB to leave the entire 32-bit address 199 the loader. We need to make sure that it is out of the way of the program
200 * space open for things that want to use the area for 32-bit pointers. 200 that it will "exec", and that there is sufficient room for the brk. 64-bit
201 */ 201 tasks are aligned to 4GB. */
202#define ELF_ET_DYN_BASE (is_compat_task() ? 0x000400000UL : \ 202#define ELF_ET_DYN_BASE (is_compat_task() ? \
203 0x100000000UL) 203 (STACK_TOP / 3 * 2) : \
204 (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
204 205
205/* This yields a mask that user programs can use to figure out what 206/* This yields a mask that user programs can use to figure out what
206 instruction set this CPU supports. */ 207 instruction set this CPU supports. */
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 921391f2341e..13de80cf741c 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -1,22 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0+ */
1#ifndef _ASM_S390_KPROBES_H 2#ifndef _ASM_S390_KPROBES_H
2#define _ASM_S390_KPROBES_H 3#define _ASM_S390_KPROBES_H
3/* 4/*
4 * Kernel Probes (KProbes) 5 * Kernel Probes (KProbes)
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright IBM Corp. 2002, 2006 7 * Copyright IBM Corp. 2002, 2006
21 * 8 *
22 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel 9 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index f3a9b5a445b6..e14f381757f6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -1,12 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * definition for kernel virtual machines on s390 3 * definition for kernel virtual machines on s390
3 * 4 *
4 * Copyright IBM Corp. 2008, 2009 5 * Copyright IBM Corp. 2008, 2009
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com> 7 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 */ 8 */
12 9
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index 41393052ac57..74eeec9c0a80 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -1,12 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * definition for paravirtual devices on s390 3 * definition for paravirtual devices on s390
3 * 4 *
4 * Copyright IBM Corp. 2008 5 * Copyright IBM Corp. 2008
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> 7 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
11 */ 8 */
12/* 9/*
@@ -20,8 +17,6 @@
20 * 17 *
21 * Copyright IBM Corp. 2007,2008 18 * Copyright IBM Corp. 2007,2008
22 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> 19 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
23 *
24 * This work is licensed under the terms of the GNU GPL, version 2.
25 */ 20 */
26#ifndef __S390_KVM_PARA_H 21#ifndef __S390_KVM_PARA_H
27#define __S390_KVM_PARA_H 22#define __S390_KVM_PARA_H
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
index 6de5c6cb0061..672f95b12d40 100644
--- a/arch/s390/include/asm/livepatch.h
+++ b/arch/s390/include/asm/livepatch.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0+ */
1/* 2/*
2 * livepatch.h - s390-specific Kernel Live Patching Core 3 * livepatch.h - s390-specific Kernel Live Patching Core
3 * 4 *
@@ -7,13 +8,6 @@
7 * Jiri Slaby 8 * Jiri Slaby
8 */ 9 */
9 10
10/*
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 */
16
17#ifndef ASM_LIVEPATCH_H 11#ifndef ASM_LIVEPATCH_H
18#define ASM_LIVEPATCH_H 12#define ASM_LIVEPATCH_H
19 13
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index f4a07f788f78..65154eaa3714 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -28,7 +28,7 @@ static inline int init_new_context(struct task_struct *tsk,
28#ifdef CONFIG_PGSTE 28#ifdef CONFIG_PGSTE
29 mm->context.alloc_pgste = page_table_allocate_pgste || 29 mm->context.alloc_pgste = page_table_allocate_pgste ||
30 test_thread_flag(TIF_PGSTE) || 30 test_thread_flag(TIF_PGSTE) ||
31 current->mm->context.alloc_pgste; 31 (current->mm && current->mm->context.alloc_pgste);
32 mm->context.has_pgste = 0; 32 mm->context.has_pgste = 0;
33 mm->context.use_skey = 0; 33 mm->context.use_skey = 0;
34 mm->context.use_cmma = 0; 34 mm->context.use_cmma = 0;
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index d6c9d1e0dc2d..b9c0e361748b 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -40,6 +40,7 @@ struct pt_regs;
40extern unsigned long perf_instruction_pointer(struct pt_regs *regs); 40extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
41extern unsigned long perf_misc_flags(struct pt_regs *regs); 41extern unsigned long perf_misc_flags(struct pt_regs *regs);
42#define perf_misc_flags(regs) perf_misc_flags(regs) 42#define perf_misc_flags(regs) perf_misc_flags(regs)
43#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
43 44
44/* Perf pt_regs extension for sample-data-entry indicators */ 45/* Perf pt_regs extension for sample-data-entry indicators */
45struct perf_sf_sde_regs { 46struct perf_sf_sde_regs {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index d7fe9838084d..0a6b0286c32e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -709,7 +709,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
709 return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; 709 return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
710} 710}
711 711
712#define __HAVE_ARCH_PMD_WRITE 712#define pmd_write pmd_write
713static inline int pmd_write(pmd_t pmd) 713static inline int pmd_write(pmd_t pmd)
714{ 714{
715 return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; 715 return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index a3788dafc0e1..6f70d81c40f2 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -74,9 +74,14 @@ enum {
74 */ 74 */
75struct pt_regs 75struct pt_regs
76{ 76{
77 unsigned long args[1]; 77 union {
78 psw_t psw; 78 user_pt_regs user_regs;
79 unsigned long gprs[NUM_GPRS]; 79 struct {
80 unsigned long args[1];
81 psw_t psw;
82 unsigned long gprs[NUM_GPRS];
83 };
84 };
80 unsigned long orig_gpr2; 85 unsigned long orig_gpr2;
81 unsigned int int_code; 86 unsigned int int_code;
82 unsigned int int_parm; 87 unsigned int int_parm;
diff --git a/arch/s390/include/asm/segment.h b/arch/s390/include/asm/segment.h
index 8bfce3475b1c..97a0582b8d0f 100644
--- a/arch/s390/include/asm/segment.h
+++ b/arch/s390/include/asm/segment.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _ASM_SEGMENT_H 2#ifndef _ASM_SEGMENT_H
2#define _ASM_SEGMENT_H 3#define _ASM_SEGMENT_H
3 4
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index ec7b476c1ac5..c61b2cc1a8a8 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -30,21 +30,20 @@ static inline void restore_access_regs(unsigned int *acrs)
30 asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs)); 30 asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
31} 31}
32 32
33#define switch_to(prev,next,last) do { \ 33#define switch_to(prev, next, last) do { \
34 if (prev->mm) { \ 34 /* save_fpu_regs() sets the CIF_FPU flag, which enforces \
35 save_fpu_regs(); \ 35 * a restore of the floating point / vector registers as \
36 save_access_regs(&prev->thread.acrs[0]); \ 36 * soon as the next task returns to user space \
37 save_ri_cb(prev->thread.ri_cb); \ 37 */ \
38 save_gs_cb(prev->thread.gs_cb); \ 38 save_fpu_regs(); \
39 } \ 39 save_access_regs(&prev->thread.acrs[0]); \
40 save_ri_cb(prev->thread.ri_cb); \
41 save_gs_cb(prev->thread.gs_cb); \
40 update_cr_regs(next); \ 42 update_cr_regs(next); \
41 if (next->mm) { \ 43 restore_access_regs(&next->thread.acrs[0]); \
42 set_cpu_flag(CIF_FPU); \ 44 restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
43 restore_access_regs(&next->thread.acrs[0]); \ 45 restore_gs_cb(next->thread.gs_cb); \
44 restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ 46 prev = __switch_to(prev, next); \
45 restore_gs_cb(next->thread.gs_cb); \
46 } \
47 prev = __switch_to(prev,next); \
48} while (0) 47} while (0)
49 48
50#endif /* __ASM_SWITCH_TO_H */ 49#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 6bc941be6921..96f9a9151fde 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -1,12 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Access to user system call parameters and results 3 * Access to user system call parameters and results
3 * 4 *
4 * Copyright IBM Corp. 2008 5 * Copyright IBM Corp. 2008
5 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) 6 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */ 7 */
11 8
12#ifndef _ASM_SYSCALL_H 9#ifndef _ASM_SYSCALL_H
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index a702cb9d4269..25057c118d56 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -1,12 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * definition for store system information stsi 3 * definition for store system information stsi
3 * 4 *
4 * Copyright IBM Corp. 2001, 2008 5 * Copyright IBM Corp. 2001, 2008
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Ulrich Weigand <weigand@de.ibm.com> 7 * Author(s): Ulrich Weigand <weigand@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */ 9 */
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 1807229b292f..cca406fdbe51 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -53,6 +53,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu);
53static inline void topology_init_early(void) { } 53static inline void topology_init_early(void) { }
54static inline void topology_schedule_update(void) { } 54static inline void topology_schedule_update(void) { }
55static inline int topology_cpu_init(struct cpu *cpu) { return 0; } 55static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
56static inline int topology_cpu_dedicated(int cpu_nr) { return 0; }
56static inline void topology_expect_change(void) { } 57static inline void topology_expect_change(void) { }
57 58
58#endif /* CONFIG_SCHED_TOPOLOGY */ 59#endif /* CONFIG_SCHED_TOPOLOGY */
diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h
index d375526c261f..605dc46bac5e 100644
--- a/arch/s390/include/asm/vga.h
+++ b/arch/s390/include/asm/vga.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1#ifndef _ASM_S390_VGA_H 2#ifndef _ASM_S390_VGA_H
2#define _ASM_S390_VGA_H 3#define _ASM_S390_VGA_H
3 4
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 098f28778a13..92b7c9b3e641 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# UAPI Header export list 2# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 3include include/uapi/asm-generic/Kbuild.asm
3 4
diff --git a/arch/s390/include/uapi/asm/bpf_perf_event.h b/arch/s390/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..cefe7c7cd4f6
--- /dev/null
+++ b/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
3#define _UAPI__ASM_BPF_PERF_EVENT_H__
4
5#include <asm/ptrace.h>
6
7typedef user_pt_regs bpf_user_pt_regs_t;
8
9#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 9ad172dcd912..38535a57fef8 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -6,10 +6,6 @@
6 * 6 *
7 * Copyright IBM Corp. 2008 7 * Copyright IBM Corp. 2008
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License (version 2 only)
11 * as published by the Free Software Foundation.
12 *
13 * Author(s): Carsten Otte <cotte@de.ibm.com> 9 * Author(s): Carsten Otte <cotte@de.ibm.com>
14 * Christian Borntraeger <borntraeger@de.ibm.com> 10 * Christian Borntraeger <borntraeger@de.ibm.com>
15 */ 11 */
diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h
index 0dc86b3a7cb0..b9ab584adf43 100644
--- a/arch/s390/include/uapi/asm/kvm_para.h
+++ b/arch/s390/include/uapi/asm/kvm_para.h
@@ -4,9 +4,5 @@
4 * 4 *
5 * Copyright IBM Corp. 2008 5 * Copyright IBM Corp. 2008
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 *
11 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> 7 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
12 */ 8 */
diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h
index c36c97ffdc6f..84606b8cc49e 100644
--- a/arch/s390/include/uapi/asm/kvm_perf.h
+++ b/arch/s390/include/uapi/asm/kvm_perf.h
@@ -4,10 +4,6 @@
4 * 4 *
5 * Copyright 2014 IBM Corp. 5 * Copyright 2014 IBM Corp.
6 * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com> 6 * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License (version 2 only)
10 * as published by the Free Software Foundation.
11 */ 7 */
12 8
13#ifndef __LINUX_KVM_PERF_S390_H 9#ifndef __LINUX_KVM_PERF_S390_H
diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h
index 7c8564f98205..d17dd9e5d516 100644
--- a/arch/s390/include/uapi/asm/perf_regs.h
+++ b/arch/s390/include/uapi/asm/perf_regs.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
1#ifndef _ASM_S390_PERF_REGS_H 2#ifndef _ASM_S390_PERF_REGS_H
2#define _ASM_S390_PERF_REGS_H 3#define _ASM_S390_PERF_REGS_H
3 4
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index 0d23c8ff2900..543dd70e12c8 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -162,7 +162,7 @@
162#define GPR_SIZE 8 162#define GPR_SIZE 8
163#define CR_SIZE 8 163#define CR_SIZE 8
164 164
165#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ 165#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
166 166
167#endif /* __s390x__ */ 167#endif /* __s390x__ */
168 168
@@ -179,17 +179,16 @@
179#define ACR_SIZE 4 179#define ACR_SIZE 4
180 180
181 181
182#define PTRACE_OLDSETOPTIONS 21 182#define PTRACE_OLDSETOPTIONS 21
183 183
184#ifndef __ASSEMBLY__ 184#ifndef __ASSEMBLY__
185#include <linux/stddef.h> 185#include <linux/stddef.h>
186#include <linux/types.h> 186#include <linux/types.h>
187 187
188typedef union 188typedef union {
189{ 189 float f;
190 float f; 190 double d;
191 double d; 191 __u64 ui;
192 __u64 ui;
193 struct 192 struct
194 { 193 {
195 __u32 hi; 194 __u32 hi;
@@ -197,23 +196,21 @@ typedef union
197 } fp; 196 } fp;
198} freg_t; 197} freg_t;
199 198
200typedef struct 199typedef struct {
201{ 200 __u32 fpc;
202 __u32 fpc;
203 __u32 pad; 201 __u32 pad;
204 freg_t fprs[NUM_FPRS]; 202 freg_t fprs[NUM_FPRS];
205} s390_fp_regs; 203} s390_fp_regs;
206 204
207#define FPC_EXCEPTION_MASK 0xF8000000 205#define FPC_EXCEPTION_MASK 0xF8000000
208#define FPC_FLAGS_MASK 0x00F80000 206#define FPC_FLAGS_MASK 0x00F80000
209#define FPC_DXC_MASK 0x0000FF00 207#define FPC_DXC_MASK 0x0000FF00
210#define FPC_RM_MASK 0x00000003 208#define FPC_RM_MASK 0x00000003
211 209
212/* this typedef defines how a Program Status Word looks like */ 210/* this typedef defines how a Program Status Word looks like */
213typedef struct 211typedef struct {
214{ 212 unsigned long mask;
215 unsigned long mask; 213 unsigned long addr;
216 unsigned long addr;
217} __attribute__ ((aligned(8))) psw_t; 214} __attribute__ ((aligned(8))) psw_t;
218 215
219#ifndef __s390x__ 216#ifndef __s390x__
@@ -282,8 +279,7 @@ typedef struct
282/* 279/*
283 * The s390_regs structure is used to define the elf_gregset_t. 280 * The s390_regs structure is used to define the elf_gregset_t.
284 */ 281 */
285typedef struct 282typedef struct {
286{
287 psw_t psw; 283 psw_t psw;
288 unsigned long gprs[NUM_GPRS]; 284 unsigned long gprs[NUM_GPRS];
289 unsigned int acrs[NUM_ACRS]; 285 unsigned int acrs[NUM_ACRS];
@@ -291,24 +287,32 @@ typedef struct
291} s390_regs; 287} s390_regs;
292 288
293/* 289/*
290 * The user_pt_regs structure exports the beginning of
291 * the in-kernel pt_regs structure to user space.
292 */
293typedef struct {
294 unsigned long args[1];
295 psw_t psw;
296 unsigned long gprs[NUM_GPRS];
297} user_pt_regs;
298
299/*
294 * Now for the user space program event recording (trace) definitions. 300 * Now for the user space program event recording (trace) definitions.
295 * The following structures are used only for the ptrace interface, don't 301 * The following structures are used only for the ptrace interface, don't
296 * touch or even look at it if you don't want to modify the user-space 302 * touch or even look at it if you don't want to modify the user-space
297 * ptrace interface. In particular stay away from it for in-kernel PER. 303 * ptrace interface. In particular stay away from it for in-kernel PER.
298 */ 304 */
299typedef struct 305typedef struct {
300{
301 unsigned long cr[NUM_CR_WORDS]; 306 unsigned long cr[NUM_CR_WORDS];
302} per_cr_words; 307} per_cr_words;
303 308
304#define PER_EM_MASK 0xE8000000UL 309#define PER_EM_MASK 0xE8000000UL
305 310
306typedef struct 311typedef struct {
307{
308#ifdef __s390x__ 312#ifdef __s390x__
309 unsigned : 32; 313 unsigned : 32;
310#endif /* __s390x__ */ 314#endif /* __s390x__ */
311 unsigned em_branching : 1; 315 unsigned em_branching : 1;
312 unsigned em_instruction_fetch : 1; 316 unsigned em_instruction_fetch : 1;
313 /* 317 /*
314 * Switching on storage alteration automatically fixes 318 * Switching on storage alteration automatically fixes
@@ -317,44 +321,41 @@ typedef struct
317 unsigned em_storage_alteration : 1; 321 unsigned em_storage_alteration : 1;
318 unsigned em_gpr_alt_unused : 1; 322 unsigned em_gpr_alt_unused : 1;
319 unsigned em_store_real_address : 1; 323 unsigned em_store_real_address : 1;
320 unsigned : 3; 324 unsigned : 3;
321 unsigned branch_addr_ctl : 1; 325 unsigned branch_addr_ctl : 1;
322 unsigned : 1; 326 unsigned : 1;
323 unsigned storage_alt_space_ctl : 1; 327 unsigned storage_alt_space_ctl : 1;
324 unsigned : 21; 328 unsigned : 21;
325 unsigned long starting_addr; 329 unsigned long starting_addr;
326 unsigned long ending_addr; 330 unsigned long ending_addr;
327} per_cr_bits; 331} per_cr_bits;
328 332
329typedef struct 333typedef struct {
330{
331 unsigned short perc_atmid; 334 unsigned short perc_atmid;
332 unsigned long address; 335 unsigned long address;
333 unsigned char access_id; 336 unsigned char access_id;
334} per_lowcore_words; 337} per_lowcore_words;
335 338
336typedef struct 339typedef struct {
337{ 340 unsigned perc_branching : 1;
338 unsigned perc_branching : 1;
339 unsigned perc_instruction_fetch : 1; 341 unsigned perc_instruction_fetch : 1;
340 unsigned perc_storage_alteration : 1; 342 unsigned perc_storage_alteration : 1;
341 unsigned perc_gpr_alt_unused : 1; 343 unsigned perc_gpr_alt_unused : 1;
342 unsigned perc_store_real_address : 1; 344 unsigned perc_store_real_address : 1;
343 unsigned : 3; 345 unsigned : 3;
344 unsigned atmid_psw_bit_31 : 1; 346 unsigned atmid_psw_bit_31 : 1;
345 unsigned atmid_validity_bit : 1; 347 unsigned atmid_validity_bit : 1;
346 unsigned atmid_psw_bit_32 : 1; 348 unsigned atmid_psw_bit_32 : 1;
347 unsigned atmid_psw_bit_5 : 1; 349 unsigned atmid_psw_bit_5 : 1;
348 unsigned atmid_psw_bit_16 : 1; 350 unsigned atmid_psw_bit_16 : 1;
349 unsigned atmid_psw_bit_17 : 1; 351 unsigned atmid_psw_bit_17 : 1;
350 unsigned si : 2; 352 unsigned si : 2;
351 unsigned long address; 353 unsigned long address;
352 unsigned : 4; 354 unsigned : 4;
353 unsigned access_id : 4; 355 unsigned access_id : 4;
354} per_lowcore_bits; 356} per_lowcore_bits;
355 357
356typedef struct 358typedef struct {
357{
358 union { 359 union {
359 per_cr_words words; 360 per_cr_words words;
360 per_cr_bits bits; 361 per_cr_bits bits;
@@ -364,9 +365,9 @@ typedef struct
364 * the kernel always sets them to zero. To enable single 365 * the kernel always sets them to zero. To enable single
365 * stepping use ptrace(PTRACE_SINGLESTEP) instead. 366 * stepping use ptrace(PTRACE_SINGLESTEP) instead.
366 */ 367 */
367 unsigned single_step : 1; 368 unsigned single_step : 1;
368 unsigned instruction_fetch : 1; 369 unsigned instruction_fetch : 1;
369 unsigned : 30; 370 unsigned : 30;
370 /* 371 /*
371 * These addresses are copied into cr10 & cr11 if single 372 * These addresses are copied into cr10 & cr11 if single
372 * stepping is switched off 373 * stepping is switched off
@@ -376,11 +377,10 @@ typedef struct
376 union { 377 union {
377 per_lowcore_words words; 378 per_lowcore_words words;
378 per_lowcore_bits bits; 379 per_lowcore_bits bits;
379 } lowcore; 380 } lowcore;
380} per_struct; 381} per_struct;
381 382
382typedef struct 383typedef struct {
383{
384 unsigned int len; 384 unsigned int len;
385 unsigned long kernel_addr; 385 unsigned long kernel_addr;
386 unsigned long process_addr; 386 unsigned long process_addr;
@@ -390,12 +390,12 @@ typedef struct
390 * S/390 specific non posix ptrace requests. I chose unusual values so 390 * S/390 specific non posix ptrace requests. I chose unusual values so
391 * they are unlikely to clash with future ptrace definitions. 391 * they are unlikely to clash with future ptrace definitions.
392 */ 392 */
393#define PTRACE_PEEKUSR_AREA 0x5000 393#define PTRACE_PEEKUSR_AREA 0x5000
394#define PTRACE_POKEUSR_AREA 0x5001 394#define PTRACE_POKEUSR_AREA 0x5001
395#define PTRACE_PEEKTEXT_AREA 0x5002 395#define PTRACE_PEEKTEXT_AREA 0x5002
396#define PTRACE_PEEKDATA_AREA 0x5003 396#define PTRACE_PEEKDATA_AREA 0x5003
397#define PTRACE_POKETEXT_AREA 0x5004 397#define PTRACE_POKETEXT_AREA 0x5004
398#define PTRACE_POKEDATA_AREA 0x5005 398#define PTRACE_POKEDATA_AREA 0x5005
399#define PTRACE_GET_LAST_BREAK 0x5006 399#define PTRACE_GET_LAST_BREAK 0x5006
400#define PTRACE_PEEK_SYSTEM_CALL 0x5007 400#define PTRACE_PEEK_SYSTEM_CALL 0x5007
401#define PTRACE_POKE_SYSTEM_CALL 0x5008 401#define PTRACE_POKE_SYSTEM_CALL 0x5008
@@ -413,21 +413,19 @@ typedef struct
413 * PT_PROT definition is loosely based on hppa bsd definition in 413 * PT_PROT definition is loosely based on hppa bsd definition in
414 * gdb/hppab-nat.c 414 * gdb/hppab-nat.c
415 */ 415 */
416#define PTRACE_PROT 21 416#define PTRACE_PROT 21
417 417
418typedef enum 418typedef enum {
419{
420 ptprot_set_access_watchpoint, 419 ptprot_set_access_watchpoint,
421 ptprot_set_write_watchpoint, 420 ptprot_set_write_watchpoint,
422 ptprot_disable_watchpoint 421 ptprot_disable_watchpoint
423} ptprot_flags; 422} ptprot_flags;
424 423
425typedef struct 424typedef struct {
426{
427 unsigned long lowaddr; 425 unsigned long lowaddr;
428 unsigned long hiaddr; 426 unsigned long hiaddr;
429 ptprot_flags prot; 427 ptprot_flags prot;
430} ptprot_area; 428} ptprot_area;
431 429
432/* Sequence of bytes for breakpoint illegal instruction. */ 430/* Sequence of bytes for breakpoint illegal instruction. */
433#define S390_BREAKPOINT {0x0,0x1} 431#define S390_BREAKPOINT {0x0,0x1}
@@ -439,8 +437,7 @@ typedef struct
439 * The user_regs_struct defines the way the user registers are 437 * The user_regs_struct defines the way the user registers are
440 * store on the stack for signal handling. 438 * store on the stack for signal handling.
441 */ 439 */
442struct user_regs_struct 440struct user_regs_struct {
443{
444 psw_t psw; 441 psw_t psw;
445 unsigned long gprs[NUM_GPRS]; 442 unsigned long gprs[NUM_GPRS];
446 unsigned int acrs[NUM_ACRS]; 443 unsigned int acrs[NUM_ACRS];
diff --git a/arch/s390/include/uapi/asm/sthyi.h b/arch/s390/include/uapi/asm/sthyi.h
index ec113db4eb7e..b1b022316983 100644
--- a/arch/s390/include/uapi/asm/sthyi.h
+++ b/arch/s390/include/uapi/asm/sthyi.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
1#ifndef _UAPI_ASM_STHYI_H 2#ifndef _UAPI_ASM_STHYI_H
2#define _UAPI_ASM_STHYI_H 3#define _UAPI_ASM_STHYI_H
3 4
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h
index 967aad390105..2b605f7e8483 100644
--- a/arch/s390/include/uapi/asm/virtio-ccw.h
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h
@@ -1,13 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 1/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
2/* 2/*
3 * Definitions for virtio-ccw devices. 3 * Definitions for virtio-ccw devices.
4 * 4 *
5 * Copyright IBM Corp. 2013 5 * Copyright IBM Corp. 2013
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 *
11 * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> 7 * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
12 */ 8 */
13#ifndef __KVM_VIRTIO_CCW_H 9#ifndef __KVM_VIRTIO_CCW_H
diff --git a/arch/s390/include/uapi/asm/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h
index 4caf71714a55..aeaaa030030e 100644
--- a/arch/s390/include/uapi/asm/vmcp.h
+++ b/arch/s390/include/uapi/asm/vmcp.h
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
1/* 2/*
2 * Copyright IBM Corp. 2004, 2005 3 * Copyright IBM Corp. 2004, 2005
3 * Interface implementation for communication with the z/VM control program 4 * Interface implementation for communication with the z/VM control program
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index 137ef473584e..d568307321fc 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -9,20 +9,6 @@
9 * Eric Rossman (edrossma@us.ibm.com) 9 * Eric Rossman (edrossma@us.ibm.com)
10 * 10 *
11 * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) 11 * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2, or (at your option)
16 * any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 */ 12 */
27 13
28#ifndef __ASM_S390_ZCRYPT_H 14#ifndef __ASM_S390_ZCRYPT_H
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 315986a06cf5..574e77622c04 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/module.h> 2#include <linux/module.h>
2#include <asm/alternative.h> 3#include <asm/alternative.h>
3#include <asm/facility.h> 4#include <asm/facility.h>
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index f04db3779b34..59eea9c65d3e 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
263 return retval; 263 return retval;
264 } 264 }
265 265
266 groups_sort(group_info);
266 retval = set_current_groups(group_info); 267 retval = set_current_groups(group_info);
267 put_group_info(group_info); 268 put_group_info(group_info);
268 269
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 58b9e127b615..80e974adb9e8 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1392,7 +1392,7 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
1392 else 1392 else
1393 except_str = "-"; 1393 except_str = "-";
1394 caller = (unsigned long) entry->caller; 1394 caller = (unsigned long) entry->caller;
1395 rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %p ", 1395 rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ",
1396 area, sec, usec, level, except_str, 1396 area, sec, usec, level, except_str,
1397 entry->id.fields.cpuid, (void *)caller); 1397 entry->id.fields.cpuid, (void *)caller);
1398 return rc; 1398 return rc;
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 3be829721cf9..b2c68fbf2634 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Disassemble s390 instructions. 3 * Disassemble s390 instructions.
3 * 4 *
@@ -396,9 +397,14 @@ struct s390_insn *find_insn(unsigned char *code)
396 unsigned char opfrag; 397 unsigned char opfrag;
397 int i; 398 int i;
398 399
400 /* Search the opcode offset table to find an entry which
401 * matches the beginning of the opcode. If there is no match
402 * the last entry will be used, which is the default entry for
403 * unknown instructions as well as 1-byte opcode instructions.
404 */
399 for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) { 405 for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) {
400 entry = &opcode_offset[i]; 406 entry = &opcode_offset[i];
401 if (entry->opcode == code[0] || entry->opcode == 0) 407 if (entry->opcode == code[0])
402 break; 408 break;
403 } 409 }
404 410
@@ -543,7 +549,7 @@ void show_code(struct pt_regs *regs)
543 start += opsize; 549 start += opsize;
544 pr_cont("%s", buffer); 550 pr_cont("%s", buffer);
545 ptr = buffer; 551 ptr = buffer;
546 ptr += sprintf(ptr, "\n\t "); 552 ptr += sprintf(ptr, "\n ");
547 hops++; 553 hops++;
548 } 554 }
549 pr_cont("\n"); 555 pr_cont("\n");
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 2aa545dca4d5..5b23c4f6e50c 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Stack dumping functions 3 * Stack dumping functions
3 * 4 *
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index a316cd6999ad..9e5f6cd8e4c2 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -180,18 +180,17 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
180 */ 180 */
181ENTRY(__switch_to) 181ENTRY(__switch_to)
182 stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task 182 stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
183 lgr %r1,%r2 183 lghi %r4,__TASK_stack
184 aghi %r1,__TASK_thread # thread_struct of prev task 184 lghi %r1,__TASK_thread
185 lg %r5,__TASK_stack(%r3) # start of kernel stack of next 185 lg %r5,0(%r4,%r3) # start of kernel stack of next
186 stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev 186 stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
187 lgr %r1,%r3
188 aghi %r1,__TASK_thread # thread_struct of next task
189 lgr %r15,%r5 187 lgr %r15,%r5
190 aghi %r15,STACK_INIT # end of kernel stack of next 188 aghi %r15,STACK_INIT # end of kernel stack of next
191 stg %r3,__LC_CURRENT # store task struct of next 189 stg %r3,__LC_CURRENT # store task struct of next
192 stg %r15,__LC_KERNEL_STACK # store end of kernel stack 190 stg %r15,__LC_KERNEL_STACK # store end of kernel stack
193 lg %r15,__THREAD_ksp(%r1) # load kernel stack of next 191 lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
194 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next 192 aghi %r3,__TASK_pid
193 mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
195 lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task 194 lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
196 TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP 195 TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
197 bzr %r14 196 bzr %r14
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 310e59e6eb4b..8ecb8726ac47 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * ipl/reipl/dump support for Linux on s390. 3 * ipl/reipl/dump support for Linux on s390.
3 * 4 *
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 1a6521af1751..af3722c28fd9 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -1,20 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Kernel Probes (KProbes) 3 * Kernel Probes (KProbes)
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corp. 2002, 2006 5 * Copyright IBM Corp. 2002, 2006
19 * 6 *
20 * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com> 7 * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index bf9622f0e6b1..452502f9a0d9 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Linux Guest Relocation (LGR) detection 3 * Linux Guest Relocation (LGR) detection
3 * 4 *
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 7b87991416fd..b7abfad4fd7d 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Kernel module help for s390. 3 * Kernel module help for s390.
3 * 4 *
@@ -8,20 +9,6 @@
8 * 9 *
9 * based on i386 version 10 * based on i386 version
10 * Copyright (C) 2001 Rusty Russell. 11 * Copyright (C) 2001 Rusty Russell.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */ 12 */
26#include <linux/module.h> 13#include <linux/module.h>
27#include <linux/elf.h> 14#include <linux/elf.h>
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 6ff169253cae..c7a627620e5e 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Machine check handler 3 * Machine check handler
3 * 4 *
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 746d03423333..cc085e2d2ce9 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Performance event support for s390x - CPU-measurement Counter Facility 3 * Performance event support for s390x - CPU-measurement Counter Facility
3 * 4 *
4 * Copyright IBM Corp. 2012, 2017 5 * Copyright IBM Corp. 2012, 2017
5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> 6 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */ 7 */
11#define KMSG_COMPONENT "cpum_cf" 8#define KMSG_COMPONENT "cpum_cf"
12#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 9#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 227b38bd82c9..1c9ddd7aa5ec 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Performance event support for the System z CPU-measurement Sampling Facility 3 * Performance event support for the System z CPU-measurement Sampling Facility
3 * 4 *
4 * Copyright IBM Corp. 2013 5 * Copyright IBM Corp. 2013
5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> 6 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */ 7 */
11#define KMSG_COMPONENT "cpum_sf" 8#define KMSG_COMPONENT "cpum_sf"
12#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 9#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 93a386f4a3b5..0d770e513abf 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Performance event support for s390x 3 * Performance event support for s390x
3 * 4 *
4 * Copyright IBM Corp. 2012, 2013 5 * Copyright IBM Corp. 2012, 2013
5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> 6 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */ 7 */
11#define KMSG_COMPONENT "perf" 8#define KMSG_COMPONENT "perf"
12#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 9#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index f8603ebed669..54e2d634b849 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/perf_event.h> 2#include <linux/perf_event.h>
2#include <linux/perf_regs.h> 3#include <linux/perf_regs.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 26c0523c1488..cd3df5514552 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -1651,6 +1651,14 @@ static const struct user_regset s390_compat_regsets[] = {
1651 .set = s390_gs_cb_set, 1651 .set = s390_gs_cb_set,
1652 }, 1652 },
1653 { 1653 {
1654 .core_note_type = NT_S390_GS_BC,
1655 .n = sizeof(struct gs_cb) / sizeof(__u64),
1656 .size = sizeof(__u64),
1657 .align = sizeof(__u64),
1658 .get = s390_gs_bc_get,
1659 .set = s390_gs_bc_set,
1660 },
1661 {
1654 .core_note_type = NT_S390_RI_CB, 1662 .core_note_type = NT_S390_RI_CB,
1655 .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), 1663 .n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
1656 .size = sizeof(__u64), 1664 .size = sizeof(__u64),
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 090053cf279b..793da97f9a6e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * S390 version 3 * S390 version
3 * Copyright IBM Corp. 1999, 2012 4 * Copyright IBM Corp. 1999, 2012
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index cd4334e80b64..b8c1a85bcf2d 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -55,6 +55,7 @@
55#include <asm/sigp.h> 55#include <asm/sigp.h>
56#include <asm/idle.h> 56#include <asm/idle.h>
57#include <asm/nmi.h> 57#include <asm/nmi.h>
58#include <asm/topology.h>
58#include "entry.h" 59#include "entry.h"
59 60
60enum { 61enum {
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index e66687dc6144..460dcfba7d4e 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Stack trace management functions 3 * Stack trace management functions
3 * 4 *
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 12981e197f01..80b862e9c53c 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -1,10 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * store hypervisor information instruction emulation functions. 3 * store hypervisor information instruction emulation functions.
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License (version 2 only)
6 * as published by the Free Software Foundation.
7 *
8 * Copyright IBM Corp. 2016 5 * Copyright IBM Corp. 2016
9 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> 6 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
10 */ 7 */
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 308a7b63348b..f7fc63385553 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -370,10 +370,10 @@ SYSCALL(sys_recvmmsg,compat_sys_recvmmsg)
370SYSCALL(sys_sendmmsg,compat_sys_sendmmsg) 370SYSCALL(sys_sendmmsg,compat_sys_sendmmsg)
371SYSCALL(sys_socket,sys_socket) 371SYSCALL(sys_socket,sys_socket)
372SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */ 372SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */
373SYSCALL(sys_bind,sys_bind) 373SYSCALL(sys_bind,compat_sys_bind)
374SYSCALL(sys_connect,sys_connect) 374SYSCALL(sys_connect,compat_sys_connect)
375SYSCALL(sys_listen,sys_listen) 375SYSCALL(sys_listen,sys_listen)
376SYSCALL(sys_accept4,sys_accept4) 376SYSCALL(sys_accept4,compat_sys_accept4)
377SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */ 377SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */
378SYSCALL(sys_setsockopt,compat_sys_setsockopt) 378SYSCALL(sys_setsockopt,compat_sys_setsockopt)
379SYSCALL(sys_getsockname,compat_sys_getsockname) 379SYSCALL(sys_getsockname,compat_sys_getsockname)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index be6198193ec2..cf561160ea88 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Time of day based timer functions. 3 * Time of day based timer functions.
3 * 4 *
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index f9b393d4a078..4d5b65e527b5 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright IBM Corp. 2007, 2011 3 * Copyright IBM Corp. 2007, 2011
3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 39a218703c50..f3a1c7c6824e 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * vdso setup for s390 3 * vdso setup for s390
3 * 4 *
4 * Copyright IBM Corp. 2008 5 * Copyright IBM Corp. 2008
5 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) 6 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */ 7 */
11 8
12#include <linux/init.h> 9#include <linux/init.h>
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
index eca3f001f081..f61df5253c23 100644
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ b/arch/s390/kernel/vdso32/clock_getres.S
@@ -1,13 +1,10 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Userland implementation of clock_getres() for 32 bits processes in a 3 * Userland implementation of clock_getres() for 32 bits processes in a
3 * s390 kernel for use in the vDSO 4 * s390 kernel for use in the vDSO
4 * 5 *
5 * Copyright IBM Corp. 2008 6 * Copyright IBM Corp. 2008
6 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) 7 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License (version 2 only)
10 * as published by the Free Software Foundation.
11 */ 8 */
12#include <asm/vdso.h> 9#include <asm/vdso.h>
13#include <asm/asm-offsets.h> 10#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
index a5769b83d90e..2d6ec3abe095 100644
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -1,13 +1,10 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Userland implementation of clock_gettime() for 32 bits processes in a 3 * Userland implementation of clock_gettime() for 32 bits processes in a
3 * s390 kernel for use in the vDSO 4 * s390 kernel for use in the vDSO
4 * 5 *
5 * Copyright IBM Corp. 2008 6 * Copyright IBM Corp. 2008
6 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) 7 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License (version 2 only)
10 * as published by the Free Software Foundation.
11 */ 8 */
12#include <asm/vdso.h> 9#include <asm/vdso.h>
13#include <asm/asm-offsets.h> 10#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
index 63b86dceb0bf..aa8bf13a2edb 100644
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ b/arch/s390/kernel/vdso32/gettimeofday.S
@@ -1,13 +1,10 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Userland implementation of gettimeofday() for 32 bits processes in a 3 * Userland implementation of gettimeofday() for 32 bits processes in a
3 * s390 kernel for use in the vDSO 4 * s390 kernel for use in the vDSO
4 * 5 *
5 * Copyright IBM Corp. 2008 6 * Copyright IBM Corp. 2008
6 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) 7 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License (version 2 only)
10 * as published by the Free Software Foundation.
11 */ 8 */
12#include <asm/vdso.h> 9#include <asm/vdso.h>
13#include <asm/asm-offsets.h> 10#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index c8513deb8c66..faf5213b15df 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -1,13 +1,10 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Userland implementation of clock_getres() for 64 bits processes in a 3 * Userland implementation of clock_getres() for 64 bits processes in a
3 * s390 kernel for use in the vDSO 4 * s390 kernel for use in the vDSO
4 * 5 *
5 * Copyright IBM Corp. 2008 6 * Copyright IBM Corp. 2008
6 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) 7 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License (version 2 only)
10 * as published by the Free Software Foundation.
11 */ 8 */
12#include <asm/vdso.h> 9#include <asm/vdso.h>
13#include <asm/asm-offsets.h> 10#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 5d7b56b49458..6046b3bfca46 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -1,13 +1,10 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Userland implementation of clock_gettime() for 64 bits processes in a 3 * Userland implementation of clock_gettime() for 64 bits processes in a
3 * s390 kernel for use in the vDSO 4 * s390 kernel for use in the vDSO
4 * 5 *
5 * Copyright IBM Corp. 2008 6 * Copyright IBM Corp. 2008
6 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) 7 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License (version 2 only)
10 * as published by the Free Software Foundation.
11 */ 8 */
12#include <asm/vdso.h> 9#include <asm/vdso.h>
13#include <asm/asm-offsets.h> 10#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
index b02e62f3bc12..cc9dbc27da6f 100644
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ b/arch/s390/kernel/vdso64/gettimeofday.S
@@ -1,13 +1,10 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Userland implementation of gettimeofday() for 64 bits processes in a 3 * Userland implementation of gettimeofday() for 64 bits processes in a
3 * s390 kernel for use in the vDSO 4 * s390 kernel for use in the vDSO
4 * 5 *
5 * Copyright IBM Corp. 2008 6 * Copyright IBM Corp. 2008
6 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) 7 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License (version 2 only)
10 * as published by the Free Software Foundation.
11 */ 8 */
12#include <asm/vdso.h> 9#include <asm/vdso.h>
13#include <asm/asm-offsets.h> 10#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
index 79a071e4357e..db19d0680a0a 100644
--- a/arch/s390/kernel/vdso64/note.S
+++ b/arch/s390/kernel/vdso64/note.S
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. 3 * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
3 * Here we can supply some information useful to userland. 4 * Here we can supply some information useful to userland.
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index dd7178fbb4f3..f24395a01918 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Virtual cpu timer based timer functions. 3 * Virtual cpu timer based timer functions.
3 * 4 *
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 6048b1c6e580..05ee90a5ea08 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -1,10 +1,7 @@
1# SPDX-License-Identifier: GPL-2.0
1# Makefile for kernel virtual machines on s390 2# Makefile for kernel virtual machines on s390
2# 3#
3# Copyright IBM Corp. 2008 4# Copyright IBM Corp. 2008
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License (version 2 only)
7# as published by the Free Software Foundation.
8 5
9KVM := ../../../virt/kvm 6KVM := ../../../virt/kvm
10common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o 7common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index d93a2c0474bf..89aa114a2cba 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * handling diagnose instructions 3 * handling diagnose instructions
3 * 4 *
4 * Copyright IBM Corp. 2008, 2011 5 * Copyright IBM Corp. 2008, 2011
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com> 7 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */ 9 */
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index bec42b852246..f4c51756c462 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,12 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * access guest memory 3 * access guest memory
3 * 4 *
4 * Copyright IBM Corp. 2008, 2014 5 * Copyright IBM Corp. 2008, 2014
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com> 7 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 */ 8 */
12 9
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index bcbd86621d01..b5f3e82006d0 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * kvm guest debug support 3 * kvm guest debug support
3 * 4 *
4 * Copyright IBM Corp. 2014 5 * Copyright IBM Corp. 2014
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com> 7 * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
11 */ 8 */
12#include <linux/kvm_host.h> 9#include <linux/kvm_host.h>
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 8fe034beb623..9c7d70715862 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * in-kernel handling for sie intercepts 3 * in-kernel handling for sie intercepts
3 * 4 *
4 * Copyright IBM Corp. 2008, 2014 5 * Copyright IBM Corp. 2008, 2014
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com> 7 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */ 9 */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fa557372d600..024ad8bcc516 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * handling kvm guest interrupts 3 * handling kvm guest interrupts
3 * 4 *
4 * Copyright IBM Corp. 2008, 2015 5 * Copyright IBM Corp. 2008, 2015
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com> 7 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 */ 8 */
12 9
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index d98e4159643d..484608c71dd0 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -1,12 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * s390 irqchip routines 3 * s390 irqchip routines
3 * 4 *
4 * Copyright IBM Corp. 2014 5 * Copyright IBM Corp. 2014
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> 7 * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
11 */ 8 */
12#ifndef __KVM_IRQ_H 9#ifndef __KVM_IRQ_H
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 98ad8b9e0360..2c93cbbcd15e 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1,11 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * hosting zSeries kernel virtual machines 3 * hosting IBM Z kernel virtual machines (s390x)
3 * 4 *
4 * Copyright IBM Corp. 2008, 2009 5 * Copyright IBM Corp. 2008, 2017
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 * 6 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com> 7 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com>
@@ -795,11 +792,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
795 792
796 if (kvm->arch.use_cmma) { 793 if (kvm->arch.use_cmma) {
797 /* 794 /*
798 * Get the last slot. They should be sorted by base_gfn, so the 795 * Get the first slot. They are reverse sorted by base_gfn, so
799 * last slot is also the one at the end of the address space. 796 * the first slot is also the one at the end of the address
800 * We have verified above that at least one slot is present. 797 * space. We have verified above that at least one slot is
798 * present.
801 */ 799 */
802 ms = slots->memslots + slots->used_slots - 1; 800 ms = slots->memslots;
803 /* round up so we only use full longs */ 801 /* round up so we only use full longs */
804 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); 802 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
805 /* allocate enough bytes to store all the bits */ 803 /* allocate enough bytes to store all the bits */
@@ -3372,7 +3370,6 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3372int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3370int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3373{ 3371{
3374 int rc; 3372 int rc;
3375 sigset_t sigsaved;
3376 3373
3377 if (kvm_run->immediate_exit) 3374 if (kvm_run->immediate_exit)
3378 return -EINTR; 3375 return -EINTR;
@@ -3382,8 +3379,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3382 return 0; 3379 return 0;
3383 } 3380 }
3384 3381
3385 if (vcpu->sigset_active) 3382 kvm_sigset_activate(vcpu);
3386 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3387 3383
3388 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 3384 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3389 kvm_s390_vcpu_start(vcpu); 3385 kvm_s390_vcpu_start(vcpu);
@@ -3417,8 +3413,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3417 disable_cpu_timer_accounting(vcpu); 3413 disable_cpu_timer_accounting(vcpu);
3418 store_regs(vcpu, kvm_run); 3414 store_regs(vcpu, kvm_run);
3419 3415
3420 if (vcpu->sigset_active) 3416 kvm_sigset_deactivate(vcpu);
3421 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3422 3417
3423 vcpu->stat.exit_userspace++; 3418 vcpu->stat.exit_userspace++;
3424 return rc; 3419 return rc;
@@ -3811,6 +3806,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
3811 r = -EINVAL; 3806 r = -EINVAL;
3812 break; 3807 break;
3813 } 3808 }
3809 /* do not use irq_state.flags, it will break old QEMUs */
3814 r = kvm_s390_set_irq_state(vcpu, 3810 r = kvm_s390_set_irq_state(vcpu,
3815 (void __user *) irq_state.buf, 3811 (void __user *) irq_state.buf,
3816 irq_state.len); 3812 irq_state.len);
@@ -3826,6 +3822,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
3826 r = -EINVAL; 3822 r = -EINVAL;
3827 break; 3823 break;
3828 } 3824 }
3825 /* do not use irq_state.flags, it will break old QEMUs */
3829 r = kvm_s390_get_irq_state(vcpu, 3826 r = kvm_s390_get_irq_state(vcpu,
3830 (__u8 __user *) irq_state.buf, 3827 (__u8 __user *) irq_state.buf,
3831 irq_state.len); 3828 irq_state.len);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 10d65dfbc306..5e46ba429bcb 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -1,12 +1,9 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * definition for kvm on s390 3 * definition for kvm on s390
3 * 4 *
4 * Copyright IBM Corp. 2008, 2009 5 * Copyright IBM Corp. 2008, 2009
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com> 7 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Christian Ehrhardt <ehrhardt@de.ibm.com> 9 * Christian Ehrhardt <ehrhardt@de.ibm.com>
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c954ac49eee4..0714bfa56da0 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * handling privileged instructions 3 * handling privileged instructions
3 * 4 *
4 * Copyright IBM Corp. 2008, 2013 5 * Copyright IBM Corp. 2008, 2013
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com> 7 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */ 9 */
@@ -235,8 +232,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
235 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); 232 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
236 return -EAGAIN; 233 return -EAGAIN;
237 } 234 }
238 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
239 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
240 return 0; 235 return 0;
241} 236}
242 237
@@ -247,6 +242,9 @@ static int handle_iske(struct kvm_vcpu *vcpu)
247 int reg1, reg2; 242 int reg1, reg2;
248 int rc; 243 int rc;
249 244
245 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
246 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
247
250 rc = try_handle_skey(vcpu); 248 rc = try_handle_skey(vcpu);
251 if (rc) 249 if (rc)
252 return rc != -EAGAIN ? rc : 0; 250 return rc != -EAGAIN ? rc : 0;
@@ -276,6 +274,9 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
276 int reg1, reg2; 274 int reg1, reg2;
277 int rc; 275 int rc;
278 276
277 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
278 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
279
279 rc = try_handle_skey(vcpu); 280 rc = try_handle_skey(vcpu);
280 if (rc) 281 if (rc)
281 return rc != -EAGAIN ? rc : 0; 282 return rc != -EAGAIN ? rc : 0;
@@ -311,6 +312,9 @@ static int handle_sske(struct kvm_vcpu *vcpu)
311 int reg1, reg2; 312 int reg1, reg2;
312 int rc; 313 int rc;
313 314
315 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
316 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
317
314 rc = try_handle_skey(vcpu); 318 rc = try_handle_skey(vcpu);
315 if (rc) 319 if (rc)
316 return rc != -EAGAIN ? rc : 0; 320 return rc != -EAGAIN ? rc : 0;
@@ -1002,7 +1006,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
1002 cbrlo[entries] = gfn << PAGE_SHIFT; 1006 cbrlo[entries] = gfn << PAGE_SHIFT;
1003 } 1007 }
1004 1008
1005 if (orc) { 1009 if (orc && gfn < ms->bitmap_size) {
1006 /* increment only if we are really flipping the bit to 1 */ 1010 /* increment only if we are really flipping the bit to 1 */
1007 if (!test_and_set_bit(gfn, ms->pgste_bitmap)) 1011 if (!test_and_set_bit(gfn, ms->pgste_bitmap))
1008 atomic64_inc(&ms->dirty_pages); 1012 atomic64_inc(&ms->dirty_pages);
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 9d592ef4104b..c1f5cde2c878 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * handling interprocessor communication 3 * handling interprocessor communication
3 * 4 *
4 * Copyright IBM Corp. 2008, 2013 5 * Copyright IBM Corp. 2008, 2013
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com> 7 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Christian Ehrhardt <ehrhardt@de.ibm.com> 9 * Christian Ehrhardt <ehrhardt@de.ibm.com>
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a311938b63b3..5d6ae0326d9e 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1,12 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * kvm nested virtualization support for s390x 3 * kvm nested virtualization support for s390x
3 * 4 *
4 * Copyright IBM Corp. 2016 5 * Copyright IBM Corp. 2016
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com> 7 * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
11 */ 8 */
12#include <linux/vmalloc.h> 9#include <linux/vmalloc.h>
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index cae5a1e16cbd..c4f8039a35e8 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -89,11 +89,11 @@ EXPORT_SYMBOL(enable_sacf_uaccess);
89 89
90void disable_sacf_uaccess(mm_segment_t old_fs) 90void disable_sacf_uaccess(mm_segment_t old_fs)
91{ 91{
92 current->thread.mm_segment = old_fs;
92 if (old_fs == USER_DS && test_facility(27)) { 93 if (old_fs == USER_DS && test_facility(27)) {
93 __ctl_load(S390_lowcore.user_asce, 1, 1); 94 __ctl_load(S390_lowcore.user_asce, 1, 1);
94 clear_cpu_flag(CIF_ASCE_PRIMARY); 95 clear_cpu_flag(CIF_ASCE_PRIMARY);
95 } 96 }
96 current->thread.mm_segment = old_fs;
97} 97}
98EXPORT_SYMBOL(disable_sacf_uaccess); 98EXPORT_SYMBOL(disable_sacf_uaccess);
99 99
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 3d017171ff8f..6cf024eb2085 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Collaborative memory management interface. 3 * Collaborative memory management interface.
3 * 4 *
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index b2c140193b0a..05d459b638f5 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * KVM guest address space mapping code 3 * KVM guest address space mapping code
3 * 4 *
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 5bea139517a2..831bdcf407bb 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -1,24 +1,10 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * flexible mmap layout support 3 * flexible mmap layout support
3 * 4 *
4 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. 5 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
5 * All Rights Reserved. 6 * All Rights Reserved.
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 *
22 * Started by Ingo Molnar <mingo@elte.hu> 8 * Started by Ingo Molnar <mingo@elte.hu>
23 */ 9 */
24 10
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 434a9564917b..cb364153c43c 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -83,8 +83,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
83 83
84 /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ 84 /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
85 VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE); 85 VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
86 if (end >= TASK_SIZE_MAX)
87 return -ENOMEM;
88 rc = 0; 86 rc = 0;
89 notify = 0; 87 notify = 0;
90 while (mm->context.asce_limit < end) { 88 while (mm->context.asce_limit < end) {
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index ae677f814bc0..4f2b65d01a70 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright IBM Corp. 2007, 2011 3 * Copyright IBM Corp. 2007, 2011
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
index 90568c33ddb0..e0d5f245e42b 100644
--- a/arch/s390/net/Makefile
+++ b/arch/s390/net/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Arch-specific network modules 3# Arch-specific network modules
3# 4#
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e81c16838b90..9557d8b516df 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
55#define SEEN_LITERAL 8 /* code uses literals */ 55#define SEEN_LITERAL 8 /* code uses literals */
56#define SEEN_FUNC 16 /* calls C functions */ 56#define SEEN_FUNC 16 /* calls C functions */
57#define SEEN_TAIL_CALL 32 /* code uses tail calls */ 57#define SEEN_TAIL_CALL 32 /* code uses tail calls */
58#define SEEN_SKB_CHANGE 64 /* code changes skb data */ 58#define SEEN_REG_AX 64 /* code uses constant blinding */
59#define SEEN_REG_AX 128 /* code uses constant blinding */
60#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) 59#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
61 60
62/* 61/*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
448 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, 447 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
449 REG_15, 152); 448 REG_15, 152);
450 } 449 }
451 if (jit->seen & SEEN_SKB) 450 if (jit->seen & SEEN_SKB) {
452 emit_load_skb_data_hlen(jit); 451 emit_load_skb_data_hlen(jit);
453 if (jit->seen & SEEN_SKB_CHANGE)
454 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ 452 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
455 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, 453 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
456 STK_OFF_SKBP); 454 STK_OFF_SKBP);
455 }
457} 456}
458 457
459/* 458/*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
983 EMIT2(0x0d00, REG_14, REG_W1); 982 EMIT2(0x0d00, REG_14, REG_W1);
984 /* lgr %b0,%r2: load return value into %b0 */ 983 /* lgr %b0,%r2: load return value into %b0 */
985 EMIT4(0xb9040000, BPF_REG_0, REG_2); 984 EMIT4(0xb9040000, BPF_REG_0, REG_2);
986 if (bpf_helper_changes_pkt_data((void *)func)) { 985 if ((jit->seen & SEEN_SKB) &&
987 jit->seen |= SEEN_SKB_CHANGE; 986 bpf_helper_changes_pkt_data((void *)func)) {
988 /* lg %b1,ST_OFF_SKBP(%r15) */ 987 /* lg %b1,ST_OFF_SKBP(%r15) */
989 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, 988 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
990 REG_15, STK_OFF_SKBP); 989 REG_15, STK_OFF_SKBP);
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
index f94ecaffa71b..66c2dff74895 100644
--- a/arch/s390/numa/Makefile
+++ b/arch/s390/numa/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1obj-y += numa.o 2obj-y += numa.o
2obj-y += toptree.o 3obj-y += toptree.o
3obj-$(CONFIG_NUMA_EMU) += mode_emu.o 4obj-$(CONFIG_NUMA_EMU) += mode_emu.o
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 805d8b29193a..22d0871291ee 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# 2#
2# Makefile for the s390 PCI subsystem. 3# Makefile for the s390 PCI subsystem.
3# 4#
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 0fe649c0d542..4902fed221c0 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright IBM Corp. 2012 3 * Copyright IBM Corp. 2012
3 * 4 *
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index c2f786f0ea06..b482e95b6249 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright IBM Corp. 2012,2015 3 * Copyright IBM Corp. 2012,2015
3 * 4 *
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 0d300ee00f4e..2d15d84c20ed 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright IBM Corp. 2012 3 * Copyright IBM Corp. 2012
3 * 4 *
@@ -180,6 +181,9 @@ out_unlock:
180static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, 181static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
181 size_t size, int flags) 182 size_t size, int flags)
182{ 183{
184 unsigned long irqflags;
185 int ret;
186
183 /* 187 /*
184 * With zdev->tlb_refresh == 0, rpcit is not required to establish new 188 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
185 * translations when previously invalid translation-table entries are 189 * translations when previously invalid translation-table entries are
@@ -195,8 +199,22 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
195 return 0; 199 return 0;
196 } 200 }
197 201
198 return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, 202 ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
199 PAGE_ALIGN(size)); 203 PAGE_ALIGN(size));
204 if (ret == -ENOMEM && !s390_iommu_strict) {
205 /* enable the hypervisor to free some resources */
206 if (zpci_refresh_global(zdev))
207 goto out;
208
209 spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
210 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
211 zdev->lazy_bitmap, zdev->iommu_pages);
212 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
213 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
214 ret = 0;
215 }
216out:
217 return ret;
200} 218}
201 219
202static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 220static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 81b840bc6e4e..f069929e8211 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * s390 specific pci instructions 3 * s390 specific pci instructions
3 * 4 *
@@ -88,6 +89,9 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
88 if (cc) 89 if (cc)
89 zpci_err_insn(cc, status, addr, range); 90 zpci_err_insn(cc, status, addr, range);
90 91
92 if (cc == 1 && (status == 4 || status == 16))
93 return -ENOMEM;
94
91 return (cc) ? -EIO : 0; 95 return (cc) ? -EIO : 0;
92} 96}
93 97
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
index 01d4c5a4bfe9..357d42681cef 100644
--- a/arch/s390/tools/gen_opcode_table.c
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -1,3 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * Generate opcode table initializers for the in-kernel disassembler. 3 * Generate opcode table initializers for the in-kernel disassembler.
3 * 4 *
diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild
index c94ee54210bc..81271d3af47c 100644
--- a/arch/score/include/uapi/asm/Kbuild
+++ b/arch/score/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bpf_perf_event.h
4generic-y += siginfo.h 5generic-y += siginfo.h
diff --git a/arch/sh/boards/mach-se/770x/setup.c b/arch/sh/boards/mach-se/770x/setup.c
index 77c35350ee77..412326d59e6f 100644
--- a/arch/sh/boards/mach-se/770x/setup.c
+++ b/arch/sh/boards/mach-se/770x/setup.c
@@ -9,6 +9,7 @@
9 */ 9 */
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/platform_device.h> 11#include <linux/platform_device.h>
12#include <linux/sh_eth.h>
12#include <mach-se/mach/se.h> 13#include <mach-se/mach/se.h>
13#include <mach-se/mach/mrshpc.h> 14#include <mach-se/mach/mrshpc.h>
14#include <asm/machvec.h> 15#include <asm/machvec.h>
@@ -115,13 +116,23 @@ static struct platform_device heartbeat_device = {
115#if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\ 116#if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\
116 defined(CONFIG_CPU_SUBTYPE_SH7712) 117 defined(CONFIG_CPU_SUBTYPE_SH7712)
117/* SH771X Ethernet driver */ 118/* SH771X Ethernet driver */
119static struct sh_eth_plat_data sh_eth_plat = {
120 .phy = PHY_ID,
121 .phy_interface = PHY_INTERFACE_MODE_MII,
122};
123
118static struct resource sh_eth0_resources[] = { 124static struct resource sh_eth0_resources[] = {
119 [0] = { 125 [0] = {
120 .start = SH_ETH0_BASE, 126 .start = SH_ETH0_BASE,
121 .end = SH_ETH0_BASE + 0x1B8, 127 .end = SH_ETH0_BASE + 0x1B8 - 1,
122 .flags = IORESOURCE_MEM, 128 .flags = IORESOURCE_MEM,
123 }, 129 },
124 [1] = { 130 [1] = {
131 .start = SH_TSU_BASE,
132 .end = SH_TSU_BASE + 0x200 - 1,
133 .flags = IORESOURCE_MEM,
134 },
135 [2] = {
125 .start = SH_ETH0_IRQ, 136 .start = SH_ETH0_IRQ,
126 .end = SH_ETH0_IRQ, 137 .end = SH_ETH0_IRQ,
127 .flags = IORESOURCE_IRQ, 138 .flags = IORESOURCE_IRQ,
@@ -132,7 +143,7 @@ static struct platform_device sh_eth0_device = {
132 .name = "sh771x-ether", 143 .name = "sh771x-ether",
133 .id = 0, 144 .id = 0,
134 .dev = { 145 .dev = {
135 .platform_data = PHY_ID, 146 .platform_data = &sh_eth_plat,
136 }, 147 },
137 .num_resources = ARRAY_SIZE(sh_eth0_resources), 148 .num_resources = ARRAY_SIZE(sh_eth0_resources),
138 .resource = sh_eth0_resources, 149 .resource = sh_eth0_resources,
@@ -141,10 +152,15 @@ static struct platform_device sh_eth0_device = {
141static struct resource sh_eth1_resources[] = { 152static struct resource sh_eth1_resources[] = {
142 [0] = { 153 [0] = {
143 .start = SH_ETH1_BASE, 154 .start = SH_ETH1_BASE,
144 .end = SH_ETH1_BASE + 0x1B8, 155 .end = SH_ETH1_BASE + 0x1B8 - 1,
145 .flags = IORESOURCE_MEM, 156 .flags = IORESOURCE_MEM,
146 }, 157 },
147 [1] = { 158 [1] = {
159 .start = SH_TSU_BASE,
160 .end = SH_TSU_BASE + 0x200 - 1,
161 .flags = IORESOURCE_MEM,
162 },
163 [2] = {
148 .start = SH_ETH1_IRQ, 164 .start = SH_ETH1_IRQ,
149 .end = SH_ETH1_IRQ, 165 .end = SH_ETH1_IRQ,
150 .flags = IORESOURCE_IRQ, 166 .flags = IORESOURCE_IRQ,
@@ -155,7 +171,7 @@ static struct platform_device sh_eth1_device = {
155 .name = "sh771x-ether", 171 .name = "sh771x-ether",
156 .id = 1, 172 .id = 1,
157 .dev = { 173 .dev = {
158 .platform_data = PHY_ID, 174 .platform_data = &sh_eth_plat,
159 }, 175 },
160 .num_resources = ARRAY_SIZE(sh_eth1_resources), 176 .num_resources = ARRAY_SIZE(sh_eth1_resources),
161 .resource = sh_eth1_resources, 177 .resource = sh_eth1_resources,
diff --git a/arch/sh/include/mach-se/mach/se.h b/arch/sh/include/mach-se/mach/se.h
index 4246ef9b07a3..aa83fe1ff0b1 100644
--- a/arch/sh/include/mach-se/mach/se.h
+++ b/arch/sh/include/mach-se/mach/se.h
@@ -100,6 +100,7 @@
100/* Base address */ 100/* Base address */
101#define SH_ETH0_BASE 0xA7000000 101#define SH_ETH0_BASE 0xA7000000
102#define SH_ETH1_BASE 0xA7000400 102#define SH_ETH1_BASE 0xA7000400
103#define SH_TSU_BASE 0xA7000800
103/* PHY ID */ 104/* PHY ID */
104#if defined(CONFIG_CPU_SUBTYPE_SH7710) 105#if defined(CONFIG_CPU_SUBTYPE_SH7710)
105# define PHY_ID 0x00 106# define PHY_ID 0x00
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index e28531333efa..ba4d39cb321d 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bitsperlong.h 4generic-y += bitsperlong.h
5generic-y += bpf_perf_event.h
5generic-y += errno.h 6generic-y += errno.h
6generic-y += fcntl.h 7generic-y += fcntl.h
7generic-y += ioctl.h 8generic-y += ioctl.h
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 5a9e96be1665..9937c5ff94a9 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -715,7 +715,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
715 return pte_pfn(pte); 715 return pte_pfn(pte);
716} 716}
717 717
718#define __HAVE_ARCH_PMD_WRITE 718#define pmd_write pmd_write
719static inline unsigned long pmd_write(pmd_t pmd) 719static inline unsigned long pmd_write(pmd_t pmd)
720{ 720{
721 pte_t pte = __pte(pmd_val(pmd)); 721 pte_t pte = __pte(pmd_val(pmd));
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
index 2178c78c7c1a..4680ba246b55 100644
--- a/arch/sparc/include/uapi/asm/Kbuild
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bpf_perf_event.h
4generic-y += types.h 5generic-y += types.h
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 0f0f76b4f6cd..063556fe2cb1 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -19,7 +19,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
19lib-$(CONFIG_SPARC64) += multi3.o 19lib-$(CONFIG_SPARC64) += multi3.o
20lib-$(CONFIG_SPARC64) += fls.o 20lib-$(CONFIG_SPARC64) += fls.o
21lib-$(CONFIG_SPARC64) += fls64.o 21lib-$(CONFIG_SPARC64) += fls64.o
22obj-$(CONFIG_SPARC64) += NG4fls.o 22lib-$(CONFIG_SPARC64) += NG4fls.o
23 23
24lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o 24lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
25lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o 25lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
index e5547b22cd18..0ddbbb031822 100644
--- a/arch/sparc/lib/hweight.S
+++ b/arch/sparc/lib/hweight.S
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
44 .previous 44 .previous
45 45
46ENTRY(__arch_hweight64) 46ENTRY(__arch_hweight64)
47 sethi %hi(__sw_hweight16), %g1 47 sethi %hi(__sw_hweight64), %g1
48 jmpl %g1 + %lo(__sw_hweight16), %g0 48 jmpl %g1 + %lo(__sw_hweight64), %g0
49 nop 49 nop
50ENDPROC(__arch_hweight64) 50ENDPROC(__arch_hweight64)
51EXPORT_SYMBOL(__arch_hweight64) 51EXPORT_SYMBOL(__arch_hweight64)
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index be3136f142a9..a8103a84b4ac 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
113 if (!printk_ratelimit()) 113 if (!printk_ratelimit())
114 return; 114 return;
115 115
116 printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", 116 printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
117 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 117 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
118 tsk->comm, task_pid_nr(tsk), address, 118 tsk->comm, task_pid_nr(tsk), address,
119 (void *)regs->pc, (void *)regs->u_regs[UREG_I7], 119 (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 815c03d7a765..41363f46797b 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
154 if (!printk_ratelimit()) 154 if (!printk_ratelimit())
155 return; 155 return;
156 156
157 printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", 157 printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
158 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 158 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
159 tsk->comm, task_pid_nr(tsk), address, 159 tsk->comm, task_pid_nr(tsk), address,
160 (void *)regs->tpc, (void *)regs->u_regs[UREG_I7], 160 (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 5765e7e711f7..ff5f9cb3039a 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1245 u8 *func = ((u8 *)__bpf_call_base) + imm; 1245 u8 *func = ((u8 *)__bpf_call_base) + imm;
1246 1246
1247 ctx->saw_call = true; 1247 ctx->saw_call = true;
1248 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1249 emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
1248 1250
1249 emit_call((u32 *)func, ctx); 1251 emit_call((u32 *)func, ctx);
1250 emit_nop(ctx); 1252 emit_nop(ctx);
1251 1253
1252 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); 1254 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
1253 1255
1254 if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind) 1256 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1255 load_skb_regs(ctx, bpf2sparc[BPF_REG_6]); 1257 load_skb_regs(ctx, L7);
1256 break; 1258 break;
1257 } 1259 }
1258 1260
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 2a26cc4fefc2..adfa21b18488 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -475,7 +475,6 @@ static inline void pmd_clear(pmd_t *pmdp)
475#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) 475#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
476#define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd)) 476#define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd))
477#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) 477#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd)))
478#define __HAVE_ARCH_PMD_WRITE
479 478
480#define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot))) 479#define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot)))
481#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) 480#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
index 5711de0a1b5e..cc439612bcd5 100644
--- a/arch/tile/include/uapi/asm/Kbuild
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bpf_perf_event.h
4generic-y += errno.h 5generic-y += errno.h
5generic-y += fcntl.h 6generic-y += fcntl.h
6generic-y += ioctl.h 7generic-y += ioctl.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 50a32c33d729..73c57f614c9e 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,4 +1,5 @@
1generic-y += barrier.h 1generic-y += barrier.h
2generic-y += bpf_perf_event.h
2generic-y += bug.h 3generic-y += bug.h
3generic-y += clkdev.h 4generic-y += clkdev.h
4generic-y += current.h 5generic-y += current.h
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index b668e351fd6c..fca34b2177e2 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
15/* 15/*
16 * Needed since we do not use the asm-generic/mm_hooks.h: 16 * Needed since we do not use the asm-generic/mm_hooks.h:
17 */ 17 */
18static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 18static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
19{ 19{
20 uml_setup_stubs(mm); 20 uml_setup_stubs(mm);
21 return 0;
21} 22}
22extern void arch_exit_mmap(struct mm_struct *mm); 23extern void arch_exit_mmap(struct mm_struct *mm);
23static inline void arch_unmap(struct mm_struct *mm, 24static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 4e6fcb32620f..428644175956 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
150 if (!printk_ratelimit()) 150 if (!printk_ratelimit())
151 return; 151 return;
152 152
153 printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x", 153 printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
154 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 154 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
155 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi), 155 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
156 (void *)UPT_IP(regs), (void *)UPT_SP(regs), 156 (void *)UPT_IP(regs), (void *)UPT_SP(regs),
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index 59b06b48f27d..5c205a9cb5a6 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -81,9 +81,10 @@ do { \
81 } \ 81 } \
82} while (0) 82} while (0)
83 83
84static inline void arch_dup_mmap(struct mm_struct *oldmm, 84static inline int arch_dup_mmap(struct mm_struct *oldmm,
85 struct mm_struct *mm) 85 struct mm_struct *mm)
86{ 86{
87 return 0;
87} 88}
88 89
89static inline void arch_unmap(struct mm_struct *mm, 90static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 759a71411169..8611ef980554 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += bitsperlong.h 5generic-y += bitsperlong.h
6generic-y += bpf_perf_event.h
6generic-y += errno.h 7generic-y += errno.h
7generic-y += fcntl.h 8generic-y += fcntl.h
8generic-y += ioctl.h 9generic-y += ioctl.h
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 5f25b39f04d4..c4ac6043ebb0 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -298,7 +298,6 @@ void abort(void)
298 /* if that doesn't kill us, halt */ 298 /* if that doesn't kill us, halt */
299 panic("Oops failed to kill thread"); 299 panic("Oops failed to kill thread");
300} 300}
301EXPORT_SYMBOL(abort);
302 301
303void __init trap_init(void) 302void __init trap_init(void)
304{ 303{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8eed3f94bfc7..20da391b5f32 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,7 +55,6 @@ config X86
55 select ARCH_HAS_GCOV_PROFILE_ALL 55 select ARCH_HAS_GCOV_PROFILE_ALL
56 select ARCH_HAS_KCOV if X86_64 56 select ARCH_HAS_KCOV if X86_64
57 select ARCH_HAS_PMEM_API if X86_64 57 select ARCH_HAS_PMEM_API if X86_64
58 # Causing hangs/crashes, see the commit that added this change for details.
59 select ARCH_HAS_REFCOUNT 58 select ARCH_HAS_REFCOUNT
60 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 59 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
61 select ARCH_HAS_SET_MEMORY 60 select ARCH_HAS_SET_MEMORY
@@ -89,6 +88,7 @@ config X86
89 select GENERIC_CLOCKEVENTS_MIN_ADJUST 88 select GENERIC_CLOCKEVENTS_MIN_ADJUST
90 select GENERIC_CMOS_UPDATE 89 select GENERIC_CMOS_UPDATE
91 select GENERIC_CPU_AUTOPROBE 90 select GENERIC_CPU_AUTOPROBE
91 select GENERIC_CPU_VULNERABILITIES
92 select GENERIC_EARLY_IOREMAP 92 select GENERIC_EARLY_IOREMAP
93 select GENERIC_FIND_FIRST_BIT 93 select GENERIC_FIND_FIRST_BIT
94 select GENERIC_IOMAP 94 select GENERIC_IOMAP
@@ -429,6 +429,19 @@ config GOLDFISH
429 def_bool y 429 def_bool y
430 depends on X86_GOLDFISH 430 depends on X86_GOLDFISH
431 431
432config RETPOLINE
433 bool "Avoid speculative indirect branches in kernel"
434 default y
435 help
436 Compile kernel with the retpoline compiler options to guard against
437 kernel-to-user data leaks by avoiding speculative indirect
438 branches. Requires a compiler with -mindirect-branch=thunk-extern
439 support for full protection. The kernel may run slower.
440
441 Without compiler support, at least indirect branches in assembler
442 code are eliminated. Since this includes the syscall entry path,
443 it is not entirely pointless.
444
432config INTEL_RDT 445config INTEL_RDT
433 bool "Intel Resource Director Technology support" 446 bool "Intel Resource Director Technology support"
434 default n 447 default n
@@ -926,7 +939,8 @@ config MAXSMP
926config NR_CPUS 939config NR_CPUS
927 int "Maximum number of CPUs" if SMP && !MAXSMP 940 int "Maximum number of CPUs" if SMP && !MAXSMP
928 range 2 8 if SMP && X86_32 && !X86_BIGSMP 941 range 2 8 if SMP && X86_32 && !X86_BIGSMP
929 range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK 942 range 2 64 if SMP && X86_32 && X86_BIGSMP
943 range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
930 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 944 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
931 default "1" if !SMP 945 default "1" if !SMP
932 default "8192" if MAXSMP 946 default "8192" if MAXSMP
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 6293a8768a91..672441c008c7 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
400config UNWINDER_GUESS 400config UNWINDER_GUESS
401 bool "Guess unwinder" 401 bool "Guess unwinder"
402 depends on EXPERT 402 depends on EXPERT
403 depends on !STACKDEPOT
403 ---help--- 404 ---help---
404 This option enables the "guess" unwinder for unwinding kernel stack 405 This option enables the "guess" unwinder for unwinding kernel stack
405 traces. It scans the stack and reports every kernel text address it 406 traces. It scans the stack and reports every kernel text address it
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 3e73bc255e4e..fad55160dcb9 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -230,6 +230,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
230# 230#
231KBUILD_CFLAGS += -fno-asynchronous-unwind-tables 231KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
232 232
233# Avoid indirect branches in kernel to deal with Spectre
234ifdef CONFIG_RETPOLINE
235 RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
236 ifneq ($(RETPOLINE_CFLAGS),)
237 KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
238 endif
239endif
240
233archscripts: scripts_basic 241archscripts: scripts_basic
234 $(Q)$(MAKE) $(build)=arch/x86/tools relocs 242 $(Q)$(MAKE) $(build)=arch/x86/tools relocs
235 243
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 1e9c322e973a..f25e1530e064 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
80ifdef CONFIG_X86_64 80ifdef CONFIG_X86_64
81 vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o 81 vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
82 vmlinux-objs-y += $(obj)/mem_encrypt.o 82 vmlinux-objs-y += $(obj)/mem_encrypt.o
83 vmlinux-objs-y += $(obj)/pgtable_64.o
83endif 84endif
84 85
85$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone 86$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 20919b4f3133..fc313e29fe2c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,10 +305,18 @@ ENTRY(startup_64)
305 leaq boot_stack_end(%rbx), %rsp 305 leaq boot_stack_end(%rbx), %rsp
306 306
307#ifdef CONFIG_X86_5LEVEL 307#ifdef CONFIG_X86_5LEVEL
308 /* Check if 5-level paging has already enabled */ 308 /*
309 movq %cr4, %rax 309 * Check if we need to enable 5-level paging.
310 testl $X86_CR4_LA57, %eax 310 * RSI holds real mode data and need to be preserved across
311 jnz lvl5 311 * a function call.
312 */
313 pushq %rsi
314 call l5_paging_required
315 popq %rsi
316
317 /* If l5_paging_required() returned zero, we're done here. */
318 cmpq $0, %rax
319 je lvl5
312 320
313 /* 321 /*
314 * At this point we are in long mode with 4-level paging enabled, 322 * At this point we are in long mode with 4-level paging enabled,
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b50c42455e25..98761a1576ce 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
169 } 169 }
170} 170}
171 171
172static bool l5_supported(void)
173{
174 /* Check if leaf 7 is supported. */
175 if (native_cpuid_eax(0) < 7)
176 return 0;
177
178 /* Check if la57 is supported. */
179 return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
180}
181
172#if CONFIG_X86_NEED_RELOCS 182#if CONFIG_X86_NEED_RELOCS
173static void handle_relocations(void *output, unsigned long output_len, 183static void handle_relocations(void *output, unsigned long output_len,
174 unsigned long virt_addr) 184 unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
362 console_init(); 372 console_init();
363 debug_putstr("early console in extract_kernel\n"); 373 debug_putstr("early console in extract_kernel\n");
364 374
375 if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
376 error("This linux kernel as configured requires 5-level paging\n"
377 "This CPU does not support the required 'cr4.la57' feature\n"
378 "Unable to boot - please use a kernel appropriate for your CPU\n");
379 }
380
365 free_mem_ptr = heap; /* Heap */ 381 free_mem_ptr = heap; /* Heap */
366 free_mem_end_ptr = heap + BOOT_HEAP_SIZE; 382 free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
367 383
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index d5364ca2e3f9..b5e5e02f8cde 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -23,6 +23,9 @@
23 */ 23 */
24#undef CONFIG_AMD_MEM_ENCRYPT 24#undef CONFIG_AMD_MEM_ENCRYPT
25 25
26/* No PAGE_TABLE_ISOLATION support needed either: */
27#undef CONFIG_PAGE_TABLE_ISOLATION
28
26#include "misc.h" 29#include "misc.h"
27 30
28/* These actually do the work of building the kernel identity maps. */ 31/* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644
index 000000000000..b4469a37e9a1
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,28 @@
1#include <asm/processor.h>
2
3/*
4 * __force_order is used by special_insns.h asm code to force instruction
5 * serialization.
6 *
7 * It is not referenced from the code, but GCC < 5 with -fPIE would fail
8 * due to an undefined symbol. Define it to make these ancient GCCs work.
9 */
10unsigned long __force_order;
11
12int l5_paging_required(void)
13{
14 /* Check if leaf 7 is supported. */
15
16 if (native_cpuid_eax(0) < 7)
17 return 0;
18
19 /* Check if la57 is supported. */
20 if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
21 return 0;
22
23 /* Check if 5-level paging has already been enabled. */
24 if (native_read_cr4() & X86_CR4_LA57)
25 return 0;
26
27 return 1;
28}
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index 49f4970f693b..6a10d52a4145 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -44,9 +44,9 @@ FDINITRD=$6
44 44
45# Make sure the files actually exist 45# Make sure the files actually exist
46verify "$FBZIMAGE" 46verify "$FBZIMAGE"
47verify "$MTOOLSRC"
48 47
49genbzdisk() { 48genbzdisk() {
49 verify "$MTOOLSRC"
50 mformat a: 50 mformat a:
51 syslinux $FIMAGE 51 syslinux $FIMAGE
52 echo "$KCMDLINE" | mcopy - a:syslinux.cfg 52 echo "$KCMDLINE" | mcopy - a:syslinux.cfg
@@ -57,6 +57,7 @@ genbzdisk() {
57} 57}
58 58
59genfdimage144() { 59genfdimage144() {
60 verify "$MTOOLSRC"
60 dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null 61 dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
61 mformat v: 62 mformat v:
62 syslinux $FIMAGE 63 syslinux $FIMAGE
@@ -68,6 +69,7 @@ genfdimage144() {
68} 69}
69 70
70genfdimage288() { 71genfdimage288() {
72 verify "$MTOOLSRC"
71 dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null 73 dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
72 mformat w: 74 mformat w:
73 syslinux $FIMAGE 75 syslinux $FIMAGE
@@ -78,39 +80,43 @@ genfdimage288() {
78 mcopy $FBZIMAGE w:linux 80 mcopy $FBZIMAGE w:linux
79} 81}
80 82
81genisoimage() { 83geniso() {
82 tmp_dir=`dirname $FIMAGE`/isoimage 84 tmp_dir=`dirname $FIMAGE`/isoimage
83 rm -rf $tmp_dir 85 rm -rf $tmp_dir
84 mkdir $tmp_dir 86 mkdir $tmp_dir
85 for i in lib lib64 share end ; do 87 for i in lib lib64 share ; do
86 for j in syslinux ISOLINUX ; do 88 for j in syslinux ISOLINUX ; do
87 if [ -f /usr/$i/$j/isolinux.bin ] ; then 89 if [ -f /usr/$i/$j/isolinux.bin ] ; then
88 isolinux=/usr/$i/$j/isolinux.bin 90 isolinux=/usr/$i/$j/isolinux.bin
89 cp $isolinux $tmp_dir
90 fi 91 fi
91 done 92 done
92 for j in syslinux syslinux/modules/bios ; do 93 for j in syslinux syslinux/modules/bios ; do
93 if [ -f /usr/$i/$j/ldlinux.c32 ]; then 94 if [ -f /usr/$i/$j/ldlinux.c32 ]; then
94 ldlinux=/usr/$i/$j/ldlinux.c32 95 ldlinux=/usr/$i/$j/ldlinux.c32
95 cp $ldlinux $tmp_dir
96 fi 96 fi
97 done 97 done
98 if [ -n "$isolinux" -a -n "$ldlinux" ] ; then 98 if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
99 break 99 break
100 fi 100 fi
101 if [ $i = end -a -z "$isolinux" ] ; then
102 echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
103 exit 1
104 fi
105 done 101 done
102 if [ -z "$isolinux" ] ; then
103 echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
104 exit 1
105 fi
106 if [ -z "$ldlinux" ] ; then
107 echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
108 exit 1
109 fi
110 cp $isolinux $tmp_dir
111 cp $ldlinux $tmp_dir
106 cp $FBZIMAGE $tmp_dir/linux 112 cp $FBZIMAGE $tmp_dir/linux
107 echo "$KCMDLINE" > $tmp_dir/isolinux.cfg 113 echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
108 if [ -f "$FDINITRD" ] ; then 114 if [ -f "$FDINITRD" ] ; then
109 cp "$FDINITRD" $tmp_dir/initrd.img 115 cp "$FDINITRD" $tmp_dir/initrd.img
110 fi 116 fi
111 mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \ 117 genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
112 -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \ 118 -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
113 $tmp_dir 119 -boot-info-table $tmp_dir
114 isohybrid $FIMAGE 2>/dev/null || true 120 isohybrid $FIMAGE 2>/dev/null || true
115 rm -rf $tmp_dir 121 rm -rf $tmp_dir
116} 122}
@@ -119,6 +125,6 @@ case $1 in
119 bzdisk) genbzdisk;; 125 bzdisk) genbzdisk;;
120 fdimage144) genfdimage144;; 126 fdimage144) genfdimage144;;
121 fdimage288) genfdimage288;; 127 fdimage288) genfdimage288;;
122 isoimage) genisoimage;; 128 isoimage) geniso;;
123 *) echo 'Unknown image format'; exit 1; 129 *) echo 'Unknown image format'; exit 1;
124esac 130esac
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 16627fec80b2..3d09e3aca18d 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,7 @@
32#include <linux/linkage.h> 32#include <linux/linkage.h>
33#include <asm/inst.h> 33#include <asm/inst.h>
34#include <asm/frame.h> 34#include <asm/frame.h>
35#include <asm/nospec-branch.h>
35 36
36/* 37/*
37 * The following macros are used to move an (un)aligned 16 byte value to/from 38 * The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
2884 pxor INC, STATE4 2885 pxor INC, STATE4
2885 movdqu IV, 0x30(OUTP) 2886 movdqu IV, 0x30(OUTP)
2886 2887
2887 call *%r11 2888 CALL_NOSPEC %r11
2888 2889
2889 movdqu 0x00(OUTP), INC 2890 movdqu 0x00(OUTP), INC
2890 pxor INC, STATE1 2891 pxor INC, STATE1
@@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
2929 _aesni_gf128mul_x_ble() 2930 _aesni_gf128mul_x_ble()
2930 movups IV, (IVP) 2931 movups IV, (IVP)
2931 2932
2932 call *%r11 2933 CALL_NOSPEC %r11
2933 2934
2934 movdqu 0x40(OUTP), INC 2935 movdqu 0x40(OUTP), INC
2935 pxor INC, STATE1 2936 pxor INC, STATE1
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index f7c495e2863c..a14af6eb09cb 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -17,6 +17,7 @@
17 17
18#include <linux/linkage.h> 18#include <linux/linkage.h>
19#include <asm/frame.h> 19#include <asm/frame.h>
20#include <asm/nospec-branch.h>
20 21
21#define CAMELLIA_TABLE_BYTE_LEN 272 22#define CAMELLIA_TABLE_BYTE_LEN 272
22 23
@@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
1227 vpxor 14 * 16(%rax), %xmm15, %xmm14; 1228 vpxor 14 * 16(%rax), %xmm15, %xmm14;
1228 vpxor 15 * 16(%rax), %xmm15, %xmm15; 1229 vpxor 15 * 16(%rax), %xmm15, %xmm15;
1229 1230
1230 call *%r9; 1231 CALL_NOSPEC %r9;
1231 1232
1232 addq $(16 * 16), %rsp; 1233 addq $(16 * 16), %rsp;
1233 1234
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index eee5b3982cfd..b66bbfa62f50 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -12,6 +12,7 @@
12 12
13#include <linux/linkage.h> 13#include <linux/linkage.h>
14#include <asm/frame.h> 14#include <asm/frame.h>
15#include <asm/nospec-branch.h>
15 16
16#define CAMELLIA_TABLE_BYTE_LEN 272 17#define CAMELLIA_TABLE_BYTE_LEN 272
17 18
@@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
1343 vpxor 14 * 32(%rax), %ymm15, %ymm14; 1344 vpxor 14 * 32(%rax), %ymm15, %ymm14;
1344 vpxor 15 * 32(%rax), %ymm15, %ymm15; 1345 vpxor 15 * 32(%rax), %ymm15, %ymm15;
1345 1346
1346 call *%r9; 1347 CALL_NOSPEC %r9;
1347 1348
1348 addq $(16 * 32), %rsp; 1349 addq $(16 * 32), %rsp;
1349 1350
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 7a7de27c6f41..d9b734d0c8cc 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -45,6 +45,7 @@
45 45
46#include <asm/inst.h> 46#include <asm/inst.h>
47#include <linux/linkage.h> 47#include <linux/linkage.h>
48#include <asm/nospec-branch.h>
48 49
49## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction 50## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
50 51
@@ -172,7 +173,7 @@ continue_block:
172 movzxw (bufp, %rax, 2), len 173 movzxw (bufp, %rax, 2), len
173 lea crc_array(%rip), bufp 174 lea crc_array(%rip), bufp
174 lea (bufp, len, 1), bufp 175 lea (bufp, len, 1), bufp
175 jmp *bufp 176 JMP_NOSPEC bufp
176 177
177 ################################################################ 178 ################################################################
178 ## 2a) PROCESS FULL BLOCKS: 179 ## 2a) PROCESS FULL BLOCKS:
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index 399a29d067d6..cb91a64a99e7 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
59 59
60 salsa20_ivsetup(ctx, walk.iv); 60 salsa20_ivsetup(ctx, walk.iv);
61 61
62 if (likely(walk.nbytes == nbytes))
63 {
64 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
65 walk.dst.virt.addr, nbytes);
66 return blkcipher_walk_done(desc, &walk, 0);
67 }
68
69 while (walk.nbytes >= 64) { 62 while (walk.nbytes >= 64) {
70 salsa20_encrypt_bytes(ctx, walk.src.virt.addr, 63 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
71 walk.dst.virt.addr, 64 walk.dst.virt.addr,
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3fd8bc560fae..3f48f695d5e6 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,6 +1,11 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/jump_label.h> 2#include <linux/jump_label.h>
3#include <asm/unwind_hints.h> 3#include <asm/unwind_hints.h>
4#include <asm/cpufeatures.h>
5#include <asm/page_types.h>
6#include <asm/percpu.h>
7#include <asm/asm-offsets.h>
8#include <asm/processor-flags.h>
4 9
5/* 10/*
6 11
@@ -187,6 +192,148 @@ For 32-bit we have the following conventions - kernel is built with
187#endif 192#endif
188.endm 193.endm
189 194
195#ifdef CONFIG_PAGE_TABLE_ISOLATION
196
197/*
198 * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
199 * halves:
200 */
201#define PTI_USER_PGTABLE_BIT PAGE_SHIFT
202#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
203#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
204#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
205#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
206
207.macro SET_NOFLUSH_BIT reg:req
208 bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
209.endm
210
211.macro ADJUST_KERNEL_CR3 reg:req
212 ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
213 /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
214 andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
215.endm
216
217.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
218 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
219 mov %cr3, \scratch_reg
220 ADJUST_KERNEL_CR3 \scratch_reg
221 mov \scratch_reg, %cr3
222.Lend_\@:
223.endm
224
225#define THIS_CPU_user_pcid_flush_mask \
226 PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
227
228.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
229 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
230 mov %cr3, \scratch_reg
231
232 ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
233
234 /*
235 * Test if the ASID needs a flush.
236 */
237 movq \scratch_reg, \scratch_reg2
238 andq $(0x7FF), \scratch_reg /* mask ASID */
239 bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
240 jnc .Lnoflush_\@
241
242 /* Flush needed, clear the bit */
243 btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
244 movq \scratch_reg2, \scratch_reg
245 jmp .Lwrcr3_pcid_\@
246
247.Lnoflush_\@:
248 movq \scratch_reg2, \scratch_reg
249 SET_NOFLUSH_BIT \scratch_reg
250
251.Lwrcr3_pcid_\@:
252 /* Flip the ASID to the user version */
253 orq $(PTI_USER_PCID_MASK), \scratch_reg
254
255.Lwrcr3_\@:
256 /* Flip the PGD to the user version */
257 orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
258 mov \scratch_reg, %cr3
259.Lend_\@:
260.endm
261
262.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
263 pushq %rax
264 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
265 popq %rax
266.endm
267
268.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
269 ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
270 movq %cr3, \scratch_reg
271 movq \scratch_reg, \save_reg
272 /*
273 * Test the user pagetable bit. If set, then the user page tables
274 * are active. If clear CR3 already has the kernel page table
275 * active.
276 */
277 bt $PTI_USER_PGTABLE_BIT, \scratch_reg
278 jnc .Ldone_\@
279
280 ADJUST_KERNEL_CR3 \scratch_reg
281 movq \scratch_reg, %cr3
282
283.Ldone_\@:
284.endm
285
286.macro RESTORE_CR3 scratch_reg:req save_reg:req
287 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
288
289 ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
290
291 /*
292 * KERNEL pages can always resume with NOFLUSH as we do
293 * explicit flushes.
294 */
295 bt $PTI_USER_PGTABLE_BIT, \save_reg
296 jnc .Lnoflush_\@
297
298 /*
299 * Check if there's a pending flush for the user ASID we're
300 * about to set.
301 */
302 movq \save_reg, \scratch_reg
303 andq $(0x7FF), \scratch_reg
304 bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
305 jnc .Lnoflush_\@
306
307 btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
308 jmp .Lwrcr3_\@
309
310.Lnoflush_\@:
311 SET_NOFLUSH_BIT \save_reg
312
313.Lwrcr3_\@:
314 /*
315 * The CR3 write could be avoided when not changing its value,
316 * but would require a CR3 read *and* a scratch register.
317 */
318 movq \save_reg, %cr3
319.Lend_\@:
320.endm
321
322#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
323
324.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
325.endm
326.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
327.endm
328.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
329.endm
330.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
331.endm
332.macro RESTORE_CR3 scratch_reg:req save_reg:req
333.endm
334
335#endif
336
190#endif /* CONFIG_X86_64 */ 337#endif /* CONFIG_X86_64 */
191 338
192/* 339/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4838037f97f6..a1f28a54f23a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -44,6 +44,7 @@
44#include <asm/asm.h> 44#include <asm/asm.h>
45#include <asm/smap.h> 45#include <asm/smap.h>
46#include <asm/frame.h> 46#include <asm/frame.h>
47#include <asm/nospec-branch.h>
47 48
48 .section .entry.text, "ax" 49 .section .entry.text, "ax"
49 50
@@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
290 291
291 /* kernel thread */ 292 /* kernel thread */
2921: movl %edi, %eax 2931: movl %edi, %eax
293 call *%ebx 294 CALL_NOSPEC %ebx
294 /* 295 /*
295 * A kernel thread is allowed to return here after successfully 296 * A kernel thread is allowed to return here after successfully
296 * calling do_execve(). Exit to userspace to complete the execve() 297 * calling do_execve(). Exit to userspace to complete the execve()
@@ -919,7 +920,7 @@ common_exception:
919 movl %ecx, %es 920 movl %ecx, %es
920 TRACE_IRQS_OFF 921 TRACE_IRQS_OFF
921 movl %esp, %eax # pt_regs pointer 922 movl %esp, %eax # pt_regs pointer
922 call *%edi 923 CALL_NOSPEC %edi
923 jmp ret_from_exception 924 jmp ret_from_exception
924END(common_exception) 925END(common_exception)
925 926
@@ -941,9 +942,10 @@ ENTRY(debug)
941 movl %esp, %eax # pt_regs pointer 942 movl %esp, %eax # pt_regs pointer
942 943
943 /* Are we currently on the SYSENTER stack? */ 944 /* Are we currently on the SYSENTER stack? */
944 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 945 movl PER_CPU_VAR(cpu_entry_area), %ecx
945 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 946 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
946 cmpl $SIZEOF_SYSENTER_stack, %ecx 947 subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
948 cmpl $SIZEOF_entry_stack, %ecx
947 jb .Ldebug_from_sysenter_stack 949 jb .Ldebug_from_sysenter_stack
948 950
949 TRACE_IRQS_OFF 951 TRACE_IRQS_OFF
@@ -984,9 +986,10 @@ ENTRY(nmi)
984 movl %esp, %eax # pt_regs pointer 986 movl %esp, %eax # pt_regs pointer
985 987
986 /* Are we currently on the SYSENTER stack? */ 988 /* Are we currently on the SYSENTER stack? */
987 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 989 movl PER_CPU_VAR(cpu_entry_area), %ecx
988 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 990 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
989 cmpl $SIZEOF_SYSENTER_stack, %ecx 991 subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
992 cmpl $SIZEOF_entry_stack, %ecx
990 jb .Lnmi_from_sysenter_stack 993 jb .Lnmi_from_sysenter_stack
991 994
992 /* Not on SYSENTER stack. */ 995 /* Not on SYSENTER stack. */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f81d50d7ceac..4f8e1d35a97c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -23,7 +23,6 @@
23#include <asm/segment.h> 23#include <asm/segment.h>
24#include <asm/cache.h> 24#include <asm/cache.h>
25#include <asm/errno.h> 25#include <asm/errno.h>
26#include "calling.h"
27#include <asm/asm-offsets.h> 26#include <asm/asm-offsets.h>
28#include <asm/msr.h> 27#include <asm/msr.h>
29#include <asm/unistd.h> 28#include <asm/unistd.h>
@@ -38,8 +37,11 @@
38#include <asm/pgtable_types.h> 37#include <asm/pgtable_types.h>
39#include <asm/export.h> 38#include <asm/export.h>
40#include <asm/frame.h> 39#include <asm/frame.h>
40#include <asm/nospec-branch.h>
41#include <linux/err.h> 41#include <linux/err.h>
42 42
43#include "calling.h"
44
43.code64 45.code64
44.section .entry.text, "ax" 46.section .entry.text, "ax"
45 47
@@ -140,6 +142,67 @@ END(native_usergs_sysret64)
140 * with them due to bugs in both AMD and Intel CPUs. 142 * with them due to bugs in both AMD and Intel CPUs.
141 */ 143 */
142 144
145 .pushsection .entry_trampoline, "ax"
146
147/*
148 * The code in here gets remapped into cpu_entry_area's trampoline. This means
149 * that the assembler and linker have the wrong idea as to where this code
150 * lives (and, in fact, it's mapped more than once, so it's not even at a
151 * fixed address). So we can't reference any symbols outside the entry
152 * trampoline and expect it to work.
153 *
154 * Instead, we carefully abuse %rip-relative addressing.
155 * _entry_trampoline(%rip) refers to the start of the remapped) entry
156 * trampoline. We can thus find cpu_entry_area with this macro:
157 */
158
159#define CPU_ENTRY_AREA \
160 _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
161
162/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
163#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
164 SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
165
166ENTRY(entry_SYSCALL_64_trampoline)
167 UNWIND_HINT_EMPTY
168 swapgs
169
170 /* Stash the user RSP. */
171 movq %rsp, RSP_SCRATCH
172
173 /* Note: using %rsp as a scratch reg. */
174 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
175
176 /* Load the top of the task stack into RSP */
177 movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
178
179 /* Start building the simulated IRET frame. */
180 pushq $__USER_DS /* pt_regs->ss */
181 pushq RSP_SCRATCH /* pt_regs->sp */
182 pushq %r11 /* pt_regs->flags */
183 pushq $__USER_CS /* pt_regs->cs */
184 pushq %rcx /* pt_regs->ip */
185
186 /*
187 * x86 lacks a near absolute jump, and we can't jump to the real
188 * entry text with a relative jump. We could push the target
189 * address and then use retq, but this destroys the pipeline on
190 * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
191 * spill RDI and restore it in a second-stage trampoline.
192 */
193 pushq %rdi
194 movq $entry_SYSCALL_64_stage2, %rdi
195 JMP_NOSPEC %rdi
196END(entry_SYSCALL_64_trampoline)
197
198 .popsection
199
200ENTRY(entry_SYSCALL_64_stage2)
201 UNWIND_HINT_EMPTY
202 popq %rdi
203 jmp entry_SYSCALL_64_after_hwframe
204END(entry_SYSCALL_64_stage2)
205
143ENTRY(entry_SYSCALL_64) 206ENTRY(entry_SYSCALL_64)
144 UNWIND_HINT_EMPTY 207 UNWIND_HINT_EMPTY
145 /* 208 /*
@@ -149,6 +212,10 @@ ENTRY(entry_SYSCALL_64)
149 */ 212 */
150 213
151 swapgs 214 swapgs
215 /*
216 * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
217 * is not required to switch CR3.
218 */
152 movq %rsp, PER_CPU_VAR(rsp_scratch) 219 movq %rsp, PER_CPU_VAR(rsp_scratch)
153 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 220 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
154 221
@@ -204,7 +271,12 @@ entry_SYSCALL_64_fastpath:
204 * It might end up jumping to the slow path. If it jumps, RAX 271 * It might end up jumping to the slow path. If it jumps, RAX
205 * and all argument registers are clobbered. 272 * and all argument registers are clobbered.
206 */ 273 */
274#ifdef CONFIG_RETPOLINE
275 movq sys_call_table(, %rax, 8), %rax
276 call __x86_indirect_thunk_rax
277#else
207 call *sys_call_table(, %rax, 8) 278 call *sys_call_table(, %rax, 8)
279#endif
208.Lentry_SYSCALL_64_after_fastpath_call: 280.Lentry_SYSCALL_64_after_fastpath_call:
209 281
210 movq %rax, RAX(%rsp) 282 movq %rax, RAX(%rsp)
@@ -330,8 +402,25 @@ syscall_return_via_sysret:
330 popq %rsi /* skip rcx */ 402 popq %rsi /* skip rcx */
331 popq %rdx 403 popq %rdx
332 popq %rsi 404 popq %rsi
405
406 /*
407 * Now all regs are restored except RSP and RDI.
408 * Save old stack pointer and switch to trampoline stack.
409 */
410 movq %rsp, %rdi
411 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
412
413 pushq RSP-RDI(%rdi) /* RSP */
414 pushq (%rdi) /* RDI */
415
416 /*
417 * We are on the trampoline stack. All regs except RDI are live.
418 * We can do future final exit work right here.
419 */
420 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
421
333 popq %rdi 422 popq %rdi
334 movq RSP-ORIG_RAX(%rsp), %rsp 423 popq %rsp
335 USERGS_SYSRET64 424 USERGS_SYSRET64
336END(entry_SYSCALL_64) 425END(entry_SYSCALL_64)
337 426
@@ -359,7 +448,7 @@ ENTRY(stub_ptregs_64)
359 jmp entry_SYSCALL64_slow_path 448 jmp entry_SYSCALL64_slow_path
360 449
3611: 4501:
362 jmp *%rax /* Called from C */ 451 JMP_NOSPEC %rax /* Called from C */
363END(stub_ptregs_64) 452END(stub_ptregs_64)
364 453
365.macro ptregs_stub func 454.macro ptregs_stub func
@@ -438,7 +527,7 @@ ENTRY(ret_from_fork)
4381: 5271:
439 /* kernel thread */ 528 /* kernel thread */
440 movq %r12, %rdi 529 movq %r12, %rdi
441 call *%rbx 530 CALL_NOSPEC %rbx
442 /* 531 /*
443 * A kernel thread is allowed to return here after successfully 532 * A kernel thread is allowed to return here after successfully
444 * calling do_execve(). Exit to userspace to complete the execve() 533 * calling do_execve(). Exit to userspace to complete the execve()
@@ -466,12 +555,13 @@ END(irq_entries_start)
466 555
467.macro DEBUG_ENTRY_ASSERT_IRQS_OFF 556.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
468#ifdef CONFIG_DEBUG_ENTRY 557#ifdef CONFIG_DEBUG_ENTRY
469 pushfq 558 pushq %rax
470 testl $X86_EFLAGS_IF, (%rsp) 559 SAVE_FLAGS(CLBR_RAX)
560 testl $X86_EFLAGS_IF, %eax
471 jz .Lokay_\@ 561 jz .Lokay_\@
472 ud2 562 ud2
473.Lokay_\@: 563.Lokay_\@:
474 addq $8, %rsp 564 popq %rax
475#endif 565#endif
476.endm 566.endm
477 567
@@ -563,6 +653,13 @@ END(irq_entries_start)
563/* 0(%rsp): ~(interrupt number) */ 653/* 0(%rsp): ~(interrupt number) */
564 .macro interrupt func 654 .macro interrupt func
565 cld 655 cld
656
657 testb $3, CS-ORIG_RAX(%rsp)
658 jz 1f
659 SWAPGS
660 call switch_to_thread_stack
6611:
662
566 ALLOC_PT_GPREGS_ON_STACK 663 ALLOC_PT_GPREGS_ON_STACK
567 SAVE_C_REGS 664 SAVE_C_REGS
568 SAVE_EXTRA_REGS 665 SAVE_EXTRA_REGS
@@ -572,12 +669,8 @@ END(irq_entries_start)
572 jz 1f 669 jz 1f
573 670
574 /* 671 /*
575 * IRQ from user mode. Switch to kernel gsbase and inform context 672 * IRQ from user mode.
576 * tracking that we're in kernel mode. 673 *
577 */
578 SWAPGS
579
580 /*
581 * We need to tell lockdep that IRQs are off. We can't do this until 674 * We need to tell lockdep that IRQs are off. We can't do this until
582 * we fix gsbase, and we should do it before enter_from_user_mode 675 * we fix gsbase, and we should do it before enter_from_user_mode
583 * (which can take locks). Since TRACE_IRQS_OFF idempotent, 676 * (which can take locks). Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +723,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
630 ud2 723 ud2
6311: 7241:
632#endif 725#endif
633 SWAPGS
634 POP_EXTRA_REGS 726 POP_EXTRA_REGS
635 POP_C_REGS 727 popq %r11
636 addq $8, %rsp /* skip regs->orig_ax */ 728 popq %r10
729 popq %r9
730 popq %r8
731 popq %rax
732 popq %rcx
733 popq %rdx
734 popq %rsi
735
736 /*
737 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
738 * Save old stack pointer and switch to trampoline stack.
739 */
740 movq %rsp, %rdi
741 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
742
743 /* Copy the IRET frame to the trampoline stack. */
744 pushq 6*8(%rdi) /* SS */
745 pushq 5*8(%rdi) /* RSP */
746 pushq 4*8(%rdi) /* EFLAGS */
747 pushq 3*8(%rdi) /* CS */
748 pushq 2*8(%rdi) /* RIP */
749
750 /* Push user RDI on the trampoline stack. */
751 pushq (%rdi)
752
753 /*
754 * We are on the trampoline stack. All regs except RDI are live.
755 * We can do future final exit work right here.
756 */
757
758 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
759
760 /* Restore RDI. */
761 popq %rdi
762 SWAPGS
637 INTERRUPT_RETURN 763 INTERRUPT_RETURN
638 764
639 765
@@ -713,7 +839,9 @@ native_irq_return_ldt:
713 */ 839 */
714 840
715 pushq %rdi /* Stash user RDI */ 841 pushq %rdi /* Stash user RDI */
716 SWAPGS 842 SWAPGS /* to kernel GS */
843 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
844
717 movq PER_CPU_VAR(espfix_waddr), %rdi 845 movq PER_CPU_VAR(espfix_waddr), %rdi
718 movq %rax, (0*8)(%rdi) /* user RAX */ 846 movq %rax, (0*8)(%rdi) /* user RAX */
719 movq (1*8)(%rsp), %rax /* user RIP */ 847 movq (1*8)(%rsp), %rax /* user RIP */
@@ -729,7 +857,6 @@ native_irq_return_ldt:
729 /* Now RAX == RSP. */ 857 /* Now RAX == RSP. */
730 858
731 andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ 859 andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
732 popq %rdi /* Restore user RDI */
733 860
734 /* 861 /*
735 * espfix_stack[31:16] == 0. The page tables are set up such that 862 * espfix_stack[31:16] == 0. The page tables are set up such that
@@ -740,7 +867,11 @@ native_irq_return_ldt:
740 * still points to an RO alias of the ESPFIX stack. 867 * still points to an RO alias of the ESPFIX stack.
741 */ 868 */
742 orq PER_CPU_VAR(espfix_stack), %rax 869 orq PER_CPU_VAR(espfix_stack), %rax
743 SWAPGS 870
871 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
872 SWAPGS /* to user GS */
873 popq %rdi /* Restore user RDI */
874
744 movq %rax, %rsp 875 movq %rax, %rsp
745 UNWIND_HINT_IRET_REGS offset=8 876 UNWIND_HINT_IRET_REGS offset=8
746 877
@@ -829,7 +960,35 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
829/* 960/*
830 * Exception entry points. 961 * Exception entry points.
831 */ 962 */
832#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) 963#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
964
965/*
966 * Switch to the thread stack. This is called with the IRET frame and
967 * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
968 * space has not been allocated for them.)
969 */
970ENTRY(switch_to_thread_stack)
971 UNWIND_HINT_FUNC
972
973 pushq %rdi
974 /* Need to switch before accessing the thread stack. */
975 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
976 movq %rsp, %rdi
977 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
978 UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
979
980 pushq 7*8(%rdi) /* regs->ss */
981 pushq 6*8(%rdi) /* regs->rsp */
982 pushq 5*8(%rdi) /* regs->eflags */
983 pushq 4*8(%rdi) /* regs->cs */
984 pushq 3*8(%rdi) /* regs->ip */
985 pushq 2*8(%rdi) /* regs->orig_ax */
986 pushq 8(%rdi) /* return address */
987 UNWIND_HINT_FUNC
988
989 movq (%rdi), %rdi
990 ret
991END(switch_to_thread_stack)
833 992
834.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 993.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
835ENTRY(\sym) 994ENTRY(\sym)
@@ -848,11 +1007,12 @@ ENTRY(\sym)
848 1007
849 ALLOC_PT_GPREGS_ON_STACK 1008 ALLOC_PT_GPREGS_ON_STACK
850 1009
851 .if \paranoid 1010 .if \paranoid < 2
852 .if \paranoid == 1
853 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ 1011 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
854 jnz 1f 1012 jnz .Lfrom_usermode_switch_stack_\@
855 .endif 1013 .endif
1014
1015 .if \paranoid
856 call paranoid_entry 1016 call paranoid_entry
857 .else 1017 .else
858 call error_entry 1018 call error_entry
@@ -894,20 +1054,15 @@ ENTRY(\sym)
894 jmp error_exit 1054 jmp error_exit
895 .endif 1055 .endif
896 1056
897 .if \paranoid == 1 1057 .if \paranoid < 2
898 /* 1058 /*
899 * Paranoid entry from userspace. Switch stacks and treat it 1059 * Entry from userspace. Switch stacks and treat it
900 * as a normal entry. This means that paranoid handlers 1060 * as a normal entry. This means that paranoid handlers
901 * run in real process context if user_mode(regs). 1061 * run in real process context if user_mode(regs).
902 */ 1062 */
9031: 1063.Lfrom_usermode_switch_stack_\@:
904 call error_entry 1064 call error_entry
905 1065
906
907 movq %rsp, %rdi /* pt_regs pointer */
908 call sync_regs
909 movq %rax, %rsp /* switch stack */
910
911 movq %rsp, %rdi /* pt_regs pointer */ 1066 movq %rsp, %rdi /* pt_regs pointer */
912 1067
913 .if \has_error_code 1068 .if \has_error_code
@@ -1119,7 +1274,11 @@ ENTRY(paranoid_entry)
1119 js 1f /* negative -> in kernel */ 1274 js 1f /* negative -> in kernel */
1120 SWAPGS 1275 SWAPGS
1121 xorl %ebx, %ebx 1276 xorl %ebx, %ebx
11221: ret 1277
12781:
1279 SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
1280
1281 ret
1123END(paranoid_entry) 1282END(paranoid_entry)
1124 1283
1125/* 1284/*
@@ -1141,6 +1300,7 @@ ENTRY(paranoid_exit)
1141 testl %ebx, %ebx /* swapgs needed? */ 1300 testl %ebx, %ebx /* swapgs needed? */
1142 jnz .Lparanoid_exit_no_swapgs 1301 jnz .Lparanoid_exit_no_swapgs
1143 TRACE_IRQS_IRETQ 1302 TRACE_IRQS_IRETQ
1303 RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
1144 SWAPGS_UNSAFE_STACK 1304 SWAPGS_UNSAFE_STACK
1145 jmp .Lparanoid_exit_restore 1305 jmp .Lparanoid_exit_restore
1146.Lparanoid_exit_no_swapgs: 1306.Lparanoid_exit_no_swapgs:
@@ -1168,8 +1328,18 @@ ENTRY(error_entry)
1168 * from user mode due to an IRET fault. 1328 * from user mode due to an IRET fault.
1169 */ 1329 */
1170 SWAPGS 1330 SWAPGS
1331 /* We have user CR3. Change to kernel CR3. */
1332 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
1171 1333
1172.Lerror_entry_from_usermode_after_swapgs: 1334.Lerror_entry_from_usermode_after_swapgs:
1335 /* Put us onto the real thread stack. */
1336 popq %r12 /* save return addr in %12 */
1337 movq %rsp, %rdi /* arg0 = pt_regs pointer */
1338 call sync_regs
1339 movq %rax, %rsp /* switch stack */
1340 ENCODE_FRAME_POINTER
1341 pushq %r12
1342
1173 /* 1343 /*
1174 * We need to tell lockdep that IRQs are off. We can't do this until 1344 * We need to tell lockdep that IRQs are off. We can't do this until
1175 * we fix gsbase, and we should do it before enter_from_user_mode 1345 * we fix gsbase, and we should do it before enter_from_user_mode
@@ -1206,6 +1376,7 @@ ENTRY(error_entry)
1206 * .Lgs_change's error handler with kernel gsbase. 1376 * .Lgs_change's error handler with kernel gsbase.
1207 */ 1377 */
1208 SWAPGS 1378 SWAPGS
1379 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
1209 jmp .Lerror_entry_done 1380 jmp .Lerror_entry_done
1210 1381
1211.Lbstep_iret: 1382.Lbstep_iret:
@@ -1215,10 +1386,11 @@ ENTRY(error_entry)
1215 1386
1216.Lerror_bad_iret: 1387.Lerror_bad_iret:
1217 /* 1388 /*
1218 * We came from an IRET to user mode, so we have user gsbase. 1389 * We came from an IRET to user mode, so we have user
1219 * Switch to kernel gsbase: 1390 * gsbase and CR3. Switch to kernel gsbase and CR3:
1220 */ 1391 */
1221 SWAPGS 1392 SWAPGS
1393 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
1222 1394
1223 /* 1395 /*
1224 * Pretend that the exception came from user mode: set up pt_regs 1396 * Pretend that the exception came from user mode: set up pt_regs
@@ -1250,6 +1422,10 @@ END(error_exit)
1250/* 1422/*
1251 * Runs on exception stack. Xen PV does not go through this path at all, 1423 * Runs on exception stack. Xen PV does not go through this path at all,
1252 * so we can use real assembly here. 1424 * so we can use real assembly here.
1425 *
1426 * Registers:
1427 * %r14: Used to save/restore the CR3 of the interrupted context
1428 * when PAGE_TABLE_ISOLATION is in use. Do not clobber.
1253 */ 1429 */
1254ENTRY(nmi) 1430ENTRY(nmi)
1255 UNWIND_HINT_IRET_REGS 1431 UNWIND_HINT_IRET_REGS
@@ -1313,6 +1489,7 @@ ENTRY(nmi)
1313 1489
1314 swapgs 1490 swapgs
1315 cld 1491 cld
1492 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
1316 movq %rsp, %rdx 1493 movq %rsp, %rdx
1317 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 1494 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
1318 UNWIND_HINT_IRET_REGS base=%rdx offset=8 1495 UNWIND_HINT_IRET_REGS base=%rdx offset=8
@@ -1565,6 +1742,8 @@ end_repeat_nmi:
1565 movq $-1, %rsi 1742 movq $-1, %rsi
1566 call do_nmi 1743 call do_nmi
1567 1744
1745 RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
1746
1568 testl %ebx, %ebx /* swapgs needed? */ 1747 testl %ebx, %ebx /* swapgs needed? */
1569 jnz nmi_restore 1748 jnz nmi_restore
1570nmi_swapgs: 1749nmi_swapgs:
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 568e130d932c..98d5358e4041 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,11 @@
48 */ 48 */
49ENTRY(entry_SYSENTER_compat) 49ENTRY(entry_SYSENTER_compat)
50 /* Interrupts are off on entry. */ 50 /* Interrupts are off on entry. */
51 SWAPGS_UNSAFE_STACK 51 SWAPGS
52
53 /* We are about to clobber %rsp anyway, clobbering here is OK */
54 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
55
52 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 56 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
53 57
54 /* 58 /*
@@ -186,8 +190,13 @@ ENTRY(entry_SYSCALL_compat)
186 /* Interrupts are off on entry. */ 190 /* Interrupts are off on entry. */
187 swapgs 191 swapgs
188 192
189 /* Stash user ESP and switch to the kernel stack. */ 193 /* Stash user ESP */
190 movl %esp, %r8d 194 movl %esp, %r8d
195
196 /* Use %rsp as scratch reg. User ESP is stashed in r8 */
197 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
198
199 /* Switch to the kernel stack */
191 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 200 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
192 201
193 /* Construct struct pt_regs on stack */ 202 /* Construct struct pt_regs on stack */
@@ -256,10 +265,22 @@ sysret32_from_system_call:
256 * when the system call started, which is already known to user 265 * when the system call started, which is already known to user
257 * code. We zero R8-R10 to avoid info leaks. 266 * code. We zero R8-R10 to avoid info leaks.
258 */ 267 */
268 movq RSP-ORIG_RAX(%rsp), %rsp
269
270 /*
271 * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
272 * on the process stack which is not mapped to userspace and
273 * not readable after we SWITCH_TO_USER_CR3. Delay the CR3
274 * switch until after after the last reference to the process
275 * stack.
276 *
277 * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
278 */
279 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
280
259 xorq %r8, %r8 281 xorq %r8, %r8
260 xorq %r9, %r9 282 xorq %r9, %r9
261 xorq %r10, %r10 283 xorq %r10, %r10
262 movq RSP-ORIG_RAX(%rsp), %rsp
263 swapgs 284 swapgs
264 sysretl 285 sysretl
265END(entry_SYSCALL_compat) 286END(entry_SYSCALL_compat)
@@ -306,8 +327,11 @@ ENTRY(entry_INT80_compat)
306 */ 327 */
307 movl %eax, %eax 328 movl %eax, %eax
308 329
309 /* Construct struct pt_regs on stack (iret frame is already on stack) */
310 pushq %rax /* pt_regs->orig_ax */ 330 pushq %rax /* pt_regs->orig_ax */
331
332 /* switch to thread stack expects orig_ax to be pushed */
333 call switch_to_thread_stack
334
311 pushq %rdi /* pt_regs->di */ 335 pushq %rdi /* pt_regs->di */
312 pushq %rsi /* pt_regs->si */ 336 pushq %rsi /* pt_regs->si */
313 pushq %rdx /* pt_regs->dx */ 337 pushq %rdx /* pt_regs->dx */
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 11b13c4b43d5..f19856d95c60 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -324,5 +324,5 @@ notrace time_t __vdso_time(time_t *t)
324 *t = result; 324 *t = result;
325 return result; 325 return result;
326} 326}
327int time(time_t *t) 327time_t time(time_t *t)
328 __attribute__((weak, alias("__vdso_time"))); 328 __attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index f279ba2643dc..577fa8adb785 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -37,6 +37,7 @@
37#include <asm/unistd.h> 37#include <asm/unistd.h>
38#include <asm/fixmap.h> 38#include <asm/fixmap.h>
39#include <asm/traps.h> 39#include <asm/traps.h>
40#include <asm/paravirt.h>
40 41
41#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
42#include "vsyscall_trace.h" 43#include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
138 139
139 WARN_ON_ONCE(address != regs->ip); 140 WARN_ON_ONCE(address != regs->ip);
140 141
142 /* This should be unreachable in NATIVE mode. */
143 if (WARN_ON(vsyscall_mode == NATIVE))
144 return false;
145
141 if (vsyscall_mode == NONE) { 146 if (vsyscall_mode == NONE) {
142 warn_bad_vsyscall(KERN_INFO, regs, 147 warn_bad_vsyscall(KERN_INFO, regs,
143 "vsyscall attempted with vsyscall=none"); 148 "vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
329 return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; 334 return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
330} 335}
331 336
337/*
338 * The VSYSCALL page is the only user-accessible page in the kernel address
339 * range. Normally, the kernel page tables can have _PAGE_USER clear, but
340 * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
341 * are enabled.
342 *
343 * Some day we may create a "minimal" vsyscall mode in which we emulate
344 * vsyscalls but leave the page not present. If so, we skip calling
345 * this.
346 */
347void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
348{
349 pgd_t *pgd;
350 p4d_t *p4d;
351 pud_t *pud;
352 pmd_t *pmd;
353
354 pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
355 set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
356 p4d = p4d_offset(pgd, VSYSCALL_ADDR);
357#if CONFIG_PGTABLE_LEVELS >= 5
358 p4d->p4d |= _PAGE_USER;
359#endif
360 pud = pud_offset(p4d, VSYSCALL_ADDR);
361 set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
362 pmd = pmd_offset(pud, VSYSCALL_ADDR);
363 set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
364}
365
332void __init map_vsyscall(void) 366void __init map_vsyscall(void)
333{ 367{
334 extern char __vsyscall_page; 368 extern char __vsyscall_page;
335 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); 369 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
336 370
337 if (vsyscall_mode != NONE) 371 if (vsyscall_mode != NONE) {
338 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, 372 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
339 vsyscall_mode == NATIVE 373 vsyscall_mode == NATIVE
340 ? PAGE_KERNEL_VSYSCALL 374 ? PAGE_KERNEL_VSYSCALL
341 : PAGE_KERNEL_VVAR); 375 : PAGE_KERNEL_VVAR);
376 set_vsyscall_pgtable_user_bits(swapper_pg_dir);
377 }
342 378
343 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != 379 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
344 (unsigned long)VSYSCALL_ADDR); 380 (unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 141e07b06216..24ffa1e88cf9 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -582,6 +582,24 @@ static __init int bts_init(void)
582 if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) 582 if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
583 return -ENODEV; 583 return -ENODEV;
584 584
585 if (boot_cpu_has(X86_FEATURE_PTI)) {
586 /*
587 * BTS hardware writes through a virtual memory map we must
588 * either use the kernel physical map, or the user mapping of
589 * the AUX buffer.
590 *
591 * However, since this driver supports per-CPU and per-task inherit
592 * we cannot use the user mapping since it will not be availble
593 * if we're not running the owning process.
594 *
595 * With PTI we can't use the kernal map either, because its not
596 * there when we run userspace.
597 *
598 * For now, disable this driver when using PTI.
599 */
600 return -ENODEV;
601 }
602
585 bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | 603 bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
586 PERF_PMU_CAP_EXCLUSIVE; 604 PERF_PMU_CAP_EXCLUSIVE;
587 bts_pmu.task_ctx_nr = perf_sw_context; 605 bts_pmu.task_ctx_nr = perf_sw_context;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 09c26a4f139c..731153a4681e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
3847 3847
3848__init int intel_pmu_init(void) 3848__init int intel_pmu_init(void)
3849{ 3849{
3850 struct attribute **extra_attr = NULL;
3851 struct attribute **to_free = NULL;
3850 union cpuid10_edx edx; 3852 union cpuid10_edx edx;
3851 union cpuid10_eax eax; 3853 union cpuid10_eax eax;
3852 union cpuid10_ebx ebx; 3854 union cpuid10_ebx ebx;
@@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
3854 unsigned int unused; 3856 unsigned int unused;
3855 struct extra_reg *er; 3857 struct extra_reg *er;
3856 int version, i; 3858 int version, i;
3857 struct attribute **extra_attr = NULL;
3858 char *name; 3859 char *name;
3859 3860
3860 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 3861 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
4294 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 4295 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
4295 hsw_format_attr : nhm_format_attr; 4296 hsw_format_attr : nhm_format_attr;
4296 extra_attr = merge_attr(extra_attr, skl_format_attr); 4297 extra_attr = merge_attr(extra_attr, skl_format_attr);
4298 to_free = extra_attr;
4297 x86_pmu.cpu_events = get_hsw_events_attrs(); 4299 x86_pmu.cpu_events = get_hsw_events_attrs();
4298 intel_pmu_pebs_data_source_skl( 4300 intel_pmu_pebs_data_source_skl(
4299 boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); 4301 boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
@@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
4401 pr_cont("full-width counters, "); 4403 pr_cont("full-width counters, ");
4402 } 4404 }
4403 4405
4406 kfree(to_free);
4404 return 0; 4407 return 0;
4405} 4408}
4406 4409
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 3674a4b6f8bd..8156e47da7ba 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -3,16 +3,19 @@
3#include <linux/types.h> 3#include <linux/types.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5 5
6#include <asm/cpu_entry_area.h>
6#include <asm/perf_event.h> 7#include <asm/perf_event.h>
8#include <asm/tlbflush.h>
7#include <asm/insn.h> 9#include <asm/insn.h>
8 10
9#include "../perf_event.h" 11#include "../perf_event.h"
10 12
13/* Waste a full page so it can be mapped into the cpu_entry_area */
14DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
15
11/* The size of a BTS record in bytes: */ 16/* The size of a BTS record in bytes: */
12#define BTS_RECORD_SIZE 24 17#define BTS_RECORD_SIZE 24
13 18
14#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
15#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
16#define PEBS_FIXUP_SIZE PAGE_SIZE 19#define PEBS_FIXUP_SIZE PAGE_SIZE
17 20
18/* 21/*
@@ -279,17 +282,67 @@ void fini_debug_store_on_cpu(int cpu)
279 282
280static DEFINE_PER_CPU(void *, insn_buffer); 283static DEFINE_PER_CPU(void *, insn_buffer);
281 284
282static int alloc_pebs_buffer(int cpu) 285static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
283{ 286{
284 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 287 unsigned long start = (unsigned long)cea;
288 phys_addr_t pa;
289 size_t msz = 0;
290
291 pa = virt_to_phys(addr);
292
293 preempt_disable();
294 for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
295 cea_set_pte(cea, pa, prot);
296
297 /*
298 * This is a cross-CPU update of the cpu_entry_area, we must shoot down
299 * all TLB entries for it.
300 */
301 flush_tlb_kernel_range(start, start + size);
302 preempt_enable();
303}
304
305static void ds_clear_cea(void *cea, size_t size)
306{
307 unsigned long start = (unsigned long)cea;
308 size_t msz = 0;
309
310 preempt_disable();
311 for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
312 cea_set_pte(cea, 0, PAGE_NONE);
313
314 flush_tlb_kernel_range(start, start + size);
315 preempt_enable();
316}
317
318static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
319{
320 unsigned int order = get_order(size);
285 int node = cpu_to_node(cpu); 321 int node = cpu_to_node(cpu);
286 int max; 322 struct page *page;
287 void *buffer, *ibuffer; 323
324 page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
325 return page ? page_address(page) : NULL;
326}
327
328static void dsfree_pages(const void *buffer, size_t size)
329{
330 if (buffer)
331 free_pages((unsigned long)buffer, get_order(size));
332}
333
334static int alloc_pebs_buffer(int cpu)
335{
336 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
337 struct debug_store *ds = hwev->ds;
338 size_t bsiz = x86_pmu.pebs_buffer_size;
339 int max, node = cpu_to_node(cpu);
340 void *buffer, *ibuffer, *cea;
288 341
289 if (!x86_pmu.pebs) 342 if (!x86_pmu.pebs)
290 return 0; 343 return 0;
291 344
292 buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); 345 buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
293 if (unlikely(!buffer)) 346 if (unlikely(!buffer))
294 return -ENOMEM; 347 return -ENOMEM;
295 348
@@ -300,25 +353,27 @@ static int alloc_pebs_buffer(int cpu)
300 if (x86_pmu.intel_cap.pebs_format < 2) { 353 if (x86_pmu.intel_cap.pebs_format < 2) {
301 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); 354 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
302 if (!ibuffer) { 355 if (!ibuffer) {
303 kfree(buffer); 356 dsfree_pages(buffer, bsiz);
304 return -ENOMEM; 357 return -ENOMEM;
305 } 358 }
306 per_cpu(insn_buffer, cpu) = ibuffer; 359 per_cpu(insn_buffer, cpu) = ibuffer;
307 } 360 }
308 361 hwev->ds_pebs_vaddr = buffer;
309 max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; 362 /* Update the cpu entry area mapping */
310 363 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
311 ds->pebs_buffer_base = (u64)(unsigned long)buffer; 364 ds->pebs_buffer_base = (unsigned long) cea;
365 ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
312 ds->pebs_index = ds->pebs_buffer_base; 366 ds->pebs_index = ds->pebs_buffer_base;
313 ds->pebs_absolute_maximum = ds->pebs_buffer_base + 367 max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
314 max * x86_pmu.pebs_record_size; 368 ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
315
316 return 0; 369 return 0;
317} 370}
318 371
319static void release_pebs_buffer(int cpu) 372static void release_pebs_buffer(int cpu)
320{ 373{
321 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 374 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
375 struct debug_store *ds = hwev->ds;
376 void *cea;
322 377
323 if (!ds || !x86_pmu.pebs) 378 if (!ds || !x86_pmu.pebs)
324 return; 379 return;
@@ -326,73 +381,70 @@ static void release_pebs_buffer(int cpu)
326 kfree(per_cpu(insn_buffer, cpu)); 381 kfree(per_cpu(insn_buffer, cpu));
327 per_cpu(insn_buffer, cpu) = NULL; 382 per_cpu(insn_buffer, cpu) = NULL;
328 383
329 kfree((void *)(unsigned long)ds->pebs_buffer_base); 384 /* Clear the fixmap */
385 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
386 ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
330 ds->pebs_buffer_base = 0; 387 ds->pebs_buffer_base = 0;
388 dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
389 hwev->ds_pebs_vaddr = NULL;
331} 390}
332 391
333static int alloc_bts_buffer(int cpu) 392static int alloc_bts_buffer(int cpu)
334{ 393{
335 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 394 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
336 int node = cpu_to_node(cpu); 395 struct debug_store *ds = hwev->ds;
337 int max, thresh; 396 void *buffer, *cea;
338 void *buffer; 397 int max;
339 398
340 if (!x86_pmu.bts) 399 if (!x86_pmu.bts)
341 return 0; 400 return 0;
342 401
343 buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); 402 buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
344 if (unlikely(!buffer)) { 403 if (unlikely(!buffer)) {
345 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); 404 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
346 return -ENOMEM; 405 return -ENOMEM;
347 } 406 }
348 407 hwev->ds_bts_vaddr = buffer;
349 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; 408 /* Update the fixmap */
350 thresh = max / 16; 409 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
351 410 ds->bts_buffer_base = (unsigned long) cea;
352 ds->bts_buffer_base = (u64)(unsigned long)buffer; 411 ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
353 ds->bts_index = ds->bts_buffer_base; 412 ds->bts_index = ds->bts_buffer_base;
354 ds->bts_absolute_maximum = ds->bts_buffer_base + 413 max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
355 max * BTS_RECORD_SIZE; 414 ds->bts_absolute_maximum = ds->bts_buffer_base + max;
356 ds->bts_interrupt_threshold = ds->bts_absolute_maximum - 415 ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
357 thresh * BTS_RECORD_SIZE;
358
359 return 0; 416 return 0;
360} 417}
361 418
362static void release_bts_buffer(int cpu) 419static void release_bts_buffer(int cpu)
363{ 420{
364 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 421 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
422 struct debug_store *ds = hwev->ds;
423 void *cea;
365 424
366 if (!ds || !x86_pmu.bts) 425 if (!ds || !x86_pmu.bts)
367 return; 426 return;
368 427
369 kfree((void *)(unsigned long)ds->bts_buffer_base); 428 /* Clear the fixmap */
429 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
430 ds_clear_cea(cea, BTS_BUFFER_SIZE);
370 ds->bts_buffer_base = 0; 431 ds->bts_buffer_base = 0;
432 dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
433 hwev->ds_bts_vaddr = NULL;
371} 434}
372 435
373static int alloc_ds_buffer(int cpu) 436static int alloc_ds_buffer(int cpu)
374{ 437{
375 int node = cpu_to_node(cpu); 438 struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
376 struct debug_store *ds;
377
378 ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
379 if (unlikely(!ds))
380 return -ENOMEM;
381 439
440 memset(ds, 0, sizeof(*ds));
382 per_cpu(cpu_hw_events, cpu).ds = ds; 441 per_cpu(cpu_hw_events, cpu).ds = ds;
383
384 return 0; 442 return 0;
385} 443}
386 444
387static void release_ds_buffer(int cpu) 445static void release_ds_buffer(int cpu)
388{ 446{
389 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
390
391 if (!ds)
392 return;
393
394 per_cpu(cpu_hw_events, cpu).ds = NULL; 447 per_cpu(cpu_hw_events, cpu).ds = NULL;
395 kfree(ds);
396} 448}
397 449
398void release_ds_buffers(void) 450void release_ds_buffers(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f7aaadf9331f..8e4ea143ed96 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -14,6 +14,8 @@
14 14
15#include <linux/perf_event.h> 15#include <linux/perf_event.h>
16 16
17#include <asm/intel_ds.h>
18
17/* To enable MSR tracing please use the generic trace points. */ 19/* To enable MSR tracing please use the generic trace points. */
18 20
19/* 21/*
@@ -77,8 +79,6 @@ struct amd_nb {
77 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 79 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
78}; 80};
79 81
80/* The maximal number of PEBS events: */
81#define MAX_PEBS_EVENTS 8
82#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) 82#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
83 83
84/* 84/*
@@ -95,23 +95,6 @@ struct amd_nb {
95 PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ 95 PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
96 PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) 96 PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
97 97
98/*
99 * A debug store configuration.
100 *
101 * We only support architectures that use 64bit fields.
102 */
103struct debug_store {
104 u64 bts_buffer_base;
105 u64 bts_index;
106 u64 bts_absolute_maximum;
107 u64 bts_interrupt_threshold;
108 u64 pebs_buffer_base;
109 u64 pebs_index;
110 u64 pebs_absolute_maximum;
111 u64 pebs_interrupt_threshold;
112 u64 pebs_event_reset[MAX_PEBS_EVENTS];
113};
114
115#define PEBS_REGS \ 98#define PEBS_REGS \
116 (PERF_REG_X86_AX | \ 99 (PERF_REG_X86_AX | \
117 PERF_REG_X86_BX | \ 100 PERF_REG_X86_BX | \
@@ -216,6 +199,8 @@ struct cpu_hw_events {
216 * Intel DebugStore bits 199 * Intel DebugStore bits
217 */ 200 */
218 struct debug_store *ds; 201 struct debug_store *ds;
202 void *ds_pebs_vaddr;
203 void *ds_bts_vaddr;
219 u64 pebs_enabled; 204 u64 pebs_enabled;
220 int n_pebs; 205 int n_pebs;
221 int n_large_pebs; 206 int n_large_pebs;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index dbfd0854651f..cf5961ca8677 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
140 ".popsection\n" \ 140 ".popsection\n" \
141 ".pushsection .altinstr_replacement, \"ax\"\n" \ 141 ".pushsection .altinstr_replacement, \"ax\"\n" \
142 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ 142 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
143 ".popsection" 143 ".popsection\n"
144 144
145#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ 145#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
146 OLDINSTR_2(oldinstr, 1, 2) \ 146 OLDINSTR_2(oldinstr, 1, 2) \
@@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
151 ".pushsection .altinstr_replacement, \"ax\"\n" \ 151 ".pushsection .altinstr_replacement, \"ax\"\n" \
152 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ 152 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
153 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ 153 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
154 ".popsection" 154 ".popsection\n"
155 155
156/* 156/*
157 * Alternative instructions for different CPU types or capabilities. 157 * Alternative instructions for different CPU types or capabilities.
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index ff700d81e91e..0927cdc4f946 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -11,7 +11,32 @@
11#include <asm/pgtable.h> 11#include <asm/pgtable.h>
12#include <asm/special_insns.h> 12#include <asm/special_insns.h>
13#include <asm/preempt.h> 13#include <asm/preempt.h>
14#include <asm/asm.h>
14 15
15#ifndef CONFIG_X86_CMPXCHG64 16#ifndef CONFIG_X86_CMPXCHG64
16extern void cmpxchg8b_emu(void); 17extern void cmpxchg8b_emu(void);
17#endif 18#endif
19
20#ifdef CONFIG_RETPOLINE
21#ifdef CONFIG_X86_32
22#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
23#else
24#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
25INDIRECT_THUNK(8)
26INDIRECT_THUNK(9)
27INDIRECT_THUNK(10)
28INDIRECT_THUNK(11)
29INDIRECT_THUNK(12)
30INDIRECT_THUNK(13)
31INDIRECT_THUNK(14)
32INDIRECT_THUNK(15)
33#endif
34INDIRECT_THUNK(ax)
35INDIRECT_THUNK(bx)
36INDIRECT_THUNK(cx)
37INDIRECT_THUNK(dx)
38INDIRECT_THUNK(si)
39INDIRECT_THUNK(di)
40INDIRECT_THUNK(bp)
41INDIRECT_THUNK(sp)
42#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 219faaec51df..386a6900e206 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -136,6 +136,7 @@
136#endif 136#endif
137 137
138#ifndef __ASSEMBLY__ 138#ifndef __ASSEMBLY__
139#ifndef __BPF__
139/* 140/*
140 * This output constraint should be used for any inline asm which has a "call" 141 * This output constraint should be used for any inline asm which has a "call"
141 * instruction. Otherwise the asm may be inserted before the frame pointer 142 * instruction. Otherwise the asm may be inserted before the frame pointer
@@ -145,5 +146,6 @@
145register unsigned long current_stack_pointer asm(_ASM_SP); 146register unsigned long current_stack_pointer asm(_ASM_SP);
146#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) 147#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
147#endif 148#endif
149#endif
148 150
149#endif /* _ASM_X86_ASM_H */ 151#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644
index 000000000000..4a7884b8dca5
--- /dev/null
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,81 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#ifndef _ASM_X86_CPU_ENTRY_AREA_H
4#define _ASM_X86_CPU_ENTRY_AREA_H
5
6#include <linux/percpu-defs.h>
7#include <asm/processor.h>
8#include <asm/intel_ds.h>
9
10/*
11 * cpu_entry_area is a percpu region that contains things needed by the CPU
12 * and early entry/exit code. Real types aren't used for all fields here
13 * to avoid circular header dependencies.
14 *
15 * Every field is a virtual alias of some other allocated backing store.
16 * There is no direct allocation of a struct cpu_entry_area.
17 */
18struct cpu_entry_area {
19 char gdt[PAGE_SIZE];
20
21 /*
22 * The GDT is just below entry_stack and thus serves (on x86_64) as
23 * a a read-only guard page.
24 */
25 struct entry_stack_page entry_stack_page;
26
27 /*
28 * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
29 * we need task switches to work, and task switches write to the TSS.
30 */
31 struct tss_struct tss;
32
33 char entry_trampoline[PAGE_SIZE];
34
35#ifdef CONFIG_X86_64
36 /*
37 * Exception stacks used for IST entries.
38 *
39 * In the future, this should have a separate slot for each stack
40 * with guard pages between them.
41 */
42 char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
43#endif
44#ifdef CONFIG_CPU_SUP_INTEL
45 /*
46 * Per CPU debug store for Intel performance monitoring. Wastes a
47 * full page at the moment.
48 */
49 struct debug_store cpu_debug_store;
50 /*
51 * The actual PEBS/BTS buffers must be mapped to user space
52 * Reserve enough fixmap PTEs.
53 */
54 struct debug_store_buffers cpu_debug_buffers;
55#endif
56};
57
58#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
59#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
60
61DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
62
63extern void setup_cpu_entry_areas(void);
64extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
65
66#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
67#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
68
69#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
70
71#define CPU_ENTRY_AREA_MAP_SIZE \
72 (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
73
74extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
75
76static inline struct entry_stack *cpu_entry_stack(int cpu)
77{
78 return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
79}
80
81#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bf6a76202a77..ea9a7dde62e5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
135 set_bit(bit, (unsigned long *)cpu_caps_set); \ 135 set_bit(bit, (unsigned long *)cpu_caps_set); \
136} while (0) 136} while (0)
137 137
138#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
139
138#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) 140#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
139/* 141/*
140 * Static testing of CPU features. Used the same as boot_cpu_has(). 142 * Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c0b0e9e8aa66..f275447862f4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -197,11 +197,14 @@
197#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ 197#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ 198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ 199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
200#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
200 201
201#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 202#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
202#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 203#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
203#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ 204#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
204 205#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
206#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
207#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
205#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ 208#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
206#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ 209#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
207#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ 210#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -266,6 +269,7 @@
266/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ 269/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
267#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ 270#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
268#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ 271#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
272#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
269 273
270/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ 274/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
271#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ 275#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -339,5 +343,8 @@
339#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ 343#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
340#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ 344#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
341#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ 345#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
346#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
347#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
348#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
342 349
343#endif /* _ASM_X86_CPUFEATURES_H */ 350#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4011cb03ef08..13c5ee878a47 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -7,6 +7,7 @@
7#include <asm/mmu.h> 7#include <asm/mmu.h>
8#include <asm/fixmap.h> 8#include <asm/fixmap.h>
9#include <asm/irq_vectors.h> 9#include <asm/irq_vectors.h>
10#include <asm/cpu_entry_area.h>
10 11
11#include <linux/smp.h> 12#include <linux/smp.h>
12#include <linux/percpu.h> 13#include <linux/percpu.h>
@@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
20 21
21 desc->type = (info->read_exec_only ^ 1) << 1; 22 desc->type = (info->read_exec_only ^ 1) << 1;
22 desc->type |= info->contents << 2; 23 desc->type |= info->contents << 2;
24 /* Set the ACCESS bit so it can be mapped RO */
25 desc->type |= 1;
23 26
24 desc->s = 1; 27 desc->s = 1;
25 desc->dpl = 0x3; 28 desc->dpl = 0x3;
@@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
60 return this_cpu_ptr(&gdt_page)->gdt; 63 return this_cpu_ptr(&gdt_page)->gdt;
61} 64}
62 65
63/* Get the fixmap index for a specific processor */
64static inline unsigned int get_cpu_gdt_ro_index(int cpu)
65{
66 return FIX_GDT_REMAP_BEGIN + cpu;
67}
68
69/* Provide the fixmap address of the remapped GDT */ 66/* Provide the fixmap address of the remapped GDT */
70static inline struct desc_struct *get_cpu_gdt_ro(int cpu) 67static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
71{ 68{
72 unsigned int idx = get_cpu_gdt_ro_index(cpu); 69 return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
73 return (struct desc_struct *)__fix_to_virt(idx);
74} 70}
75 71
76/* Provide the current read-only GDT */ 72/* Provide the current read-only GDT */
@@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
185#endif 181#endif
186} 182}
187 183
188static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) 184static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
189{ 185{
190 struct desc_struct *d = get_cpu_gdt_rw(cpu); 186 struct desc_struct *d = get_cpu_gdt_rw(cpu);
191 tss_desc tss; 187 tss_desc tss;
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 14d6d5007314..b027633e7300 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,12 @@
50# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) 50# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
51#endif 51#endif
52 52
53#ifdef CONFIG_PAGE_TABLE_ISOLATION
54# define DISABLE_PTI 0
55#else
56# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
57#endif
58
53/* 59/*
54 * Make sure to add features to the correct mask 60 * Make sure to add features to the correct mask
55 */ 61 */
@@ -60,7 +66,7 @@
60#define DISABLED_MASK4 (DISABLE_PCID) 66#define DISABLED_MASK4 (DISABLE_PCID)
61#define DISABLED_MASK5 0 67#define DISABLED_MASK5 0
62#define DISABLED_MASK6 0 68#define DISABLED_MASK6 0
63#define DISABLED_MASK7 0 69#define DISABLED_MASK7 (DISABLE_PTI)
64#define DISABLED_MASK8 0 70#define DISABLED_MASK8 0
65#define DISABLED_MASK9 (DISABLE_MPX) 71#define DISABLED_MASK9 (DISABLE_MPX)
66#define DISABLED_MASK10 0 72#define DISABLED_MASK10 0
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 0211029076ea..6777480d8a42 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -2,7 +2,7 @@
2#ifndef _ASM_X86_ESPFIX_H 2#ifndef _ASM_X86_ESPFIX_H
3#define _ASM_X86_ESPFIX_H 3#define _ASM_X86_ESPFIX_H
4 4
5#ifdef CONFIG_X86_64 5#ifdef CONFIG_X86_ESPFIX64
6 6
7#include <asm/percpu.h> 7#include <asm/percpu.h>
8 8
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
11 11
12extern void init_espfix_bsp(void); 12extern void init_espfix_bsp(void);
13extern void init_espfix_ap(int cpu); 13extern void init_espfix_ap(int cpu);
14 14#else
15#endif /* CONFIG_X86_64 */ 15static inline void init_espfix_ap(int cpu) { }
16#endif
16 17
17#endif /* _ASM_X86_ESPFIX_H */ 18#endif /* _ASM_X86_ESPFIX_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b0c505fe9a95..64c4a30e0d39 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
44 PAGE_SIZE) 44 PAGE_SIZE)
45#endif 45#endif
46 46
47
48/* 47/*
49 * Here we define all the compile-time 'special' virtual 48 * Here we define all the compile-time 'special' virtual
50 * addresses. The point is to have a constant address at 49 * addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
84 FIX_IO_APIC_BASE_0, 83 FIX_IO_APIC_BASE_0,
85 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, 84 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
86#endif 85#endif
87 FIX_RO_IDT, /* Virtual mapping for read-only IDT */
88#ifdef CONFIG_X86_32 86#ifdef CONFIG_X86_32
89 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ 87 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
90 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, 88 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
100#ifdef CONFIG_X86_INTEL_MID 98#ifdef CONFIG_X86_INTEL_MID
101 FIX_LNW_VRTC, 99 FIX_LNW_VRTC,
102#endif 100#endif
103 /* Fixmap entries to remap the GDTs, one per processor. */
104 FIX_GDT_REMAP_BEGIN,
105 FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
106 101
107#ifdef CONFIG_ACPI_APEI_GHES 102#ifdef CONFIG_ACPI_APEI_GHES
108 /* Used for GHES mapping from assorted contexts */ 103 /* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
143extern void reserve_top_address(unsigned long reserve); 138extern void reserve_top_address(unsigned long reserve);
144 139
145#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) 140#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
146#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) 141#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
147 142
148extern int fixmaps_set; 143extern int fixmaps_set;
149 144
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 1b0a5abcd8ae..96aa6b9884dc 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
20#ifndef _ASM_X86_HYPERVISOR_H 20#ifndef _ASM_X86_HYPERVISOR_H
21#define _ASM_X86_HYPERVISOR_H 21#define _ASM_X86_HYPERVISOR_H
22 22
23#ifdef CONFIG_HYPERVISOR_GUEST 23/* x86 hypervisor types */
24
25#include <asm/kvm_para.h>
26#include <asm/x86_init.h>
27#include <asm/xen/hypervisor.h>
28
29/*
30 * x86 hypervisor information
31 */
32
33enum x86_hypervisor_type { 24enum x86_hypervisor_type {
34 X86_HYPER_NATIVE = 0, 25 X86_HYPER_NATIVE = 0,
35 X86_HYPER_VMWARE, 26 X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
39 X86_HYPER_KVM, 30 X86_HYPER_KVM,
40}; 31};
41 32
33#ifdef CONFIG_HYPERVISOR_GUEST
34
35#include <asm/kvm_para.h>
36#include <asm/x86_init.h>
37#include <asm/xen/hypervisor.h>
38
42struct hypervisor_x86 { 39struct hypervisor_x86 {
43 /* Hypervisor name */ 40 /* Hypervisor name */
44 const char *name; 41 const char *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
58 55
59extern enum x86_hypervisor_type x86_hyper_type; 56extern enum x86_hypervisor_type x86_hyper_type;
60extern void init_hypervisor_platform(void); 57extern void init_hypervisor_platform(void);
58static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
59{
60 return x86_hyper_type == type;
61}
61#else 62#else
62static inline void init_hypervisor_platform(void) { } 63static inline void init_hypervisor_platform(void) { }
64static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
65{
66 return type == X86_HYPER_NATIVE;
67}
63#endif /* CONFIG_HYPERVISOR_GUEST */ 68#endif /* CONFIG_HYPERVISOR_GUEST */
64#endif /* _ASM_X86_HYPERVISOR_H */ 69#endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
new file mode 100644
index 000000000000..62a9f4966b42
--- /dev/null
+++ b/arch/x86/include/asm/intel_ds.h
@@ -0,0 +1,36 @@
1#ifndef _ASM_INTEL_DS_H
2#define _ASM_INTEL_DS_H
3
4#include <linux/percpu-defs.h>
5
6#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
7#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
8
9/* The maximal number of PEBS events: */
10#define MAX_PEBS_EVENTS 8
11
12/*
13 * A debug store configuration.
14 *
15 * We only support architectures that use 64bit fields.
16 */
17struct debug_store {
18 u64 bts_buffer_base;
19 u64 bts_index;
20 u64 bts_absolute_maximum;
21 u64 bts_interrupt_threshold;
22 u64 pebs_buffer_base;
23 u64 pebs_index;
24 u64 pebs_absolute_maximum;
25 u64 pebs_interrupt_threshold;
26 u64 pebs_event_reset[MAX_PEBS_EVENTS];
27} __aligned(PAGE_SIZE);
28
29DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
30
31struct debug_store_buffers {
32 char bts_buffer[BTS_BUFFER_SIZE];
33 char pebs_buffer[PEBS_BUFFER_SIZE];
34};
35
36#endif
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
new file mode 100644
index 000000000000..989cfa86de85
--- /dev/null
+++ b/arch/x86/include/asm/invpcid.h
@@ -0,0 +1,53 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_X86_INVPCID
3#define _ASM_X86_INVPCID
4
5static inline void __invpcid(unsigned long pcid, unsigned long addr,
6 unsigned long type)
7{
8 struct { u64 d[2]; } desc = { { pcid, addr } };
9
10 /*
11 * The memory clobber is because the whole point is to invalidate
12 * stale TLB entries and, especially if we're flushing global
13 * mappings, we don't want the compiler to reorder any subsequent
14 * memory accesses before the TLB flush.
15 *
16 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
17 * invpcid (%rcx), %rax in long mode.
18 */
19 asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
20 : : "m" (desc), "a" (type), "c" (&desc) : "memory");
21}
22
23#define INVPCID_TYPE_INDIV_ADDR 0
24#define INVPCID_TYPE_SINGLE_CTXT 1
25#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
26#define INVPCID_TYPE_ALL_NON_GLOBAL 3
27
28/* Flush all mappings for a given pcid and addr, not including globals. */
29static inline void invpcid_flush_one(unsigned long pcid,
30 unsigned long addr)
31{
32 __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
33}
34
35/* Flush all mappings for a given PCID, not including globals. */
36static inline void invpcid_flush_single_context(unsigned long pcid)
37{
38 __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
39}
40
41/* Flush all mappings, including globals, for all PCIDs. */
42static inline void invpcid_flush_all(void)
43{
44 __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
45}
46
47/* Flush all mappings for all PCIDs except globals. */
48static inline void invpcid_flush_all_nonglobals(void)
49{
50 __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
51}
52
53#endif /* _ASM_X86_INVPCID */
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index 139feef467f7..c066ffae222b 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
44extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, 44extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
45 unsigned int nr_irqs); 45 unsigned int nr_irqs);
46extern int mp_irqdomain_activate(struct irq_domain *domain, 46extern int mp_irqdomain_activate(struct irq_domain *domain,
47 struct irq_data *irq_data, bool early); 47 struct irq_data *irq_data, bool reserve);
48extern void mp_irqdomain_deactivate(struct irq_domain *domain, 48extern void mp_irqdomain_deactivate(struct irq_domain *domain,
49 struct irq_data *irq_data); 49 struct irq_data *irq_data);
50extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); 50extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c8ef23f2c28f..89f08955fff7 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
142 swapgs; \ 142 swapgs; \
143 sysretl 143 sysretl
144 144
145#ifdef CONFIG_DEBUG_ENTRY
146#define SAVE_FLAGS(x) pushfq; popq %rax
147#endif
145#else 148#else
146#define INTERRUPT_RETURN iret 149#define INTERRUPT_RETURN iret
147#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit 150#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f86a8caa561e..395c9631e000 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
26extern int __must_check __die(const char *, struct pt_regs *, long); 26extern int __must_check __die(const char *, struct pt_regs *, long);
27extern void show_stack_regs(struct pt_regs *regs); 27extern void show_stack_regs(struct pt_regs *regs);
28extern void __show_regs(struct pt_regs *regs, int all); 28extern void __show_regs(struct pt_regs *regs, int all);
29extern void show_iret_regs(struct pt_regs *regs);
29extern unsigned long oops_begin(void); 30extern unsigned long oops_begin(void);
30extern void oops_end(unsigned long, struct pt_regs *, int signr); 31extern void oops_end(unsigned long, struct pt_regs *, int signr);
31 32
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/include/asm/kmemcheck.h
+++ /dev/null
@@ -1 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 034caa1a084e..b24b1c8b3979 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -214,8 +214,6 @@ struct x86_emulate_ops {
214 void (*halt)(struct x86_emulate_ctxt *ctxt); 214 void (*halt)(struct x86_emulate_ctxt *ctxt);
215 void (*wbinvd)(struct x86_emulate_ctxt *ctxt); 215 void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
216 int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); 216 int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
217 void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
218 void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
219 int (*intercept)(struct x86_emulate_ctxt *ctxt, 217 int (*intercept)(struct x86_emulate_ctxt *ctxt,
220 struct x86_instruction_info *info, 218 struct x86_instruction_info *info,
221 enum x86_intercept_stage stage); 219 enum x86_intercept_stage stage);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1bfb99770c34..516798431328 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,7 +536,20 @@ struct kvm_vcpu_arch {
536 struct kvm_mmu_memory_cache mmu_page_cache; 536 struct kvm_mmu_memory_cache mmu_page_cache;
537 struct kvm_mmu_memory_cache mmu_page_header_cache; 537 struct kvm_mmu_memory_cache mmu_page_header_cache;
538 538
539 /*
540 * QEMU userspace and the guest each have their own FPU state.
541 * In vcpu_run, we switch between the user and guest FPU contexts.
542 * While running a VCPU, the VCPU thread will have the guest FPU
543 * context.
544 *
545 * Note that while the PKRU state lives inside the fpu registers,
546 * it is switched out separately at VMENTER and VMEXIT time. The
547 * "guest_fpu" state here contains the guest FPU context, with the
548 * host PRKU bits.
549 */
550 struct fpu user_fpu;
539 struct fpu guest_fpu; 551 struct fpu guest_fpu;
552
540 u64 xcr0; 553 u64 xcr0;
541 u64 guest_supported_xcr0; 554 u64 guest_supported_xcr0;
542 u32 guest_xstate_size; 555 u32 guest_xstate_size;
@@ -1161,7 +1174,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
1161static inline int emulate_instruction(struct kvm_vcpu *vcpu, 1174static inline int emulate_instruction(struct kvm_vcpu *vcpu,
1162 int emulation_type) 1175 int emulation_type)
1163{ 1176{
1164 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); 1177 return x86_emulate_instruction(vcpu, 0,
1178 emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
1165} 1179}
1166 1180
1167void kvm_enable_efer_bits(u64); 1181void kvm_enable_efer_bits(u64);
@@ -1434,4 +1448,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
1434#define put_smstate(type, buf, offset, val) \ 1448#define put_smstate(type, buf, offset, val) \
1435 *(type *)((buf) + (offset) - 0x7e00) = val 1449 *(type *)((buf) + (offset) - 0x7e00) = val
1436 1450
1451void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
1452 unsigned long start, unsigned long end);
1453
1437#endif /* _ASM_X86_KVM_HOST_H */ 1454#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 9ea26f167497..5ff3e8af2c20 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,6 +3,7 @@
3#define _ASM_X86_MMU_H 3#define _ASM_X86_MMU_H
4 4
5#include <linux/spinlock.h> 5#include <linux/spinlock.h>
6#include <linux/rwsem.h>
6#include <linux/mutex.h> 7#include <linux/mutex.h>
7#include <linux/atomic.h> 8#include <linux/atomic.h>
8 9
@@ -27,7 +28,8 @@ typedef struct {
27 atomic64_t tlb_gen; 28 atomic64_t tlb_gen;
28 29
29#ifdef CONFIG_MODIFY_LDT_SYSCALL 30#ifdef CONFIG_MODIFY_LDT_SYSCALL
30 struct ldt_struct *ldt; 31 struct rw_semaphore ldt_usr_sem;
32 struct ldt_struct *ldt;
31#endif 33#endif
32 34
33#ifdef CONFIG_X86_64 35#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6d16d15d09a0..c931b88982a0 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -50,22 +50,53 @@ struct ldt_struct {
50 * call gates. On native, we could merge the ldt_struct and LDT 50 * call gates. On native, we could merge the ldt_struct and LDT
51 * allocations, but it's not worth trying to optimize. 51 * allocations, but it's not worth trying to optimize.
52 */ 52 */
53 struct desc_struct *entries; 53 struct desc_struct *entries;
54 unsigned int nr_entries; 54 unsigned int nr_entries;
55
56 /*
57 * If PTI is in use, then the entries array is not mapped while we're
58 * in user mode. The whole array will be aliased at the addressed
59 * given by ldt_slot_va(slot). We use two slots so that we can allocate
60 * and map, and enable a new LDT without invalidating the mapping
61 * of an older, still-in-use LDT.
62 *
63 * slot will be -1 if this LDT doesn't have an alias mapping.
64 */
65 int slot;
55}; 66};
56 67
68/* This is a multiple of PAGE_SIZE. */
69#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
70
71static inline void *ldt_slot_va(int slot)
72{
73#ifdef CONFIG_X86_64
74 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
75#else
76 BUG();
77#endif
78}
79
57/* 80/*
58 * Used for LDT copy/destruction. 81 * Used for LDT copy/destruction.
59 */ 82 */
60int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); 83static inline void init_new_context_ldt(struct mm_struct *mm)
84{
85 mm->context.ldt = NULL;
86 init_rwsem(&mm->context.ldt_usr_sem);
87}
88int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
61void destroy_context_ldt(struct mm_struct *mm); 89void destroy_context_ldt(struct mm_struct *mm);
90void ldt_arch_exit_mmap(struct mm_struct *mm);
62#else /* CONFIG_MODIFY_LDT_SYSCALL */ 91#else /* CONFIG_MODIFY_LDT_SYSCALL */
63static inline int init_new_context_ldt(struct task_struct *tsk, 92static inline void init_new_context_ldt(struct mm_struct *mm) { }
64 struct mm_struct *mm) 93static inline int ldt_dup_context(struct mm_struct *oldmm,
94 struct mm_struct *mm)
65{ 95{
66 return 0; 96 return 0;
67} 97}
68static inline void destroy_context_ldt(struct mm_struct *mm) {} 98static inline void destroy_context_ldt(struct mm_struct *mm) { }
99static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
69#endif 100#endif
70 101
71static inline void load_mm_ldt(struct mm_struct *mm) 102static inline void load_mm_ldt(struct mm_struct *mm)
@@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
90 * that we can see. 121 * that we can see.
91 */ 122 */
92 123
93 if (unlikely(ldt)) 124 if (unlikely(ldt)) {
94 set_ldt(ldt->entries, ldt->nr_entries); 125 if (static_cpu_has(X86_FEATURE_PTI)) {
95 else 126 if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
127 /*
128 * Whoops -- either the new LDT isn't mapped
129 * (if slot == -1) or is mapped into a bogus
130 * slot (if slot > 1).
131 */
132 clear_LDT();
133 return;
134 }
135
136 /*
137 * If page table isolation is enabled, ldt->entries
138 * will not be mapped in the userspace pagetables.
139 * Tell the CPU to access the LDT through the alias
140 * at ldt_slot_va(ldt->slot).
141 */
142 set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
143 } else {
144 set_ldt(ldt->entries, ldt->nr_entries);
145 }
146 } else {
96 clear_LDT(); 147 clear_LDT();
148 }
97#else 149#else
98 clear_LDT(); 150 clear_LDT();
99#endif 151#endif
@@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
132static inline int init_new_context(struct task_struct *tsk, 184static inline int init_new_context(struct task_struct *tsk,
133 struct mm_struct *mm) 185 struct mm_struct *mm)
134{ 186{
187 mutex_init(&mm->context.lock);
188
135 mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); 189 mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
136 atomic64_set(&mm->context.tlb_gen, 0); 190 atomic64_set(&mm->context.tlb_gen, 0);
137 191
138 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 192#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
139 if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { 193 if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
140 /* pkey 0 is the default and always allocated */ 194 /* pkey 0 is the default and always allocated */
141 mm->context.pkey_allocation_map = 0x1; 195 mm->context.pkey_allocation_map = 0x1;
142 /* -1 means unallocated or invalid */ 196 /* -1 means unallocated or invalid */
143 mm->context.execute_only_pkey = -1; 197 mm->context.execute_only_pkey = -1;
144 } 198 }
145 #endif 199#endif
146 return init_new_context_ldt(tsk, mm); 200 init_new_context_ldt(mm);
201 return 0;
147} 202}
148static inline void destroy_context(struct mm_struct *mm) 203static inline void destroy_context(struct mm_struct *mm)
149{ 204{
@@ -176,15 +231,16 @@ do { \
176} while (0) 231} while (0)
177#endif 232#endif
178 233
179static inline void arch_dup_mmap(struct mm_struct *oldmm, 234static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
180 struct mm_struct *mm)
181{ 235{
182 paravirt_arch_dup_mmap(oldmm, mm); 236 paravirt_arch_dup_mmap(oldmm, mm);
237 return ldt_dup_context(oldmm, mm);
183} 238}
184 239
185static inline void arch_exit_mmap(struct mm_struct *mm) 240static inline void arch_exit_mmap(struct mm_struct *mm)
186{ 241{
187 paravirt_arch_exit_mmap(mm); 242 paravirt_arch_exit_mmap(mm);
243 ldt_arch_exit_mmap(mm);
188} 244}
189 245
190#ifdef CONFIG_X86_64 246#ifdef CONFIG_X86_64
@@ -282,33 +338,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
282} 338}
283 339
284/* 340/*
285 * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
286 * bits. This serves two purposes. It prevents a nasty situation in
287 * which PCID-unaware code saves CR3, loads some other value (with PCID
288 * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
289 * the saved ASID was nonzero. It also means that any bugs involving
290 * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
291 * deterministically.
292 */
293
294static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
295{
296 if (static_cpu_has(X86_FEATURE_PCID)) {
297 VM_WARN_ON_ONCE(asid > 4094);
298 return __sme_pa(mm->pgd) | (asid + 1);
299 } else {
300 VM_WARN_ON_ONCE(asid != 0);
301 return __sme_pa(mm->pgd);
302 }
303}
304
305static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
306{
307 VM_WARN_ON_ONCE(asid > 4094);
308 return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
309}
310
311/*
312 * This can be used from process context to figure out what the value of 341 * This can be used from process context to figure out what the value of
313 * CR3 is without needing to do a (slow) __read_cr3(). 342 * CR3 is without needing to do a (slow) __read_cr3().
314 * 343 *
@@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
317 */ 346 */
318static inline unsigned long __get_current_cr3_fast(void) 347static inline unsigned long __get_current_cr3_fast(void)
319{ 348{
320 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), 349 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
321 this_cpu_read(cpu_tlbstate.loaded_mm_asid)); 350 this_cpu_read(cpu_tlbstate.loaded_mm_asid));
322 351
323 /* For now, be very restrictive about when this can be called. */ 352 /* For now, be very restrictive about when this can be called. */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 5400add2885b..8bf450b13d9f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -7,6 +7,7 @@
7#include <linux/nmi.h> 7#include <linux/nmi.h>
8#include <asm/io.h> 8#include <asm/io.h>
9#include <asm/hyperv.h> 9#include <asm/hyperv.h>
10#include <asm/nospec-branch.h>
10 11
11/* 12/*
12 * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent 13 * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
@@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
186 return U64_MAX; 187 return U64_MAX;
187 188
188 __asm__ __volatile__("mov %4, %%r8\n" 189 __asm__ __volatile__("mov %4, %%r8\n"
189 "call *%5" 190 CALL_NOSPEC
190 : "=a" (hv_status), ASM_CALL_CONSTRAINT, 191 : "=a" (hv_status), ASM_CALL_CONSTRAINT,
191 "+c" (control), "+d" (input_address) 192 "+c" (control), "+d" (input_address)
192 : "r" (output_address), "m" (hv_hypercall_pg) 193 : "r" (output_address),
194 THUNK_TARGET(hv_hypercall_pg)
193 : "cc", "memory", "r8", "r9", "r10", "r11"); 195 : "cc", "memory", "r8", "r9", "r10", "r11");
194#else 196#else
195 u32 input_address_hi = upper_32_bits(input_address); 197 u32 input_address_hi = upper_32_bits(input_address);
@@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
200 if (!hv_hypercall_pg) 202 if (!hv_hypercall_pg)
201 return U64_MAX; 203 return U64_MAX;
202 204
203 __asm__ __volatile__("call *%7" 205 __asm__ __volatile__(CALL_NOSPEC
204 : "=A" (hv_status), 206 : "=A" (hv_status),
205 "+c" (input_address_lo), ASM_CALL_CONSTRAINT 207 "+c" (input_address_lo), ASM_CALL_CONSTRAINT
206 : "A" (control), 208 : "A" (control),
207 "b" (input_address_hi), 209 "b" (input_address_hi),
208 "D"(output_address_hi), "S"(output_address_lo), 210 "D"(output_address_hi), "S"(output_address_lo),
209 "m" (hv_hypercall_pg) 211 THUNK_TARGET(hv_hypercall_pg)
210 : "cc", "memory"); 212 : "cc", "memory");
211#endif /* !x86_64 */ 213#endif /* !x86_64 */
212 return hv_status; 214 return hv_status;
@@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
227 229
228#ifdef CONFIG_X86_64 230#ifdef CONFIG_X86_64
229 { 231 {
230 __asm__ __volatile__("call *%4" 232 __asm__ __volatile__(CALL_NOSPEC
231 : "=a" (hv_status), ASM_CALL_CONSTRAINT, 233 : "=a" (hv_status), ASM_CALL_CONSTRAINT,
232 "+c" (control), "+d" (input1) 234 "+c" (control), "+d" (input1)
233 : "m" (hv_hypercall_pg) 235 : THUNK_TARGET(hv_hypercall_pg)
234 : "cc", "r8", "r9", "r10", "r11"); 236 : "cc", "r8", "r9", "r10", "r11");
235 } 237 }
236#else 238#else
@@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
238 u32 input1_hi = upper_32_bits(input1); 240 u32 input1_hi = upper_32_bits(input1);
239 u32 input1_lo = lower_32_bits(input1); 241 u32 input1_lo = lower_32_bits(input1);
240 242
241 __asm__ __volatile__ ("call *%5" 243 __asm__ __volatile__ (CALL_NOSPEC
242 : "=A"(hv_status), 244 : "=A"(hv_status),
243 "+c"(input1_lo), 245 "+c"(input1_lo),
244 ASM_CALL_CONSTRAINT 246 ASM_CALL_CONSTRAINT
245 : "A" (control), 247 : "A" (control),
246 "b" (input1_hi), 248 "b" (input1_hi),
247 "m" (hv_hypercall_pg) 249 THUNK_TARGET(hv_hypercall_pg)
248 : "cc", "edi", "esi"); 250 : "cc", "edi", "esi");
249 } 251 }
250#endif 252#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 34c4922bbc3f..e7b983a35506 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -355,6 +355,9 @@
355#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL 355#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
356#define FAM10H_MMIO_CONF_BASE_SHIFT 20 356#define FAM10H_MMIO_CONF_BASE_SHIFT 20
357#define MSR_FAM10H_NODE_ID 0xc001100c 357#define MSR_FAM10H_NODE_ID 0xc001100c
358#define MSR_F10H_DECFG 0xc0011029
359#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
360#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
358 361
359/* K8 MSRs */ 362/* K8 MSRs */
360#define MSR_K8_TOP_MEM1 0xc001001a 363#define MSR_K8_TOP_MEM1 0xc001001a
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644
index 000000000000..402a11c803c3
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,214 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#ifndef __NOSPEC_BRANCH_H__
4#define __NOSPEC_BRANCH_H__
5
6#include <asm/alternative.h>
7#include <asm/alternative-asm.h>
8#include <asm/cpufeatures.h>
9
10/*
11 * Fill the CPU return stack buffer.
12 *
13 * Each entry in the RSB, if used for a speculative 'ret', contains an
14 * infinite 'pause; jmp' loop to capture speculative execution.
15 *
16 * This is required in various cases for retpoline and IBRS-based
17 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
18 * eliminate potentially bogus entries from the RSB, and sometimes
19 * purely to ensure that it doesn't get empty, which on some CPUs would
20 * allow predictions from other (unwanted!) sources to be used.
21 *
22 * We define a CPP macro such that it can be used from both .S files and
23 * inline assembly. It's possible to do a .macro and then include that
24 * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
25 */
26
27#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
28#define RSB_FILL_LOOPS 16 /* To avoid underflow */
29
30/*
31 * Google experimented with loop-unrolling and this turned out to be
32 * the optimal version — two calls, each with their own speculation
33 * trap should their return address end up getting used, in a loop.
34 */
35#define __FILL_RETURN_BUFFER(reg, nr, sp) \
36 mov $(nr/2), reg; \
37771: \
38 call 772f; \
39773: /* speculation trap */ \
40 pause; \
41 jmp 773b; \
42772: \
43 call 774f; \
44775: /* speculation trap */ \
45 pause; \
46 jmp 775b; \
47774: \
48 dec reg; \
49 jnz 771b; \
50 add $(BITS_PER_LONG/8) * nr, sp;
51
52#ifdef __ASSEMBLY__
53
54/*
55 * This should be used immediately before a retpoline alternative. It tells
56 * objtool where the retpolines are so that it can make sense of the control
57 * flow by just reading the original instruction(s) and ignoring the
58 * alternatives.
59 */
60.macro ANNOTATE_NOSPEC_ALTERNATIVE
61 .Lannotate_\@:
62 .pushsection .discard.nospec
63 .long .Lannotate_\@ - .
64 .popsection
65.endm
66
67/*
68 * These are the bare retpoline primitives for indirect jmp and call.
69 * Do not use these directly; they only exist to make the ALTERNATIVE
70 * invocation below less ugly.
71 */
72.macro RETPOLINE_JMP reg:req
73 call .Ldo_rop_\@
74.Lspec_trap_\@:
75 pause
76 jmp .Lspec_trap_\@
77.Ldo_rop_\@:
78 mov \reg, (%_ASM_SP)
79 ret
80.endm
81
82/*
83 * This is a wrapper around RETPOLINE_JMP so the called function in reg
84 * returns to the instruction after the macro.
85 */
86.macro RETPOLINE_CALL reg:req
87 jmp .Ldo_call_\@
88.Ldo_retpoline_jmp_\@:
89 RETPOLINE_JMP \reg
90.Ldo_call_\@:
91 call .Ldo_retpoline_jmp_\@
92.endm
93
94/*
95 * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
96 * indirect jmp/call which may be susceptible to the Spectre variant 2
97 * attack.
98 */
99.macro JMP_NOSPEC reg:req
100#ifdef CONFIG_RETPOLINE
101 ANNOTATE_NOSPEC_ALTERNATIVE
102 ALTERNATIVE_2 __stringify(jmp *\reg), \
103 __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
104 __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
105#else
106 jmp *\reg
107#endif
108.endm
109
110.macro CALL_NOSPEC reg:req
111#ifdef CONFIG_RETPOLINE
112 ANNOTATE_NOSPEC_ALTERNATIVE
113 ALTERNATIVE_2 __stringify(call *\reg), \
114 __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
115 __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
116#else
117 call *\reg
118#endif
119.endm
120
121 /*
122 * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
123 * monstrosity above, manually.
124 */
125.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
126#ifdef CONFIG_RETPOLINE
127 ANNOTATE_NOSPEC_ALTERNATIVE
128 ALTERNATIVE "jmp .Lskip_rsb_\@", \
129 __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
130 \ftr
131.Lskip_rsb_\@:
132#endif
133.endm
134
135#else /* __ASSEMBLY__ */
136
137#define ANNOTATE_NOSPEC_ALTERNATIVE \
138 "999:\n\t" \
139 ".pushsection .discard.nospec\n\t" \
140 ".long 999b - .\n\t" \
141 ".popsection\n\t"
142
143#if defined(CONFIG_X86_64) && defined(RETPOLINE)
144
145/*
146 * Since the inline asm uses the %V modifier which is only in newer GCC,
147 * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
148 */
149# define CALL_NOSPEC \
150 ANNOTATE_NOSPEC_ALTERNATIVE \
151 ALTERNATIVE( \
152 "call *%[thunk_target]\n", \
153 "call __x86_indirect_thunk_%V[thunk_target]\n", \
154 X86_FEATURE_RETPOLINE)
155# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
156
157#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
158/*
159 * For i386 we use the original ret-equivalent retpoline, because
160 * otherwise we'll run out of registers. We don't care about CET
161 * here, anyway.
162 */
163# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
164 " jmp 904f;\n" \
165 " .align 16\n" \
166 "901: call 903f;\n" \
167 "902: pause;\n" \
168 " jmp 902b;\n" \
169 " .align 16\n" \
170 "903: addl $4, %%esp;\n" \
171 " pushl %[thunk_target];\n" \
172 " ret;\n" \
173 " .align 16\n" \
174 "904: call 901b;\n", \
175 X86_FEATURE_RETPOLINE)
176
177# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
178#else /* No retpoline for C / inline asm */
179# define CALL_NOSPEC "call *%[thunk_target]\n"
180# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
181#endif
182
183/* The Spectre V2 mitigation variants */
184enum spectre_v2_mitigation {
185 SPECTRE_V2_NONE,
186 SPECTRE_V2_RETPOLINE_MINIMAL,
187 SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
188 SPECTRE_V2_RETPOLINE_GENERIC,
189 SPECTRE_V2_RETPOLINE_AMD,
190 SPECTRE_V2_IBRS,
191};
192
193/*
194 * On VMEXIT we must ensure that no RSB predictions learned in the guest
195 * can be followed in the host, by overwriting the RSB completely. Both
196 * retpoline and IBRS mitigations for Spectre v2 need this; only on future
197 * CPUs with IBRS_ATT *might* it be avoided.
198 */
199static inline void vmexit_fill_RSB(void)
200{
201#ifdef CONFIG_RETPOLINE
202 unsigned long loops = RSB_CLEAR_LOOPS / 2;
203
204 asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
205 ALTERNATIVE("jmp 910f",
206 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
207 X86_FEATURE_RETPOLINE)
208 "910:"
209 : "=&r" (loops), ASM_CALL_CONSTRAINT
210 : "r" (loops) : "memory" );
211#endif
212}
213#endif /* __ASSEMBLY__ */
214#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 283efcaac8af..892df375b615 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
928 CLBR_NONE, \ 928 CLBR_NONE, \
929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
930
931#ifdef CONFIG_DEBUG_ENTRY
932#define SAVE_FLAGS(clobbers) \
933 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
934 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
935 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
936 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
937#endif
938
930#endif /* CONFIG_X86_32 */ 939#endif /* CONFIG_X86_32 */
931 940
932#endif /* __ASSEMBLY__ */ 941#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 7a5d6695abd3..eb66fa9cd0fc 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -38,6 +38,7 @@ do { \
38#define PCI_NOASSIGN_ROMS 0x80000 38#define PCI_NOASSIGN_ROMS 0x80000
39#define PCI_ROOT_NO_CRS 0x100000 39#define PCI_ROOT_NO_CRS 0x100000
40#define PCI_NOASSIGN_BARS 0x200000 40#define PCI_NOASSIGN_BARS 0x200000
41#define PCI_BIG_ROOT_WINDOW 0x400000
41 42
42extern unsigned int pci_probe; 43extern unsigned int pci_probe;
43extern unsigned long pirq_table_addr; 44extern unsigned long pirq_table_addr;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 4b5e1eafada7..aff42e1da6ee 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
30 */ 30 */
31extern gfp_t __userpte_alloc_gfp; 31extern gfp_t __userpte_alloc_gfp;
32 32
33#ifdef CONFIG_PAGE_TABLE_ISOLATION
34/*
35 * Instead of one PGD, we acquire two PGDs. Being order-1, it is
36 * both 8k in size and 8k-aligned. That lets us just flip bit 12
37 * in a pointer to swap between the two 4k halves.
38 */
39#define PGD_ALLOCATION_ORDER 1
40#else
41#define PGD_ALLOCATION_ORDER 0
42#endif
43
33/* 44/*
34 * Allocate and free page tables. 45 * Allocate and free page tables.
35 */ 46 */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 09f9e1e00e3b..e42b8943cb1a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
28int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); 28int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
29 29
30void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); 30void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
31void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
31void ptdump_walk_pgd_level_checkwx(void); 32void ptdump_walk_pgd_level_checkwx(void);
32 33
33#ifdef CONFIG_DEBUG_WX 34#ifdef CONFIG_DEBUG_WX
@@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
841 842
842static inline int p4d_bad(p4d_t p4d) 843static inline int p4d_bad(p4d_t p4d)
843{ 844{
844 return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; 845 unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
846
847 if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
848 ignore_flags |= _PAGE_NX;
849
850 return (p4d_flags(p4d) & ~ignore_flags) != 0;
845} 851}
846#endif /* CONFIG_PGTABLE_LEVELS > 3 */ 852#endif /* CONFIG_PGTABLE_LEVELS > 3 */
847 853
@@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
875 881
876static inline int pgd_bad(pgd_t pgd) 882static inline int pgd_bad(pgd_t pgd)
877{ 883{
878 return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; 884 unsigned long ignore_flags = _PAGE_USER;
885
886 if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
887 ignore_flags |= _PAGE_NX;
888
889 return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
879} 890}
880 891
881static inline int pgd_none(pgd_t pgd) 892static inline int pgd_none(pgd_t pgd)
@@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
904 * pgd_offset() returns a (pgd_t *) 915 * pgd_offset() returns a (pgd_t *)
905 * pgd_index() is used get the offset into the pgd page's array of pgd_t's; 916 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
906 */ 917 */
907#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) 918#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
919/*
920 * a shortcut to get a pgd_t in a given mm
921 */
922#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
908/* 923/*
909 * a shortcut which implies the use of the kernel's pgd, instead 924 * a shortcut which implies the use of the kernel's pgd, instead
910 * of a process's 925 * of a process's
@@ -1061,7 +1076,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
1061 unsigned long address, pmd_t *pmdp); 1076 unsigned long address, pmd_t *pmdp);
1062 1077
1063 1078
1064#define __HAVE_ARCH_PMD_WRITE 1079#define pmd_write pmd_write
1065static inline int pmd_write(pmd_t pmd) 1080static inline int pmd_write(pmd_t pmd)
1066{ 1081{
1067 return pmd_flags(pmd) & _PAGE_RW; 1082 return pmd_flags(pmd) & _PAGE_RW;
@@ -1088,6 +1103,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
1088 clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); 1103 clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
1089} 1104}
1090 1105
1106#define pud_write pud_write
1107static inline int pud_write(pud_t pud)
1108{
1109 return pud_flags(pud) & _PAGE_RW;
1110}
1111
1091/* 1112/*
1092 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); 1113 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
1093 * 1114 *
@@ -1100,7 +1121,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
1100 */ 1121 */
1101static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) 1122static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
1102{ 1123{
1103 memcpy(dst, src, count * sizeof(pgd_t)); 1124 memcpy(dst, src, count * sizeof(pgd_t));
1125#ifdef CONFIG_PAGE_TABLE_ISOLATION
1126 if (!static_cpu_has(X86_FEATURE_PTI))
1127 return;
1128 /* Clone the user space pgd as well */
1129 memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
1130 count * sizeof(pgd_t));
1131#endif
1104} 1132}
1105 1133
1106#define PTE_SHIFT ilog2(PTRS_PER_PTE) 1134#define PTE_SHIFT ilog2(PTRS_PER_PTE)
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index f2ca9b28fd68..ce245b0cdfca 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
38#define LAST_PKMAP 1024 38#define LAST_PKMAP 1024
39#endif 39#endif
40 40
41#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ 41/*
42 & PMD_MASK) 42 * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
43 * to avoid include recursion hell
44 */
45#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
46
47#define CPU_ENTRY_AREA_BASE \
48 ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
49
50#define PKMAP_BASE \
51 ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
43 52
44#ifdef CONFIG_HIGHMEM 53#ifdef CONFIG_HIGHMEM
45# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) 54# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
46#else 55#else
47# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) 56# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
48#endif 57#endif
49 58
50#define MODULES_VADDR VMALLOC_START 59#define MODULES_VADDR VMALLOC_START
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e9f05331e732..81462e9a34f6 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
131#endif 131#endif
132} 132}
133 133
134#ifdef CONFIG_PAGE_TABLE_ISOLATION
135/*
136 * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
137 * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
138 * the user one is in the last 4k. To switch between them, you
139 * just need to flip the 12th bit in their addresses.
140 */
141#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
142
143/*
144 * This generates better code than the inline assembly in
145 * __set_bit().
146 */
147static inline void *ptr_set_bit(void *ptr, int bit)
148{
149 unsigned long __ptr = (unsigned long)ptr;
150
151 __ptr |= BIT(bit);
152 return (void *)__ptr;
153}
154static inline void *ptr_clear_bit(void *ptr, int bit)
155{
156 unsigned long __ptr = (unsigned long)ptr;
157
158 __ptr &= ~BIT(bit);
159 return (void *)__ptr;
160}
161
162static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
163{
164 return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
165}
166
167static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
168{
169 return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
170}
171
172static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
173{
174 return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
175}
176
177static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
178{
179 return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
180}
181#endif /* CONFIG_PAGE_TABLE_ISOLATION */
182
183/*
184 * Page table pages are page-aligned. The lower half of the top
185 * level is used for userspace and the top half for the kernel.
186 *
187 * Returns true for parts of the PGD that map userspace and
188 * false for the parts that map the kernel.
189 */
190static inline bool pgdp_maps_userspace(void *__ptr)
191{
192 unsigned long ptr = (unsigned long)__ptr;
193
194 return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
195}
196
197#ifdef CONFIG_PAGE_TABLE_ISOLATION
198pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
199
200/*
201 * Take a PGD location (pgdp) and a pgd value that needs to be set there.
202 * Populates the user and returns the resulting PGD that must be set in
203 * the kernel copy of the page tables.
204 */
205static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
206{
207 if (!static_cpu_has(X86_FEATURE_PTI))
208 return pgd;
209 return __pti_set_user_pgd(pgdp, pgd);
210}
211#else
212static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
213{
214 return pgd;
215}
216#endif
217
134static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) 218static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
135{ 219{
220#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
221 p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
222#else
136 *p4dp = p4d; 223 *p4dp = p4d;
224#endif
137} 225}
138 226
139static inline void native_p4d_clear(p4d_t *p4d) 227static inline void native_p4d_clear(p4d_t *p4d)
@@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
147 235
148static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) 236static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
149{ 237{
238#ifdef CONFIG_PAGE_TABLE_ISOLATION
239 *pgdp = pti_set_user_pgd(pgdp, pgd);
240#else
150 *pgdp = pgd; 241 *pgdp = pgd;
242#endif
151} 243}
152 244
153static inline void native_pgd_clear(pgd_t *pgd) 245static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6d5f45dcd4a1..6b8f73dcbc2c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -75,33 +75,52 @@ typedef struct { pteval_t pte; } pte_t;
75#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) 75#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
76#define PGDIR_MASK (~(PGDIR_SIZE - 1)) 76#define PGDIR_MASK (~(PGDIR_SIZE - 1))
77 77
78/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ 78/*
79#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) 79 * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
80 *
81 * Be very careful vs. KASLR when changing anything here. The KASLR address
82 * range must not overlap with anything except the KASAN shadow area, which
83 * is correct as KASAN disables KASLR.
84 */
85#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
86
80#ifdef CONFIG_X86_5LEVEL 87#ifdef CONFIG_X86_5LEVEL
81#define VMALLOC_SIZE_TB _AC(16384, UL) 88# define VMALLOC_SIZE_TB _AC(12800, UL)
82#define __VMALLOC_BASE _AC(0xff92000000000000, UL) 89# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
83#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) 90# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
91# define LDT_PGD_ENTRY _AC(-112, UL)
92# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
84#else 93#else
85#define VMALLOC_SIZE_TB _AC(32, UL) 94# define VMALLOC_SIZE_TB _AC(32, UL)
86#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) 95# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
87#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) 96# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
97# define LDT_PGD_ENTRY _AC(-3, UL)
98# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
88#endif 99#endif
100
89#ifdef CONFIG_RANDOMIZE_MEMORY 101#ifdef CONFIG_RANDOMIZE_MEMORY
90#define VMALLOC_START vmalloc_base 102# define VMALLOC_START vmalloc_base
91#define VMEMMAP_START vmemmap_base 103# define VMEMMAP_START vmemmap_base
92#else 104#else
93#define VMALLOC_START __VMALLOC_BASE 105# define VMALLOC_START __VMALLOC_BASE
94#define VMEMMAP_START __VMEMMAP_BASE 106# define VMEMMAP_START __VMEMMAP_BASE
95#endif /* CONFIG_RANDOMIZE_MEMORY */ 107#endif /* CONFIG_RANDOMIZE_MEMORY */
96#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) 108
97#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) 109#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
110
111#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
98/* The module sections ends with the start of the fixmap */ 112/* The module sections ends with the start of the fixmap */
99#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) 113#define MODULES_END _AC(0xffffffffff000000, UL)
100#define MODULES_LEN (MODULES_END - MODULES_VADDR) 114#define MODULES_LEN (MODULES_END - MODULES_VADDR)
101#define ESPFIX_PGD_ENTRY _AC(-2, UL) 115
102#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) 116#define ESPFIX_PGD_ENTRY _AC(-2, UL)
103#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) 117#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
104#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) 118
119#define CPU_ENTRY_AREA_PGD _AC(-4, UL)
120#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
121
122#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
123#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
105 124
106#define EARLY_DYNAMIC_PAGE_TABLES 64 125#define EARLY_DYNAMIC_PAGE_TABLES 64
107 126
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 43212a43ee69..625a52a5594f 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -38,6 +38,11 @@
38#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) 38#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
39#define CR3_PCID_MASK 0xFFFull 39#define CR3_PCID_MASK 0xFFFull
40#define CR3_NOFLUSH BIT_ULL(63) 40#define CR3_NOFLUSH BIT_ULL(63)
41
42#ifdef CONFIG_PAGE_TABLE_ISOLATION
43# define X86_CR3_PTI_PCID_USER_BIT 11
44#endif
45
41#else 46#else
42/* 47/*
43 * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save 48 * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cc16fa882e3e..d3a67fba200a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
163extern struct cpuinfo_x86 boot_cpu_data; 163extern struct cpuinfo_x86 boot_cpu_data;
164extern struct cpuinfo_x86 new_cpu_data; 164extern struct cpuinfo_x86 new_cpu_data;
165 165
166extern struct tss_struct doublefault_tss; 166extern struct x86_hw_tss doublefault_tss;
167extern __u32 cpu_caps_cleared[NCAPINTS]; 167extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
168extern __u32 cpu_caps_set[NCAPINTS]; 168extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
169 169
170#ifdef CONFIG_SMP 170#ifdef CONFIG_SMP
171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); 171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
253 write_cr3(__sme_pa(pgdir)); 253 write_cr3(__sme_pa(pgdir));
254} 254}
255 255
256/*
257 * Note that while the legacy 'TSS' name comes from 'Task State Segment',
258 * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
259 * unrelated to the task-switch mechanism:
260 */
256#ifdef CONFIG_X86_32 261#ifdef CONFIG_X86_32
257/* This is the TSS defined by the hardware. */ 262/* This is the TSS defined by the hardware. */
258struct x86_hw_tss { 263struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
305struct x86_hw_tss { 310struct x86_hw_tss {
306 u32 reserved1; 311 u32 reserved1;
307 u64 sp0; 312 u64 sp0;
313
314 /*
315 * We store cpu_current_top_of_stack in sp1 so it's always accessible.
316 * Linux does not use ring 1, so sp1 is not otherwise needed.
317 */
308 u64 sp1; 318 u64 sp1;
319
309 u64 sp2; 320 u64 sp2;
310 u64 reserved2; 321 u64 reserved2;
311 u64 ist[7]; 322 u64 ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
323#define IO_BITMAP_BITS 65536 334#define IO_BITMAP_BITS 65536
324#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) 335#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
325#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) 336#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
326#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) 337#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
327#define INVALID_IO_BITMAP_OFFSET 0x8000 338#define INVALID_IO_BITMAP_OFFSET 0x8000
328 339
340struct entry_stack {
341 unsigned long words[64];
342};
343
344struct entry_stack_page {
345 struct entry_stack stack;
346} __aligned(PAGE_SIZE);
347
329struct tss_struct { 348struct tss_struct {
330 /* 349 /*
331 * The hardware state: 350 * The fixed hardware portion. This must not cross a page boundary
351 * at risk of violating the SDM's advice and potentially triggering
352 * errata.
332 */ 353 */
333 struct x86_hw_tss x86_tss; 354 struct x86_hw_tss x86_tss;
334 355
@@ -339,18 +360,9 @@ struct tss_struct {
339 * be within the limit. 360 * be within the limit.
340 */ 361 */
341 unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; 362 unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
363} __aligned(PAGE_SIZE);
342 364
343#ifdef CONFIG_X86_32 365DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
344 /*
345 * Space for the temporary SYSENTER stack.
346 */
347 unsigned long SYSENTER_stack_canary;
348 unsigned long SYSENTER_stack[64];
349#endif
350
351} ____cacheline_aligned;
352
353DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
354 366
355/* 367/*
356 * sizeof(unsigned long) coming from an extra "long" at the end 368 * sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
364 376
365#ifdef CONFIG_X86_32 377#ifdef CONFIG_X86_32
366DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); 378DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
379#else
380/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
381#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
367#endif 382#endif
368 383
369/* 384/*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
523static inline void 538static inline void
524native_load_sp0(unsigned long sp0) 539native_load_sp0(unsigned long sp0)
525{ 540{
526 this_cpu_write(cpu_tss.x86_tss.sp0, sp0); 541 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
527} 542}
528 543
529static inline void native_swapgs(void) 544static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
535 550
536static inline unsigned long current_top_of_stack(void) 551static inline unsigned long current_top_of_stack(void)
537{ 552{
538#ifdef CONFIG_X86_64 553 /*
539 return this_cpu_read_stable(cpu_tss.x86_tss.sp0); 554 * We can't read directly from tss.sp0: sp0 on x86_32 is special in
540#else 555 * and around vm86 mode and sp0 on x86_64 is special because of the
541 /* sp0 on x86_32 is special in and around vm86 mode. */ 556 * entry trampoline.
557 */
542 return this_cpu_read_stable(cpu_current_top_of_stack); 558 return this_cpu_read_stable(cpu_current_top_of_stack);
543#endif
544} 559}
545 560
546static inline bool on_thread_stack(void) 561static inline bool on_thread_stack(void)
@@ -837,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x)
837 852
838#else 853#else
839/* 854/*
840 * User space process size. 47bits minus one guard page. The guard 855 * User space process size. This is the first address outside the user range.
841 * page is necessary on Intel CPUs: if a SYSCALL instruction is at 856 * There are a few constraints that determine this:
842 * the highest possible canonical userspace address, then that 857 *
843 * syscall will enter the kernel with a non-canonical return 858 * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
844 * address, and SYSRET will explode dangerously. We avoid this 859 * address, then that syscall will enter the kernel with a
845 * particular problem by preventing anything from being mapped 860 * non-canonical return address, and SYSRET will explode dangerously.
846 * at the maximum canonical address. 861 * We avoid this particular problem by preventing anything executable
862 * from being mapped at the maximum canonical address.
863 *
864 * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
865 * CPUs malfunction if they execute code from the highest canonical page.
866 * They'll speculate right off the end of the canonical space, and
867 * bad things happen. This is worked around in the same way as the
868 * Intel problem.
869 *
870 * With page table isolation enabled, we map the LDT in ... [stay tuned]
847 */ 871 */
848#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) 872#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
849 873
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
new file mode 100644
index 000000000000..0b5ef05b2d2d
--- /dev/null
+++ b/arch/x86/include/asm/pti.h
@@ -0,0 +1,14 @@
1// SPDX-License-Identifier: GPL-2.0
2#ifndef _ASM_X86_PTI_H
3#define _ASM_X86_PTI_H
4#ifndef __ASSEMBLY__
5
6#ifdef CONFIG_PAGE_TABLE_ISOLATION
7extern void pti_init(void);
8extern void pti_check_boottime_disable(void);
9#else
10static inline void pti_check_boottime_disable(void) { }
11#endif
12
13#endif /* __ASSEMBLY__ */
14#endif /* _ASM_X86_PTI_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index b20f9d623f9c..8f09012b92e7 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -236,11 +236,23 @@
236 */ 236 */
237#define EARLY_IDT_HANDLER_SIZE 9 237#define EARLY_IDT_HANDLER_SIZE 9
238 238
239/*
240 * xen_early_idt_handler_array is for Xen pv guests: for each entry in
241 * early_idt_handler_array it contains a prequel in the form of
242 * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to
243 * max 8 bytes.
244 */
245#define XEN_EARLY_IDT_HANDLER_SIZE 8
246
239#ifndef __ASSEMBLY__ 247#ifndef __ASSEMBLY__
240 248
241extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; 249extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
242extern void early_ignore_irq(void); 250extern void early_ignore_irq(void);
243 251
252#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
253extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
254#endif
255
244/* 256/*
245 * Load a segment. Fall back on loading the zero segment if something goes 257 * Load a segment. Fall back on loading the zero segment if something goes
246 * wrong. This variant assumes that loading zero fully clears the segment. 258 * wrong. This variant assumes that loading zero fully clears the segment.
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 8da111b3c342..f73706878772 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,6 +16,7 @@ enum stack_type {
16 STACK_TYPE_TASK, 16 STACK_TYPE_TASK,
17 STACK_TYPE_IRQ, 17 STACK_TYPE_IRQ,
18 STACK_TYPE_SOFTIRQ, 18 STACK_TYPE_SOFTIRQ,
19 STACK_TYPE_ENTRY,
19 STACK_TYPE_EXCEPTION, 20 STACK_TYPE_EXCEPTION,
20 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, 21 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
21}; 22};
@@ -28,6 +29,8 @@ struct stack_info {
28bool in_task_stack(unsigned long *stack, struct task_struct *task, 29bool in_task_stack(unsigned long *stack, struct task_struct *task,
29 struct stack_info *info); 30 struct stack_info *info);
30 31
32bool in_entry_stack(unsigned long *stack, struct stack_info *info);
33
31int get_stack_info(unsigned long *stack, struct task_struct *task, 34int get_stack_info(unsigned long *stack, struct task_struct *task,
32 struct stack_info *info, unsigned long *visit_mask); 35 struct stack_info *info, unsigned long *visit_mask);
33 36
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 982c325dad33..8be6afb58471 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
12 12
13/* image of the saved processor state */ 13/* image of the saved processor state */
14struct saved_context { 14struct saved_context {
15 u16 es, fs, gs, ss; 15 /*
16 * On x86_32, all segment registers, with the possible exception of
17 * gs, are saved at kernel entry in pt_regs.
18 */
19#ifdef CONFIG_X86_32_LAZY_GS
20 u16 gs;
21#endif
16 unsigned long cr0, cr2, cr3, cr4; 22 unsigned long cr0, cr2, cr3, cr4;
17 u64 misc_enable; 23 u64 misc_enable;
18 bool misc_enable_saved; 24 bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7306e911faee..a7af9f53c0cb 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
20 */ 20 */
21struct saved_context { 21struct saved_context {
22 struct pt_regs regs; 22 struct pt_regs regs;
23 u16 ds, es, fs, gs, ss; 23
24 unsigned long gs_base, gs_kernel_base, fs_base; 24 /*
25 * User CS and SS are saved in current_pt_regs(). The rest of the
26 * segment selectors need to be saved and restored here.
27 */
28 u16 ds, es, fs, gs;
29
30 /*
31 * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
32 * so we save them separately. We save the kernelmode GSBASE to
33 * restore percpu access after resume.
34 */
35 unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
36
25 unsigned long cr0, cr2, cr3, cr4, cr8; 37 unsigned long cr0, cr2, cr3, cr4, cr8;
26 u64 misc_enable; 38 u64 misc_enable;
27 bool misc_enable_saved; 39 bool misc_enable_saved;
@@ -30,8 +42,7 @@ struct saved_context {
30 u16 gdt_pad; /* Unused */ 42 u16 gdt_pad; /* Unused */
31 struct desc_ptr gdt_desc; 43 struct desc_ptr gdt_desc;
32 u16 idt_pad; 44 u16 idt_pad;
33 u16 idt_limit; 45 struct desc_ptr idt;
34 unsigned long idt_base;
35 u16 ldt; 46 u16 ldt;
36 u16 tss; 47 u16 tss;
37 unsigned long tr; 48 unsigned long tr;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8c6bd6863db9..eb5f7999a893 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
16 struct tss_struct *tss); 16 struct tss_struct *tss);
17 17
18/* This runs runs on the previous thread's stack. */ 18/* This runs runs on the previous thread's stack. */
19static inline void prepare_switch_to(struct task_struct *prev, 19static inline void prepare_switch_to(struct task_struct *next)
20 struct task_struct *next)
21{ 20{
22#ifdef CONFIG_VMAP_STACK 21#ifdef CONFIG_VMAP_STACK
23 /* 22 /*
@@ -70,7 +69,7 @@ struct fork_frame {
70 69
71#define switch_to(prev, next, last) \ 70#define switch_to(prev, next, last) \
72do { \ 71do { \
73 prepare_switch_to(prev, next); \ 72 prepare_switch_to(next); \
74 \ 73 \
75 ((last) = __switch_to_asm((prev), (next))); \ 74 ((last) = __switch_to_asm((prev), (next))); \
76} while (0) 75} while (0)
@@ -79,10 +78,10 @@ do { \
79static inline void refresh_sysenter_cs(struct thread_struct *thread) 78static inline void refresh_sysenter_cs(struct thread_struct *thread)
80{ 79{
81 /* Only happens when SEP is enabled, no need to test "SEP"arately: */ 80 /* Only happens when SEP is enabled, no need to test "SEP"arately: */
82 if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) 81 if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
83 return; 82 return;
84 83
85 this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); 84 this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
86 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); 85 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
87} 86}
88#endif 87#endif
@@ -90,10 +89,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
90/* This is used when switching tasks or entering/exiting vm86 mode. */ 89/* This is used when switching tasks or entering/exiting vm86 mode. */
91static inline void update_sp0(struct task_struct *task) 90static inline void update_sp0(struct task_struct *task)
92{ 91{
92 /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
93#ifdef CONFIG_X86_32 93#ifdef CONFIG_X86_32
94 load_sp0(task->thread.sp0); 94 load_sp0(task->thread.sp0);
95#else 95#else
96 load_sp0(task_top_of_stack(task)); 96 if (static_cpu_has(X86_FEATURE_XENPV))
97 load_sp0(task_top_of_stack(task));
97#endif 98#endif
98} 99}
99 100
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 70f425947dc5..00223333821a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
207#else /* !__ASSEMBLY__ */ 207#else /* !__ASSEMBLY__ */
208 208
209#ifdef CONFIG_X86_64 209#ifdef CONFIG_X86_64
210# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) 210# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
211#endif 211#endif
212 212
213#endif 213#endif
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 509046cfa5ce..d33e4a26dc7e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -9,70 +9,130 @@
9#include <asm/cpufeature.h> 9#include <asm/cpufeature.h>
10#include <asm/special_insns.h> 10#include <asm/special_insns.h>
11#include <asm/smp.h> 11#include <asm/smp.h>
12#include <asm/invpcid.h>
13#include <asm/pti.h>
14#include <asm/processor-flags.h>
12 15
13static inline void __invpcid(unsigned long pcid, unsigned long addr, 16/*
14 unsigned long type) 17 * The x86 feature is called PCID (Process Context IDentifier). It is similar
15{ 18 * to what is traditionally called ASID on the RISC processors.
16 struct { u64 d[2]; } desc = { { pcid, addr } }; 19 *
20 * We don't use the traditional ASID implementation, where each process/mm gets
21 * its own ASID and flush/restart when we run out of ASID space.
22 *
23 * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
24 * that came by on this CPU, allowing cheaper switch_mm between processes on
25 * this CPU.
26 *
27 * We end up with different spaces for different things. To avoid confusion we
28 * use different names for each of them:
29 *
30 * ASID - [0, TLB_NR_DYN_ASIDS-1]
31 * the canonical identifier for an mm
32 *
33 * kPCID - [1, TLB_NR_DYN_ASIDS]
34 * the value we write into the PCID part of CR3; corresponds to the
35 * ASID+1, because PCID 0 is special.
36 *
37 * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
38 * for KPTI each mm has two address spaces and thus needs two
39 * PCID values, but we can still do with a single ASID denomination
40 * for each mm. Corresponds to kPCID + 2048.
41 *
42 */
17 43
18 /* 44/* There are 12 bits of space for ASIDS in CR3 */
19 * The memory clobber is because the whole point is to invalidate 45#define CR3_HW_ASID_BITS 12
20 * stale TLB entries and, especially if we're flushing global
21 * mappings, we don't want the compiler to reorder any subsequent
22 * memory accesses before the TLB flush.
23 *
24 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
25 * invpcid (%rcx), %rax in long mode.
26 */
27 asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
28 : : "m" (desc), "a" (type), "c" (&desc) : "memory");
29}
30 46
31#define INVPCID_TYPE_INDIV_ADDR 0 47/*
32#define INVPCID_TYPE_SINGLE_CTXT 1 48 * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
33#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 49 * user/kernel switches
34#define INVPCID_TYPE_ALL_NON_GLOBAL 3 50 */
51#ifdef CONFIG_PAGE_TABLE_ISOLATION
52# define PTI_CONSUMED_PCID_BITS 1
53#else
54# define PTI_CONSUMED_PCID_BITS 0
55#endif
35 56
36/* Flush all mappings for a given pcid and addr, not including globals. */ 57#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
37static inline void invpcid_flush_one(unsigned long pcid, 58
38 unsigned long addr) 59/*
39{ 60 * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
40 __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); 61 * for them being zero-based. Another -1 is because PCID 0 is reserved for
41} 62 * use by non-PCID-aware users.
63 */
64#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
42 65
43/* Flush all mappings for a given PCID, not including globals. */ 66/*
44static inline void invpcid_flush_single_context(unsigned long pcid) 67 * 6 because 6 should be plenty and struct tlb_state will fit in two cache
68 * lines.
69 */
70#define TLB_NR_DYN_ASIDS 6
71
72/*
73 * Given @asid, compute kPCID
74 */
75static inline u16 kern_pcid(u16 asid)
45{ 76{
46 __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); 77 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
78
79#ifdef CONFIG_PAGE_TABLE_ISOLATION
80 /*
81 * Make sure that the dynamic ASID space does not confict with the
82 * bit we are using to switch between user and kernel ASIDs.
83 */
84 BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
85
86 /*
87 * The ASID being passed in here should have respected the
88 * MAX_ASID_AVAILABLE and thus never have the switch bit set.
89 */
90 VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
91#endif
92 /*
93 * The dynamically-assigned ASIDs that get passed in are small
94 * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set,
95 * so do not bother to clear it.
96 *
97 * If PCID is on, ASID-aware code paths put the ASID+1 into the
98 * PCID bits. This serves two purposes. It prevents a nasty
99 * situation in which PCID-unaware code saves CR3, loads some other
100 * value (with PCID == 0), and then restores CR3, thus corrupting
101 * the TLB for ASID 0 if the saved ASID was nonzero. It also means
102 * that any bugs involving loading a PCID-enabled CR3 with
103 * CR4.PCIDE off will trigger deterministically.
104 */
105 return asid + 1;
47} 106}
48 107
49/* Flush all mappings, including globals, for all PCIDs. */ 108/*
50static inline void invpcid_flush_all(void) 109 * Given @asid, compute uPCID
110 */
111static inline u16 user_pcid(u16 asid)
51{ 112{
52 __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); 113 u16 ret = kern_pcid(asid);
114#ifdef CONFIG_PAGE_TABLE_ISOLATION
115 ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
116#endif
117 return ret;
53} 118}
54 119
55/* Flush all mappings for all PCIDs except globals. */ 120struct pgd_t;
56static inline void invpcid_flush_all_nonglobals(void) 121static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
57{ 122{
58 __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); 123 if (static_cpu_has(X86_FEATURE_PCID)) {
124 return __sme_pa(pgd) | kern_pcid(asid);
125 } else {
126 VM_WARN_ON_ONCE(asid != 0);
127 return __sme_pa(pgd);
128 }
59} 129}
60 130
61static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) 131static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
62{ 132{
63 u64 new_tlb_gen; 133 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
64 134 VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
65 /* 135 return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
66 * Bump the generation count. This also serves as a full barrier
67 * that synchronizes with switch_mm(): callers are required to order
68 * their read of mm_cpumask after their writes to the paging
69 * structures.
70 */
71 smp_mb__before_atomic();
72 new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
73 smp_mb__after_atomic();
74
75 return new_tlb_gen;
76} 136}
77 137
78#ifdef CONFIG_PARAVIRT 138#ifdef CONFIG_PARAVIRT
@@ -99,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
99 return !static_cpu_has(X86_FEATURE_PCID); 159 return !static_cpu_has(X86_FEATURE_PCID);
100} 160}
101 161
102/*
103 * 6 because 6 should be plenty and struct tlb_state will fit in
104 * two cache lines.
105 */
106#define TLB_NR_DYN_ASIDS 6
107
108struct tlb_context { 162struct tlb_context {
109 u64 ctx_id; 163 u64 ctx_id;
110 u64 tlb_gen; 164 u64 tlb_gen;
@@ -139,6 +193,24 @@ struct tlb_state {
139 bool is_lazy; 193 bool is_lazy;
140 194
141 /* 195 /*
196 * If set we changed the page tables in such a way that we
197 * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
198 * This tells us to go invalidate all the non-loaded ctxs[]
199 * on the next context switch.
200 *
201 * The current ctx was kept up-to-date as it ran and does not
202 * need to be invalidated.
203 */
204 bool invalidate_other;
205
206 /*
207 * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
208 * the corresponding user PCID needs a flush next time we
209 * switch to it; see SWITCH_TO_USER_CR3.
210 */
211 unsigned short user_pcid_flush_mask;
212
213 /*
142 * Access to this CR4 shadow and to H/W CR4 is protected by 214 * Access to this CR4 shadow and to H/W CR4 is protected by
143 * disabling interrupts when modifying either one. 215 * disabling interrupts when modifying either one.
144 */ 216 */
@@ -173,40 +245,43 @@ static inline void cr4_init_shadow(void)
173 this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); 245 this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
174} 246}
175 247
248static inline void __cr4_set(unsigned long cr4)
249{
250 lockdep_assert_irqs_disabled();
251 this_cpu_write(cpu_tlbstate.cr4, cr4);
252 __write_cr4(cr4);
253}
254
176/* Set in this cpu's CR4. */ 255/* Set in this cpu's CR4. */
177static inline void cr4_set_bits(unsigned long mask) 256static inline void cr4_set_bits(unsigned long mask)
178{ 257{
179 unsigned long cr4; 258 unsigned long cr4, flags;
180 259
260 local_irq_save(flags);
181 cr4 = this_cpu_read(cpu_tlbstate.cr4); 261 cr4 = this_cpu_read(cpu_tlbstate.cr4);
182 if ((cr4 | mask) != cr4) { 262 if ((cr4 | mask) != cr4)
183 cr4 |= mask; 263 __cr4_set(cr4 | mask);
184 this_cpu_write(cpu_tlbstate.cr4, cr4); 264 local_irq_restore(flags);
185 __write_cr4(cr4);
186 }
187} 265}
188 266
189/* Clear in this cpu's CR4. */ 267/* Clear in this cpu's CR4. */
190static inline void cr4_clear_bits(unsigned long mask) 268static inline void cr4_clear_bits(unsigned long mask)
191{ 269{
192 unsigned long cr4; 270 unsigned long cr4, flags;
193 271
272 local_irq_save(flags);
194 cr4 = this_cpu_read(cpu_tlbstate.cr4); 273 cr4 = this_cpu_read(cpu_tlbstate.cr4);
195 if ((cr4 & ~mask) != cr4) { 274 if ((cr4 & ~mask) != cr4)
196 cr4 &= ~mask; 275 __cr4_set(cr4 & ~mask);
197 this_cpu_write(cpu_tlbstate.cr4, cr4); 276 local_irq_restore(flags);
198 __write_cr4(cr4);
199 }
200} 277}
201 278
202static inline void cr4_toggle_bits(unsigned long mask) 279static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
203{ 280{
204 unsigned long cr4; 281 unsigned long cr4;
205 282
206 cr4 = this_cpu_read(cpu_tlbstate.cr4); 283 cr4 = this_cpu_read(cpu_tlbstate.cr4);
207 cr4 ^= mask; 284 __cr4_set(cr4 ^ mask);
208 this_cpu_write(cpu_tlbstate.cr4, cr4);
209 __write_cr4(cr4);
210} 285}
211 286
212/* Read the CR4 shadow. */ 287/* Read the CR4 shadow. */
@@ -216,6 +291,14 @@ static inline unsigned long cr4_read_shadow(void)
216} 291}
217 292
218/* 293/*
294 * Mark all other ASIDs as invalid, preserves the current.
295 */
296static inline void invalidate_other_asid(void)
297{
298 this_cpu_write(cpu_tlbstate.invalidate_other, true);
299}
300
301/*
219 * Save some of cr4 feature set we're using (e.g. Pentium 4MB 302 * Save some of cr4 feature set we're using (e.g. Pentium 4MB
220 * enable and PPro Global page enable), so that any CPU's that boot 303 * enable and PPro Global page enable), so that any CPU's that boot
221 * up after us can get the correct flags. This should only be used 304 * up after us can get the correct flags. This should only be used
@@ -234,37 +317,63 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
234 317
235extern void initialize_tlbstate_and_flush(void); 318extern void initialize_tlbstate_and_flush(void);
236 319
237static inline void __native_flush_tlb(void) 320/*
321 * Given an ASID, flush the corresponding user ASID. We can delay this
322 * until the next time we switch to it.
323 *
324 * See SWITCH_TO_USER_CR3.
325 */
326static inline void invalidate_user_asid(u16 asid)
238{ 327{
328 /* There is no user ASID if address space separation is off */
329 if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
330 return;
331
239 /* 332 /*
240 * If current->mm == NULL then we borrow a mm which may change during a 333 * We only have a single ASID if PCID is off and the CR3
241 * task switch and therefore we must not be preempted while we write CR3 334 * write will have flushed it.
242 * back:
243 */ 335 */
244 preempt_disable(); 336 if (!cpu_feature_enabled(X86_FEATURE_PCID))
245 native_write_cr3(__native_read_cr3()); 337 return;
246 preempt_enable(); 338
339 if (!static_cpu_has(X86_FEATURE_PTI))
340 return;
341
342 __set_bit(kern_pcid(asid),
343 (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
247} 344}
248 345
249static inline void __native_flush_tlb_global_irq_disabled(void) 346/*
347 * flush the entire current user mapping
348 */
349static inline void __native_flush_tlb(void)
250{ 350{
251 unsigned long cr4; 351 /*
352 * Preemption or interrupts must be disabled to protect the access
353 * to the per CPU variable and to prevent being preempted between
354 * read_cr3() and write_cr3().
355 */
356 WARN_ON_ONCE(preemptible());
252 357
253 cr4 = this_cpu_read(cpu_tlbstate.cr4); 358 invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
254 /* clear PGE */ 359
255 native_write_cr4(cr4 & ~X86_CR4_PGE); 360 /* If current->mm == NULL then the read_cr3() "borrows" an mm */
256 /* write old PGE again and flush TLBs */ 361 native_write_cr3(__native_read_cr3());
257 native_write_cr4(cr4);
258} 362}
259 363
364/*
365 * flush everything
366 */
260static inline void __native_flush_tlb_global(void) 367static inline void __native_flush_tlb_global(void)
261{ 368{
262 unsigned long flags; 369 unsigned long cr4, flags;
263 370
264 if (static_cpu_has(X86_FEATURE_INVPCID)) { 371 if (static_cpu_has(X86_FEATURE_INVPCID)) {
265 /* 372 /*
266 * Using INVPCID is considerably faster than a pair of writes 373 * Using INVPCID is considerably faster than a pair of writes
267 * to CR4 sandwiched inside an IRQ flag save/restore. 374 * to CR4 sandwiched inside an IRQ flag save/restore.
375 *
376 * Note, this works with CR4.PCIDE=0 or 1.
268 */ 377 */
269 invpcid_flush_all(); 378 invpcid_flush_all();
270 return; 379 return;
@@ -277,36 +386,69 @@ static inline void __native_flush_tlb_global(void)
277 */ 386 */
278 raw_local_irq_save(flags); 387 raw_local_irq_save(flags);
279 388
280 __native_flush_tlb_global_irq_disabled(); 389 cr4 = this_cpu_read(cpu_tlbstate.cr4);
390 /* toggle PGE */
391 native_write_cr4(cr4 ^ X86_CR4_PGE);
392 /* write old PGE again and flush TLBs */
393 native_write_cr4(cr4);
281 394
282 raw_local_irq_restore(flags); 395 raw_local_irq_restore(flags);
283} 396}
284 397
398/*
399 * flush one page in the user mapping
400 */
285static inline void __native_flush_tlb_single(unsigned long addr) 401static inline void __native_flush_tlb_single(unsigned long addr)
286{ 402{
403 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
404
287 asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); 405 asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
406
407 if (!static_cpu_has(X86_FEATURE_PTI))
408 return;
409
410 /*
411 * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
412 * Just use invalidate_user_asid() in case we are called early.
413 */
414 if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
415 invalidate_user_asid(loaded_mm_asid);
416 else
417 invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
288} 418}
289 419
420/*
421 * flush everything
422 */
290static inline void __flush_tlb_all(void) 423static inline void __flush_tlb_all(void)
291{ 424{
292 if (boot_cpu_has(X86_FEATURE_PGE)) 425 if (boot_cpu_has(X86_FEATURE_PGE)) {
293 __flush_tlb_global(); 426 __flush_tlb_global();
294 else 427 } else {
428 /*
429 * !PGE -> !PCID (setup_pcid()), thus every flush is total.
430 */
295 __flush_tlb(); 431 __flush_tlb();
296 432 }
297 /*
298 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
299 * we'd end up flushing kernel translations for the current ASID but
300 * we might fail to flush kernel translations for other cached ASIDs.
301 *
302 * To avoid this issue, we force PCID off if PGE is off.
303 */
304} 433}
305 434
435/*
436 * flush one page in the kernel mapping
437 */
306static inline void __flush_tlb_one(unsigned long addr) 438static inline void __flush_tlb_one(unsigned long addr)
307{ 439{
308 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); 440 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
309 __flush_tlb_single(addr); 441 __flush_tlb_single(addr);
442
443 if (!static_cpu_has(X86_FEATURE_PTI))
444 return;
445
446 /*
447 * __flush_tlb_single() will have cleared the TLB entry for this ASID,
448 * but since kernel space is replicated across all, we must also
449 * invalidate all others.
450 */
451 invalidate_other_asid();
310} 452}
311 453
312#define TLB_FLUSH_ALL -1UL 454#define TLB_FLUSH_ALL -1UL
@@ -367,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
367void native_flush_tlb_others(const struct cpumask *cpumask, 509void native_flush_tlb_others(const struct cpumask *cpumask,
368 const struct flush_tlb_info *info); 510 const struct flush_tlb_info *info);
369 511
512static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
513{
514 /*
515 * Bump the generation count. This also serves as a full barrier
516 * that synchronizes with switch_mm(): callers are required to order
517 * their read of mm_cpumask after their writes to the paging
518 * structures.
519 */
520 return atomic64_inc_return(&mm->context.tlb_gen);
521}
522
370static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, 523static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
371 struct mm_struct *mm) 524 struct mm_struct *mm)
372{ 525{
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 84b9ec0c1bc0..22647a642e98 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed,
283DECLARE_EVENT_CLASS(vector_activate, 283DECLARE_EVENT_CLASS(vector_activate,
284 284
285 TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, 285 TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
286 bool early), 286 bool reserve),
287 287
288 TP_ARGS(irq, is_managed, can_reserve, early), 288 TP_ARGS(irq, is_managed, can_reserve, reserve),
289 289
290 TP_STRUCT__entry( 290 TP_STRUCT__entry(
291 __field( unsigned int, irq ) 291 __field( unsigned int, irq )
292 __field( bool, is_managed ) 292 __field( bool, is_managed )
293 __field( bool, can_reserve ) 293 __field( bool, can_reserve )
294 __field( bool, early ) 294 __field( bool, reserve )
295 ), 295 ),
296 296
297 TP_fast_assign( 297 TP_fast_assign(
298 __entry->irq = irq; 298 __entry->irq = irq;
299 __entry->is_managed = is_managed; 299 __entry->is_managed = is_managed;
300 __entry->can_reserve = can_reserve; 300 __entry->can_reserve = can_reserve;
301 __entry->early = early; 301 __entry->reserve = reserve;
302 ), 302 ),
303 303
304 TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", 304 TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d",
305 __entry->irq, __entry->is_managed, __entry->can_reserve, 305 __entry->irq, __entry->is_managed, __entry->can_reserve,
306 __entry->early) 306 __entry->reserve)
307); 307);
308 308
309#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ 309#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \
310DEFINE_EVENT_FN(vector_activate, name, \ 310DEFINE_EVENT_FN(vector_activate, name, \
311 TP_PROTO(unsigned int irq, bool is_managed, \ 311 TP_PROTO(unsigned int irq, bool is_managed, \
312 bool can_reserve, bool early), \ 312 bool can_reserve, bool reserve), \
313 TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ 313 TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \
314 314
315DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); 315DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
316DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); 316DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1fadd310ff68..31051f35cbb7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
75dotraplinkage void do_stack_segment(struct pt_regs *, long); 75dotraplinkage void do_stack_segment(struct pt_regs *, long);
76#ifdef CONFIG_X86_64 76#ifdef CONFIG_X86_64
77dotraplinkage void do_double_fault(struct pt_regs *, long); 77dotraplinkage void do_double_fault(struct pt_regs *, long);
78asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
79#endif 78#endif
80dotraplinkage void do_general_protection(struct pt_regs *, long); 79dotraplinkage void do_general_protection(struct pt_regs *, long);
81dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); 80dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e9cc6fe1fc6f..1f86e1b0a5cd 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -7,6 +7,9 @@
7#include <asm/ptrace.h> 7#include <asm/ptrace.h>
8#include <asm/stacktrace.h> 8#include <asm/stacktrace.h>
9 9
10#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
11#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
12
10struct unwind_state { 13struct unwind_state {
11 struct stack_info stack_info; 14 struct stack_info stack_info;
12 unsigned long stack_mask; 15 unsigned long stack_mask;
@@ -52,15 +55,28 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
52} 55}
53 56
54#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) 57#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
55static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 58/*
59 * If 'partial' returns true, only the iret frame registers are valid.
60 */
61static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
62 bool *partial)
56{ 63{
57 if (unwind_done(state)) 64 if (unwind_done(state))
58 return NULL; 65 return NULL;
59 66
67 if (partial) {
68#ifdef CONFIG_UNWINDER_ORC
69 *partial = !state->full_regs;
70#else
71 *partial = false;
72#endif
73 }
74
60 return state->regs; 75 return state->regs;
61} 76}
62#else 77#else
63static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 78static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
79 bool *partial)
64{ 80{
65 return NULL; 81 return NULL;
66} 82}
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d9a7c659009c..b986b2ca688a 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -7,6 +7,7 @@
7 7
8#ifdef CONFIG_X86_VSYSCALL_EMULATION 8#ifdef CONFIG_X86_VSYSCALL_EMULATION
9extern void map_vsyscall(void); 9extern void map_vsyscall(void);
10extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
10 11
11/* 12/*
12 * Called on instruction fetch fault in vsyscall page. 13 * Called on instruction fetch fault in vsyscall page.
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 7cb282e9e587..bfd882617613 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -44,6 +44,7 @@
44#include <asm/page.h> 44#include <asm/page.h>
45#include <asm/pgtable.h> 45#include <asm/pgtable.h>
46#include <asm/smap.h> 46#include <asm/smap.h>
47#include <asm/nospec-branch.h>
47 48
48#include <xen/interface/xen.h> 49#include <xen/interface/xen.h>
49#include <xen/interface/sched.h> 50#include <xen/interface/sched.h>
@@ -217,9 +218,9 @@ privcmd_call(unsigned call,
217 __HYPERCALL_5ARG(a1, a2, a3, a4, a5); 218 __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
218 219
219 stac(); 220 stac();
220 asm volatile("call *%[call]" 221 asm volatile(CALL_NOSPEC
221 : __HYPERCALL_5PARAM 222 : __HYPERCALL_5PARAM
222 : [call] "a" (&hypercall_page[call]) 223 : [thunk_target] "a" (&hypercall_page[call])
223 : __HYPERCALL_CLOBBER5); 224 : __HYPERCALL_CLOBBER5);
224 clac(); 225 clac();
225 226
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index da1489cb64dc..1e901e421f2d 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bpf_perf_event.h
4generated-y += unistd_32.h 5generated-y += unistd_32.h
5generated-y += unistd_64.h 6generated-y += unistd_64.h
6generated-y += unistd_x32.h 7generated-y += unistd_x32.h
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 7e1e730396ae..bcba3c643e63 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -78,7 +78,12 @@
78#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) 78#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
79#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ 79#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
80#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) 80#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
81#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ 81
82#define X86_CR3_PCID_BITS 12
83#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
84
85#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
86#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
82 87
83/* 88/*
84 * Intel CPU features in CR4 89 * Intel CPU features in CR4
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index dbaf14d69ebd..4817d743c263 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -344,9 +344,12 @@ done:
344static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) 344static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
345{ 345{
346 unsigned long flags; 346 unsigned long flags;
347 int i;
347 348
348 if (instr[0] != 0x90) 349 for (i = 0; i < a->padlen; i++) {
349 return; 350 if (instr[i] != 0x90)
351 return;
352 }
350 353
351 local_irq_save(flags); 354 local_irq_save(flags);
352 add_nops(instr + (a->instrlen - a->padlen), a->padlen); 355 add_nops(instr + (a->instrlen - a->padlen), a->padlen);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6e272f3ea984..880441f24146 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg)
2626 apic_verbosity = APIC_DEBUG; 2626 apic_verbosity = APIC_DEBUG;
2627 else if (strcmp("verbose", arg) == 0) 2627 else if (strcmp("verbose", arg) == 0)
2628 apic_verbosity = APIC_VERBOSE; 2628 apic_verbosity = APIC_VERBOSE;
2629#ifdef CONFIG_X86_64
2629 else { 2630 else {
2630 pr_warning("APIC Verbosity level %s not recognised" 2631 pr_warning("APIC Verbosity level %s not recognised"
2631 " use apic=verbose or apic=debug\n", arg); 2632 " use apic=verbose or apic=debug\n", arg);
2632 return -EINVAL; 2633 return -EINVAL;
2633 } 2634 }
2635#endif
2634 2636
2635 return 0; 2637 return 0;
2636} 2638}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index aa85690e9b64..25a87028cb3f 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = {
151 .apic_id_valid = default_apic_id_valid, 151 .apic_id_valid = default_apic_id_valid,
152 .apic_id_registered = flat_apic_id_registered, 152 .apic_id_registered = flat_apic_id_registered,
153 153
154 .irq_delivery_mode = dest_LowestPrio, 154 .irq_delivery_mode = dest_Fixed,
155 .irq_dest_mode = 1, /* logical */ 155 .irq_dest_mode = 1, /* logical */
156 156
157 .disable_esr = 0, 157 .disable_esr = 0,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 7b659c4480c9..5078b5ce63a7 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = {
110 .apic_id_valid = default_apic_id_valid, 110 .apic_id_valid = default_apic_id_valid,
111 .apic_id_registered = noop_apic_id_registered, 111 .apic_id_registered = noop_apic_id_registered,
112 112
113 .irq_delivery_mode = dest_LowestPrio, 113 .irq_delivery_mode = dest_Fixed,
114 /* logical delivery broadcast to all CPUs: */ 114 /* logical delivery broadcast to all CPUs: */
115 .irq_dest_mode = 1, 115 .irq_dest_mode = 1,
116 116
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 201579dc5242..8a7963421460 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2988} 2988}
2989 2989
2990int mp_irqdomain_activate(struct irq_domain *domain, 2990int mp_irqdomain_activate(struct irq_domain *domain,
2991 struct irq_data *irq_data, bool early) 2991 struct irq_data *irq_data, bool reserve)
2992{ 2992{
2993 unsigned long flags; 2993 unsigned long flags;
2994 2994
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 9b18be764422..ce503c99f5c4 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
39 ((apic->irq_dest_mode == 0) ? 39 ((apic->irq_dest_mode == 0) ?
40 MSI_ADDR_DEST_MODE_PHYSICAL : 40 MSI_ADDR_DEST_MODE_PHYSICAL :
41 MSI_ADDR_DEST_MODE_LOGICAL) | 41 MSI_ADDR_DEST_MODE_LOGICAL) |
42 ((apic->irq_delivery_mode != dest_LowestPrio) ? 42 MSI_ADDR_REDIRECTION_CPU |
43 MSI_ADDR_REDIRECTION_CPU :
44 MSI_ADDR_REDIRECTION_LOWPRI) |
45 MSI_ADDR_DEST_ID(cfg->dest_apicid); 43 MSI_ADDR_DEST_ID(cfg->dest_apicid);
46 44
47 msg->data = 45 msg->data =
48 MSI_DATA_TRIGGER_EDGE | 46 MSI_DATA_TRIGGER_EDGE |
49 MSI_DATA_LEVEL_ASSERT | 47 MSI_DATA_LEVEL_ASSERT |
50 ((apic->irq_delivery_mode != dest_LowestPrio) ? 48 MSI_DATA_DELIVERY_FIXED |
51 MSI_DATA_DELIVERY_FIXED :
52 MSI_DATA_DELIVERY_LOWPRI) |
53 MSI_DATA_VECTOR(cfg->vector); 49 MSI_DATA_VECTOR(cfg->vector);
54} 50}
55 51
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index fa22017de806..02e8acb134f8 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = {
105 .apic_id_valid = default_apic_id_valid, 105 .apic_id_valid = default_apic_id_valid,
106 .apic_id_registered = default_apic_id_registered, 106 .apic_id_registered = default_apic_id_registered,
107 107
108 .irq_delivery_mode = dest_LowestPrio, 108 .irq_delivery_mode = dest_Fixed,
109 /* logical delivery broadcast to all CPUs: */ 109 /* logical delivery broadcast to all CPUs: */
110 .irq_dest_mode = 1, 110 .irq_dest_mode = 1,
111 111
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6a823a25eaff..f8b03bb8e725 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd)
184 irq_matrix_reserve(vector_matrix); 184 irq_matrix_reserve(vector_matrix);
185 apicd->can_reserve = true; 185 apicd->can_reserve = true;
186 apicd->has_reserved = true; 186 apicd->has_reserved = true;
187 irqd_set_can_reserve(irqd);
187 trace_vector_reserve(irqd->irq, 0); 188 trace_vector_reserve(irqd->irq, 0);
188 vector_assign_managed_shutdown(irqd); 189 vector_assign_managed_shutdown(irqd);
189} 190}
@@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd)
368 int ret; 369 int ret;
369 370
370 ret = assign_irq_vector_any_locked(irqd); 371 ret = assign_irq_vector_any_locked(irqd);
371 if (!ret) 372 if (!ret) {
372 apicd->has_reserved = false; 373 apicd->has_reserved = false;
374 /*
375 * Core might have disabled reservation mode after
376 * allocating the irq descriptor. Ideally this should
377 * happen before allocation time, but that would require
378 * completely convoluted ways of transporting that
379 * information.
380 */
381 if (!irqd_can_reserve(irqd))
382 apicd->can_reserve = false;
383 }
373 return ret; 384 return ret;
374} 385}
375 386
@@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd)
398} 409}
399 410
400static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, 411static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
401 bool early) 412 bool reserve)
402{ 413{
403 struct apic_chip_data *apicd = apic_chip_data(irqd); 414 struct apic_chip_data *apicd = apic_chip_data(irqd);
404 unsigned long flags; 415 unsigned long flags;
405 int ret = 0; 416 int ret = 0;
406 417
407 trace_vector_activate(irqd->irq, apicd->is_managed, 418 trace_vector_activate(irqd->irq, apicd->is_managed,
408 apicd->can_reserve, early); 419 apicd->can_reserve, reserve);
409 420
410 /* Nothing to do for fixed assigned vectors */ 421 /* Nothing to do for fixed assigned vectors */
411 if (!apicd->can_reserve && !apicd->is_managed) 422 if (!apicd->can_reserve && !apicd->is_managed)
412 return 0; 423 return 0;
413 424
414 raw_spin_lock_irqsave(&vector_lock, flags); 425 raw_spin_lock_irqsave(&vector_lock, flags);
415 if (early || irqd_is_managed_and_shutdown(irqd)) 426 if (reserve || irqd_is_managed_and_shutdown(irqd))
416 vector_assign_managed_shutdown(irqd); 427 vector_assign_managed_shutdown(irqd);
417 else if (apicd->is_managed) 428 else if (apicd->is_managed)
418 ret = activate_managed(irqd); 429 ret = activate_managed(irqd);
@@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
478 } else { 489 } else {
479 /* Release the vector */ 490 /* Release the vector */
480 apicd->can_reserve = true; 491 apicd->can_reserve = true;
492 irqd_set_can_reserve(irqd);
481 clear_irq_vector(irqd); 493 clear_irq_vector(irqd);
482 realloc = true; 494 realloc = true;
483 } 495 }
@@ -542,8 +554,8 @@ error:
542} 554}
543 555
544#ifdef CONFIG_GENERIC_IRQ_DEBUGFS 556#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
545void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, 557static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
546 struct irq_data *irqd, int ind) 558 struct irq_data *irqd, int ind)
547{ 559{
548 unsigned int cpu, vector, prev_cpu, prev_vector; 560 unsigned int cpu, vector, prev_cpu, prev_vector;
549 struct apic_chip_data *apicd; 561 struct apic_chip_data *apicd;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 622f13ca8a94..8b04234e010b 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
184 .apic_id_valid = x2apic_apic_id_valid, 184 .apic_id_valid = x2apic_apic_id_valid,
185 .apic_id_registered = x2apic_apic_id_registered, 185 .apic_id_registered = x2apic_apic_id_registered,
186 186
187 .irq_delivery_mode = dest_LowestPrio, 187 .irq_delivery_mode = dest_Fixed,
188 .irq_dest_mode = 1, /* logical */ 188 .irq_dest_mode = 1, /* logical */
189 189
190 .disable_esr = 0, 190 .disable_esr = 0,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8ea78275480d..76417a9aab73 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -17,6 +17,7 @@
17#include <asm/sigframe.h> 17#include <asm/sigframe.h>
18#include <asm/bootparam.h> 18#include <asm/bootparam.h>
19#include <asm/suspend.h> 19#include <asm/suspend.h>
20#include <asm/tlbflush.h>
20 21
21#ifdef CONFIG_XEN 22#ifdef CONFIG_XEN
22#include <xen/interface/xen.h> 23#include <xen/interface/xen.h>
@@ -93,4 +94,13 @@ void common(void) {
93 94
94 BLANK(); 95 BLANK();
95 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); 96 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
97
98 /* TLB state for the entry code */
99 OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
100
101 /* Layout info for cpu_entry_area */
102 OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
103 OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
104 OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
105 DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
96} 106}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index dedf428b20b6..fa1261eefa16 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,13 +47,8 @@ void foo(void)
47 BLANK(); 47 BLANK();
48 48
49 /* Offset from the sysenter stack to tss.sp0 */ 49 /* Offset from the sysenter stack to tss.sp0 */
50 DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - 50 DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
51 offsetofend(struct tss_struct, SYSENTER_stack)); 51 offsetofend(struct cpu_entry_area, entry_stack_page.stack));
52
53 /* Offset from cpu_tss to SYSENTER_stack */
54 OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
55 /* Size of SYSENTER_stack */
56 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
57 52
58#ifdef CONFIG_CC_STACKPROTECTOR 53#ifdef CONFIG_CC_STACKPROTECTOR
59 BLANK(); 54 BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 630212fa9b9d..bf51e51d808d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,6 +23,9 @@ int main(void)
23#ifdef CONFIG_PARAVIRT 23#ifdef CONFIG_PARAVIRT
24 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); 24 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
25 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 25 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
26#ifdef CONFIG_DEBUG_ENTRY
27 OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
28#endif
26 BLANK(); 29 BLANK();
27#endif 30#endif
28 31
@@ -63,6 +66,7 @@ int main(void)
63 66
64 OFFSET(TSS_ist, tss_struct, x86_tss.ist); 67 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
65 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); 68 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
69 OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
66 BLANK(); 70 BLANK();
67 71
68#ifdef CONFIG_CC_STACKPROTECTOR 72#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d58184b7cd44..ea831c858195 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
804 case 0x17: init_amd_zn(c); break; 804 case 0x17: init_amd_zn(c); break;
805 } 805 }
806 806
807 /* Enable workaround for FXSAVE leak */ 807 /*
808 if (c->x86 >= 6) 808 * Enable workaround for FXSAVE leak on CPUs
809 * without a XSaveErPtr feature
810 */
811 if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
809 set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); 812 set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
810 813
811 cpu_detect_cache_sizes(c); 814 cpu_detect_cache_sizes(c);
@@ -826,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
826 set_cpu_cap(c, X86_FEATURE_K8); 829 set_cpu_cap(c, X86_FEATURE_K8);
827 830
828 if (cpu_has(c, X86_FEATURE_XMM2)) { 831 if (cpu_has(c, X86_FEATURE_XMM2)) {
829 /* MFENCE stops RDTSC speculation */ 832 unsigned long long val;
830 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); 833 int ret;
834
835 /*
836 * A serializing LFENCE has less overhead than MFENCE, so
837 * use it for execution serialization. On families which
838 * don't have that MSR, LFENCE is already serializing.
839 * msr_set_bit() uses the safe accessors, too, even if the MSR
840 * is not present.
841 */
842 msr_set_bit(MSR_F10H_DECFG,
843 MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
844
845 /*
846 * Verify that the MSR write was successful (could be running
847 * under a hypervisor) and only then assume that LFENCE is
848 * serializing.
849 */
850 ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
851 if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
852 /* A serializing LFENCE stops RDTSC speculation */
853 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
854 } else {
855 /* MFENCE stops RDTSC speculation */
856 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
857 }
831 } 858 }
832 859
833 /* 860 /*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ba0b2424c9b0..e4dc26185aa7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -10,6 +10,10 @@
10 */ 10 */
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/utsname.h> 12#include <linux/utsname.h>
13#include <linux/cpu.h>
14
15#include <asm/nospec-branch.h>
16#include <asm/cmdline.h>
13#include <asm/bugs.h> 17#include <asm/bugs.h>
14#include <asm/processor.h> 18#include <asm/processor.h>
15#include <asm/processor-flags.h> 19#include <asm/processor-flags.h>
@@ -20,6 +24,8 @@
20#include <asm/pgtable.h> 24#include <asm/pgtable.h>
21#include <asm/set_memory.h> 25#include <asm/set_memory.h>
22 26
27static void __init spectre_v2_select_mitigation(void);
28
23void __init check_bugs(void) 29void __init check_bugs(void)
24{ 30{
25 identify_boot_cpu(); 31 identify_boot_cpu();
@@ -29,6 +35,9 @@ void __init check_bugs(void)
29 print_cpu_info(&boot_cpu_data); 35 print_cpu_info(&boot_cpu_data);
30 } 36 }
31 37
38 /* Select the proper spectre mitigation before patching alternatives */
39 spectre_v2_select_mitigation();
40
32#ifdef CONFIG_X86_32 41#ifdef CONFIG_X86_32
33 /* 42 /*
34 * Check whether we are able to run this kernel safely on SMP. 43 * Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +69,179 @@ void __init check_bugs(void)
60 set_memory_4k((unsigned long)__va(0), 1); 69 set_memory_4k((unsigned long)__va(0), 1);
61#endif 70#endif
62} 71}
72
73/* The kernel command line selection */
74enum spectre_v2_mitigation_cmd {
75 SPECTRE_V2_CMD_NONE,
76 SPECTRE_V2_CMD_AUTO,
77 SPECTRE_V2_CMD_FORCE,
78 SPECTRE_V2_CMD_RETPOLINE,
79 SPECTRE_V2_CMD_RETPOLINE_GENERIC,
80 SPECTRE_V2_CMD_RETPOLINE_AMD,
81};
82
83static const char *spectre_v2_strings[] = {
84 [SPECTRE_V2_NONE] = "Vulnerable",
85 [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
86 [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
87 [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
88 [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
89};
90
91#undef pr_fmt
92#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
93
94static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
95
96static void __init spec2_print_if_insecure(const char *reason)
97{
98 if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
99 pr_info("%s\n", reason);
100}
101
102static void __init spec2_print_if_secure(const char *reason)
103{
104 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
105 pr_info("%s\n", reason);
106}
107
108static inline bool retp_compiler(void)
109{
110 return __is_defined(RETPOLINE);
111}
112
113static inline bool match_option(const char *arg, int arglen, const char *opt)
114{
115 int len = strlen(opt);
116
117 return len == arglen && !strncmp(arg, opt, len);
118}
119
120static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
121{
122 char arg[20];
123 int ret;
124
125 ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
126 sizeof(arg));
127 if (ret > 0) {
128 if (match_option(arg, ret, "off")) {
129 goto disable;
130 } else if (match_option(arg, ret, "on")) {
131 spec2_print_if_secure("force enabled on command line.");
132 return SPECTRE_V2_CMD_FORCE;
133 } else if (match_option(arg, ret, "retpoline")) {
134 spec2_print_if_insecure("retpoline selected on command line.");
135 return SPECTRE_V2_CMD_RETPOLINE;
136 } else if (match_option(arg, ret, "retpoline,amd")) {
137 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
138 pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
139 return SPECTRE_V2_CMD_AUTO;
140 }
141 spec2_print_if_insecure("AMD retpoline selected on command line.");
142 return SPECTRE_V2_CMD_RETPOLINE_AMD;
143 } else if (match_option(arg, ret, "retpoline,generic")) {
144 spec2_print_if_insecure("generic retpoline selected on command line.");
145 return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
146 } else if (match_option(arg, ret, "auto")) {
147 return SPECTRE_V2_CMD_AUTO;
148 }
149 }
150
151 if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
152 return SPECTRE_V2_CMD_AUTO;
153disable:
154 spec2_print_if_insecure("disabled on command line.");
155 return SPECTRE_V2_CMD_NONE;
156}
157
158static void __init spectre_v2_select_mitigation(void)
159{
160 enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
161 enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
162
163 /*
164 * If the CPU is not affected and the command line mode is NONE or AUTO
165 * then nothing to do.
166 */
167 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
168 (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
169 return;
170
171 switch (cmd) {
172 case SPECTRE_V2_CMD_NONE:
173 return;
174
175 case SPECTRE_V2_CMD_FORCE:
176 /* FALLTRHU */
177 case SPECTRE_V2_CMD_AUTO:
178 goto retpoline_auto;
179
180 case SPECTRE_V2_CMD_RETPOLINE_AMD:
181 if (IS_ENABLED(CONFIG_RETPOLINE))
182 goto retpoline_amd;
183 break;
184 case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
185 if (IS_ENABLED(CONFIG_RETPOLINE))
186 goto retpoline_generic;
187 break;
188 case SPECTRE_V2_CMD_RETPOLINE:
189 if (IS_ENABLED(CONFIG_RETPOLINE))
190 goto retpoline_auto;
191 break;
192 }
193 pr_err("kernel not compiled with retpoline; no mitigation available!");
194 return;
195
196retpoline_auto:
197 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
198 retpoline_amd:
199 if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
200 pr_err("LFENCE not serializing. Switching to generic retpoline\n");
201 goto retpoline_generic;
202 }
203 mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
204 SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
205 setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
206 setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
207 } else {
208 retpoline_generic:
209 mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
210 SPECTRE_V2_RETPOLINE_MINIMAL;
211 setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
212 }
213
214 spectre_v2_enabled = mode;
215 pr_info("%s\n", spectre_v2_strings[mode]);
216}
217
218#undef pr_fmt
219
220#ifdef CONFIG_SYSFS
221ssize_t cpu_show_meltdown(struct device *dev,
222 struct device_attribute *attr, char *buf)
223{
224 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
225 return sprintf(buf, "Not affected\n");
226 if (boot_cpu_has(X86_FEATURE_PTI))
227 return sprintf(buf, "Mitigation: PTI\n");
228 return sprintf(buf, "Vulnerable\n");
229}
230
231ssize_t cpu_show_spectre_v1(struct device *dev,
232 struct device_attribute *attr, char *buf)
233{
234 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
235 return sprintf(buf, "Not affected\n");
236 return sprintf(buf, "Vulnerable\n");
237}
238
239ssize_t cpu_show_spectre_v2(struct device *dev,
240 struct device_attribute *attr, char *buf)
241{
242 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
243 return sprintf(buf, "Not affected\n");
244
245 return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
246}
247#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fa998ca8aa5a..ef29ad001991 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
476 return NULL; /* Not found */ 476 return NULL; /* Not found */
477} 477}
478 478
479__u32 cpu_caps_cleared[NCAPINTS]; 479__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
480__u32 cpu_caps_set[NCAPINTS]; 480__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
481 481
482void load_percpu_segment(int cpu) 482void load_percpu_segment(int cpu)
483{ 483{
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
490 load_stack_canary_segment(); 490 load_stack_canary_segment();
491} 491}
492 492
493/* Setup the fixmap mapping only once per-processor */ 493#ifdef CONFIG_X86_32
494static inline void setup_fixmap_gdt(int cpu) 494/* The 32-bit entry code needs to find cpu_entry_area. */
495{ 495DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
496#ifdef CONFIG_X86_64
497 /* On 64-bit systems, we use a read-only fixmap GDT. */
498 pgprot_t prot = PAGE_KERNEL_RO;
499#else
500 /*
501 * On native 32-bit systems, the GDT cannot be read-only because
502 * our double fault handler uses a task gate, and entering through
503 * a task gate needs to change an available TSS to busy. If the GDT
504 * is read-only, that will triple fault.
505 *
506 * On Xen PV, the GDT must be read-only because the hypervisor requires
507 * it.
508 */
509 pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
510 PAGE_KERNEL_RO : PAGE_KERNEL;
511#endif 496#endif
512 497
513 __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); 498#ifdef CONFIG_X86_64
514} 499/*
500 * Special IST stacks which the CPU switches to when it calls
501 * an IST-marked descriptor entry. Up to 7 stacks (hardware
502 * limit), all of them are 4K, except the debug stack which
503 * is 8K.
504 */
505static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
506 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
507 [DEBUG_STACK - 1] = DEBUG_STKSZ
508};
509#endif
515 510
516/* Load the original GDT from the per-cpu structure */ 511/* Load the original GDT from the per-cpu structure */
517void load_direct_gdt(int cpu) 512void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
747{ 742{
748 int i; 743 int i;
749 744
750 for (i = 0; i < NCAPINTS; i++) { 745 for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
751 c->x86_capability[i] &= ~cpu_caps_cleared[i]; 746 c->x86_capability[i] &= ~cpu_caps_cleared[i];
752 c->x86_capability[i] |= cpu_caps_set[i]; 747 c->x86_capability[i] |= cpu_caps_set[i];
753 } 748 }
@@ -927,6 +922,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
927 } 922 }
928 923
929 setup_force_cpu_cap(X86_FEATURE_ALWAYS); 924 setup_force_cpu_cap(X86_FEATURE_ALWAYS);
925
926 if (c->x86_vendor != X86_VENDOR_AMD)
927 setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
928
929 setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
930 setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
931
930 fpu__init_system(c); 932 fpu__init_system(c);
931 933
932#ifdef CONFIG_X86_32 934#ifdef CONFIG_X86_32
@@ -1250,7 +1252,7 @@ void enable_sep_cpu(void)
1250 return; 1252 return;
1251 1253
1252 cpu = get_cpu(); 1254 cpu = get_cpu();
1253 tss = &per_cpu(cpu_tss, cpu); 1255 tss = &per_cpu(cpu_tss_rw, cpu);
1254 1256
1255 /* 1257 /*
1256 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- 1258 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1261,7 @@ void enable_sep_cpu(void)
1259 1261
1260 tss->x86_tss.ss1 = __KERNEL_CS; 1262 tss->x86_tss.ss1 = __KERNEL_CS;
1261 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); 1263 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
1262 1264 wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
1263 wrmsr(MSR_IA32_SYSENTER_ESP,
1264 (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
1265 0);
1266
1267 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); 1265 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
1268 1266
1269 put_cpu(); 1267 put_cpu();
@@ -1357,25 +1355,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
1357DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; 1355DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1358EXPORT_PER_CPU_SYMBOL(__preempt_count); 1356EXPORT_PER_CPU_SYMBOL(__preempt_count);
1359 1357
1360/*
1361 * Special IST stacks which the CPU switches to when it calls
1362 * an IST-marked descriptor entry. Up to 7 stacks (hardware
1363 * limit), all of them are 4K, except the debug stack which
1364 * is 8K.
1365 */
1366static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
1367 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1368 [DEBUG_STACK - 1] = DEBUG_STKSZ
1369};
1370
1371static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1372 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
1373
1374/* May not be marked __init: used by software suspend */ 1358/* May not be marked __init: used by software suspend */
1375void syscall_init(void) 1359void syscall_init(void)
1376{ 1360{
1361 extern char _entry_trampoline[];
1362 extern char entry_SYSCALL_64_trampoline[];
1363
1364 int cpu = smp_processor_id();
1365 unsigned long SYSCALL64_entry_trampoline =
1366 (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
1367 (entry_SYSCALL_64_trampoline - _entry_trampoline);
1368
1377 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); 1369 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1378 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); 1370 if (static_cpu_has(X86_FEATURE_PTI))
1371 wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
1372 else
1373 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
1379 1374
1380#ifdef CONFIG_IA32_EMULATION 1375#ifdef CONFIG_IA32_EMULATION
1381 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); 1376 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1381,7 @@ void syscall_init(void)
1386 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). 1381 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
1387 */ 1382 */
1388 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); 1383 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
1389 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); 1384 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
1390 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); 1385 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
1391#else 1386#else
1392 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); 1387 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1525,7 @@ void cpu_init(void)
1530 if (cpu) 1525 if (cpu)
1531 load_ucode_ap(); 1526 load_ucode_ap();
1532 1527
1533 t = &per_cpu(cpu_tss, cpu); 1528 t = &per_cpu(cpu_tss_rw, cpu);
1534 oist = &per_cpu(orig_ist, cpu); 1529 oist = &per_cpu(orig_ist, cpu);
1535 1530
1536#ifdef CONFIG_NUMA 1531#ifdef CONFIG_NUMA
@@ -1569,7 +1564,7 @@ void cpu_init(void)
1569 * set up and load the per-CPU TSS 1564 * set up and load the per-CPU TSS
1570 */ 1565 */
1571 if (!oist->ist[0]) { 1566 if (!oist->ist[0]) {
1572 char *estacks = per_cpu(exception_stacks, cpu); 1567 char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
1573 1568
1574 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1569 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1575 estacks += exception_stack_sizes[v]; 1570 estacks += exception_stack_sizes[v];
@@ -1580,7 +1575,7 @@ void cpu_init(void)
1580 } 1575 }
1581 } 1576 }
1582 1577
1583 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1578 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1584 1579
1585 /* 1580 /*
1586 * <= is required because the CPU will access up to 1581 * <= is required because the CPU will access up to
@@ -1596,11 +1591,12 @@ void cpu_init(void)
1596 enter_lazy_tlb(&init_mm, me); 1591 enter_lazy_tlb(&init_mm, me);
1597 1592
1598 /* 1593 /*
1599 * Initialize the TSS. Don't bother initializing sp0, as the initial 1594 * Initialize the TSS. sp0 points to the entry trampoline stack
1600 * task never enters user mode. 1595 * regardless of what task is running.
1601 */ 1596 */
1602 set_tss_desc(cpu, t); 1597 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1603 load_TR_desc(); 1598 load_TR_desc();
1599 load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
1604 1600
1605 load_mm_ldt(&init_mm); 1601 load_mm_ldt(&init_mm);
1606 1602
@@ -1612,7 +1608,6 @@ void cpu_init(void)
1612 if (is_uv_system()) 1608 if (is_uv_system())
1613 uv_cpu_init(); 1609 uv_cpu_init();
1614 1610
1615 setup_fixmap_gdt(cpu);
1616 load_fixmap_gdt(cpu); 1611 load_fixmap_gdt(cpu);
1617} 1612}
1618 1613
@@ -1622,7 +1617,7 @@ void cpu_init(void)
1622{ 1617{
1623 int cpu = smp_processor_id(); 1618 int cpu = smp_processor_id();
1624 struct task_struct *curr = current; 1619 struct task_struct *curr = current;
1625 struct tss_struct *t = &per_cpu(cpu_tss, cpu); 1620 struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
1626 1621
1627 wait_for_master_cpu(cpu); 1622 wait_for_master_cpu(cpu);
1628 1623
@@ -1657,12 +1652,12 @@ void cpu_init(void)
1657 * Initialize the TSS. Don't bother initializing sp0, as the initial 1652 * Initialize the TSS. Don't bother initializing sp0, as the initial
1658 * task never enters user mode. 1653 * task never enters user mode.
1659 */ 1654 */
1660 set_tss_desc(cpu, t); 1655 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1661 load_TR_desc(); 1656 load_TR_desc();
1662 1657
1663 load_mm_ldt(&init_mm); 1658 load_mm_ldt(&init_mm);
1664 1659
1665 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1660 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1666 1661
1667#ifdef CONFIG_DOUBLEFAULT 1662#ifdef CONFIG_DOUBLEFAULT
1668 /* Set up doublefault TSS pointer in the GDT */ 1663 /* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1669,6 @@ void cpu_init(void)
1674 1669
1675 fpu__init_cpu(); 1670 fpu__init_cpu();
1676 1671
1677 setup_fixmap_gdt(cpu);
1678 load_fixmap_gdt(cpu); 1672 load_fixmap_gdt(cpu);
1679} 1673}
1680#endif 1674#endif
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c6daec4bdba5..330b8462d426 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
470#define F14H_MPB_MAX_SIZE 1824 470#define F14H_MPB_MAX_SIZE 1824
471#define F15H_MPB_MAX_SIZE 4096 471#define F15H_MPB_MAX_SIZE 4096
472#define F16H_MPB_MAX_SIZE 3458 472#define F16H_MPB_MAX_SIZE 3458
473#define F17H_MPB_MAX_SIZE 3200
473 474
474 switch (family) { 475 switch (family) {
475 case 0x14: 476 case 0x14:
@@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
481 case 0x16: 482 case 0x16:
482 max_size = F16H_MPB_MAX_SIZE; 483 max_size = F16H_MPB_MAX_SIZE;
483 break; 484 break;
485 case 0x17:
486 max_size = F17H_MPB_MAX_SIZE;
487 break;
484 default: 488 default:
485 max_size = F1XH_MPB_MAX_SIZE; 489 max_size = F1XH_MPB_MAX_SIZE;
486 break; 490 break;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 7dbcb7adf797..d9e460fc7a3b 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
565} 565}
566#else 566#else
567 567
568/*
569 * Flush global tlb. We only do this in x86_64 where paging has been enabled
570 * already and PGE should be enabled as well.
571 */
572static inline void flush_tlb_early(void)
573{
574 __native_flush_tlb_global_irq_disabled();
575}
576
577static inline void print_ucode(struct ucode_cpu_info *uci) 568static inline void print_ucode(struct ucode_cpu_info *uci)
578{ 569{
579 struct microcode_intel *mc; 570 struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
602 if (rev != mc->hdr.rev) 593 if (rev != mc->hdr.rev)
603 return -1; 594 return -1;
604 595
605#ifdef CONFIG_X86_64
606 /* Flush global tlb. This is precaution. */
607 flush_tlb_early();
608#endif
609 uci->cpu_sig.rev = rev; 596 uci->cpu_sig.rev = rev;
610 597
611 if (early) 598 if (early)
@@ -923,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
923{ 910{
924 struct cpuinfo_x86 *c = &cpu_data(cpu); 911 struct cpuinfo_x86 *c = &cpu_data(cpu);
925 912
926 if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { 913 /*
927 pr_err_once("late loading on model 79 is disabled.\n"); 914 * Late loading on model 79 with microcode revision less than 0x0b000021
915 * may result in a system hang. This behavior is documented in item
916 * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
917 */
918 if (c->x86 == 6 &&
919 c->x86_model == INTEL_FAM6_BROADWELL_X &&
920 c->x86_mask == 0x01 &&
921 c->microcode < 0x0b000021) {
922 pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
923 pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
928 return true; 924 return true;
929 } 925 }
930 926
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index 0e662c55ae90..0b8cedb20d6d 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
50 cpu_relax(); 50 cpu_relax();
51} 51}
52 52
53struct tss_struct doublefault_tss __cacheline_aligned = { 53struct x86_hw_tss doublefault_tss __cacheline_aligned = {
54 .x86_tss = { 54 .sp0 = STACK_START,
55 .sp0 = STACK_START, 55 .ss0 = __KERNEL_DS,
56 .ss0 = __KERNEL_DS, 56 .ldt = 0,
57 .ldt = 0, 57 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
58 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, 58
59 59 .ip = (unsigned long) doublefault_fn,
60 .ip = (unsigned long) doublefault_fn, 60 /* 0x2 bit is always set */
61 /* 0x2 bit is always set */ 61 .flags = X86_EFLAGS_SF | 0x2,
62 .flags = X86_EFLAGS_SF | 0x2, 62 .sp = STACK_START,
63 .sp = STACK_START, 63 .es = __USER_DS,
64 .es = __USER_DS, 64 .cs = __KERNEL_CS,
65 .cs = __KERNEL_CS, 65 .ss = __KERNEL_DS,
66 .ss = __KERNEL_DS, 66 .ds = __USER_DS,
67 .ds = __USER_DS, 67 .fs = __KERNEL_PERCPU,
68 .fs = __KERNEL_PERCPU, 68
69 69 .__cr3 = __pa_nodebug(swapper_pg_dir),
70 .__cr3 = __pa_nodebug(swapper_pg_dir),
71 }
72}; 70};
73 71
74/* dummy for do_double_fault() call */ 72/* dummy for do_double_fault() call */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f13b4c00a5de..afbecff161d1 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,6 +18,7 @@
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/sysfs.h> 19#include <linux/sysfs.h>
20 20
21#include <asm/cpu_entry_area.h>
21#include <asm/stacktrace.h> 22#include <asm/stacktrace.h>
22#include <asm/unwind.h> 23#include <asm/unwind.h>
23 24
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
43 return true; 44 return true;
44} 45}
45 46
47bool in_entry_stack(unsigned long *stack, struct stack_info *info)
48{
49 struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
50
51 void *begin = ss;
52 void *end = ss + 1;
53
54 if ((void *)stack < begin || (void *)stack >= end)
55 return false;
56
57 info->type = STACK_TYPE_ENTRY;
58 info->begin = begin;
59 info->end = end;
60 info->next_sp = NULL;
61
62 return true;
63}
64
46static void printk_stack_address(unsigned long address, int reliable, 65static void printk_stack_address(unsigned long address, int reliable,
47 char *log_lvl) 66 char *log_lvl)
48{ 67{
@@ -50,6 +69,39 @@ static void printk_stack_address(unsigned long address, int reliable,
50 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); 69 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
51} 70}
52 71
72void show_iret_regs(struct pt_regs *regs)
73{
74 printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
75 printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
76 regs->sp, regs->flags);
77}
78
79static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
80 bool partial)
81{
82 /*
83 * These on_stack() checks aren't strictly necessary: the unwind code
84 * has already validated the 'regs' pointer. The checks are done for
85 * ordering reasons: if the registers are on the next stack, we don't
86 * want to print them out yet. Otherwise they'll be shown as part of
87 * the wrong stack. Later, when show_trace_log_lvl() switches to the
88 * next stack, this function will be called again with the same regs so
89 * they can be printed in the right context.
90 */
91 if (!partial && on_stack(info, regs, sizeof(*regs))) {
92 __show_regs(regs, 0);
93
94 } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
95 IRET_FRAME_SIZE)) {
96 /*
97 * When an interrupt or exception occurs in entry code, the
98 * full pt_regs might not have been saved yet. In that case
99 * just print the iret frame.
100 */
101 show_iret_regs(regs);
102 }
103}
104
53void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 105void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
54 unsigned long *stack, char *log_lvl) 106 unsigned long *stack, char *log_lvl)
55{ 107{
@@ -57,11 +109,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
57 struct stack_info stack_info = {0}; 109 struct stack_info stack_info = {0};
58 unsigned long visit_mask = 0; 110 unsigned long visit_mask = 0;
59 int graph_idx = 0; 111 int graph_idx = 0;
112 bool partial;
60 113
61 printk("%sCall Trace:\n", log_lvl); 114 printk("%sCall Trace:\n", log_lvl);
62 115
63 unwind_start(&state, task, regs, stack); 116 unwind_start(&state, task, regs, stack);
64 stack = stack ? : get_stack_pointer(task, regs); 117 stack = stack ? : get_stack_pointer(task, regs);
118 regs = unwind_get_entry_regs(&state, &partial);
65 119
66 /* 120 /*
67 * Iterate through the stacks, starting with the current stack pointer. 121 * Iterate through the stacks, starting with the current stack pointer.
@@ -71,31 +125,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
71 * - task stack 125 * - task stack
72 * - interrupt stack 126 * - interrupt stack
73 * - HW exception stacks (double fault, nmi, debug, mce) 127 * - HW exception stacks (double fault, nmi, debug, mce)
128 * - entry stack
74 * 129 *
75 * x86-32 can have up to three stacks: 130 * x86-32 can have up to four stacks:
76 * - task stack 131 * - task stack
77 * - softirq stack 132 * - softirq stack
78 * - hardirq stack 133 * - hardirq stack
134 * - entry stack
79 */ 135 */
80 for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { 136 for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
81 const char *stack_name; 137 const char *stack_name;
82 138
83 /* 139 if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
84 * If we overflowed the task stack into a guard page, jump back 140 /*
85 * to the bottom of the usable stack. 141 * We weren't on a valid stack. It's possible that
86 */ 142 * we overflowed a valid stack into a guard page.
87 if (task_stack_page(task) - (void *)stack < PAGE_SIZE) 143 * See if the next page up is valid so that we can
88 stack = task_stack_page(task); 144 * generate some kind of backtrace if this happens.
89 145 */
90 if (get_stack_info(stack, task, &stack_info, &visit_mask)) 146 stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
91 break; 147 if (get_stack_info(stack, task, &stack_info, &visit_mask))
148 break;
149 }
92 150
93 stack_name = stack_type_name(stack_info.type); 151 stack_name = stack_type_name(stack_info.type);
94 if (stack_name) 152 if (stack_name)
95 printk("%s <%s>\n", log_lvl, stack_name); 153 printk("%s <%s>\n", log_lvl, stack_name);
96 154
97 if (regs && on_stack(&stack_info, regs, sizeof(*regs))) 155 if (regs)
98 __show_regs(regs, 0); 156 show_regs_if_on_stack(&stack_info, regs, partial);
99 157
100 /* 158 /*
101 * Scan the stack, printing any text addresses we find. At the 159 * Scan the stack, printing any text addresses we find. At the
@@ -119,7 +177,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
119 177
120 /* 178 /*
121 * Don't print regs->ip again if it was already printed 179 * Don't print regs->ip again if it was already printed
122 * by __show_regs() below. 180 * by show_regs_if_on_stack().
123 */ 181 */
124 if (regs && stack == &regs->ip) 182 if (regs && stack == &regs->ip)
125 goto next; 183 goto next;
@@ -154,9 +212,9 @@ next:
154 unwind_next_frame(&state); 212 unwind_next_frame(&state);
155 213
156 /* if the frame has entry regs, print them */ 214 /* if the frame has entry regs, print them */
157 regs = unwind_get_entry_regs(&state); 215 regs = unwind_get_entry_regs(&state, &partial);
158 if (regs && on_stack(&stack_info, regs, sizeof(*regs))) 216 if (regs)
159 __show_regs(regs, 0); 217 show_regs_if_on_stack(&stack_info, regs, partial);
160 } 218 }
161 219
162 if (stack_name) 220 if (stack_name)
@@ -252,11 +310,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
252 unsigned long sp; 310 unsigned long sp;
253#endif 311#endif
254 printk(KERN_DEFAULT 312 printk(KERN_DEFAULT
255 "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, 313 "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
256 IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", 314 IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
257 IS_ENABLED(CONFIG_SMP) ? " SMP" : "", 315 IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
258 debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", 316 debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
259 IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); 317 IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
318 IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
319 (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
260 320
261 if (notify_die(DIE_OOPS, str, regs, err, 321 if (notify_die(DIE_OOPS, str, regs, err,
262 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 322 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index daefae83a3aa..04170f63e3a1 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
26 if (type == STACK_TYPE_SOFTIRQ) 26 if (type == STACK_TYPE_SOFTIRQ)
27 return "SOFTIRQ"; 27 return "SOFTIRQ";
28 28
29 if (type == STACK_TYPE_ENTRY)
30 return "ENTRY_TRAMPOLINE";
31
29 return NULL; 32 return NULL;
30} 33}
31 34
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
93 if (task != current) 96 if (task != current)
94 goto unknown; 97 goto unknown;
95 98
99 if (in_entry_stack(stack, info))
100 goto recursion_check;
101
96 if (in_hardirq_stack(stack, info)) 102 if (in_hardirq_stack(stack, info))
97 goto recursion_check; 103 goto recursion_check;
98 104
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 88ce2ffdb110..563e28d14f2c 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
37 if (type == STACK_TYPE_IRQ) 37 if (type == STACK_TYPE_IRQ)
38 return "IRQ"; 38 return "IRQ";
39 39
40 if (type == STACK_TYPE_ENTRY) {
41 /*
42 * On 64-bit, we have a generic entry stack that we
43 * use for all the kernel entry points, including
44 * SYSENTER.
45 */
46 return "ENTRY_TRAMPOLINE";
47 }
48
40 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) 49 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
41 return exception_stack_names[type - STACK_TYPE_EXCEPTION]; 50 return exception_stack_names[type - STACK_TYPE_EXCEPTION];
42 51
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
115 if (in_irq_stack(stack, info)) 124 if (in_irq_stack(stack, info))
116 goto recursion_check; 125 goto recursion_check;
117 126
127 if (in_entry_stack(stack, info))
128 goto recursion_check;
129
118 goto unknown; 130 goto unknown;
119 131
120recursion_check: 132recursion_check:
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index b6c6468e10bc..4c8440de3355 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -8,6 +8,7 @@
8#include <asm/segment.h> 8#include <asm/segment.h>
9#include <asm/export.h> 9#include <asm/export.h>
10#include <asm/ftrace.h> 10#include <asm/ftrace.h>
11#include <asm/nospec-branch.h>
11 12
12#ifdef CC_USING_FENTRY 13#ifdef CC_USING_FENTRY
13# define function_hook __fentry__ 14# define function_hook __fentry__
@@ -197,7 +198,8 @@ ftrace_stub:
197 movl 0x4(%ebp), %edx 198 movl 0x4(%ebp), %edx
198 subl $MCOUNT_INSN_SIZE, %eax 199 subl $MCOUNT_INSN_SIZE, %eax
199 200
200 call *ftrace_trace_function 201 movl ftrace_trace_function, %ecx
202 CALL_NOSPEC %ecx
201 203
202 popl %edx 204 popl %edx
203 popl %ecx 205 popl %ecx
@@ -241,5 +243,5 @@ return_to_handler:
241 movl %eax, %ecx 243 movl %eax, %ecx
242 popl %edx 244 popl %edx
243 popl %eax 245 popl %eax
244 jmp *%ecx 246 JMP_NOSPEC %ecx
245#endif 247#endif
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index c832291d948a..7cb8ba08beb9 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -7,7 +7,7 @@
7#include <asm/ptrace.h> 7#include <asm/ptrace.h>
8#include <asm/ftrace.h> 8#include <asm/ftrace.h>
9#include <asm/export.h> 9#include <asm/export.h>
10 10#include <asm/nospec-branch.h>
11 11
12 .code64 12 .code64
13 .section .entry.text, "ax" 13 .section .entry.text, "ax"
@@ -286,8 +286,8 @@ trace:
286 * ip and parent ip are used and the list function is called when 286 * ip and parent ip are used and the list function is called when
287 * function tracing is enabled. 287 * function tracing is enabled.
288 */ 288 */
289 call *ftrace_trace_function 289 movq ftrace_trace_function, %r8
290 290 CALL_NOSPEC %r8
291 restore_mcount_regs 291 restore_mcount_regs
292 292
293 jmp fgraph_trace 293 jmp fgraph_trace
@@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
329 movq 8(%rsp), %rdx 329 movq 8(%rsp), %rdx
330 movq (%rsp), %rax 330 movq (%rsp), %rax
331 addq $24, %rsp 331 addq $24, %rsp
332 jmp *%rdi 332 JMP_NOSPEC %rdi
333#endif 333#endif
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7dca675fe78d..04a625f0fcda 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
341 .balign PAGE_SIZE; \ 341 .balign PAGE_SIZE; \
342GLOBAL(name) 342GLOBAL(name)
343 343
344#ifdef CONFIG_PAGE_TABLE_ISOLATION
345/*
346 * Each PGD needs to be 8k long and 8k aligned. We do not
347 * ever go out to userspace with these, so we do not
348 * strictly *need* the second page, but this allows us to
349 * have a single set_pgd() implementation that does not
350 * need to worry about whether it has 4k or 8k to work
351 * with.
352 *
353 * This ensures PGDs are 8k long:
354 */
355#define PTI_USER_PGD_FILL 512
356/* This ensures they are 8k-aligned: */
357#define NEXT_PGD_PAGE(name) \
358 .balign 2 * PAGE_SIZE; \
359GLOBAL(name)
360#else
361#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
362#define PTI_USER_PGD_FILL 0
363#endif
364
344/* Automate the creation of 1 to 1 mapping pmd entries */ 365/* Automate the creation of 1 to 1 mapping pmd entries */
345#define PMDS(START, PERM, COUNT) \ 366#define PMDS(START, PERM, COUNT) \
346 i = 0 ; \ 367 i = 0 ; \
@@ -350,13 +371,14 @@ GLOBAL(name)
350 .endr 371 .endr
351 372
352 __INITDATA 373 __INITDATA
353NEXT_PAGE(early_top_pgt) 374NEXT_PGD_PAGE(early_top_pgt)
354 .fill 511,8,0 375 .fill 511,8,0
355#ifdef CONFIG_X86_5LEVEL 376#ifdef CONFIG_X86_5LEVEL
356 .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 377 .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
357#else 378#else
358 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 379 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
359#endif 380#endif
381 .fill PTI_USER_PGD_FILL,8,0
360 382
361NEXT_PAGE(early_dynamic_pgts) 383NEXT_PAGE(early_dynamic_pgts)
362 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 384 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
364 .data 386 .data
365 387
366#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) 388#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
367NEXT_PAGE(init_top_pgt) 389NEXT_PGD_PAGE(init_top_pgt)
368 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 390 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
369 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 391 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
370 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 392 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
371 .org init_top_pgt + PGD_START_KERNEL*8, 0 393 .org init_top_pgt + PGD_START_KERNEL*8, 0
372 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 394 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
373 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 395 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
396 .fill PTI_USER_PGD_FILL,8,0
374 397
375NEXT_PAGE(level3_ident_pgt) 398NEXT_PAGE(level3_ident_pgt)
376 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 399 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
381 */ 404 */
382 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 405 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
383#else 406#else
384NEXT_PAGE(init_top_pgt) 407NEXT_PGD_PAGE(init_top_pgt)
385 .fill 512,8,0 408 .fill 512,8,0
409 .fill PTI_USER_PGD_FILL,8,0
386#endif 410#endif
387 411
388#ifdef CONFIG_X86_5LEVEL 412#ifdef CONFIG_X86_5LEVEL
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 3feb648781c4..2f723301eb58 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
67 * because the ->io_bitmap_max value must match the bitmap 67 * because the ->io_bitmap_max value must match the bitmap
68 * contents: 68 * contents:
69 */ 69 */
70 tss = &per_cpu(cpu_tss, get_cpu()); 70 tss = &per_cpu(cpu_tss_rw, get_cpu());
71 71
72 if (turn_on) 72 if (turn_on)
73 bitmap_clear(t->io_bitmap_ptr, from, num); 73 bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 49cfd9fe7589..68e1867cca80 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
219 /* high bit used in ret_from_ code */ 219 /* high bit used in ret_from_ code */
220 unsigned vector = ~regs->orig_ax; 220 unsigned vector = ~regs->orig_ax;
221 221
222 /*
223 * NB: Unlike exception entries, IRQ entries do not reliably
224 * handle context tracking in the low-level entry code. This is
225 * because syscall entries execute briefly with IRQs on before
226 * updating context tracking state, so we can take an IRQ from
227 * kernel mode with CONTEXT_USER. The low-level entry code only
228 * updates the context if we came from user mode, so we won't
229 * switch to CONTEXT_KERNEL. We'll fix that once the syscall
230 * code is cleaned up enough that we can cleanly defer enabling
231 * IRQs.
232 */
233
234 entering_irq(); 222 entering_irq();
235 223
236 /* entering_irq() tells RCU that we're not quiescent. Check it. */ 224 /* entering_irq() tells RCU that we're not quiescent. Check it. */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a83b3346a0e1..c1bdbd3d3232 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -20,6 +20,7 @@
20#include <linux/mm.h> 20#include <linux/mm.h>
21 21
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/nospec-branch.h>
23 24
24#ifdef CONFIG_DEBUG_STACKOVERFLOW 25#ifdef CONFIG_DEBUG_STACKOVERFLOW
25 26
@@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
55static void call_on_stack(void *func, void *stack) 56static void call_on_stack(void *func, void *stack)
56{ 57{
57 asm volatile("xchgl %%ebx,%%esp \n" 58 asm volatile("xchgl %%ebx,%%esp \n"
58 "call *%%edi \n" 59 CALL_NOSPEC
59 "movl %%ebx,%%esp \n" 60 "movl %%ebx,%%esp \n"
60 : "=b" (stack) 61 : "=b" (stack)
61 : "0" (stack), 62 : "0" (stack),
62 "D"(func) 63 [thunk_target] "D"(func)
63 : "memory", "cc", "edx", "ecx", "eax"); 64 : "memory", "cc", "edx", "ecx", "eax");
64} 65}
65 66
@@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
95 call_on_stack(print_stack_overflow, isp); 96 call_on_stack(print_stack_overflow, isp);
96 97
97 asm volatile("xchgl %%ebx,%%esp \n" 98 asm volatile("xchgl %%ebx,%%esp \n"
98 "call *%%edi \n" 99 CALL_NOSPEC
99 "movl %%ebx,%%esp \n" 100 "movl %%ebx,%%esp \n"
100 : "=a" (arg1), "=b" (isp) 101 : "=a" (arg1), "=b" (isp)
101 : "0" (desc), "1" (isp), 102 : "0" (desc), "1" (isp),
102 "D" (desc->handle_irq) 103 [thunk_target] "D" (desc->handle_irq)
103 : "memory", "cc", "ecx"); 104 : "memory", "cc", "ecx");
104 return 1; 105 return 1;
105} 106}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 020efbf5786b..d86e344f5b3d 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
57 if (regs->sp >= estack_top && regs->sp <= estack_bottom) 57 if (regs->sp >= estack_top && regs->sp <= estack_bottom)
58 return; 58 return;
59 59
60 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", 60 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
61 current->comm, curbase, regs->sp, 61 current->comm, curbase, regs->sp,
62 irq_stack_top, irq_stack_bottom, 62 irq_stack_top, irq_stack_bottom,
63 estack_top, estack_bottom); 63 estack_top, estack_bottom, (void *)regs->ip);
64 64
65 if (sysctl_panic_on_stackoverflow) 65 if (sysctl_panic_on_stackoverflow)
66 panic("low stack detected by irq handler - check messages\n"); 66 panic("low stack detected by irq handler - check messages\n");
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 1c1eae961340..26d713ecad34 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -5,6 +5,11 @@
5 * Copyright (C) 2002 Andi Kleen 5 * Copyright (C) 2002 Andi Kleen
6 * 6 *
7 * This handles calls from both 32bit and 64bit mode. 7 * This handles calls from both 32bit and 64bit mode.
8 *
9 * Lock order:
10 * contex.ldt_usr_sem
11 * mmap_sem
12 * context.lock
8 */ 13 */
9 14
10#include <linux/errno.h> 15#include <linux/errno.h>
@@ -19,6 +24,7 @@
19#include <linux/uaccess.h> 24#include <linux/uaccess.h>
20 25
21#include <asm/ldt.h> 26#include <asm/ldt.h>
27#include <asm/tlb.h>
22#include <asm/desc.h> 28#include <asm/desc.h>
23#include <asm/mmu_context.h> 29#include <asm/mmu_context.h>
24#include <asm/syscalls.h> 30#include <asm/syscalls.h>
@@ -42,17 +48,15 @@ static void refresh_ldt_segments(void)
42#endif 48#endif
43} 49}
44 50
45/* context.lock is held for us, so we don't need any locking. */ 51/* context.lock is held by the task which issued the smp function call */
46static void flush_ldt(void *__mm) 52static void flush_ldt(void *__mm)
47{ 53{
48 struct mm_struct *mm = __mm; 54 struct mm_struct *mm = __mm;
49 mm_context_t *pc;
50 55
51 if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) 56 if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
52 return; 57 return;
53 58
54 pc = &mm->context; 59 load_mm_ldt(mm);
55 set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
56 60
57 refresh_ldt_segments(); 61 refresh_ldt_segments();
58} 62}
@@ -89,25 +93,143 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
89 return NULL; 93 return NULL;
90 } 94 }
91 95
96 /* The new LDT isn't aliased for PTI yet. */
97 new_ldt->slot = -1;
98
92 new_ldt->nr_entries = num_entries; 99 new_ldt->nr_entries = num_entries;
93 return new_ldt; 100 return new_ldt;
94} 101}
95 102
103/*
104 * If PTI is enabled, this maps the LDT into the kernelmode and
105 * usermode tables for the given mm.
106 *
107 * There is no corresponding unmap function. Even if the LDT is freed, we
108 * leave the PTEs around until the slot is reused or the mm is destroyed.
109 * This is harmless: the LDT is always in ordinary memory, and no one will
110 * access the freed slot.
111 *
112 * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
113 * it useful, and the flush would slow down modify_ldt().
114 */
115static int
116map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
117{
118#ifdef CONFIG_PAGE_TABLE_ISOLATION
119 bool is_vmalloc, had_top_level_entry;
120 unsigned long va;
121 spinlock_t *ptl;
122 pgd_t *pgd;
123 int i;
124
125 if (!static_cpu_has(X86_FEATURE_PTI))
126 return 0;
127
128 /*
129 * Any given ldt_struct should have map_ldt_struct() called at most
130 * once.
131 */
132 WARN_ON(ldt->slot != -1);
133
134 /*
135 * Did we already have the top level entry allocated? We can't
136 * use pgd_none() for this because it doens't do anything on
137 * 4-level page table kernels.
138 */
139 pgd = pgd_offset(mm, LDT_BASE_ADDR);
140 had_top_level_entry = (pgd->pgd != 0);
141
142 is_vmalloc = is_vmalloc_addr(ldt->entries);
143
144 for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
145 unsigned long offset = i << PAGE_SHIFT;
146 const void *src = (char *)ldt->entries + offset;
147 unsigned long pfn;
148 pte_t pte, *ptep;
149
150 va = (unsigned long)ldt_slot_va(slot) + offset;
151 pfn = is_vmalloc ? vmalloc_to_pfn(src) :
152 page_to_pfn(virt_to_page(src));
153 /*
154 * Treat the PTI LDT range as a *userspace* range.
155 * get_locked_pte() will allocate all needed pagetables
156 * and account for them in this mm.
157 */
158 ptep = get_locked_pte(mm, va, &ptl);
159 if (!ptep)
160 return -ENOMEM;
161 /*
162 * Map it RO so the easy to find address is not a primary
163 * target via some kernel interface which misses a
164 * permission check.
165 */
166 pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
167 set_pte_at(mm, va, ptep, pte);
168 pte_unmap_unlock(ptep, ptl);
169 }
170
171 if (mm->context.ldt) {
172 /*
173 * We already had an LDT. The top-level entry should already
174 * have been allocated and synchronized with the usermode
175 * tables.
176 */
177 WARN_ON(!had_top_level_entry);
178 if (static_cpu_has(X86_FEATURE_PTI))
179 WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
180 } else {
181 /*
182 * This is the first time we're mapping an LDT for this process.
183 * Sync the pgd to the usermode tables.
184 */
185 WARN_ON(had_top_level_entry);
186 if (static_cpu_has(X86_FEATURE_PTI)) {
187 WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
188 set_pgd(kernel_to_user_pgdp(pgd), *pgd);
189 }
190 }
191
192 va = (unsigned long)ldt_slot_va(slot);
193 flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
194
195 ldt->slot = slot;
196#endif
197 return 0;
198}
199
200static void free_ldt_pgtables(struct mm_struct *mm)
201{
202#ifdef CONFIG_PAGE_TABLE_ISOLATION
203 struct mmu_gather tlb;
204 unsigned long start = LDT_BASE_ADDR;
205 unsigned long end = start + (1UL << PGDIR_SHIFT);
206
207 if (!static_cpu_has(X86_FEATURE_PTI))
208 return;
209
210 tlb_gather_mmu(&tlb, mm, start, end);
211 free_pgd_range(&tlb, start, end, start, end);
212 tlb_finish_mmu(&tlb, start, end);
213#endif
214}
215
96/* After calling this, the LDT is immutable. */ 216/* After calling this, the LDT is immutable. */
97static void finalize_ldt_struct(struct ldt_struct *ldt) 217static void finalize_ldt_struct(struct ldt_struct *ldt)
98{ 218{
99 paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); 219 paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
100} 220}
101 221
102/* context.lock is held */ 222static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
103static void install_ldt(struct mm_struct *current_mm,
104 struct ldt_struct *ldt)
105{ 223{
224 mutex_lock(&mm->context.lock);
225
106 /* Synchronizes with READ_ONCE in load_mm_ldt. */ 226 /* Synchronizes with READ_ONCE in load_mm_ldt. */
107 smp_store_release(&current_mm->context.ldt, ldt); 227 smp_store_release(&mm->context.ldt, ldt);
108 228
109 /* Activate the LDT for all CPUs using current_mm. */ 229 /* Activate the LDT for all CPUs using currents mm. */
110 on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); 230 on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
231
232 mutex_unlock(&mm->context.lock);
111} 233}
112 234
113static void free_ldt_struct(struct ldt_struct *ldt) 235static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +246,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
124} 246}
125 247
126/* 248/*
127 * we do not have to muck with descriptors here, that is 249 * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
128 * done in switch_mm() as needed. 250 * the new task is not running, so nothing can be installed.
129 */ 251 */
130int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) 252int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
131{ 253{
132 struct ldt_struct *new_ldt; 254 struct ldt_struct *new_ldt;
133 struct mm_struct *old_mm;
134 int retval = 0; 255 int retval = 0;
135 256
136 mutex_init(&mm->context.lock); 257 if (!old_mm)
137 old_mm = current->mm;
138 if (!old_mm) {
139 mm->context.ldt = NULL;
140 return 0; 258 return 0;
141 }
142 259
143 mutex_lock(&old_mm->context.lock); 260 mutex_lock(&old_mm->context.lock);
144 if (!old_mm->context.ldt) { 261 if (!old_mm->context.ldt)
145 mm->context.ldt = NULL;
146 goto out_unlock; 262 goto out_unlock;
147 }
148 263
149 new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); 264 new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
150 if (!new_ldt) { 265 if (!new_ldt) {
@@ -156,6 +271,12 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
156 new_ldt->nr_entries * LDT_ENTRY_SIZE); 271 new_ldt->nr_entries * LDT_ENTRY_SIZE);
157 finalize_ldt_struct(new_ldt); 272 finalize_ldt_struct(new_ldt);
158 273
274 retval = map_ldt_struct(mm, new_ldt, 0);
275 if (retval) {
276 free_ldt_pgtables(mm);
277 free_ldt_struct(new_ldt);
278 goto out_unlock;
279 }
159 mm->context.ldt = new_ldt; 280 mm->context.ldt = new_ldt;
160 281
161out_unlock: 282out_unlock:
@@ -174,13 +295,18 @@ void destroy_context_ldt(struct mm_struct *mm)
174 mm->context.ldt = NULL; 295 mm->context.ldt = NULL;
175} 296}
176 297
298void ldt_arch_exit_mmap(struct mm_struct *mm)
299{
300 free_ldt_pgtables(mm);
301}
302
177static int read_ldt(void __user *ptr, unsigned long bytecount) 303static int read_ldt(void __user *ptr, unsigned long bytecount)
178{ 304{
179 struct mm_struct *mm = current->mm; 305 struct mm_struct *mm = current->mm;
180 unsigned long entries_size; 306 unsigned long entries_size;
181 int retval; 307 int retval;
182 308
183 mutex_lock(&mm->context.lock); 309 down_read(&mm->context.ldt_usr_sem);
184 310
185 if (!mm->context.ldt) { 311 if (!mm->context.ldt) {
186 retval = 0; 312 retval = 0;
@@ -209,7 +335,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
209 retval = bytecount; 335 retval = bytecount;
210 336
211out_unlock: 337out_unlock:
212 mutex_unlock(&mm->context.lock); 338 up_read(&mm->context.ldt_usr_sem);
213 return retval; 339 return retval;
214} 340}
215 341
@@ -269,7 +395,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
269 ldt.avl = 0; 395 ldt.avl = 0;
270 } 396 }
271 397
272 mutex_lock(&mm->context.lock); 398 if (down_write_killable(&mm->context.ldt_usr_sem))
399 return -EINTR;
273 400
274 old_ldt = mm->context.ldt; 401 old_ldt = mm->context.ldt;
275 old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; 402 old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -286,12 +413,31 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
286 new_ldt->entries[ldt_info.entry_number] = ldt; 413 new_ldt->entries[ldt_info.entry_number] = ldt;
287 finalize_ldt_struct(new_ldt); 414 finalize_ldt_struct(new_ldt);
288 415
416 /*
417 * If we are using PTI, map the new LDT into the userspace pagetables.
418 * If there is already an LDT, use the other slot so that other CPUs
419 * will continue to use the old LDT until install_ldt() switches
420 * them over to the new LDT.
421 */
422 error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
423 if (error) {
424 /*
425 * This only can fail for the first LDT setup. If an LDT is
426 * already installed then the PTE page is already
427 * populated. Mop up a half populated page table.
428 */
429 if (!WARN_ON_ONCE(old_ldt))
430 free_ldt_pgtables(mm);
431 free_ldt_struct(new_ldt);
432 goto out_unlock;
433 }
434
289 install_ldt(mm, new_ldt); 435 install_ldt(mm, new_ldt);
290 free_ldt_struct(old_ldt); 436 free_ldt_struct(old_ldt);
291 error = 0; 437 error = 0;
292 438
293out_unlock: 439out_unlock:
294 mutex_unlock(&mm->context.lock); 440 up_write(&mm->context.ldt_usr_sem);
295out: 441out:
296 return error; 442 return error;
297} 443}
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 00bc751c861c..edfede768688 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -48,8 +48,6 @@ static void load_segments(void)
48 "\tmovl $"STR(__KERNEL_DS)",%%eax\n" 48 "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
49 "\tmovl %%eax,%%ds\n" 49 "\tmovl %%eax,%%ds\n"
50 "\tmovl %%eax,%%es\n" 50 "\tmovl %%eax,%%es\n"
51 "\tmovl %%eax,%%fs\n"
52 "\tmovl %%eax,%%gs\n"
53 "\tmovl %%eax,%%ss\n" 51 "\tmovl %%eax,%%ss\n"
54 : : : "eax", "memory"); 52 : : : "eax", "memory");
55#undef STR 53#undef STR
@@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image)
232 * The gdt & idt are now invalid. 230 * The gdt & idt are now invalid.
233 * If you want to load them you must set up your own idt & gdt. 231 * If you want to load them you must set up your own idt & gdt.
234 */ 232 */
235 set_gdt(phys_to_virt(0), 0);
236 idt_invalidate(phys_to_virt(0)); 233 idt_invalidate(phys_to_virt(0));
234 set_gdt(phys_to_virt(0), 0);
237 235
238 /* now call it */ 236 /* now call it */
239 image->start = relocate_kernel_ptr((unsigned long)image->head, 237 image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index ac0be8283325..9edadabf04f6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); 10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); 11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); 12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
13DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
14DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); 13DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
15 14
16DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); 15DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
60 PATCH_SITE(pv_mmu_ops, read_cr2); 59 PATCH_SITE(pv_mmu_ops, read_cr2);
61 PATCH_SITE(pv_mmu_ops, read_cr3); 60 PATCH_SITE(pv_mmu_ops, read_cr3);
62 PATCH_SITE(pv_mmu_ops, write_cr3); 61 PATCH_SITE(pv_mmu_ops, write_cr3);
63 PATCH_SITE(pv_mmu_ops, flush_tlb_single);
64 PATCH_SITE(pv_cpu_ops, wbinvd); 62 PATCH_SITE(pv_cpu_ops, wbinvd);
65#if defined(CONFIG_PARAVIRT_SPINLOCKS) 63#if defined(CONFIG_PARAVIRT_SPINLOCKS)
66 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): 64 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 97fb3e5737f5..832a6acd730f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
47 * section. Since TSS's are completely CPU-local, we want them 47 * section. Since TSS's are completely CPU-local, we want them
48 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 48 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
49 */ 49 */
50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { 50__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
51 .x86_tss = { 51 .x86_tss = {
52 /* 52 /*
53 * .sp0 is only used when entering ring 0 from a lower 53 * .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
56 * Poison it. 56 * Poison it.
57 */ 57 */
58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, 58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
59
60#ifdef CONFIG_X86_64
61 /*
62 * .sp1 is cpu_current_top_of_stack. The init task never
63 * runs user code, but cpu_current_top_of_stack should still
64 * be well defined before the first context switch.
65 */
66 .sp1 = TOP_OF_INIT_STACK,
67#endif
68
59#ifdef CONFIG_X86_32 69#ifdef CONFIG_X86_32
60 .ss0 = __KERNEL_DS, 70 .ss0 = __KERNEL_DS,
61 .ss1 = __KERNEL_CS, 71 .ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
71 */ 81 */
72 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, 82 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
73#endif 83#endif
74#ifdef CONFIG_X86_32
75 .SYSENTER_stack_canary = STACK_END_MAGIC,
76#endif
77}; 84};
78EXPORT_PER_CPU_SYMBOL(cpu_tss); 85EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
79 86
80DEFINE_PER_CPU(bool, __tss_limit_invalid); 87DEFINE_PER_CPU(bool, __tss_limit_invalid);
81EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); 88EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
104 struct fpu *fpu = &t->fpu; 111 struct fpu *fpu = &t->fpu;
105 112
106 if (bp) { 113 if (bp) {
107 struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); 114 struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
108 115
109 t->io_bitmap_ptr = NULL; 116 t->io_bitmap_ptr = NULL;
110 clear_thread_flag(TIF_IO_BITMAP); 117 clear_thread_flag(TIF_IO_BITMAP);
@@ -299,7 +306,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
299 } 306 }
300 307
301 if ((tifp ^ tifn) & _TIF_NOTSC) 308 if ((tifp ^ tifn) & _TIF_NOTSC)
302 cr4_toggle_bits(X86_CR4_TSD); 309 cr4_toggle_bits_irqsoff(X86_CR4_TSD);
303 310
304 if ((tifp ^ tifn) & _TIF_NOCPUID) 311 if ((tifp ^ tifn) & _TIF_NOCPUID)
305 set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); 312 set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 45bf0c5f93e1..5224c6099184 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
234 struct fpu *prev_fpu = &prev->fpu; 234 struct fpu *prev_fpu = &prev->fpu;
235 struct fpu *next_fpu = &next->fpu; 235 struct fpu *next_fpu = &next->fpu;
236 int cpu = smp_processor_id(); 236 int cpu = smp_processor_id();
237 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 237 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
238 238
239 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 239 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
240 240
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eeeb34f85c25..c75466232016 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
69 unsigned int fsindex, gsindex; 69 unsigned int fsindex, gsindex;
70 unsigned int ds, cs, es; 70 unsigned int ds, cs, es;
71 71
72 printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); 72 show_iret_regs(regs);
73 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, 73
74 regs->sp, regs->flags);
75 if (regs->orig_ax != -1) 74 if (regs->orig_ax != -1)
76 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); 75 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
77 else 76 else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
88 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", 87 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
89 regs->r13, regs->r14, regs->r15); 88 regs->r13, regs->r14, regs->r15);
90 89
90 if (!all)
91 return;
92
91 asm("movl %%ds,%0" : "=r" (ds)); 93 asm("movl %%ds,%0" : "=r" (ds));
92 asm("movl %%cs,%0" : "=r" (cs)); 94 asm("movl %%cs,%0" : "=r" (cs));
93 asm("movl %%es,%0" : "=r" (es)); 95 asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
98 rdmsrl(MSR_GS_BASE, gs); 100 rdmsrl(MSR_GS_BASE, gs);
99 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 101 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
100 102
101 if (!all)
102 return;
103
104 cr0 = read_cr0(); 103 cr0 = read_cr0();
105 cr2 = read_cr2(); 104 cr2 = read_cr2();
106 cr3 = __read_cr3(); 105 cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
400 struct fpu *prev_fpu = &prev->fpu; 399 struct fpu *prev_fpu = &prev->fpu;
401 struct fpu *next_fpu = &next->fpu; 400 struct fpu *next_fpu = &next->fpu;
402 int cpu = smp_processor_id(); 401 int cpu = smp_processor_id();
403 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 402 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
404 403
405 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && 404 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
406 this_cpu_read(irq_count) != -1); 405 this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
462 * Switch the PDA and FPU contexts. 461 * Switch the PDA and FPU contexts.
463 */ 462 */
464 this_cpu_write(current_task, next_p); 463 this_cpu_write(current_task, next_p);
464 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
465 465
466 /* Reload sp0. */ 466 /* Reload sp0. */
467 update_sp0(next_p); 467 update_sp0(next_p);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 8af2e8d0c0a1..145810b0edf6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -906,9 +906,6 @@ void __init setup_arch(char **cmdline_p)
906 set_bit(EFI_BOOT, &efi.flags); 906 set_bit(EFI_BOOT, &efi.flags);
907 set_bit(EFI_64BIT, &efi.flags); 907 set_bit(EFI_64BIT, &efi.flags);
908 } 908 }
909
910 if (efi_enabled(EFI_BOOT))
911 efi_memblock_x86_reserve_range();
912#endif 909#endif
913 910
914 x86_init.oem.arch_setup(); 911 x86_init.oem.arch_setup();
@@ -962,6 +959,8 @@ void __init setup_arch(char **cmdline_p)
962 959
963 parse_early_param(); 960 parse_early_param();
964 961
962 if (efi_enabled(EFI_BOOT))
963 efi_memblock_x86_reserve_range();
965#ifdef CONFIG_MEMORY_HOTPLUG 964#ifdef CONFIG_MEMORY_HOTPLUG
966 /* 965 /*
967 * Memory used by the kernel cannot be hot-removed because Linux 966 * Memory used by the kernel cannot be hot-removed because Linux
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3d01df7d7cf6..ed556d50d7ed 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages);
106static unsigned int logical_packages __read_mostly; 106static unsigned int logical_packages __read_mostly;
107 107
108/* Maximum number of SMT threads on any online core */ 108/* Maximum number of SMT threads on any online core */
109int __max_smt_threads __read_mostly; 109int __read_mostly __max_smt_threads = 1;
110 110
111/* Flag to indicate if a complete sched domain rebuild is required */ 111/* Flag to indicate if a complete sched domain rebuild is required */
112bool x86_topology_update; 112bool x86_topology_update;
@@ -126,14 +126,10 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
126 spin_lock_irqsave(&rtc_lock, flags); 126 spin_lock_irqsave(&rtc_lock, flags);
127 CMOS_WRITE(0xa, 0xf); 127 CMOS_WRITE(0xa, 0xf);
128 spin_unlock_irqrestore(&rtc_lock, flags); 128 spin_unlock_irqrestore(&rtc_lock, flags);
129 local_flush_tlb();
130 pr_debug("1.\n");
131 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = 129 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
132 start_eip >> 4; 130 start_eip >> 4;
133 pr_debug("2.\n");
134 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 131 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
135 start_eip & 0xf; 132 start_eip & 0xf;
136 pr_debug("3.\n");
137} 133}
138 134
139static inline void smpboot_restore_warm_reset_vector(void) 135static inline void smpboot_restore_warm_reset_vector(void)
@@ -141,11 +137,6 @@ static inline void smpboot_restore_warm_reset_vector(void)
141 unsigned long flags; 137 unsigned long flags;
142 138
143 /* 139 /*
144 * Install writable page 0 entry to set BIOS data area.
145 */
146 local_flush_tlb();
147
148 /*
149 * Paranoid: Set warm reset code and vector here back 140 * Paranoid: Set warm reset code and vector here back
150 * to default values. 141 * to default values.
151 */ 142 */
@@ -237,7 +228,7 @@ static void notrace start_secondary(void *unused)
237 load_cr3(swapper_pg_dir); 228 load_cr3(swapper_pg_dir);
238 __flush_tlb_all(); 229 __flush_tlb_all();
239#endif 230#endif
240 231 load_current_idt();
241 cpu_init(); 232 cpu_init();
242 x86_cpuinit.early_percpu_clock_init(); 233 x86_cpuinit.early_percpu_clock_init();
243 preempt_disable(); 234 preempt_disable();
@@ -932,12 +923,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
932 initial_code = (unsigned long)start_secondary; 923 initial_code = (unsigned long)start_secondary;
933 initial_stack = idle->thread.sp; 924 initial_stack = idle->thread.sp;
934 925
935 /* 926 /* Enable the espfix hack for this CPU */
936 * Enable the espfix hack for this CPU
937 */
938#ifdef CONFIG_X86_ESPFIX64
939 init_espfix_ap(cpu); 927 init_espfix_ap(cpu);
940#endif
941 928
942 /* So we see what's up */ 929 /* So we see what's up */
943 announce_cpu(cpu, apicid); 930 announce_cpu(cpu, apicid);
@@ -1304,7 +1291,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1304 * Today neither Intel nor AMD support heterogenous systems so 1291 * Today neither Intel nor AMD support heterogenous systems so
1305 * extrapolate the boot cpu's data to all packages. 1292 * extrapolate the boot cpu's data to all packages.
1306 */ 1293 */
1307 ncpus = cpu_data(0).booted_cores * smp_num_siblings; 1294 ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
1308 __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); 1295 __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
1309 pr_info("Max logical packages: %u\n", __max_logical_packages); 1296 pr_info("Max logical packages: %u\n", __max_logical_packages);
1310 1297
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 77835bc021c7..093f2ea5dd56 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -102,7 +102,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
102 for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); 102 for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
103 unwind_next_frame(&state)) { 103 unwind_next_frame(&state)) {
104 104
105 regs = unwind_get_entry_regs(&state); 105 regs = unwind_get_entry_regs(&state, NULL);
106 if (regs) { 106 if (regs) {
107 /* 107 /*
108 * Kernel mode registers on the stack indicate an 108 * Kernel mode registers on the stack indicate an
@@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk,
164{ 164{
165 int ret; 165 int ret;
166 166
167 /*
168 * If the task doesn't have a stack (e.g., a zombie), the stack is
169 * "reliably" empty.
170 */
167 if (!try_get_task_stack(tsk)) 171 if (!try_get_task_stack(tsk))
168 return -EINVAL; 172 return 0;
169 173
170 ret = __save_stack_trace_reliable(trace, tsk); 174 ret = __save_stack_trace_reliable(trace, tsk);
171 175
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index a4eb27918ceb..a2486f444073 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
138 return -1; 138 return -1;
139 set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); 139 set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
140 pte_unmap(pte); 140 pte_unmap(pte);
141
142 /*
143 * PTI poisons low addresses in the kernel page tables in the
144 * name of making them unusable for userspace. To execute
145 * code at such a low address, the poison must be cleared.
146 *
147 * Note: 'pgd' actually gets set in p4d_alloc() _or_
148 * pud_alloc() depending on 4/5-level paging.
149 */
150 pgd->pgd &= ~_PAGE_NX;
151
141 return 0; 152 return 0;
142} 153}
143 154
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 9a9c9b076955..a5b802a12212 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
93 cpu = get_cpu(); 93 cpu = get_cpu();
94 94
95 while (n-- > 0) { 95 while (n-- > 0) {
96 if (LDT_empty(info) || LDT_zero(info)) { 96 if (LDT_empty(info) || LDT_zero(info))
97 memset(desc, 0, sizeof(*desc)); 97 memset(desc, 0, sizeof(*desc));
98 } else { 98 else
99 fill_ldt(desc, info); 99 fill_ldt(desc, info);
100
101 /*
102 * Always set the accessed bit so that the CPU
103 * doesn't try to write to the (read-only) GDT.
104 */
105 desc->type |= 1;
106 }
107 ++info; 100 ++info;
108 ++desc; 101 ++desc;
109 } 102 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 989514c94a55..446c9ef8cfc3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
51#include <asm/traps.h> 51#include <asm/traps.h>
52#include <asm/desc.h> 52#include <asm/desc.h>
53#include <asm/fpu/internal.h> 53#include <asm/fpu/internal.h>
54#include <asm/cpu_entry_area.h>
54#include <asm/mce.h> 55#include <asm/mce.h>
55#include <asm/fixmap.h> 56#include <asm/fixmap.h>
56#include <asm/mach_traps.h> 57#include <asm/mach_traps.h>
@@ -348,23 +349,42 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
348 349
349 /* 350 /*
350 * If IRET takes a non-IST fault on the espfix64 stack, then we 351 * If IRET takes a non-IST fault on the espfix64 stack, then we
351 * end up promoting it to a doublefault. In that case, modify 352 * end up promoting it to a doublefault. In that case, take
352 * the stack to make it look like we just entered the #GP 353 * advantage of the fact that we're not using the normal (TSS.sp0)
353 * handler from user space, similar to bad_iret. 354 * stack right now. We can write a fake #GP(0) frame at TSS.sp0
355 * and then modify our own IRET frame so that, when we return,
356 * we land directly at the #GP(0) vector with the stack already
357 * set up according to its expectations.
358 *
359 * The net result is that our #GP handler will think that we
360 * entered from usermode with the bad user context.
354 * 361 *
355 * No need for ist_enter here because we don't use RCU. 362 * No need for ist_enter here because we don't use RCU.
356 */ 363 */
357 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && 364 if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
358 regs->cs == __KERNEL_CS && 365 regs->cs == __KERNEL_CS &&
359 regs->ip == (unsigned long)native_irq_return_iret) 366 regs->ip == (unsigned long)native_irq_return_iret)
360 { 367 {
361 struct pt_regs *normal_regs = task_pt_regs(current); 368 struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
362 369
363 /* Fake a #GP(0) from userspace. */ 370 /*
364 memmove(&normal_regs->ip, (void *)regs->sp, 5*8); 371 * regs->sp points to the failing IRET frame on the
365 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 372 * ESPFIX64 stack. Copy it to the entry stack. This fills
373 * in gpregs->ss through gpregs->ip.
374 *
375 */
376 memmove(&gpregs->ip, (void *)regs->sp, 5*8);
377 gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
378
379 /*
380 * Adjust our frame so that we return straight to the #GP
381 * vector with the expected RSP value. This is safe because
382 * we won't enable interupts or schedule before we invoke
383 * general_protection, so nothing will clobber the stack
384 * frame we just set up.
385 */
366 regs->ip = (unsigned long)general_protection; 386 regs->ip = (unsigned long)general_protection;
367 regs->sp = (unsigned long)&normal_regs->orig_ax; 387 regs->sp = (unsigned long)&gpregs->orig_ax;
368 388
369 return; 389 return;
370 } 390 }
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
389 * 409 *
390 * Processors update CR2 whenever a page fault is detected. If a 410 * Processors update CR2 whenever a page fault is detected. If a
391 * second page fault occurs while an earlier page fault is being 411 * second page fault occurs while an earlier page fault is being
392 * deliv- ered, the faulting linear address of the second fault will 412 * delivered, the faulting linear address of the second fault will
393 * overwrite the contents of CR2 (replacing the previous 413 * overwrite the contents of CR2 (replacing the previous
394 * address). These updates to CR2 occur even if the page fault 414 * address). These updates to CR2 occur even if the page fault
395 * results in a double fault or occurs during the delivery of a 415 * results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
605 625
606#ifdef CONFIG_X86_64 626#ifdef CONFIG_X86_64
607/* 627/*
608 * Help handler running on IST stack to switch off the IST stack if the 628 * Help handler running on a per-cpu (IST or entry trampoline) stack
609 * interrupted code was in user mode. The actual stack switch is done in 629 * to switch to the normal thread stack if the interrupted code was in
610 * entry_64.S 630 * user mode. The actual stack switch is done in entry_64.S
611 */ 631 */
612asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) 632asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
613{ 633{
614 struct pt_regs *regs = task_pt_regs(current); 634 struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
615 *regs = *eregs; 635 if (regs != eregs)
636 *regs = *eregs;
616 return regs; 637 return regs;
617} 638}
618NOKPROBE_SYMBOL(sync_regs); 639NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
628 /* 649 /*
629 * This is called from entry_64.S early in handling a fault 650 * This is called from entry_64.S early in handling a fault
630 * caused by a bad iret to user mode. To handle the fault 651 * caused by a bad iret to user mode. To handle the fault
631 * correctly, we want move our stack frame to task_pt_regs 652 * correctly, we want to move our stack frame to where it would
632 * and we want to pretend that the exception came from the 653 * be had we entered directly on the entry stack (rather than
633 * iret target. 654 * just below the IRET frame) and we want to pretend that the
655 * exception came from the IRET target.
634 */ 656 */
635 struct bad_iret_stack *new_stack = 657 struct bad_iret_stack *new_stack =
636 container_of(task_pt_regs(current), 658 (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
637 struct bad_iret_stack, regs);
638 659
639 /* Copy the IRET target to the new stack. */ 660 /* Copy the IRET target to the new stack. */
640 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); 661 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
795 debug_stack_usage_dec(); 816 debug_stack_usage_dec();
796 817
797exit: 818exit:
798#if defined(CONFIG_X86_32)
799 /*
800 * This is the most likely code path that involves non-trivial use
801 * of the SYSENTER stack. Check that we haven't overrun it.
802 */
803 WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
804 "Overran or corrupted SYSENTER stack\n");
805#endif
806 ist_exit(regs); 819 ist_exit(regs);
807} 820}
808NOKPROBE_SYMBOL(do_debug); 821NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
929 942
930void __init trap_init(void) 943void __init trap_init(void)
931{ 944{
945 /* Init cpu_entry_area before IST entries are set up */
946 setup_cpu_entry_areas();
947
932 idt_setup_traps(); 948 idt_setup_traps();
933 949
934 /* 950 /*
@@ -936,8 +952,9 @@ void __init trap_init(void)
936 * "sidt" instruction will not leak the location of the kernel, and 952 * "sidt" instruction will not leak the location of the kernel, and
937 * to defend the IDT against arbitrary memory write vulnerabilities. 953 * to defend the IDT against arbitrary memory write vulnerabilities.
938 * It will be reloaded in cpu_init() */ 954 * It will be reloaded in cpu_init() */
939 __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); 955 cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
940 idt_descr.address = fix_to_virt(FIX_RO_IDT); 956 PAGE_KERNEL_RO);
957 idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
941 958
942 /* 959 /*
943 * Should be a barrier for any external CPU state: 960 * Should be a barrier for any external CPU state:
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index a3f973b2c97a..be86a865087a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
253 return NULL; 253 return NULL;
254} 254}
255 255
256static bool stack_access_ok(struct unwind_state *state, unsigned long addr, 256static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
257 size_t len) 257 size_t len)
258{ 258{
259 struct stack_info *info = &state->stack_info; 259 struct stack_info *info = &state->stack_info;
260 void *addr = (void *)_addr;
260 261
261 /* 262 if (!on_stack(info, addr, len) &&
262 * If the address isn't on the current stack, switch to the next one. 263 (get_stack_info(addr, state->task, info, &state->stack_mask)))
263 * 264 return false;
264 * We may have to traverse multiple stacks to deal with the possibility
265 * that info->next_sp could point to an empty stack and the address
266 * could be on a subsequent stack.
267 */
268 while (!on_stack(info, (void *)addr, len))
269 if (get_stack_info(info->next_sp, state->task, info,
270 &state->stack_mask))
271 return false;
272 265
273 return true; 266 return true;
274} 267}
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
283 return true; 276 return true;
284} 277}
285 278
286#define REGS_SIZE (sizeof(struct pt_regs))
287#define SP_OFFSET (offsetof(struct pt_regs, sp))
288#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
289#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
290
291static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, 279static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
292 unsigned long *ip, unsigned long *sp, bool full) 280 unsigned long *ip, unsigned long *sp)
293{ 281{
294 size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; 282 struct pt_regs *regs = (struct pt_regs *)addr;
295 size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
296 struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
297
298 if (IS_ENABLED(CONFIG_X86_64)) {
299 if (!stack_access_ok(state, addr, regs_size))
300 return false;
301 283
302 *ip = regs->ip; 284 /* x86-32 support will be more complicated due to the &regs->sp hack */
303 *sp = regs->sp; 285 BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
304 286
305 return true; 287 if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
306 }
307
308 if (!stack_access_ok(state, addr, sp_offset))
309 return false; 288 return false;
310 289
311 *ip = regs->ip; 290 *ip = regs->ip;
291 *sp = regs->sp;
292 return true;
293}
312 294
313 if (user_mode(regs)) { 295static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
314 if (!stack_access_ok(state, addr + sp_offset, 296 unsigned long *ip, unsigned long *sp)
315 REGS_SIZE - SP_OFFSET)) 297{
316 return false; 298 struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
317 299
318 *sp = regs->sp; 300 if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
319 } else 301 return false;
320 *sp = (unsigned long)&regs->sp;
321 302
303 *ip = regs->ip;
304 *sp = regs->sp;
322 return true; 305 return true;
323} 306}
324 307
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
327 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; 310 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
328 enum stack_type prev_type = state->stack_info.type; 311 enum stack_type prev_type = state->stack_info.type;
329 struct orc_entry *orc; 312 struct orc_entry *orc;
330 struct pt_regs *ptregs;
331 bool indirect = false; 313 bool indirect = false;
332 314
333 if (unwind_done(state)) 315 if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
435 break; 417 break;
436 418
437 case ORC_TYPE_REGS: 419 case ORC_TYPE_REGS:
438 if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { 420 if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
439 orc_warn("can't dereference registers at %p for ip %pB\n", 421 orc_warn("can't dereference registers at %p for ip %pB\n",
440 (void *)sp, (void *)orig_ip); 422 (void *)sp, (void *)orig_ip);
441 goto done; 423 goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
447 break; 429 break;
448 430
449 case ORC_TYPE_REGS_IRET: 431 case ORC_TYPE_REGS_IRET:
450 if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { 432 if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
451 orc_warn("can't dereference iret registers at %p for ip %pB\n", 433 orc_warn("can't dereference iret registers at %p for ip %pB\n",
452 (void *)sp, (void *)orig_ip); 434 (void *)sp, (void *)orig_ip);
453 goto done; 435 goto done;
454 } 436 }
455 437
456 ptregs = container_of((void *)sp, struct pt_regs, ip); 438 state->regs = (void *)sp - IRET_FRAME_OFFSET;
457 if ((unsigned long)ptregs >= prev_sp && 439 state->full_regs = false;
458 on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
459 state->regs = ptregs;
460 state->full_regs = false;
461 } else
462 state->regs = NULL;
463
464 state->signal = true; 440 state->signal = true;
465 break; 441 break;
466 442
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
553 } 529 }
554 530
555 if (get_stack_info((unsigned long *)state->sp, state->task, 531 if (get_stack_info((unsigned long *)state->sp, state->task,
556 &state->stack_info, &state->stack_mask)) 532 &state->stack_info, &state->stack_mask)) {
557 return; 533 /*
534 * We weren't on a valid stack. It's possible that
535 * we overflowed a valid stack into a guard page.
536 * See if the next page up is valid so that we can
537 * generate some kind of backtrace if this happens.
538 */
539 void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
540 if (get_stack_info(next_page, state->task, &state->stack_info,
541 &state->stack_mask))
542 return;
543 }
558 544
559 /* 545 /*
560 * The caller can provide the address of the first frame directly 546 * The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a4009fb9be87..1e413a9326aa 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -61,11 +61,17 @@ jiffies_64 = jiffies;
61 . = ALIGN(HPAGE_SIZE); \ 61 . = ALIGN(HPAGE_SIZE); \
62 __end_rodata_hpage_align = .; 62 __end_rodata_hpage_align = .;
63 63
64#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
65#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
66
64#else 67#else
65 68
66#define X64_ALIGN_RODATA_BEGIN 69#define X64_ALIGN_RODATA_BEGIN
67#define X64_ALIGN_RODATA_END 70#define X64_ALIGN_RODATA_END
68 71
72#define ALIGN_ENTRY_TEXT_BEGIN
73#define ALIGN_ENTRY_TEXT_END
74
69#endif 75#endif
70 76
71PHDRS { 77PHDRS {
@@ -102,11 +108,22 @@ SECTIONS
102 CPUIDLE_TEXT 108 CPUIDLE_TEXT
103 LOCK_TEXT 109 LOCK_TEXT
104 KPROBES_TEXT 110 KPROBES_TEXT
111 ALIGN_ENTRY_TEXT_BEGIN
105 ENTRY_TEXT 112 ENTRY_TEXT
106 IRQENTRY_TEXT 113 IRQENTRY_TEXT
114 ALIGN_ENTRY_TEXT_END
107 SOFTIRQENTRY_TEXT 115 SOFTIRQENTRY_TEXT
108 *(.fixup) 116 *(.fixup)
109 *(.gnu.warning) 117 *(.gnu.warning)
118
119#ifdef CONFIG_X86_64
120 . = ALIGN(PAGE_SIZE);
121 _entry_trampoline = .;
122 *(.entry_trampoline)
123 . = ALIGN(PAGE_SIZE);
124 ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
125#endif
126
110 /* End of text section */ 127 /* End of text section */
111 _etext = .; 128 _etext = .;
112 } :text = 0x9090 129 } :text = 0x9090
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index cdc70a3a6583..c2cea6651279 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -44,7 +44,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
44 [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, 44 [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
45 [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, 45 [CPUID_1_ECX] = { 1, 0, CPUID_ECX},
46 [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, 46 [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX},
47 [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, 47 [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},
48 [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, 48 [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
49 [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, 49 [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
50 [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, 50 [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8079d141792a..b514b2b2845a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1046,7 +1046,6 @@ static void fetch_register_operand(struct operand *op)
1046 1046
1047static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) 1047static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
1048{ 1048{
1049 ctxt->ops->get_fpu(ctxt);
1050 switch (reg) { 1049 switch (reg) {
1051 case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; 1050 case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
1052 case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; 1051 case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
@@ -1068,13 +1067,11 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
1068#endif 1067#endif
1069 default: BUG(); 1068 default: BUG();
1070 } 1069 }
1071 ctxt->ops->put_fpu(ctxt);
1072} 1070}
1073 1071
1074static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, 1072static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
1075 int reg) 1073 int reg)
1076{ 1074{
1077 ctxt->ops->get_fpu(ctxt);
1078 switch (reg) { 1075 switch (reg) {
1079 case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; 1076 case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
1080 case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; 1077 case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
@@ -1096,12 +1093,10 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
1096#endif 1093#endif
1097 default: BUG(); 1094 default: BUG();
1098 } 1095 }
1099 ctxt->ops->put_fpu(ctxt);
1100} 1096}
1101 1097
1102static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) 1098static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1103{ 1099{
1104 ctxt->ops->get_fpu(ctxt);
1105 switch (reg) { 1100 switch (reg) {
1106 case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; 1101 case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
1107 case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; 1102 case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
@@ -1113,12 +1108,10 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1113 case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; 1108 case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
1114 default: BUG(); 1109 default: BUG();
1115 } 1110 }
1116 ctxt->ops->put_fpu(ctxt);
1117} 1111}
1118 1112
1119static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) 1113static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1120{ 1114{
1121 ctxt->ops->get_fpu(ctxt);
1122 switch (reg) { 1115 switch (reg) {
1123 case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; 1116 case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
1124 case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; 1117 case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
@@ -1130,7 +1123,6 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1130 case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; 1123 case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
1131 default: BUG(); 1124 default: BUG();
1132 } 1125 }
1133 ctxt->ops->put_fpu(ctxt);
1134} 1126}
1135 1127
1136static int em_fninit(struct x86_emulate_ctxt *ctxt) 1128static int em_fninit(struct x86_emulate_ctxt *ctxt)
@@ -1138,9 +1130,7 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt)
1138 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) 1130 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1139 return emulate_nm(ctxt); 1131 return emulate_nm(ctxt);
1140 1132
1141 ctxt->ops->get_fpu(ctxt);
1142 asm volatile("fninit"); 1133 asm volatile("fninit");
1143 ctxt->ops->put_fpu(ctxt);
1144 return X86EMUL_CONTINUE; 1134 return X86EMUL_CONTINUE;
1145} 1135}
1146 1136
@@ -1151,9 +1141,7 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1151 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) 1141 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1152 return emulate_nm(ctxt); 1142 return emulate_nm(ctxt);
1153 1143
1154 ctxt->ops->get_fpu(ctxt);
1155 asm volatile("fnstcw %0": "+m"(fcw)); 1144 asm volatile("fnstcw %0": "+m"(fcw));
1156 ctxt->ops->put_fpu(ctxt);
1157 1145
1158 ctxt->dst.val = fcw; 1146 ctxt->dst.val = fcw;
1159 1147
@@ -1167,9 +1155,7 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1167 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) 1155 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1168 return emulate_nm(ctxt); 1156 return emulate_nm(ctxt);
1169 1157
1170 ctxt->ops->get_fpu(ctxt);
1171 asm volatile("fnstsw %0": "+m"(fsw)); 1158 asm volatile("fnstsw %0": "+m"(fsw));
1172 ctxt->ops->put_fpu(ctxt);
1173 1159
1174 ctxt->dst.val = fsw; 1160 ctxt->dst.val = fsw;
1175 1161
@@ -2404,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2404} 2390}
2405 2391
2406static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, 2392static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2407 u64 cr0, u64 cr4) 2393 u64 cr0, u64 cr3, u64 cr4)
2408{ 2394{
2409 int bad; 2395 int bad;
2396 u64 pcid;
2397
2398 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
2399 pcid = 0;
2400 if (cr4 & X86_CR4_PCIDE) {
2401 pcid = cr3 & 0xfff;
2402 cr3 &= ~0xfff;
2403 }
2404
2405 bad = ctxt->ops->set_cr(ctxt, 3, cr3);
2406 if (bad)
2407 return X86EMUL_UNHANDLEABLE;
2410 2408
2411 /* 2409 /*
2412 * First enable PAE, long mode needs it before CR0.PG = 1 is set. 2410 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2425,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2425 bad = ctxt->ops->set_cr(ctxt, 4, cr4); 2423 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2426 if (bad) 2424 if (bad)
2427 return X86EMUL_UNHANDLEABLE; 2425 return X86EMUL_UNHANDLEABLE;
2426 if (pcid) {
2427 bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
2428 if (bad)
2429 return X86EMUL_UNHANDLEABLE;
2430 }
2431
2428 } 2432 }
2429 2433
2430 return X86EMUL_CONTINUE; 2434 return X86EMUL_CONTINUE;
@@ -2435,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2435 struct desc_struct desc; 2439 struct desc_struct desc;
2436 struct desc_ptr dt; 2440 struct desc_ptr dt;
2437 u16 selector; 2441 u16 selector;
2438 u32 val, cr0, cr4; 2442 u32 val, cr0, cr3, cr4;
2439 int i; 2443 int i;
2440 2444
2441 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); 2445 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2442 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); 2446 cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
2443 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; 2447 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2444 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); 2448 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2445 2449
@@ -2481,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2481 2485
2482 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); 2486 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2483 2487
2484 return rsm_enter_protected_mode(ctxt, cr0, cr4); 2488 return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2485} 2489}
2486 2490
2487static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) 2491static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2488{ 2492{
2489 struct desc_struct desc; 2493 struct desc_struct desc;
2490 struct desc_ptr dt; 2494 struct desc_ptr dt;
2491 u64 val, cr0, cr4; 2495 u64 val, cr0, cr3, cr4;
2492 u32 base3; 2496 u32 base3;
2493 u16 selector; 2497 u16 selector;
2494 int i, r; 2498 int i, r;
@@ -2505,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2505 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); 2509 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2506 2510
2507 cr0 = GET_SMSTATE(u64, smbase, 0x7f58); 2511 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2508 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); 2512 cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
2509 cr4 = GET_SMSTATE(u64, smbase, 0x7f48); 2513 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2510 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); 2514 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2511 val = GET_SMSTATE(u64, smbase, 0x7ed0); 2515 val = GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2533,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2533 dt.address = GET_SMSTATE(u64, smbase, 0x7e68); 2537 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2534 ctxt->ops->set_gdt(ctxt, &dt); 2538 ctxt->ops->set_gdt(ctxt, &dt);
2535 2539
2536 r = rsm_enter_protected_mode(ctxt, cr0, cr4); 2540 r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2537 if (r != X86EMUL_CONTINUE) 2541 if (r != X86EMUL_CONTINUE)
2538 return r; 2542 return r;
2539 2543
@@ -4001,12 +4005,8 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
4001 if (rc != X86EMUL_CONTINUE) 4005 if (rc != X86EMUL_CONTINUE)
4002 return rc; 4006 return rc;
4003 4007
4004 ctxt->ops->get_fpu(ctxt);
4005
4006 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); 4008 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
4007 4009
4008 ctxt->ops->put_fpu(ctxt);
4009
4010 if (rc != X86EMUL_CONTINUE) 4010 if (rc != X86EMUL_CONTINUE)
4011 return rc; 4011 return rc;
4012 4012
@@ -4014,6 +4014,26 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
4014 fxstate_size(ctxt)); 4014 fxstate_size(ctxt));
4015} 4015}
4016 4016
4017/*
4018 * FXRSTOR might restore XMM registers not provided by the guest. Fill
4019 * in the host registers (via FXSAVE) instead, so they won't be modified.
4020 * (preemption has to stay disabled until FXRSTOR).
4021 *
4022 * Use noinline to keep the stack for other functions called by callers small.
4023 */
4024static noinline int fxregs_fixup(struct fxregs_state *fx_state,
4025 const size_t used_size)
4026{
4027 struct fxregs_state fx_tmp;
4028 int rc;
4029
4030 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
4031 memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
4032 __fxstate_size(16) - used_size);
4033
4034 return rc;
4035}
4036
4017static int em_fxrstor(struct x86_emulate_ctxt *ctxt) 4037static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
4018{ 4038{
4019 struct fxregs_state fx_state; 4039 struct fxregs_state fx_state;
@@ -4024,19 +4044,17 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
4024 if (rc != X86EMUL_CONTINUE) 4044 if (rc != X86EMUL_CONTINUE)
4025 return rc; 4045 return rc;
4026 4046
4027 ctxt->ops->get_fpu(ctxt);
4028
4029 size = fxstate_size(ctxt); 4047 size = fxstate_size(ctxt);
4048 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
4049 if (rc != X86EMUL_CONTINUE)
4050 return rc;
4051
4030 if (size < __fxstate_size(16)) { 4052 if (size < __fxstate_size(16)) {
4031 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); 4053 rc = fxregs_fixup(&fx_state, size);
4032 if (rc != X86EMUL_CONTINUE) 4054 if (rc != X86EMUL_CONTINUE)
4033 goto out; 4055 goto out;
4034 } 4056 }
4035 4057
4036 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
4037 if (rc != X86EMUL_CONTINUE)
4038 goto out;
4039
4040 if (fx_state.mxcsr >> 16) { 4058 if (fx_state.mxcsr >> 16) {
4041 rc = emulate_gp(ctxt, 0); 4059 rc = emulate_gp(ctxt, 0);
4042 goto out; 4060 goto out;
@@ -4046,8 +4064,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
4046 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); 4064 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
4047 4065
4048out: 4066out:
4049 ctxt->ops->put_fpu(ctxt);
4050
4051 return rc; 4067 return rc;
4052} 4068}
4053 4069
@@ -5000,6 +5016,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
5000 bool op_prefix = false; 5016 bool op_prefix = false;
5001 bool has_seg_override = false; 5017 bool has_seg_override = false;
5002 struct opcode opcode; 5018 struct opcode opcode;
5019 u16 dummy;
5020 struct desc_struct desc;
5003 5021
5004 ctxt->memop.type = OP_NONE; 5022 ctxt->memop.type = OP_NONE;
5005 ctxt->memopp = NULL; 5023 ctxt->memopp = NULL;
@@ -5018,6 +5036,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
5018 switch (mode) { 5036 switch (mode) {
5019 case X86EMUL_MODE_REAL: 5037 case X86EMUL_MODE_REAL:
5020 case X86EMUL_MODE_VM86: 5038 case X86EMUL_MODE_VM86:
5039 def_op_bytes = def_ad_bytes = 2;
5040 ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
5041 if (desc.d)
5042 def_op_bytes = def_ad_bytes = 4;
5043 break;
5021 case X86EMUL_MODE_PROT16: 5044 case X86EMUL_MODE_PROT16:
5022 def_op_bytes = def_ad_bytes = 2; 5045 def_op_bytes = def_ad_bytes = 2;
5023 break; 5046 break;
@@ -5290,9 +5313,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5290{ 5313{
5291 int rc; 5314 int rc;
5292 5315
5293 ctxt->ops->get_fpu(ctxt);
5294 rc = asm_safe("fwait"); 5316 rc = asm_safe("fwait");
5295 ctxt->ops->put_fpu(ctxt);
5296 5317
5297 if (unlikely(rc != X86EMUL_CONTINUE)) 5318 if (unlikely(rc != X86EMUL_CONTINUE))
5298 return emulate_exception(ctxt, MF_VECTOR, 0, false); 5319 return emulate_exception(ctxt, MF_VECTOR, 0, false);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index bdff437acbcb..4e822ad363f3 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -209,12 +209,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
209 209
210 old_irr = ioapic->irr; 210 old_irr = ioapic->irr;
211 ioapic->irr |= mask; 211 ioapic->irr |= mask;
212 if (edge) 212 if (edge) {
213 ioapic->irr_delivered &= ~mask; 213 ioapic->irr_delivered &= ~mask;
214 if ((edge && old_irr == ioapic->irr) || 214 if (old_irr == ioapic->irr) {
215 (!edge && entry.fields.remote_irr)) { 215 ret = 0;
216 ret = 0; 216 goto out;
217 goto out; 217 }
218 } 218 }
219 219
220 ret = ioapic_service(ioapic, irq, line_status); 220 ret = ioapic_service(ioapic, irq, line_status);
@@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
257 index == RTC_GSI) { 257 index == RTC_GSI) {
258 if (kvm_apic_match_dest(vcpu, NULL, 0, 258 if (kvm_apic_match_dest(vcpu, NULL, 0,
259 e->fields.dest_id, e->fields.dest_mode) || 259 e->fields.dest_id, e->fields.dest_mode) ||
260 (e->fields.trig_mode == IOAPIC_EDGE_TRIG && 260 kvm_apic_pending_eoi(vcpu, e->fields.vector))
261 kvm_apic_pending_eoi(vcpu, e->fields.vector)))
262 __set_bit(e->fields.vector, 261 __set_bit(e->fields.vector,
263 ioapic_handled_vectors); 262 ioapic_handled_vectors);
264 } 263 }
@@ -277,6 +276,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
277{ 276{
278 unsigned index; 277 unsigned index;
279 bool mask_before, mask_after; 278 bool mask_before, mask_after;
279 int old_remote_irr, old_delivery_status;
280 union kvm_ioapic_redirect_entry *e; 280 union kvm_ioapic_redirect_entry *e;
281 281
282 switch (ioapic->ioregsel) { 282 switch (ioapic->ioregsel) {
@@ -299,14 +299,28 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
299 return; 299 return;
300 e = &ioapic->redirtbl[index]; 300 e = &ioapic->redirtbl[index];
301 mask_before = e->fields.mask; 301 mask_before = e->fields.mask;
302 /* Preserve read-only fields */
303 old_remote_irr = e->fields.remote_irr;
304 old_delivery_status = e->fields.delivery_status;
302 if (ioapic->ioregsel & 1) { 305 if (ioapic->ioregsel & 1) {
303 e->bits &= 0xffffffff; 306 e->bits &= 0xffffffff;
304 e->bits |= (u64) val << 32; 307 e->bits |= (u64) val << 32;
305 } else { 308 } else {
306 e->bits &= ~0xffffffffULL; 309 e->bits &= ~0xffffffffULL;
307 e->bits |= (u32) val; 310 e->bits |= (u32) val;
308 e->fields.remote_irr = 0;
309 } 311 }
312 e->fields.remote_irr = old_remote_irr;
313 e->fields.delivery_status = old_delivery_status;
314
315 /*
316 * Some OSes (Linux, Xen) assume that Remote IRR bit will
317 * be cleared by IOAPIC hardware when the entry is configured
318 * as edge-triggered. This behavior is used to simulate an
319 * explicit EOI on IOAPICs that don't have the EOI register.
320 */
321 if (e->fields.trig_mode == IOAPIC_EDGE_TRIG)
322 e->fields.remote_irr = 0;
323
310 mask_after = e->fields.mask; 324 mask_after = e->fields.mask;
311 if (mask_before != mask_after) 325 if (mask_before != mask_after)
312 kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); 326 kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
@@ -324,7 +338,9 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
324 struct kvm_lapic_irq irqe; 338 struct kvm_lapic_irq irqe;
325 int ret; 339 int ret;
326 340
327 if (entry->fields.mask) 341 if (entry->fields.mask ||
342 (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG &&
343 entry->fields.remote_irr))
328 return -1; 344 return -1;
329 345
330 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 346 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 943acbf00c69..e2c1fb8d35ce 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -266,9 +266,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
266 recalculate_apic_map(apic->vcpu->kvm); 266 recalculate_apic_map(apic->vcpu->kvm);
267} 267}
268 268
269static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
270{
271 return ((id >> 4) << 16) | (1 << (id & 0xf));
272}
273
269static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) 274static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
270{ 275{
271 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 276 u32 ldr = kvm_apic_calc_x2apic_ldr(id);
272 277
273 WARN_ON_ONCE(id != apic->vcpu->vcpu_id); 278 WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
274 279
@@ -2245,6 +2250,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2245{ 2250{
2246 if (apic_x2apic_mode(vcpu->arch.apic)) { 2251 if (apic_x2apic_mode(vcpu->arch.apic)) {
2247 u32 *id = (u32 *)(s->regs + APIC_ID); 2252 u32 *id = (u32 *)(s->regs + APIC_ID);
2253 u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2248 2254
2249 if (vcpu->kvm->arch.x2apic_format) { 2255 if (vcpu->kvm->arch.x2apic_format) {
2250 if (*id != vcpu->vcpu_id) 2256 if (*id != vcpu->vcpu_id)
@@ -2255,6 +2261,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2255 else 2261 else
2256 *id <<= 24; 2262 *id <<= 24;
2257 } 2263 }
2264
2265 /* In x2APIC mode, the LDR is fixed and based on the id */
2266 if (set)
2267 *ldr = kvm_apic_calc_x2apic_ldr(*id);
2258 } 2268 }
2259 2269
2260 return 0; 2270 return 0;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e5e66e5c6640..2b8eb4da4d08 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3395 spin_lock(&vcpu->kvm->mmu_lock); 3395 spin_lock(&vcpu->kvm->mmu_lock);
3396 if(make_mmu_pages_available(vcpu) < 0) { 3396 if(make_mmu_pages_available(vcpu) < 0) {
3397 spin_unlock(&vcpu->kvm->mmu_lock); 3397 spin_unlock(&vcpu->kvm->mmu_lock);
3398 return 1; 3398 return -ENOSPC;
3399 } 3399 }
3400 sp = kvm_mmu_get_page(vcpu, 0, 0, 3400 sp = kvm_mmu_get_page(vcpu, 0, 0,
3401 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); 3401 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3410 spin_lock(&vcpu->kvm->mmu_lock); 3410 spin_lock(&vcpu->kvm->mmu_lock);
3411 if (make_mmu_pages_available(vcpu) < 0) { 3411 if (make_mmu_pages_available(vcpu) < 0) {
3412 spin_unlock(&vcpu->kvm->mmu_lock); 3412 spin_unlock(&vcpu->kvm->mmu_lock);
3413 return 1; 3413 return -ENOSPC;
3414 } 3414 }
3415 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), 3415 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
3416 i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); 3416 i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3450 spin_lock(&vcpu->kvm->mmu_lock); 3450 spin_lock(&vcpu->kvm->mmu_lock);
3451 if (make_mmu_pages_available(vcpu) < 0) { 3451 if (make_mmu_pages_available(vcpu) < 0) {
3452 spin_unlock(&vcpu->kvm->mmu_lock); 3452 spin_unlock(&vcpu->kvm->mmu_lock);
3453 return 1; 3453 return -ENOSPC;
3454 } 3454 }
3455 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 3455 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
3456 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); 3456 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3487 spin_lock(&vcpu->kvm->mmu_lock); 3487 spin_lock(&vcpu->kvm->mmu_lock);
3488 if (make_mmu_pages_available(vcpu) < 0) { 3488 if (make_mmu_pages_available(vcpu) < 0) {
3489 spin_unlock(&vcpu->kvm->mmu_lock); 3489 spin_unlock(&vcpu->kvm->mmu_lock);
3490 return 1; 3490 return -ENOSPC;
3491 } 3491 }
3492 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 3492 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
3493 0, ACC_ALL); 3493 0, ACC_ALL);
@@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
3781bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) 3781bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
3782{ 3782{
3783 if (unlikely(!lapic_in_kernel(vcpu) || 3783 if (unlikely(!lapic_in_kernel(vcpu) ||
3784 kvm_event_needs_reinjection(vcpu))) 3784 kvm_event_needs_reinjection(vcpu) ||
3785 vcpu->arch.exception.pending))
3785 return false; 3786 return false;
3786 3787
3787 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) 3788 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
@@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void)
5465 5466
5466int kvm_mmu_module_init(void) 5467int kvm_mmu_module_init(void)
5467{ 5468{
5469 int ret = -ENOMEM;
5470
5468 kvm_mmu_clear_all_pte_masks(); 5471 kvm_mmu_clear_all_pte_masks();
5469 5472
5470 pte_list_desc_cache = kmem_cache_create("pte_list_desc", 5473 pte_list_desc_cache = kmem_cache_create("pte_list_desc",
5471 sizeof(struct pte_list_desc), 5474 sizeof(struct pte_list_desc),
5472 0, SLAB_ACCOUNT, NULL); 5475 0, SLAB_ACCOUNT, NULL);
5473 if (!pte_list_desc_cache) 5476 if (!pte_list_desc_cache)
5474 goto nomem; 5477 goto out;
5475 5478
5476 mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", 5479 mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
5477 sizeof(struct kvm_mmu_page), 5480 sizeof(struct kvm_mmu_page),
5478 0, SLAB_ACCOUNT, NULL); 5481 0, SLAB_ACCOUNT, NULL);
5479 if (!mmu_page_header_cache) 5482 if (!mmu_page_header_cache)
5480 goto nomem; 5483 goto out;
5481 5484
5482 if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL)) 5485 if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
5483 goto nomem; 5486 goto out;
5484 5487
5485 register_shrinker(&mmu_shrinker); 5488 ret = register_shrinker(&mmu_shrinker);
5489 if (ret)
5490 goto out;
5486 5491
5487 return 0; 5492 return 0;
5488 5493
5489nomem: 5494out:
5490 mmu_destroy_caches(); 5495 mmu_destroy_caches();
5491 return -ENOMEM; 5496 return ret;
5492} 5497}
5493 5498
5494/* 5499/*
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 59e13a79c2e3..f40d0da1f1d3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -45,6 +45,7 @@
45#include <asm/debugreg.h> 45#include <asm/debugreg.h>
46#include <asm/kvm_para.h> 46#include <asm/kvm_para.h>
47#include <asm/irq_remapping.h> 47#include <asm/irq_remapping.h>
48#include <asm/nospec-branch.h>
48 49
49#include <asm/virtext.h> 50#include <asm/virtext.h>
50#include "trace.h" 51#include "trace.h"
@@ -2197,6 +2198,8 @@ static int ud_interception(struct vcpu_svm *svm)
2197 int er; 2198 int er;
2198 2199
2199 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); 2200 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
2201 if (er == EMULATE_USER_EXIT)
2202 return 0;
2200 if (er != EMULATE_DONE) 2203 if (er != EMULATE_DONE)
2201 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2204 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2202 return 1; 2205 return 1;
@@ -4977,6 +4980,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
4977 "mov %%r14, %c[r14](%[svm]) \n\t" 4980 "mov %%r14, %c[r14](%[svm]) \n\t"
4978 "mov %%r15, %c[r15](%[svm]) \n\t" 4981 "mov %%r15, %c[r15](%[svm]) \n\t"
4979#endif 4982#endif
4983 /*
4984 * Clear host registers marked as clobbered to prevent
4985 * speculative use.
4986 */
4987 "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
4988 "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
4989 "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
4990 "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
4991 "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
4992#ifdef CONFIG_X86_64
4993 "xor %%r8, %%r8 \n\t"
4994 "xor %%r9, %%r9 \n\t"
4995 "xor %%r10, %%r10 \n\t"
4996 "xor %%r11, %%r11 \n\t"
4997 "xor %%r12, %%r12 \n\t"
4998 "xor %%r13, %%r13 \n\t"
4999 "xor %%r14, %%r14 \n\t"
5000 "xor %%r15, %%r15 \n\t"
5001#endif
4980 "pop %%" _ASM_BP 5002 "pop %%" _ASM_BP
4981 : 5003 :
4982 : [svm]"a"(svm), 5004 : [svm]"a"(svm),
@@ -5006,6 +5028,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5006#endif 5028#endif
5007 ); 5029 );
5008 5030
5031 /* Eliminate branch target predictions from guest mode */
5032 vmexit_fill_RSB();
5033
5009#ifdef CONFIG_X86_64 5034#ifdef CONFIG_X86_64
5010 wrmsrl(MSR_GS_BASE, svm->host.gs_base); 5035 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
5011#else 5036#else
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 714a0673ec3c..c829d89e2e63 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -50,6 +50,7 @@
50#include <asm/apic.h> 50#include <asm/apic.h>
51#include <asm/irq_remapping.h> 51#include <asm/irq_remapping.h>
52#include <asm/mmu_context.h> 52#include <asm/mmu_context.h>
53#include <asm/nospec-branch.h>
53 54
54#include "trace.h" 55#include "trace.h"
55#include "pmu.h" 56#include "pmu.h"
@@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
899{ 900{
900 BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); 901 BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
901 902
902 if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || 903 if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
903 vmcs_field_to_offset_table[field] == 0) 904 return -ENOENT;
905
906 /*
907 * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
908 * generic mechanism.
909 */
910 asm("lfence");
911
912 if (vmcs_field_to_offset_table[field] == 0)
904 return -ENOENT; 913 return -ENOENT;
905 914
906 return vmcs_field_to_offset_table[field]; 915 return vmcs_field_to_offset_table[field];
@@ -2300,7 +2309,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2300 * processors. See 22.2.4. 2309 * processors. See 22.2.4.
2301 */ 2310 */
2302 vmcs_writel(HOST_TR_BASE, 2311 vmcs_writel(HOST_TR_BASE,
2303 (unsigned long)this_cpu_ptr(&cpu_tss)); 2312 (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
2304 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ 2313 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
2305 2314
2306 /* 2315 /*
@@ -5600,7 +5609,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
5600 vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 5609 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
5601 } 5610 }
5602 5611
5603 vmcs_writel(GUEST_RFLAGS, 0x02); 5612 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
5604 kvm_rip_write(vcpu, 0xfff0); 5613 kvm_rip_write(vcpu, 0xfff0);
5605 5614
5606 vmcs_writel(GUEST_GDTR_BASE, 0); 5615 vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -5915,11 +5924,9 @@ static int handle_exception(struct kvm_vcpu *vcpu)
5915 return 1; /* already handled by vmx_vcpu_run() */ 5924 return 1; /* already handled by vmx_vcpu_run() */
5916 5925
5917 if (is_invalid_opcode(intr_info)) { 5926 if (is_invalid_opcode(intr_info)) {
5918 if (is_guest_mode(vcpu)) {
5919 kvm_queue_exception(vcpu, UD_VECTOR);
5920 return 1;
5921 }
5922 er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); 5927 er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
5928 if (er == EMULATE_USER_EXIT)
5929 return 0;
5923 if (er != EMULATE_DONE) 5930 if (er != EMULATE_DONE)
5924 kvm_queue_exception(vcpu, UD_VECTOR); 5931 kvm_queue_exception(vcpu, UD_VECTOR);
5925 return 1; 5932 return 1;
@@ -6602,7 +6609,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
6602 if (kvm_test_request(KVM_REQ_EVENT, vcpu)) 6609 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
6603 return 1; 6610 return 1;
6604 6611
6605 err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); 6612 err = emulate_instruction(vcpu, 0);
6606 6613
6607 if (err == EMULATE_USER_EXIT) { 6614 if (err == EMULATE_USER_EXIT) {
6608 ++vcpu->stat.mmio_exits; 6615 ++vcpu->stat.mmio_exits;
@@ -6750,16 +6757,10 @@ static __init int hardware_setup(void)
6750 goto out; 6757 goto out;
6751 } 6758 }
6752 6759
6753 vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
6754 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); 6760 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
6755 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); 6761 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
6756 6762
6757 /*
6758 * Allow direct access to the PC debug port (it is often used for I/O
6759 * delays, but the vmexits simply slow things down).
6760 */
6761 memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); 6763 memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
6762 clear_bit(0x80, vmx_io_bitmap_a);
6763 6764
6764 memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); 6765 memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
6765 6766
@@ -7414,10 +7415,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
7414 */ 7415 */
7415static void free_nested(struct vcpu_vmx *vmx) 7416static void free_nested(struct vcpu_vmx *vmx)
7416{ 7417{
7417 if (!vmx->nested.vmxon) 7418 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
7418 return; 7419 return;
7419 7420
7420 vmx->nested.vmxon = false; 7421 vmx->nested.vmxon = false;
7422 vmx->nested.smm.vmxon = false;
7421 free_vpid(vmx->nested.vpid02); 7423 free_vpid(vmx->nested.vpid02);
7422 vmx->nested.posted_intr_nv = -1; 7424 vmx->nested.posted_intr_nv = -1;
7423 vmx->nested.current_vmptr = -1ull; 7425 vmx->nested.current_vmptr = -1ull;
@@ -9419,6 +9421,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9419 /* Save guest registers, load host registers, keep flags */ 9421 /* Save guest registers, load host registers, keep flags */
9420 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" 9422 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
9421 "pop %0 \n\t" 9423 "pop %0 \n\t"
9424 "setbe %c[fail](%0)\n\t"
9422 "mov %%" _ASM_AX ", %c[rax](%0) \n\t" 9425 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
9423 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" 9426 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
9424 __ASM_SIZE(pop) " %c[rcx](%0) \n\t" 9427 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -9435,12 +9438,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9435 "mov %%r13, %c[r13](%0) \n\t" 9438 "mov %%r13, %c[r13](%0) \n\t"
9436 "mov %%r14, %c[r14](%0) \n\t" 9439 "mov %%r14, %c[r14](%0) \n\t"
9437 "mov %%r15, %c[r15](%0) \n\t" 9440 "mov %%r15, %c[r15](%0) \n\t"
9441 "xor %%r8d, %%r8d \n\t"
9442 "xor %%r9d, %%r9d \n\t"
9443 "xor %%r10d, %%r10d \n\t"
9444 "xor %%r11d, %%r11d \n\t"
9445 "xor %%r12d, %%r12d \n\t"
9446 "xor %%r13d, %%r13d \n\t"
9447 "xor %%r14d, %%r14d \n\t"
9448 "xor %%r15d, %%r15d \n\t"
9438#endif 9449#endif
9439 "mov %%cr2, %%" _ASM_AX " \n\t" 9450 "mov %%cr2, %%" _ASM_AX " \n\t"
9440 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" 9451 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
9441 9452
9453 "xor %%eax, %%eax \n\t"
9454 "xor %%ebx, %%ebx \n\t"
9455 "xor %%esi, %%esi \n\t"
9456 "xor %%edi, %%edi \n\t"
9442 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" 9457 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
9443 "setbe %c[fail](%0) \n\t"
9444 ".pushsection .rodata \n\t" 9458 ".pushsection .rodata \n\t"
9445 ".global vmx_return \n\t" 9459 ".global vmx_return \n\t"
9446 "vmx_return: " _ASM_PTR " 2b \n\t" 9460 "vmx_return: " _ASM_PTR " 2b \n\t"
@@ -9477,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9477#endif 9491#endif
9478 ); 9492 );
9479 9493
9494 /* Eliminate branch target predictions from guest mode */
9495 vmexit_fill_RSB();
9496
9480 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ 9497 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
9481 if (debugctlmsr) 9498 if (debugctlmsr)
9482 update_debugctlmsr(debugctlmsr); 9499 update_debugctlmsr(debugctlmsr);
@@ -9800,8 +9817,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
9800 cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); 9817 cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
9801 cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); 9818 cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
9802 cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); 9819 cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
9803 /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */ 9820 cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
9804 cr4_fixed1_update(bit(11), ecx, bit(2));
9805 9821
9806#undef cr4_fixed1_update 9822#undef cr4_fixed1_update
9807} 9823}
@@ -10875,6 +10891,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10875 return 1; 10891 return 1;
10876 } 10892 }
10877 10893
10894 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
10895 (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
10896 (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
10897 return 1;
10898
10878 return 0; 10899 return 0;
10879} 10900}
10880 10901
@@ -11099,13 +11120,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
11099{ 11120{
11100 struct vcpu_vmx *vmx = to_vmx(vcpu); 11121 struct vcpu_vmx *vmx = to_vmx(vcpu);
11101 unsigned long exit_qual; 11122 unsigned long exit_qual;
11102 11123 bool block_nested_events =
11103 if (kvm_event_needs_reinjection(vcpu)) 11124 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
11104 return -EBUSY;
11105 11125
11106 if (vcpu->arch.exception.pending && 11126 if (vcpu->arch.exception.pending &&
11107 nested_vmx_check_exception(vcpu, &exit_qual)) { 11127 nested_vmx_check_exception(vcpu, &exit_qual)) {
11108 if (vmx->nested.nested_run_pending) 11128 if (block_nested_events)
11109 return -EBUSY; 11129 return -EBUSY;
11110 nested_vmx_inject_exception_vmexit(vcpu, exit_qual); 11130 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
11111 vcpu->arch.exception.pending = false; 11131 vcpu->arch.exception.pending = false;
@@ -11114,14 +11134,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
11114 11134
11115 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && 11135 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
11116 vmx->nested.preemption_timer_expired) { 11136 vmx->nested.preemption_timer_expired) {
11117 if (vmx->nested.nested_run_pending) 11137 if (block_nested_events)
11118 return -EBUSY; 11138 return -EBUSY;
11119 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); 11139 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
11120 return 0; 11140 return 0;
11121 } 11141 }
11122 11142
11123 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { 11143 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
11124 if (vmx->nested.nested_run_pending) 11144 if (block_nested_events)
11125 return -EBUSY; 11145 return -EBUSY;
11126 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 11146 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
11127 NMI_VECTOR | INTR_TYPE_NMI_INTR | 11147 NMI_VECTOR | INTR_TYPE_NMI_INTR |
@@ -11137,7 +11157,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
11137 11157
11138 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && 11158 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
11139 nested_exit_on_intr(vcpu)) { 11159 nested_exit_on_intr(vcpu)) {
11140 if (vmx->nested.nested_run_pending) 11160 if (block_nested_events)
11141 return -EBUSY; 11161 return -EBUSY;
11142 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); 11162 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
11143 return 0; 11163 return 0;
@@ -11324,6 +11344,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
11324 kvm_clear_interrupt_queue(vcpu); 11344 kvm_clear_interrupt_queue(vcpu);
11325} 11345}
11326 11346
11347static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
11348 struct vmcs12 *vmcs12)
11349{
11350 u32 entry_failure_code;
11351
11352 nested_ept_uninit_mmu_context(vcpu);
11353
11354 /*
11355 * Only PDPTE load can fail as the value of cr3 was checked on entry and
11356 * couldn't have changed.
11357 */
11358 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
11359 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
11360
11361 if (!enable_ept)
11362 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
11363}
11364
11327/* 11365/*
11328 * A part of what we need to when the nested L2 guest exits and we want to 11366 * A part of what we need to when the nested L2 guest exits and we want to
11329 * run its L1 parent, is to reset L1's guest state to the host state specified 11367 * run its L1 parent, is to reset L1's guest state to the host state specified
@@ -11337,7 +11375,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
11337 struct vmcs12 *vmcs12) 11375 struct vmcs12 *vmcs12)
11338{ 11376{
11339 struct kvm_segment seg; 11377 struct kvm_segment seg;
11340 u32 entry_failure_code;
11341 11378
11342 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) 11379 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
11343 vcpu->arch.efer = vmcs12->host_ia32_efer; 11380 vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -11364,17 +11401,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
11364 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); 11401 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
11365 vmx_set_cr4(vcpu, vmcs12->host_cr4); 11402 vmx_set_cr4(vcpu, vmcs12->host_cr4);
11366 11403
11367 nested_ept_uninit_mmu_context(vcpu); 11404 load_vmcs12_mmu_host_state(vcpu, vmcs12);
11368
11369 /*
11370 * Only PDPTE load can fail as the value of cr3 was checked on entry and
11371 * couldn't have changed.
11372 */
11373 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
11374 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
11375
11376 if (!enable_ept)
11377 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
11378 11405
11379 if (enable_vpid) { 11406 if (enable_vpid) {
11380 /* 11407 /*
@@ -11604,6 +11631,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
11604 * accordingly. 11631 * accordingly.
11605 */ 11632 */
11606 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 11633 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
11634
11635 load_vmcs12_mmu_host_state(vcpu, vmcs12);
11636
11607 /* 11637 /*
11608 * The emulated instruction was already skipped in 11638 * The emulated instruction was already skipped in
11609 * nested_vmx_run, but the updated RIP was never 11639 * nested_vmx_run, but the updated RIP was never
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34c85aa2e2d1..1cec2c62a0b0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
107static bool __read_mostly ignore_msrs = 0; 107static bool __read_mostly ignore_msrs = 0;
108module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); 108module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
109 109
110static bool __read_mostly report_ignored_msrs = true;
111module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
112
110unsigned int min_timer_period_us = 500; 113unsigned int min_timer_period_us = 500;
111module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); 114module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
112 115
@@ -1795,10 +1798,13 @@ u64 get_kvmclock_ns(struct kvm *kvm)
1795 /* both __this_cpu_read() and rdtsc() should be on the same cpu */ 1798 /* both __this_cpu_read() and rdtsc() should be on the same cpu */
1796 get_cpu(); 1799 get_cpu();
1797 1800
1798 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, 1801 if (__this_cpu_read(cpu_tsc_khz)) {
1799 &hv_clock.tsc_shift, 1802 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
1800 &hv_clock.tsc_to_system_mul); 1803 &hv_clock.tsc_shift,
1801 ret = __pvclock_read_cycles(&hv_clock, rdtsc()); 1804 &hv_clock.tsc_to_system_mul);
1805 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
1806 } else
1807 ret = ktime_get_boot_ns() + ka->kvmclock_offset;
1802 1808
1803 put_cpu(); 1809 put_cpu();
1804 1810
@@ -1830,6 +1836,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
1830 */ 1836 */
1831 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); 1837 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
1832 1838
1839 if (guest_hv_clock.version & 1)
1840 ++guest_hv_clock.version; /* first time write, random junk */
1841
1833 vcpu->hv_clock.version = guest_hv_clock.version + 1; 1842 vcpu->hv_clock.version = guest_hv_clock.version + 1;
1834 kvm_write_guest_cached(v->kvm, &vcpu->pv_time, 1843 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1835 &vcpu->hv_clock, 1844 &vcpu->hv_clock,
@@ -2322,7 +2331,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2322 /* Drop writes to this legacy MSR -- see rdmsr 2331 /* Drop writes to this legacy MSR -- see rdmsr
2323 * counterpart for further detail. 2332 * counterpart for further detail.
2324 */ 2333 */
2325 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); 2334 if (report_ignored_msrs)
2335 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
2336 msr, data);
2326 break; 2337 break;
2327 case MSR_AMD64_OSVW_ID_LENGTH: 2338 case MSR_AMD64_OSVW_ID_LENGTH:
2328 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) 2339 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
@@ -2359,8 +2370,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2359 msr, data); 2370 msr, data);
2360 return 1; 2371 return 1;
2361 } else { 2372 } else {
2362 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", 2373 if (report_ignored_msrs)
2363 msr, data); 2374 vcpu_unimpl(vcpu,
2375 "ignored wrmsr: 0x%x data 0x%llx\n",
2376 msr, data);
2364 break; 2377 break;
2365 } 2378 }
2366 } 2379 }
@@ -2578,7 +2591,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2578 msr_info->index); 2591 msr_info->index);
2579 return 1; 2592 return 1;
2580 } else { 2593 } else {
2581 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index); 2594 if (report_ignored_msrs)
2595 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
2596 msr_info->index);
2582 msr_info->data = 0; 2597 msr_info->data = 0;
2583 } 2598 }
2584 break; 2599 break;
@@ -2922,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2922 srcu_read_unlock(&vcpu->kvm->srcu, idx); 2937 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2923 pagefault_enable(); 2938 pagefault_enable();
2924 kvm_x86_ops->vcpu_put(vcpu); 2939 kvm_x86_ops->vcpu_put(vcpu);
2925 kvm_put_guest_fpu(vcpu);
2926 vcpu->arch.last_host_tsc = rdtsc(); 2940 vcpu->arch.last_host_tsc = rdtsc();
2927} 2941}
2928 2942
@@ -4370,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
4370 addr, n, v)) 4384 addr, n, v))
4371 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) 4385 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
4372 break; 4386 break;
4373 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); 4387 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
4374 handled += n; 4388 handled += n;
4375 addr += n; 4389 addr += n;
4376 len -= n; 4390 len -= n;
@@ -4629,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4629{ 4643{
4630 if (vcpu->mmio_read_completed) { 4644 if (vcpu->mmio_read_completed) {
4631 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, 4645 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4632 vcpu->mmio_fragments[0].gpa, *(u64 *)val); 4646 vcpu->mmio_fragments[0].gpa, val);
4633 vcpu->mmio_read_completed = 0; 4647 vcpu->mmio_read_completed = 0;
4634 return 1; 4648 return 1;
4635 } 4649 }
@@ -4651,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4651 4665
4652static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) 4666static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4653{ 4667{
4654 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); 4668 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
4655 return vcpu_mmio_write(vcpu, gpa, bytes, val); 4669 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4656} 4670}
4657 4671
4658static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, 4672static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4659 void *val, int bytes) 4673 void *val, int bytes)
4660{ 4674{
4661 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 4675 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
4662 return X86EMUL_IO_NEEDED; 4676 return X86EMUL_IO_NEEDED;
4663} 4677}
4664 4678
@@ -5237,17 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
5237 emul_to_vcpu(ctxt)->arch.halt_request = 1; 5251 emul_to_vcpu(ctxt)->arch.halt_request = 1;
5238} 5252}
5239 5253
5240static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
5241{
5242 preempt_disable();
5243 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
5244}
5245
5246static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
5247{
5248 preempt_enable();
5249}
5250
5251static int emulator_intercept(struct x86_emulate_ctxt *ctxt, 5254static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
5252 struct x86_instruction_info *info, 5255 struct x86_instruction_info *info,
5253 enum x86_intercept_stage stage) 5256 enum x86_intercept_stage stage)
@@ -5325,8 +5328,6 @@ static const struct x86_emulate_ops emulate_ops = {
5325 .halt = emulator_halt, 5328 .halt = emulator_halt,
5326 .wbinvd = emulator_wbinvd, 5329 .wbinvd = emulator_wbinvd,
5327 .fix_hypercall = emulator_fix_hypercall, 5330 .fix_hypercall = emulator_fix_hypercall,
5328 .get_fpu = emulator_get_fpu,
5329 .put_fpu = emulator_put_fpu,
5330 .intercept = emulator_intercept, 5331 .intercept = emulator_intercept,
5331 .get_cpuid = emulator_get_cpuid, 5332 .get_cpuid = emulator_get_cpuid,
5332 .set_nmi_mask = emulator_set_nmi_mask, 5333 .set_nmi_mask = emulator_set_nmi_mask,
@@ -5430,7 +5431,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
5430 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 5431 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5431 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 5432 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
5432 vcpu->run->internal.ndata = 0; 5433 vcpu->run->internal.ndata = 0;
5433 r = EMULATE_FAIL; 5434 r = EMULATE_USER_EXIT;
5434 } 5435 }
5435 kvm_queue_exception(vcpu, UD_VECTOR); 5436 kvm_queue_exception(vcpu, UD_VECTOR);
5436 5437
@@ -5722,6 +5723,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5722 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, 5723 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5723 emulation_type)) 5724 emulation_type))
5724 return EMULATE_DONE; 5725 return EMULATE_DONE;
5726 if (ctxt->have_exception && inject_emulated_exception(vcpu))
5727 return EMULATE_DONE;
5725 if (emulation_type & EMULTYPE_SKIP) 5728 if (emulation_type & EMULTYPE_SKIP)
5726 return EMULATE_FAIL; 5729 return EMULATE_FAIL;
5727 return handle_emulation_failure(vcpu); 5730 return handle_emulation_failure(vcpu);
@@ -6761,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
6761 kvm_x86_ops->tlb_flush(vcpu); 6764 kvm_x86_ops->tlb_flush(vcpu);
6762} 6765}
6763 6766
6767void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
6768 unsigned long start, unsigned long end)
6769{
6770 unsigned long apic_address;
6771
6772 /*
6773 * The physical address of apic access page is stored in the VMCS.
6774 * Update it when it becomes invalid.
6775 */
6776 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
6777 if (start <= apic_address && apic_address < end)
6778 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
6779}
6780
6764void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) 6781void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
6765{ 6782{
6766 struct page *page = NULL; 6783 struct page *page = NULL;
@@ -6935,7 +6952,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6935 preempt_disable(); 6952 preempt_disable();
6936 6953
6937 kvm_x86_ops->prepare_guest_switch(vcpu); 6954 kvm_x86_ops->prepare_guest_switch(vcpu);
6938 kvm_load_guest_fpu(vcpu);
6939 6955
6940 /* 6956 /*
6941 * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt 6957 * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt
@@ -7248,14 +7264,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
7248 7264
7249int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 7265int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7250{ 7266{
7251 struct fpu *fpu = &current->thread.fpu;
7252 int r; 7267 int r;
7253 sigset_t sigsaved;
7254 7268
7255 fpu__initialize(fpu); 7269 kvm_sigset_activate(vcpu);
7256 7270
7257 if (vcpu->sigset_active) 7271 kvm_load_guest_fpu(vcpu);
7258 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
7259 7272
7260 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 7273 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
7261 if (kvm_run->immediate_exit) { 7274 if (kvm_run->immediate_exit) {
@@ -7297,9 +7310,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7297 r = vcpu_run(vcpu); 7310 r = vcpu_run(vcpu);
7298 7311
7299out: 7312out:
7313 kvm_put_guest_fpu(vcpu);
7300 post_kvm_run_save(vcpu); 7314 post_kvm_run_save(vcpu);
7301 if (vcpu->sigset_active) 7315 kvm_sigset_deactivate(vcpu);
7302 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
7303 7316
7304 return r; 7317 return r;
7305} 7318}
@@ -7367,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
7367#endif 7380#endif
7368 7381
7369 kvm_rip_write(vcpu, regs->rip); 7382 kvm_rip_write(vcpu, regs->rip);
7370 kvm_set_rflags(vcpu, regs->rflags); 7383 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
7371 7384
7372 vcpu->arch.exception.pending = false; 7385 vcpu->arch.exception.pending = false;
7373 7386
@@ -7481,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
7481} 7494}
7482EXPORT_SYMBOL_GPL(kvm_task_switch); 7495EXPORT_SYMBOL_GPL(kvm_task_switch);
7483 7496
7497int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
7498{
7499 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
7500 /*
7501 * When EFER.LME and CR0.PG are set, the processor is in
7502 * 64-bit mode (though maybe in a 32-bit code segment).
7503 * CR4.PAE and EFER.LMA must be set.
7504 */
7505 if (!(sregs->cr4 & X86_CR4_PAE_BIT)
7506 || !(sregs->efer & EFER_LMA))
7507 return -EINVAL;
7508 } else {
7509 /*
7510 * Not in 64-bit mode: EFER.LMA is clear and the code
7511 * segment cannot be 64-bit.
7512 */
7513 if (sregs->efer & EFER_LMA || sregs->cs.l)
7514 return -EINVAL;
7515 }
7516
7517 return 0;
7518}
7519
7484int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 7520int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7485 struct kvm_sregs *sregs) 7521 struct kvm_sregs *sregs)
7486{ 7522{
@@ -7493,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7493 (sregs->cr4 & X86_CR4_OSXSAVE)) 7529 (sregs->cr4 & X86_CR4_OSXSAVE))
7494 return -EINVAL; 7530 return -EINVAL;
7495 7531
7532 if (kvm_valid_sregs(vcpu, sregs))
7533 return -EINVAL;
7534
7496 apic_base_msr.data = sregs->apic_base; 7535 apic_base_msr.data = sregs->apic_base;
7497 apic_base_msr.host_initiated = true; 7536 apic_base_msr.host_initiated = true;
7498 if (kvm_set_apic_base(vcpu, &apic_base_msr)) 7537 if (kvm_set_apic_base(vcpu, &apic_base_msr))
@@ -7690,32 +7729,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
7690 vcpu->arch.cr0 |= X86_CR0_ET; 7729 vcpu->arch.cr0 |= X86_CR0_ET;
7691} 7730}
7692 7731
7732/* Swap (qemu) user FPU context for the guest FPU context. */
7693void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 7733void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
7694{ 7734{
7695 if (vcpu->guest_fpu_loaded) 7735 preempt_disable();
7696 return; 7736 copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
7697
7698 /*
7699 * Restore all possible states in the guest,
7700 * and assume host would use all available bits.
7701 * Guest xcr0 would be loaded later.
7702 */
7703 vcpu->guest_fpu_loaded = 1;
7704 __kernel_fpu_begin();
7705 /* PKRU is separately restored in kvm_x86_ops->run. */ 7737 /* PKRU is separately restored in kvm_x86_ops->run. */
7706 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, 7738 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
7707 ~XFEATURE_MASK_PKRU); 7739 ~XFEATURE_MASK_PKRU);
7740 preempt_enable();
7708 trace_kvm_fpu(1); 7741 trace_kvm_fpu(1);
7709} 7742}
7710 7743
7744/* When vcpu_run ends, restore user space FPU context. */
7711void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 7745void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
7712{ 7746{
7713 if (!vcpu->guest_fpu_loaded) 7747 preempt_disable();
7714 return;
7715
7716 vcpu->guest_fpu_loaded = 0;
7717 copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); 7748 copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
7718 __kernel_fpu_end(); 7749 copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
7750 preempt_enable();
7719 ++vcpu->stat.fpu_reload; 7751 ++vcpu->stat.fpu_reload;
7720 trace_kvm_fpu(0); 7752 trace_kvm_fpu(0);
7721} 7753}
@@ -7832,7 +7864,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
7832 * To avoid have the INIT path from kvm_apic_has_events() that be 7864 * To avoid have the INIT path from kvm_apic_has_events() that be
7833 * called with loaded FPU and does not let userspace fix the state. 7865 * called with loaded FPU and does not let userspace fix the state.
7834 */ 7866 */
7835 kvm_put_guest_fpu(vcpu); 7867 if (init_event)
7868 kvm_put_guest_fpu(vcpu);
7836 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, 7869 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
7837 XFEATURE_MASK_BNDREGS); 7870 XFEATURE_MASK_BNDREGS);
7838 if (mpx_state_buffer) 7871 if (mpx_state_buffer)
@@ -7841,6 +7874,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
7841 XFEATURE_MASK_BNDCSR); 7874 XFEATURE_MASK_BNDCSR);
7842 if (mpx_state_buffer) 7875 if (mpx_state_buffer)
7843 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); 7876 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
7877 if (init_event)
7878 kvm_load_guest_fpu(vcpu);
7844 } 7879 }
7845 7880
7846 if (!init_event) { 7881 if (!init_event) {
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 7b181b61170e..f23934bbaf4e 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
26lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o 26lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
27lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o 27lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o 28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
29lib-$(CONFIG_RETPOLINE) += retpoline.o
29 30
30obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o 31obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
31 32
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4d34bb548b41..46e71a74e612 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -29,7 +29,8 @@
29#include <asm/errno.h> 29#include <asm/errno.h>
30#include <asm/asm.h> 30#include <asm/asm.h>
31#include <asm/export.h> 31#include <asm/export.h>
32 32#include <asm/nospec-branch.h>
33
33/* 34/*
34 * computes a partial checksum, e.g. for TCP/UDP fragments 35 * computes a partial checksum, e.g. for TCP/UDP fragments
35 */ 36 */
@@ -156,7 +157,7 @@ ENTRY(csum_partial)
156 negl %ebx 157 negl %ebx
157 lea 45f(%ebx,%ebx,2), %ebx 158 lea 45f(%ebx,%ebx,2), %ebx
158 testl %esi, %esi 159 testl %esi, %esi
159 jmp *%ebx 160 JMP_NOSPEC %ebx
160 161
161 # Handle 2-byte-aligned regions 162 # Handle 2-byte-aligned regions
16220: addw (%esi), %ax 16320: addw (%esi), %ax
@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
439 andl $-32,%edx 440 andl $-32,%edx
440 lea 3f(%ebx,%ebx), %ebx 441 lea 3f(%ebx,%ebx), %ebx
441 testl %esi, %esi 442 testl %esi, %esi
442 jmp *%ebx 443 JMP_NOSPEC %ebx
4431: addl $64,%esi 4441: addl $64,%esi
444 addl $64,%edi 445 addl $64,%edi
445 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) 446 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 553f8fd23cc4..4846eff7e4c8 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
107 delay = min_t(u64, MWAITX_MAX_LOOPS, loops); 107 delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
108 108
109 /* 109 /*
110 * Use cpu_tss as a cacheline-aligned, seldomly 110 * Use cpu_tss_rw as a cacheline-aligned, seldomly
111 * accessed per-cpu variable as the monitor target. 111 * accessed per-cpu variable as the monitor target.
112 */ 112 */
113 __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); 113 __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
114 114
115 /* 115 /*
116 * AMD, like Intel, supports the EAX hint and EAX=0xf 116 * AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644
index 000000000000..cb45c6cb465f
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,48 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#include <linux/stringify.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6#include <asm/cpufeatures.h>
7#include <asm/alternative-asm.h>
8#include <asm/export.h>
9#include <asm/nospec-branch.h>
10
11.macro THUNK reg
12 .section .text.__x86.indirect_thunk.\reg
13
14ENTRY(__x86_indirect_thunk_\reg)
15 CFI_STARTPROC
16 JMP_NOSPEC %\reg
17 CFI_ENDPROC
18ENDPROC(__x86_indirect_thunk_\reg)
19.endm
20
21/*
22 * Despite being an assembler file we can't just use .irp here
23 * because __KSYM_DEPS__ only uses the C preprocessor and would
24 * only see one instance of "__x86_indirect_thunk_\reg" rather
25 * than one per register with the correct names. So we do it
26 * the simple and nasty way...
27 */
28#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
29#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
30
31GENERATE_THUNK(_ASM_AX)
32GENERATE_THUNK(_ASM_BX)
33GENERATE_THUNK(_ASM_CX)
34GENERATE_THUNK(_ASM_DX)
35GENERATE_THUNK(_ASM_SI)
36GENERATE_THUNK(_ASM_DI)
37GENERATE_THUNK(_ASM_BP)
38GENERATE_THUNK(_ASM_SP)
39#ifdef CONFIG_64BIT
40GENERATE_THUNK(r8)
41GENERATE_THUNK(r9)
42GENERATE_THUNK(r10)
43GENERATE_THUNK(r11)
44GENERATE_THUNK(r12)
45GENERATE_THUNK(r13)
46GENERATE_THUNK(r14)
47GENERATE_THUNK(r15)
48#endif
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index c4d55919fac1..e0b85930dd77 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
607fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) 607fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
608fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) 608fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
609fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) 609fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
610ff: 610ff: UD0
611EndTable 611EndTable
612 612
613Table: 3-byte opcode 1 (0x0f 0x38) 613Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
7177e: vpermt2d/q Vx,Hx,Wx (66),(ev) 7177e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7187f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 7187f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
71980: INVEPT Gy,Mdq (66) 71980: INVEPT Gy,Mdq (66)
72081: INVPID Gy,Mdq (66) 72081: INVVPID Gy,Mdq (66)
72182: INVPCID Gy,Mdq (66) 72182: INVPCID Gy,Mdq (66)
72283: vpmultishiftqb Vx,Hx,Wx (66),(ev) 72283: vpmultishiftqb Vx,Hx,Wx (66),(ev)
72388: vexpandps/d Vpd,Wpd (66),(ev) 72388: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
970EndTable 970EndTable
971 971
972GrpTable: Grp10 972GrpTable: Grp10
973# all are UD1
9740: UD1
9751: UD1
9762: UD1
9773: UD1
9784: UD1
9795: UD1
9806: UD1
9817: UD1
973EndTable 982EndTable
974 983
975# Grp11A and Grp11B are expressed as Grp11 in Intel SDM 984# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 8e13b8cc6bed..27e9e90a8d35 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
10endif 10endif
11 11
12obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 12obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
13 pat.o pgtable.o physaddr.o setup_nx.o tlb.o 13 pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
14 14
15# Make sure __phys_addr has no stackprotector 15# Make sure __phys_addr has no stackprotector
16nostackp := $(call cc-option, -fno-stack-protector) 16nostackp := $(call cc-option, -fno-stack-protector)
@@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
41obj-$(CONFIG_ACPI_NUMA) += srat.o 41obj-$(CONFIG_ACPI_NUMA) += srat.o
42obj-$(CONFIG_NUMA_EMU) += numa_emulation.o 42obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
43 43
44obj-$(CONFIG_X86_INTEL_MPX) += mpx.o 44obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
45obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o 45obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
46obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o 46obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
47obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
47 48
48obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o 49obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
49obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o 50obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644
index 000000000000..b9283cc27622
--- /dev/null
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -0,0 +1,166 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/spinlock.h>
4#include <linux/percpu.h>
5
6#include <asm/cpu_entry_area.h>
7#include <asm/pgtable.h>
8#include <asm/fixmap.h>
9#include <asm/desc.h>
10
11static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
12
13#ifdef CONFIG_X86_64
14static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
15 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
16#endif
17
18struct cpu_entry_area *get_cpu_entry_area(int cpu)
19{
20 unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
21 BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
22
23 return (struct cpu_entry_area *) va;
24}
25EXPORT_SYMBOL(get_cpu_entry_area);
26
27void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
28{
29 unsigned long va = (unsigned long) cea_vaddr;
30
31 set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
32}
33
34static void __init
35cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
36{
37 for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
38 cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
39}
40
41static void percpu_setup_debug_store(int cpu)
42{
43#ifdef CONFIG_CPU_SUP_INTEL
44 int npages;
45 void *cea;
46
47 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
48 return;
49
50 cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
51 npages = sizeof(struct debug_store) / PAGE_SIZE;
52 BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
53 cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
54 PAGE_KERNEL);
55
56 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
57 /*
58 * Force the population of PMDs for not yet allocated per cpu
59 * memory like debug store buffers.
60 */
61 npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
62 for (; npages; npages--, cea += PAGE_SIZE)
63 cea_set_pte(cea, 0, PAGE_NONE);
64#endif
65}
66
67/* Setup the fixmap mappings only once per-processor */
68static void __init setup_cpu_entry_area(int cpu)
69{
70#ifdef CONFIG_X86_64
71 extern char _entry_trampoline[];
72
73 /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
74 pgprot_t gdt_prot = PAGE_KERNEL_RO;
75 pgprot_t tss_prot = PAGE_KERNEL_RO;
76#else
77 /*
78 * On native 32-bit systems, the GDT cannot be read-only because
79 * our double fault handler uses a task gate, and entering through
80 * a task gate needs to change an available TSS to busy. If the
81 * GDT is read-only, that will triple fault. The TSS cannot be
82 * read-only because the CPU writes to it on task switches.
83 *
84 * On Xen PV, the GDT must be read-only because the hypervisor
85 * requires it.
86 */
87 pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
88 PAGE_KERNEL_RO : PAGE_KERNEL;
89 pgprot_t tss_prot = PAGE_KERNEL;
90#endif
91
92 cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
93 gdt_prot);
94
95 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
96 per_cpu_ptr(&entry_stack_storage, cpu), 1,
97 PAGE_KERNEL);
98
99 /*
100 * The Intel SDM says (Volume 3, 7.2.1):
101 *
102 * Avoid placing a page boundary in the part of the TSS that the
103 * processor reads during a task switch (the first 104 bytes). The
104 * processor may not correctly perform address translations if a
105 * boundary occurs in this area. During a task switch, the processor
106 * reads and writes into the first 104 bytes of each TSS (using
107 * contiguous physical addresses beginning with the physical address
108 * of the first byte of the TSS). So, after TSS access begins, if
109 * part of the 104 bytes is not physically contiguous, the processor
110 * will access incorrect information without generating a page-fault
111 * exception.
112 *
113 * There are also a lot of errata involving the TSS spanning a page
114 * boundary. Assert that we're not doing that.
115 */
116 BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
117 offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
118 BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
119 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
120 &per_cpu(cpu_tss_rw, cpu),
121 sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
122
123#ifdef CONFIG_X86_32
124 per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
125#endif
126
127#ifdef CONFIG_X86_64
128 BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
129 BUILD_BUG_ON(sizeof(exception_stacks) !=
130 sizeof(((struct cpu_entry_area *)0)->exception_stacks));
131 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
132 &per_cpu(exception_stacks, cpu),
133 sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
134
135 cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
136 __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
137#endif
138 percpu_setup_debug_store(cpu);
139}
140
141static __init void setup_cpu_entry_area_ptes(void)
142{
143#ifdef CONFIG_X86_32
144 unsigned long start, end;
145
146 BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
147 BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
148
149 start = CPU_ENTRY_AREA_BASE;
150 end = start + CPU_ENTRY_AREA_MAP_SIZE;
151
152 /* Careful here: start + PMD_SIZE might wrap around */
153 for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
154 populate_extra_pte(start);
155#endif
156}
157
158void __init setup_cpu_entry_areas(void)
159{
160 unsigned int cpu;
161
162 setup_cpu_entry_area_ptes();
163
164 for_each_possible_cpu(cpu)
165 setup_cpu_entry_area(cpu);
166}
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index bfcffdf6c577..421f2664ffa0 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -5,7 +5,7 @@
5 5
6static int ptdump_show(struct seq_file *m, void *v) 6static int ptdump_show(struct seq_file *m, void *v)
7{ 7{
8 ptdump_walk_pgd_level(m, NULL); 8 ptdump_walk_pgd_level_debugfs(m, NULL, false);
9 return 0; 9 return 0;
10} 10}
11 11
@@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = {
22 .release = single_release, 22 .release = single_release,
23}; 23};
24 24
25static struct dentry *pe; 25static int ptdump_show_curknl(struct seq_file *m, void *v)
26{
27 if (current->mm->pgd) {
28 down_read(&current->mm->mmap_sem);
29 ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
30 up_read(&current->mm->mmap_sem);
31 }
32 return 0;
33}
34
35static int ptdump_open_curknl(struct inode *inode, struct file *filp)
36{
37 return single_open(filp, ptdump_show_curknl, NULL);
38}
39
40static const struct file_operations ptdump_curknl_fops = {
41 .owner = THIS_MODULE,
42 .open = ptdump_open_curknl,
43 .read = seq_read,
44 .llseek = seq_lseek,
45 .release = single_release,
46};
47
48#ifdef CONFIG_PAGE_TABLE_ISOLATION
49static struct dentry *pe_curusr;
50
51static int ptdump_show_curusr(struct seq_file *m, void *v)
52{
53 if (current->mm->pgd) {
54 down_read(&current->mm->mmap_sem);
55 ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
56 up_read(&current->mm->mmap_sem);
57 }
58 return 0;
59}
60
61static int ptdump_open_curusr(struct inode *inode, struct file *filp)
62{
63 return single_open(filp, ptdump_show_curusr, NULL);
64}
65
66static const struct file_operations ptdump_curusr_fops = {
67 .owner = THIS_MODULE,
68 .open = ptdump_open_curusr,
69 .read = seq_read,
70 .llseek = seq_lseek,
71 .release = single_release,
72};
73#endif
74
75static struct dentry *dir, *pe_knl, *pe_curknl;
26 76
27static int __init pt_dump_debug_init(void) 77static int __init pt_dump_debug_init(void)
28{ 78{
29 pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL, 79 dir = debugfs_create_dir("page_tables", NULL);
30 &ptdump_fops); 80 if (!dir)
31 if (!pe)
32 return -ENOMEM; 81 return -ENOMEM;
33 82
83 pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
84 &ptdump_fops);
85 if (!pe_knl)
86 goto err;
87
88 pe_curknl = debugfs_create_file("current_kernel", 0400,
89 dir, NULL, &ptdump_curknl_fops);
90 if (!pe_curknl)
91 goto err;
92
93#ifdef CONFIG_PAGE_TABLE_ISOLATION
94 pe_curusr = debugfs_create_file("current_user", 0400,
95 dir, NULL, &ptdump_curusr_fops);
96 if (!pe_curusr)
97 goto err;
98#endif
34 return 0; 99 return 0;
100err:
101 debugfs_remove_recursive(dir);
102 return -ENOMEM;
35} 103}
36 104
37static void __exit pt_dump_debug_exit(void) 105static void __exit pt_dump_debug_exit(void)
38{ 106{
39 debugfs_remove_recursive(pe); 107 debugfs_remove_recursive(dir);
40} 108}
41 109
42module_init(pt_dump_debug_init); 110module_init(pt_dump_debug_init);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 5e3ac6fe6c9e..2a4849e92831 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -44,68 +44,97 @@ struct addr_marker {
44 unsigned long max_lines; 44 unsigned long max_lines;
45}; 45};
46 46
47/* indices for address_markers; keep sync'd w/ address_markers below */ 47/* Address space markers hints */
48
49#ifdef CONFIG_X86_64
50
48enum address_markers_idx { 51enum address_markers_idx {
49 USER_SPACE_NR = 0, 52 USER_SPACE_NR = 0,
50#ifdef CONFIG_X86_64
51 KERNEL_SPACE_NR, 53 KERNEL_SPACE_NR,
52 LOW_KERNEL_NR, 54 LOW_KERNEL_NR,
55#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
56 LDT_NR,
57#endif
53 VMALLOC_START_NR, 58 VMALLOC_START_NR,
54 VMEMMAP_START_NR, 59 VMEMMAP_START_NR,
55#ifdef CONFIG_KASAN 60#ifdef CONFIG_KASAN
56 KASAN_SHADOW_START_NR, 61 KASAN_SHADOW_START_NR,
57 KASAN_SHADOW_END_NR, 62 KASAN_SHADOW_END_NR,
58#endif 63#endif
59# ifdef CONFIG_X86_ESPFIX64 64 CPU_ENTRY_AREA_NR,
65#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
66 LDT_NR,
67#endif
68#ifdef CONFIG_X86_ESPFIX64
60 ESPFIX_START_NR, 69 ESPFIX_START_NR,
61# endif 70#endif
71#ifdef CONFIG_EFI
72 EFI_END_NR,
73#endif
62 HIGH_KERNEL_NR, 74 HIGH_KERNEL_NR,
63 MODULES_VADDR_NR, 75 MODULES_VADDR_NR,
64 MODULES_END_NR, 76 MODULES_END_NR,
65#else 77 FIXADDR_START_NR,
78 END_OF_SPACE_NR,
79};
80
81static struct addr_marker address_markers[] = {
82 [USER_SPACE_NR] = { 0, "User Space" },
83 [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" },
84 [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" },
85 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
86 [VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
87#ifdef CONFIG_KASAN
88 [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
89 [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
90#endif
91#ifdef CONFIG_MODIFY_LDT_SYSCALL
92 [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
93#endif
94 [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
95#ifdef CONFIG_X86_ESPFIX64
96 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
97#endif
98#ifdef CONFIG_EFI
99 [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
100#endif
101 [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
102 [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
103 [MODULES_END_NR] = { MODULES_END, "End Modules" },
104 [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
105 [END_OF_SPACE_NR] = { -1, NULL }
106};
107
108#else /* CONFIG_X86_64 */
109
110enum address_markers_idx {
111 USER_SPACE_NR = 0,
66 KERNEL_SPACE_NR, 112 KERNEL_SPACE_NR,
67 VMALLOC_START_NR, 113 VMALLOC_START_NR,
68 VMALLOC_END_NR, 114 VMALLOC_END_NR,
69# ifdef CONFIG_HIGHMEM 115#ifdef CONFIG_HIGHMEM
70 PKMAP_BASE_NR, 116 PKMAP_BASE_NR,
71# endif
72 FIXADDR_START_NR,
73#endif 117#endif
118 CPU_ENTRY_AREA_NR,
119 FIXADDR_START_NR,
120 END_OF_SPACE_NR,
74}; 121};
75 122
76/* Address space markers hints */
77static struct addr_marker address_markers[] = { 123static struct addr_marker address_markers[] = {
78 { 0, "User Space" }, 124 [USER_SPACE_NR] = { 0, "User Space" },
79#ifdef CONFIG_X86_64 125 [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" },
80 { 0x8000000000000000UL, "Kernel Space" }, 126 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
81 { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, 127 [VMALLOC_END_NR] = { 0UL, "vmalloc() End" },
82 { 0/* VMALLOC_START */, "vmalloc() Area" }, 128#ifdef CONFIG_HIGHMEM
83 { 0/* VMEMMAP_START */, "Vmemmap" }, 129 [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
84#ifdef CONFIG_KASAN
85 { KASAN_SHADOW_START, "KASAN shadow" },
86 { KASAN_SHADOW_END, "KASAN shadow end" },
87#endif 130#endif
88# ifdef CONFIG_X86_ESPFIX64 131 [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
89 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, 132 [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
90# endif 133 [END_OF_SPACE_NR] = { -1, NULL }
91# ifdef CONFIG_EFI
92 { EFI_VA_END, "EFI Runtime Services" },
93# endif
94 { __START_KERNEL_map, "High Kernel Mapping" },
95 { MODULES_VADDR, "Modules" },
96 { MODULES_END, "End Modules" },
97#else
98 { PAGE_OFFSET, "Kernel Mapping" },
99 { 0/* VMALLOC_START */, "vmalloc() Area" },
100 { 0/*VMALLOC_END*/, "vmalloc() End" },
101# ifdef CONFIG_HIGHMEM
102 { 0/*PKMAP_BASE*/, "Persistent kmap() Area" },
103# endif
104 { 0/*FIXADDR_START*/, "Fixmap Area" },
105#endif
106 { -1, NULL } /* End of list */
107}; 134};
108 135
136#endif /* !CONFIG_X86_64 */
137
109/* Multipliers for offsets within the PTEs */ 138/* Multipliers for offsets within the PTEs */
110#define PTE_LEVEL_MULT (PAGE_SIZE) 139#define PTE_LEVEL_MULT (PAGE_SIZE)
111#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 140#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +169,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
140 static const char * const level_name[] = 169 static const char * const level_name[] =
141 { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; 170 { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
142 171
143 if (!pgprot_val(prot)) { 172 if (!(pr & _PAGE_PRESENT)) {
144 /* Not present */ 173 /* Not present */
145 pt_dump_cont_printf(m, dmsg, " "); 174 pt_dump_cont_printf(m, dmsg, " ");
146 } else { 175 } else {
@@ -447,7 +476,7 @@ static inline bool is_hypervisor_range(int idx)
447} 476}
448 477
449static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, 478static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
450 bool checkwx) 479 bool checkwx, bool dmesg)
451{ 480{
452#ifdef CONFIG_X86_64 481#ifdef CONFIG_X86_64
453 pgd_t *start = (pgd_t *) &init_top_pgt; 482 pgd_t *start = (pgd_t *) &init_top_pgt;
@@ -460,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
460 489
461 if (pgd) { 490 if (pgd) {
462 start = pgd; 491 start = pgd;
463 st.to_dmesg = true; 492 st.to_dmesg = dmesg;
464 } 493 }
465 494
466 st.check_wx = checkwx; 495 st.check_wx = checkwx;
@@ -498,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
498 527
499void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) 528void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
500{ 529{
501 ptdump_walk_pgd_level_core(m, pgd, false); 530 ptdump_walk_pgd_level_core(m, pgd, false, true);
531}
532
533void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
534{
535#ifdef CONFIG_PAGE_TABLE_ISOLATION
536 if (user && static_cpu_has(X86_FEATURE_PTI))
537 pgd = kernel_to_user_pgdp(pgd);
538#endif
539 ptdump_walk_pgd_level_core(m, pgd, false, false);
540}
541EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
542
543static void ptdump_walk_user_pgd_level_checkwx(void)
544{
545#ifdef CONFIG_PAGE_TABLE_ISOLATION
546 pgd_t *pgd = (pgd_t *) &init_top_pgt;
547
548 if (!static_cpu_has(X86_FEATURE_PTI))
549 return;
550
551 pr_info("x86/mm: Checking user space page tables\n");
552 pgd = kernel_to_user_pgdp(pgd);
553 ptdump_walk_pgd_level_core(NULL, pgd, true, false);
554#endif
502} 555}
503EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
504 556
505void ptdump_walk_pgd_level_checkwx(void) 557void ptdump_walk_pgd_level_checkwx(void)
506{ 558{
507 ptdump_walk_pgd_level_core(NULL, NULL, true); 559 ptdump_walk_pgd_level_core(NULL, NULL, true, false);
560 ptdump_walk_user_pgd_level_checkwx();
508} 561}
509 562
510static int __init pt_dump_init(void) 563static int __init pt_dump_init(void)
@@ -525,8 +578,8 @@ static int __init pt_dump_init(void)
525 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; 578 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
526# endif 579# endif
527 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; 580 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
581 address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
528#endif 582#endif
529
530 return 0; 583 return 0;
531} 584}
532__initcall(pt_dump_init); 585__initcall(pt_dump_init);
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 3321b446b66c..9fe656c42aa5 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
1#include <linux/extable.h> 1#include <linux/extable.h>
2#include <linux/uaccess.h> 2#include <linux/uaccess.h>
3#include <linux/sched/debug.h> 3#include <linux/sched/debug.h>
4#include <xen/xen.h>
4 5
5#include <asm/fpu/internal.h> 6#include <asm/fpu/internal.h>
6#include <asm/traps.h> 7#include <asm/traps.h>
@@ -82,7 +83,7 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
82 83
83 return true; 84 return true;
84} 85}
85EXPORT_SYMBOL_GPL(ex_handler_refcount); 86EXPORT_SYMBOL(ex_handler_refcount);
86 87
87/* 88/*
88 * Handler for when we fail to restore a task's FPU state. We should never get 89 * Handler for when we fail to restore a task's FPU state. We should never get
@@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
212 * Old CPUs leave the high bits of CS on the stack 213 * Old CPUs leave the high bits of CS on the stack
213 * undefined. I'm not sure which CPUs do this, but at least 214 * undefined. I'm not sure which CPUs do this, but at least
214 * the 486 DX works this way. 215 * the 486 DX works this way.
216 * Xen pv domains are not using the default __KERNEL_CS.
215 */ 217 */
216 if (regs->cs != __KERNEL_CS) 218 if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
217 goto fail; 219 goto fail;
218 220
219 /* 221 /*
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 78ca9a8ee454..06fe3d51d385 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -701,7 +701,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
701 else 701 else
702 printk(KERN_CONT "paging request"); 702 printk(KERN_CONT "paging request");
703 703
704 printk(KERN_CONT " at %p\n", (void *) address); 704 printk(KERN_CONT " at %px\n", (void *) address);
705 printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip); 705 printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
706 706
707 dump_pagetable(address); 707 dump_pagetable(address);
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
860 if (!printk_ratelimit()) 860 if (!printk_ratelimit())
861 return; 861 return;
862 862
863 printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", 863 printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
864 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 864 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
865 tsk->comm, task_pid_nr(tsk), address, 865 tsk->comm, task_pid_nr(tsk), address,
866 (void *)regs->ip, (void *)regs->sp, error_code); 866 (void *)regs->ip, (void *)regs->sp, error_code);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6fdf91ef130a..82f5252c723a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -20,6 +20,7 @@
20#include <asm/kaslr.h> 20#include <asm/kaslr.h>
21#include <asm/hypervisor.h> 21#include <asm/hypervisor.h>
22#include <asm/cpufeature.h> 22#include <asm/cpufeature.h>
23#include <asm/pti.h>
23 24
24/* 25/*
25 * We need to define the tracepoints somewhere, and tlb.c 26 * We need to define the tracepoints somewhere, and tlb.c
@@ -160,6 +161,12 @@ struct map_range {
160 161
161static int page_size_mask; 162static int page_size_mask;
162 163
164static void enable_global_pages(void)
165{
166 if (!static_cpu_has(X86_FEATURE_PTI))
167 __supported_pte_mask |= _PAGE_GLOBAL;
168}
169
163static void __init probe_page_size_mask(void) 170static void __init probe_page_size_mask(void)
164{ 171{
165 /* 172 /*
@@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void)
177 cr4_set_bits_and_update_boot(X86_CR4_PSE); 184 cr4_set_bits_and_update_boot(X86_CR4_PSE);
178 185
179 /* Enable PGE if available */ 186 /* Enable PGE if available */
187 __supported_pte_mask &= ~_PAGE_GLOBAL;
180 if (boot_cpu_has(X86_FEATURE_PGE)) { 188 if (boot_cpu_has(X86_FEATURE_PGE)) {
181 cr4_set_bits_and_update_boot(X86_CR4_PGE); 189 cr4_set_bits_and_update_boot(X86_CR4_PGE);
182 __supported_pte_mask |= _PAGE_GLOBAL; 190 enable_global_pages();
183 } else 191 }
184 __supported_pte_mask &= ~_PAGE_GLOBAL;
185 192
186 /* Enable 1 GB linear kernel mappings if available: */ 193 /* Enable 1 GB linear kernel mappings if available: */
187 if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { 194 if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
@@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void)
194 201
195static void setup_pcid(void) 202static void setup_pcid(void)
196{ 203{
197#ifdef CONFIG_X86_64 204 if (!IS_ENABLED(CONFIG_X86_64))
198 if (boot_cpu_has(X86_FEATURE_PCID)) { 205 return;
199 if (boot_cpu_has(X86_FEATURE_PGE)) { 206
200 /* 207 if (!boot_cpu_has(X86_FEATURE_PCID))
201 * This can't be cr4_set_bits_and_update_boot() -- 208 return;
202 * the trampoline code can't handle CR4.PCIDE and 209
203 * it wouldn't do any good anyway. Despite the name, 210 if (boot_cpu_has(X86_FEATURE_PGE)) {
204 * cr4_set_bits_and_update_boot() doesn't actually 211 /*
205 * cause the bits in question to remain set all the 212 * This can't be cr4_set_bits_and_update_boot() -- the
206 * way through the secondary boot asm. 213 * trampoline code can't handle CR4.PCIDE and it wouldn't
207 * 214 * do any good anyway. Despite the name,
208 * Instead, we brute-force it and set CR4.PCIDE 215 * cr4_set_bits_and_update_boot() doesn't actually cause
209 * manually in start_secondary(). 216 * the bits in question to remain set all the way through
210 */ 217 * the secondary boot asm.
211 cr4_set_bits(X86_CR4_PCIDE); 218 *
212 } else { 219 * Instead, we brute-force it and set CR4.PCIDE manually in
213 /* 220 * start_secondary().
214 * flush_tlb_all(), as currently implemented, won't 221 */
215 * work if PCID is on but PGE is not. Since that 222 cr4_set_bits(X86_CR4_PCIDE);
216 * combination doesn't exist on real hardware, there's 223
217 * no reason to try to fully support it, but it's 224 /*
218 * polite to avoid corrupting data if we're on 225 * INVPCID's single-context modes (2/3) only work if we set
219 * an improperly configured VM. 226 * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
220 */ 227 * on systems that have X86_CR4_PCIDE clear, or that have
221 setup_clear_cpu_cap(X86_FEATURE_PCID); 228 * no INVPCID support at all.
222 } 229 */
230 if (boot_cpu_has(X86_FEATURE_INVPCID))
231 setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
232 } else {
233 /*
234 * flush_tlb_all(), as currently implemented, won't work if
235 * PCID is on but PGE is not. Since that combination
236 * doesn't exist on real hardware, there's no reason to try
237 * to fully support it, but it's polite to avoid corrupting
238 * data if we're on an improperly configured VM.
239 */
240 setup_clear_cpu_cap(X86_FEATURE_PCID);
223 } 241 }
224#endif
225} 242}
226 243
227#ifdef CONFIG_X86_32 244#ifdef CONFIG_X86_32
@@ -622,6 +639,7 @@ void __init init_mem_mapping(void)
622{ 639{
623 unsigned long end; 640 unsigned long end;
624 641
642 pti_check_boottime_disable();
625 probe_page_size_mask(); 643 probe_page_size_mask();
626 setup_pcid(); 644 setup_pcid();
627 645
@@ -845,12 +863,12 @@ void __init zone_sizes_init(void)
845 free_area_init_nodes(max_zone_pfns); 863 free_area_init_nodes(max_zone_pfns);
846} 864}
847 865
848DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { 866__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
849 .loaded_mm = &init_mm, 867 .loaded_mm = &init_mm,
850 .next_asid = 1, 868 .next_asid = 1,
851 .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ 869 .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
852}; 870};
853EXPORT_SYMBOL_GPL(cpu_tlbstate); 871EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
854 872
855void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) 873void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
856{ 874{
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8a64a6f2848d..135c9a7898c7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/set_memory.h> 51#include <asm/set_memory.h>
52#include <asm/page_types.h> 52#include <asm/page_types.h>
53#include <asm/cpu_entry_area.h>
53#include <asm/init.h> 54#include <asm/init.h>
54 55
55#include "mm_internal.h" 56#include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
766 mem_init_print_info(NULL); 767 mem_init_print_info(NULL);
767 printk(KERN_INFO "virtual kernel memory layout:\n" 768 printk(KERN_INFO "virtual kernel memory layout:\n"
768 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 769 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
770 " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
769#ifdef CONFIG_HIGHMEM 771#ifdef CONFIG_HIGHMEM
770 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 772 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
771#endif 773#endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
777 FIXADDR_START, FIXADDR_TOP, 779 FIXADDR_START, FIXADDR_TOP,
778 (FIXADDR_TOP - FIXADDR_START) >> 10, 780 (FIXADDR_TOP - FIXADDR_START) >> 10,
779 781
782 CPU_ENTRY_AREA_BASE,
783 CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
784 CPU_ENTRY_AREA_MAP_SIZE >> 10,
785
780#ifdef CONFIG_HIGHMEM 786#ifdef CONFIG_HIGHMEM
781 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, 787 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
782 (LAST_PKMAP*PAGE_SIZE) >> 10, 788 (LAST_PKMAP*PAGE_SIZE) >> 10,
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 6e4573b1da34..c45b6ec5357b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr)
404 return; 404 return;
405 } 405 }
406 406
407 mmiotrace_iounmap(addr);
408
407 addr = (volatile void __iomem *) 409 addr = (volatile void __iomem *)
408 (PAGE_MASK & (unsigned long __force)addr); 410 (PAGE_MASK & (unsigned long __force)addr);
409 411
410 mmiotrace_iounmap(addr);
411
412 /* Use the vm area unlocked, assuming the caller 412 /* Use the vm area unlocked, assuming the caller
413 ensures there isn't another iounmap for the same address 413 ensures there isn't another iounmap for the same address
414 in parallel. Reuse of the virtual address is prevented by 414 in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 99dfed6dfef8..47388f0c0e59 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -15,6 +15,7 @@
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#include <asm/sections.h> 16#include <asm/sections.h>
17#include <asm/pgtable.h> 17#include <asm/pgtable.h>
18#include <asm/cpu_entry_area.h>
18 19
19extern struct range pfn_mapped[E820_MAX_ENTRIES]; 20extern struct range pfn_mapped[E820_MAX_ENTRIES];
20 21
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
277void __init kasan_init(void) 278void __init kasan_init(void)
278{ 279{
279 int i; 280 int i;
281 void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
280 282
281#ifdef CONFIG_KASAN_INLINE 283#ifdef CONFIG_KASAN_INLINE
282 register_die_notifier(&kasan_die_notifier); 284 register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
321 map_range(&pfn_mapped[i]); 323 map_range(&pfn_mapped[i]);
322 } 324 }
323 325
326 shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
327 shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
328 shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
329 PAGE_SIZE);
330
331 shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
332 CPU_ENTRY_AREA_MAP_SIZE);
333 shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
334 shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
335 PAGE_SIZE);
336
324 kasan_populate_zero_shadow( 337 kasan_populate_zero_shadow(
325 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), 338 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
326 kasan_mem_to_shadow((void *)__START_KERNEL_map)); 339 shadow_cpu_entry_begin);
340
341 kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
342 (unsigned long)shadow_cpu_entry_end, 0);
343
344 kasan_populate_zero_shadow(shadow_cpu_entry_end,
345 kasan_mem_to_shadow((void *)__START_KERNEL_map));
327 346
328 kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), 347 kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
329 (unsigned long)kasan_mem_to_shadow(_end), 348 (unsigned long)kasan_mem_to_shadow(_end),
330 early_pfn_to_nid(__pa(_stext))); 349 early_pfn_to_nid(__pa(_stext)));
331 350
332 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), 351 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
333 (void *)KASAN_SHADOW_END); 352 (void *)KASAN_SHADOW_END);
334 353
335 load_cr3(init_top_pgt); 354 load_cr3(init_top_pgt);
336 __flush_tlb_all(); 355 __flush_tlb_all();
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 879ef930e2c2..aedebd2ebf1e 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -34,25 +34,14 @@
34#define TB_SHIFT 40 34#define TB_SHIFT 40
35 35
36/* 36/*
37 * Virtual address start and end range for randomization. The end changes base 37 * Virtual address start and end range for randomization.
38 * on configuration to have the highest amount of space for randomization.
39 * It increases the possible random position for each randomized region.
40 * 38 *
41 * You need to add an if/def entry if you introduce a new memory region 39 * The end address could depend on more configuration options to make the
42 * compatible with KASLR. Your entry must be in logical order with memory 40 * highest amount of space for randomization available, but that's too hard
43 * layout. For example, ESPFIX is before EFI because its virtual address is 41 * to keep straight and caused issues already.
44 * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
45 * ensure that this order is correct and won't be changed.
46 */ 42 */
47static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; 43static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
48 44static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
49#if defined(CONFIG_X86_ESPFIX64)
50static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
51#elif defined(CONFIG_EFI)
52static const unsigned long vaddr_end = EFI_VA_END;
53#else
54static const unsigned long vaddr_end = __START_KERNEL_map;
55#endif
56 45
57/* Default values */ 46/* Default values */
58unsigned long page_offset_base = __PAGE_OFFSET_BASE; 47unsigned long page_offset_base = __PAGE_OFFSET_BASE;
@@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void)
101 unsigned long remain_entropy; 90 unsigned long remain_entropy;
102 91
103 /* 92 /*
104 * All these BUILD_BUG_ON checks ensures the memory layout is 93 * These BUILD_BUG_ON checks ensure the memory layout is consistent
105 * consistent with the vaddr_start/vaddr_end variables. 94 * with the vaddr_start/vaddr_end variables. These checks are very
95 * limited....
106 */ 96 */
107 BUILD_BUG_ON(vaddr_start >= vaddr_end); 97 BUILD_BUG_ON(vaddr_start >= vaddr_end);
108 BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && 98 BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
109 vaddr_end >= EFI_VA_END);
110 BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
111 IS_ENABLED(CONFIG_EFI)) &&
112 vaddr_end >= __START_KERNEL_map);
113 BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); 99 BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
114 100
115 if (!kaslr_memory_enabled()) 101 if (!kaslr_memory_enabled())
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/error.c
+++ /dev/null
@@ -1 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/error.h
+++ /dev/null
@@ -1 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/opcode.c
+++ /dev/null
@@ -1 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/opcode.h
+++ /dev/null
@@ -1 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/pte.c
+++ /dev/null
@@ -1 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/pte.h
+++ /dev/null
@@ -1 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/selftest.c
+++ /dev/null
@@ -1 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/selftest.h
+++ /dev/null
@@ -1 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/shadow.h
+++ /dev/null
@@ -1 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index c21c2ed04612..58477ec3d66d 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
435 unsigned long flags; 435 unsigned long flags;
436 int ret = 0; 436 int ret = 0;
437 unsigned long size = 0; 437 unsigned long size = 0;
438 unsigned long addr = p->addr & PAGE_MASK;
438 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); 439 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
439 unsigned int l; 440 unsigned int l;
440 pte_t *pte; 441 pte_t *pte;
441 442
442 spin_lock_irqsave(&kmmio_lock, flags); 443 spin_lock_irqsave(&kmmio_lock, flags);
443 if (get_kmmio_probe(p->addr)) { 444 if (get_kmmio_probe(addr)) {
444 ret = -EEXIST; 445 ret = -EEXIST;
445 goto out; 446 goto out;
446 } 447 }
447 448
448 pte = lookup_address(p->addr, &l); 449 pte = lookup_address(addr, &l);
449 if (!pte) { 450 if (!pte) {
450 ret = -EINVAL; 451 ret = -EINVAL;
451 goto out; 452 goto out;
@@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
454 kmmio_count++; 455 kmmio_count++;
455 list_add_rcu(&p->list, &kmmio_probes); 456 list_add_rcu(&p->list, &kmmio_probes);
456 while (size < size_lim) { 457 while (size < size_lim) {
457 if (add_kmmio_fault_page(p->addr + size)) 458 if (add_kmmio_fault_page(addr + size))
458 pr_err("Unable to set page fault.\n"); 459 pr_err("Unable to set page fault.\n");
459 size += page_level_size(l); 460 size += page_level_size(l);
460 } 461 }
@@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
528{ 529{
529 unsigned long flags; 530 unsigned long flags;
530 unsigned long size = 0; 531 unsigned long size = 0;
532 unsigned long addr = p->addr & PAGE_MASK;
531 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); 533 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
532 struct kmmio_fault_page *release_list = NULL; 534 struct kmmio_fault_page *release_list = NULL;
533 struct kmmio_delayed_release *drelease; 535 struct kmmio_delayed_release *drelease;
534 unsigned int l; 536 unsigned int l;
535 pte_t *pte; 537 pte_t *pte;
536 538
537 pte = lookup_address(p->addr, &l); 539 pte = lookup_address(addr, &l);
538 if (!pte) 540 if (!pte)
539 return; 541 return;
540 542
541 spin_lock_irqsave(&kmmio_lock, flags); 543 spin_lock_irqsave(&kmmio_lock, flags);
542 while (size < size_lim) { 544 while (size < size_lim) {
543 release_kmmio_fault_page(p->addr + size, &release_list); 545 release_kmmio_fault_page(addr + size, &release_list);
544 size += page_level_size(l); 546 size += page_level_size(l);
545 } 547 }
546 list_del_rcu(&p->list); 548 list_del_rcu(&p->list);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d9a9e9fc75dd..391b13402e40 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -405,13 +405,13 @@ bool sme_active(void)
405{ 405{
406 return sme_me_mask && !sev_enabled; 406 return sme_me_mask && !sev_enabled;
407} 407}
408EXPORT_SYMBOL_GPL(sme_active); 408EXPORT_SYMBOL(sme_active);
409 409
410bool sev_active(void) 410bool sev_active(void)
411{ 411{
412 return sme_me_mask && sev_enabled; 412 return sme_me_mask && sev_enabled;
413} 413}
414EXPORT_SYMBOL_GPL(sev_active); 414EXPORT_SYMBOL(sev_active);
415 415
416static const struct dma_map_ops sev_dma_ops = { 416static const struct dma_map_ops sev_dma_ops = {
417 .alloc = sev_alloc, 417 .alloc = sev_alloc,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 96d456a94b03..004abf9ebf12 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
355 kmem_cache_free(pgd_cache, pgd); 355 kmem_cache_free(pgd_cache, pgd);
356} 356}
357#else 357#else
358
358static inline pgd_t *_pgd_alloc(void) 359static inline pgd_t *_pgd_alloc(void)
359{ 360{
360 return (pgd_t *)__get_free_page(PGALLOC_GFP); 361 return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
361} 362}
362 363
363static inline void _pgd_free(pgd_t *pgd) 364static inline void _pgd_free(pgd_t *pgd)
364{ 365{
365 free_page((unsigned long)pgd); 366 free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
366} 367}
367#endif /* CONFIG_X86_PAE */ 368#endif /* CONFIG_X86_PAE */
368 369
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6b9bf023a700..c3c5274410a9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,6 +10,7 @@
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/spinlock.h> 11#include <linux/spinlock.h>
12 12
13#include <asm/cpu_entry_area.h>
13#include <asm/pgtable.h> 14#include <asm/pgtable.h>
14#include <asm/pgalloc.h> 15#include <asm/pgalloc.h>
15#include <asm/fixmap.h> 16#include <asm/fixmap.h>
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
new file mode 100644
index 000000000000..ce38f165489b
--- /dev/null
+++ b/arch/x86/mm/pti.c
@@ -0,0 +1,368 @@
1/*
2 * Copyright(c) 2017 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * This code is based in part on work published here:
14 *
15 * https://github.com/IAIK/KAISER
16 *
17 * The original work was written by and and signed off by for the Linux
18 * kernel by:
19 *
20 * Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
21 * Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
22 * Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
23 * Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
24 *
25 * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
26 * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
27 * Andy Lutomirsky <luto@amacapital.net>
28 */
29#include <linux/kernel.h>
30#include <linux/errno.h>
31#include <linux/string.h>
32#include <linux/types.h>
33#include <linux/bug.h>
34#include <linux/init.h>
35#include <linux/spinlock.h>
36#include <linux/mm.h>
37#include <linux/uaccess.h>
38
39#include <asm/cpufeature.h>
40#include <asm/hypervisor.h>
41#include <asm/vsyscall.h>
42#include <asm/cmdline.h>
43#include <asm/pti.h>
44#include <asm/pgtable.h>
45#include <asm/pgalloc.h>
46#include <asm/tlbflush.h>
47#include <asm/desc.h>
48
49#undef pr_fmt
50#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
51
52/* Backporting helper */
53#ifndef __GFP_NOTRACK
54#define __GFP_NOTRACK 0
55#endif
56
57static void __init pti_print_if_insecure(const char *reason)
58{
59 if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
60 pr_info("%s\n", reason);
61}
62
63static void __init pti_print_if_secure(const char *reason)
64{
65 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
66 pr_info("%s\n", reason);
67}
68
69void __init pti_check_boottime_disable(void)
70{
71 char arg[5];
72 int ret;
73
74 if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
75 pti_print_if_insecure("disabled on XEN PV.");
76 return;
77 }
78
79 ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
80 if (ret > 0) {
81 if (ret == 3 && !strncmp(arg, "off", 3)) {
82 pti_print_if_insecure("disabled on command line.");
83 return;
84 }
85 if (ret == 2 && !strncmp(arg, "on", 2)) {
86 pti_print_if_secure("force enabled on command line.");
87 goto enable;
88 }
89 if (ret == 4 && !strncmp(arg, "auto", 4))
90 goto autosel;
91 }
92
93 if (cmdline_find_option_bool(boot_command_line, "nopti")) {
94 pti_print_if_insecure("disabled on command line.");
95 return;
96 }
97
98autosel:
99 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
100 return;
101enable:
102 setup_force_cpu_cap(X86_FEATURE_PTI);
103}
104
105pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
106{
107 /*
108 * Changes to the high (kernel) portion of the kernelmode page
109 * tables are not automatically propagated to the usermode tables.
110 *
111 * Users should keep in mind that, unlike the kernelmode tables,
112 * there is no vmalloc_fault equivalent for the usermode tables.
113 * Top-level entries added to init_mm's usermode pgd after boot
114 * will not be automatically propagated to other mms.
115 */
116 if (!pgdp_maps_userspace(pgdp))
117 return pgd;
118
119 /*
120 * The user page tables get the full PGD, accessible from
121 * userspace:
122 */
123 kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
124
125 /*
126 * If this is normal user memory, make it NX in the kernel
127 * pagetables so that, if we somehow screw up and return to
128 * usermode with the kernel CR3 loaded, we'll get a page fault
129 * instead of allowing user code to execute with the wrong CR3.
130 *
131 * As exceptions, we don't set NX if:
132 * - _PAGE_USER is not set. This could be an executable
133 * EFI runtime mapping or something similar, and the kernel
134 * may execute from it
135 * - we don't have NX support
136 * - we're clearing the PGD (i.e. the new pgd is not present).
137 */
138 if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
139 (__supported_pte_mask & _PAGE_NX))
140 pgd.pgd |= _PAGE_NX;
141
142 /* return the copy of the PGD we want the kernel to use: */
143 return pgd;
144}
145
146/*
147 * Walk the user copy of the page tables (optionally) trying to allocate
148 * page table pages on the way down.
149 *
150 * Returns a pointer to a P4D on success, or NULL on failure.
151 */
152static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
153{
154 pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
155 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
156
157 if (address < PAGE_OFFSET) {
158 WARN_ONCE(1, "attempt to walk user address\n");
159 return NULL;
160 }
161
162 if (pgd_none(*pgd)) {
163 unsigned long new_p4d_page = __get_free_page(gfp);
164 if (!new_p4d_page)
165 return NULL;
166
167 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
168 }
169 BUILD_BUG_ON(pgd_large(*pgd) != 0);
170
171 return p4d_offset(pgd, address);
172}
173
174/*
175 * Walk the user copy of the page tables (optionally) trying to allocate
176 * page table pages on the way down.
177 *
178 * Returns a pointer to a PMD on success, or NULL on failure.
179 */
180static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
181{
182 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
183 p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
184 pud_t *pud;
185
186 BUILD_BUG_ON(p4d_large(*p4d) != 0);
187 if (p4d_none(*p4d)) {
188 unsigned long new_pud_page = __get_free_page(gfp);
189 if (!new_pud_page)
190 return NULL;
191
192 set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
193 }
194
195 pud = pud_offset(p4d, address);
196 /* The user page tables do not use large mappings: */
197 if (pud_large(*pud)) {
198 WARN_ON(1);
199 return NULL;
200 }
201 if (pud_none(*pud)) {
202 unsigned long new_pmd_page = __get_free_page(gfp);
203 if (!new_pmd_page)
204 return NULL;
205
206 set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
207 }
208
209 return pmd_offset(pud, address);
210}
211
212#ifdef CONFIG_X86_VSYSCALL_EMULATION
213/*
214 * Walk the shadow copy of the page tables (optionally) trying to allocate
215 * page table pages on the way down. Does not support large pages.
216 *
217 * Note: this is only used when mapping *new* kernel data into the
218 * user/shadow page tables. It is never used for userspace data.
219 *
220 * Returns a pointer to a PTE on success, or NULL on failure.
221 */
222static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
223{
224 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
225 pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
226 pte_t *pte;
227
228 /* We can't do anything sensible if we hit a large mapping. */
229 if (pmd_large(*pmd)) {
230 WARN_ON(1);
231 return NULL;
232 }
233
234 if (pmd_none(*pmd)) {
235 unsigned long new_pte_page = __get_free_page(gfp);
236 if (!new_pte_page)
237 return NULL;
238
239 set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
240 }
241
242 pte = pte_offset_kernel(pmd, address);
243 if (pte_flags(*pte) & _PAGE_USER) {
244 WARN_ONCE(1, "attempt to walk to user pte\n");
245 return NULL;
246 }
247 return pte;
248}
249
250static void __init pti_setup_vsyscall(void)
251{
252 pte_t *pte, *target_pte;
253 unsigned int level;
254
255 pte = lookup_address(VSYSCALL_ADDR, &level);
256 if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
257 return;
258
259 target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
260 if (WARN_ON(!target_pte))
261 return;
262
263 *target_pte = *pte;
264 set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
265}
266#else
267static void __init pti_setup_vsyscall(void) { }
268#endif
269
270static void __init
271pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
272{
273 unsigned long addr;
274
275 /*
276 * Clone the populated PMDs which cover start to end. These PMD areas
277 * can have holes.
278 */
279 for (addr = start; addr < end; addr += PMD_SIZE) {
280 pmd_t *pmd, *target_pmd;
281 pgd_t *pgd;
282 p4d_t *p4d;
283 pud_t *pud;
284
285 pgd = pgd_offset_k(addr);
286 if (WARN_ON(pgd_none(*pgd)))
287 return;
288 p4d = p4d_offset(pgd, addr);
289 if (WARN_ON(p4d_none(*p4d)))
290 return;
291 pud = pud_offset(p4d, addr);
292 if (pud_none(*pud))
293 continue;
294 pmd = pmd_offset(pud, addr);
295 if (pmd_none(*pmd))
296 continue;
297
298 target_pmd = pti_user_pagetable_walk_pmd(addr);
299 if (WARN_ON(!target_pmd))
300 return;
301
302 /*
303 * Copy the PMD. That is, the kernelmode and usermode
304 * tables will share the last-level page tables of this
305 * address range
306 */
307 *target_pmd = pmd_clear_flags(*pmd, clear);
308 }
309}
310
311/*
312 * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
313 * next-level entry on 5-level systems.
314 */
315static void __init pti_clone_p4d(unsigned long addr)
316{
317 p4d_t *kernel_p4d, *user_p4d;
318 pgd_t *kernel_pgd;
319
320 user_p4d = pti_user_pagetable_walk_p4d(addr);
321 kernel_pgd = pgd_offset_k(addr);
322 kernel_p4d = p4d_offset(kernel_pgd, addr);
323 *user_p4d = *kernel_p4d;
324}
325
326/*
327 * Clone the CPU_ENTRY_AREA into the user space visible page table.
328 */
329static void __init pti_clone_user_shared(void)
330{
331 pti_clone_p4d(CPU_ENTRY_AREA_BASE);
332}
333
334/*
335 * Clone the ESPFIX P4D into the user space visinble page table
336 */
337static void __init pti_setup_espfix64(void)
338{
339#ifdef CONFIG_X86_ESPFIX64
340 pti_clone_p4d(ESPFIX_BASE_ADDR);
341#endif
342}
343
344/*
345 * Clone the populated PMDs of the entry and irqentry text and force it RO.
346 */
347static void __init pti_clone_entry_text(void)
348{
349 pti_clone_pmds((unsigned long) __entry_text_start,
350 (unsigned long) __irqentry_text_end,
351 _PAGE_RW | _PAGE_GLOBAL);
352}
353
354/*
355 * Initialize kernel page table isolation
356 */
357void __init pti_init(void)
358{
359 if (!static_cpu_has(X86_FEATURE_PTI))
360 return;
361
362 pr_info("enabled\n");
363
364 pti_clone_user_shared();
365 pti_clone_entry_text();
366 pti_setup_espfix64();
367 pti_setup_vsyscall();
368}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3118392cdf75..a1561957dccb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -28,6 +28,38 @@
28 * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi 28 * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
29 */ 29 */
30 30
31/*
32 * We get here when we do something requiring a TLB invalidation
33 * but could not go invalidate all of the contexts. We do the
34 * necessary invalidation by clearing out the 'ctx_id' which
35 * forces a TLB flush when the context is loaded.
36 */
37void clear_asid_other(void)
38{
39 u16 asid;
40
41 /*
42 * This is only expected to be set if we have disabled
43 * kernel _PAGE_GLOBAL pages.
44 */
45 if (!static_cpu_has(X86_FEATURE_PTI)) {
46 WARN_ON_ONCE(1);
47 return;
48 }
49
50 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
51 /* Do not need to flush the current asid */
52 if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
53 continue;
54 /*
55 * Make sure the next time we go to switch to
56 * this asid, we do a flush:
57 */
58 this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
59 }
60 this_cpu_write(cpu_tlbstate.invalidate_other, false);
61}
62
31atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); 63atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
32 64
33 65
@@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
42 return; 74 return;
43 } 75 }
44 76
77 if (this_cpu_read(cpu_tlbstate.invalidate_other))
78 clear_asid_other();
79
45 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { 80 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
46 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != 81 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
47 next->context.ctx_id) 82 next->context.ctx_id)
@@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
65 *need_flush = true; 100 *need_flush = true;
66} 101}
67 102
103static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
104{
105 unsigned long new_mm_cr3;
106
107 if (need_flush) {
108 invalidate_user_asid(new_asid);
109 new_mm_cr3 = build_cr3(pgdir, new_asid);
110 } else {
111 new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
112 }
113
114 /*
115 * Caution: many callers of this function expect
116 * that load_cr3() is serializing and orders TLB
117 * fills with respect to the mm_cpumask writes.
118 */
119 write_cr3(new_mm_cr3);
120}
121
68void leave_mm(int cpu) 122void leave_mm(int cpu)
69{ 123{
70 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); 124 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -128,7 +182,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
128 * isn't free. 182 * isn't free.
129 */ 183 */
130#ifdef CONFIG_DEBUG_VM 184#ifdef CONFIG_DEBUG_VM
131 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { 185 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
132 /* 186 /*
133 * If we were to BUG here, we'd be very likely to kill 187 * If we were to BUG here, we'd be very likely to kill
134 * the system so hard that we don't see the call trace. 188 * the system so hard that we don't see the call trace.
@@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
195 if (need_flush) { 249 if (need_flush) {
196 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); 250 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
197 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); 251 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
198 write_cr3(build_cr3(next, new_asid)); 252 load_new_mm_cr3(next->pgd, new_asid, true);
199 253
200 /* 254 /*
201 * NB: This gets called via leave_mm() in the idle path 255 * NB: This gets called via leave_mm() in the idle path
@@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
208 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 262 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
209 } else { 263 } else {
210 /* The new ASID is already up to date. */ 264 /* The new ASID is already up to date. */
211 write_cr3(build_cr3_noflush(next, new_asid)); 265 load_new_mm_cr3(next->pgd, new_asid, false);
212 266
213 /* See above wrt _rcuidle. */ 267 /* See above wrt _rcuidle. */
214 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); 268 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +342,7 @@ void initialize_tlbstate_and_flush(void)
288 !(cr4_read_shadow() & X86_CR4_PCIDE)); 342 !(cr4_read_shadow() & X86_CR4_PCIDE));
289 343
290 /* Force ASID 0 and force a TLB flush. */ 344 /* Force ASID 0 and force a TLB flush. */
291 write_cr3(build_cr3(mm, 0)); 345 write_cr3(build_cr3(mm->pgd, 0));
292 346
293 /* Reinitialize tlbstate. */ 347 /* Reinitialize tlbstate. */
294 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); 348 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +605,7 @@ static void do_kernel_range_flush(void *info)
551 605
552 /* flush range by one by one 'invlpg' */ 606 /* flush range by one by one 'invlpg' */
553 for (addr = f->start; addr < f->end; addr += PAGE_SIZE) 607 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
554 __flush_tlb_single(addr); 608 __flush_tlb_one(addr);
555} 609}
556 610
557void flush_tlb_kernel_range(unsigned long start, unsigned long end) 611void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index bb461cfd01ab..526536c81ddc 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void)
97 * We should get host bridge information from ACPI unless the BIOS 97 * We should get host bridge information from ACPI unless the BIOS
98 * doesn't support it. 98 * doesn't support it.
99 */ 99 */
100 if (acpi_os_get_root_pointer()) 100 if (!acpi_disabled && acpi_os_get_root_pointer())
101 return 0; 101 return 0;
102#endif 102#endif
103 103
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 7a5350d08cef..563049c483a1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -594,6 +594,11 @@ char *__init pcibios_setup(char *str)
594 } else if (!strcmp(str, "nocrs")) { 594 } else if (!strcmp(str, "nocrs")) {
595 pci_probe |= PCI_ROOT_NO_CRS; 595 pci_probe |= PCI_ROOT_NO_CRS;
596 return NULL; 596 return NULL;
597#ifdef CONFIG_PHYS_ADDR_T_64BIT
598 } else if (!strcmp(str, "big_root_window")) {
599 pci_probe |= PCI_BIG_ROOT_WINDOW;
600 return NULL;
601#endif
597 } else if (!strcmp(str, "earlydump")) { 602 } else if (!strcmp(str, "earlydump")) {
598 pci_early_dump_regs = 1; 603 pci_early_dump_regs = 1;
599 return NULL; 604 return NULL;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 1e996df687a3..f6a26e3cb476 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -662,9 +662,23 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
662 */ 662 */
663static void pci_amd_enable_64bit_bar(struct pci_dev *dev) 663static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
664{ 664{
665 unsigned i;
666 u32 base, limit, high; 665 u32 base, limit, high;
667 struct resource *res, *conflict; 666 struct pci_dev *other;
667 struct resource *res;
668 unsigned i;
669 int r;
670
671 if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
672 return;
673
674 /* Check that we are the only device of that type */
675 other = pci_get_device(dev->vendor, dev->device, NULL);
676 if (other != dev ||
677 (other = pci_get_device(dev->vendor, dev->device, other))) {
678 /* This is a multi-socket system, don't touch it for now */
679 pci_dev_put(other);
680 return;
681 }
668 682
669 for (i = 0; i < 8; i++) { 683 for (i = 0; i < 8; i++) {
670 pci_read_config_dword(dev, AMD_141b_MMIO_BASE(i), &base); 684 pci_read_config_dword(dev, AMD_141b_MMIO_BASE(i), &base);
@@ -689,17 +703,25 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
689 if (!res) 703 if (!res)
690 return; 704 return;
691 705
706 /*
707 * Allocate a 256GB window directly below the 0xfd00000000 hardware
708 * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
709 */
692 res->name = "PCI Bus 0000:00"; 710 res->name = "PCI Bus 0000:00";
693 res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM | 711 res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
694 IORESOURCE_MEM_64 | IORESOURCE_WINDOW; 712 IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
695 res->start = 0x100000000ull; 713 res->start = 0xbd00000000ull;
696 res->end = 0xfd00000000ull - 1; 714 res->end = 0xfd00000000ull - 1;
697 715
698 /* Just grab the free area behind system memory for this */ 716 r = request_resource(&iomem_resource, res);
699 while ((conflict = request_resource_conflict(&iomem_resource, res))) 717 if (r) {
700 res->start = conflict->end + 1; 718 kfree(res);
719 return;
720 }
701 721
702 dev_info(&dev->dev, "adding root bus resource %pR\n", res); 722 dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
723 res);
724 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
703 725
704 base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) | 726 base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
705 AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK; 727 AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
@@ -714,10 +736,10 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
714 736
715 pci_bus_add_resource(dev->bus, res, 0); 737 pci_bus_add_resource(dev->bus, res, 0);
716} 738}
717DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar); 739DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
718DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar); 740DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
719DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); 741DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
720DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); 742DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
721DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); 743DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
722 744
723#endif 745#endif
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6a151ce70e86..2dd15e967c3f 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -135,7 +135,9 @@ pgd_t * __init efi_call_phys_prolog(void)
135 pud[j] = *pud_offset(p4d_k, vaddr); 135 pud[j] = *pud_offset(p4d_k, vaddr);
136 } 136 }
137 } 137 }
138 pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
138 } 139 }
140
139out: 141out:
140 __flush_tlb_all(); 142 __flush_tlb_all();
141 143
@@ -196,6 +198,9 @@ static pgd_t *efi_pgd;
196 * because we want to avoid inserting EFI region mappings (EFI_VA_END 198 * because we want to avoid inserting EFI region mappings (EFI_VA_END
197 * to EFI_VA_START) into the standard kernel page tables. Everything 199 * to EFI_VA_START) into the standard kernel page tables. Everything
198 * else can be shared, see efi_sync_low_kernel_mappings(). 200 * else can be shared, see efi_sync_low_kernel_mappings().
201 *
202 * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
203 * allocation.
199 */ 204 */
200int __init efi_alloc_page_tables(void) 205int __init efi_alloc_page_tables(void)
201{ 206{
@@ -208,7 +213,7 @@ int __init efi_alloc_page_tables(void)
208 return 0; 213 return 0;
209 214
210 gfp_mask = GFP_KERNEL | __GFP_ZERO; 215 gfp_mask = GFP_KERNEL | __GFP_ZERO;
211 efi_pgd = (pgd_t *)__get_free_page(gfp_mask); 216 efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
212 if (!efi_pgd) 217 if (!efi_pgd)
213 return -ENOMEM; 218 return -ENOMEM;
214 219
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 8a99a2e96537..5b513ccffde4 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -592,7 +592,18 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
592 /* 592 /*
593 * Update the first page pointer to skip over the CSH header. 593 * Update the first page pointer to skip over the CSH header.
594 */ 594 */
595 cap_info->pages[0] += csh->headersize; 595 cap_info->phys[0] += csh->headersize;
596
597 /*
598 * cap_info->capsule should point at a virtual mapping of the entire
599 * capsule, starting at the capsule header. Our image has the Quark
600 * security header prepended, so we cannot rely on the default vmap()
601 * mapping created by the generic capsule code.
602 * Given that the Quark firmware does not appear to care about the
603 * virtual mapping, let's just point cap_info->capsule at our copy
604 * of the capsule header.
605 */
606 cap_info->capsule = &cap_info->header;
596 607
597 return 1; 608 return 1;
598} 609}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index dc036e511f48..5a0483e7bf66 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
60 return 0; 60 return 0;
61} 61}
62 62
63static const struct bt_sfi_data tng_bt_sfi_data __initdata = { 63static struct bt_sfi_data tng_bt_sfi_data __initdata = {
64 .setup = tng_bt_sfi_setup, 64 .setup = tng_bt_sfi_setup,
65}; 65};
66 66
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index f44c0bc95aa2..8538a6723171 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
299 local_flush_tlb(); 299 local_flush_tlb();
300 stat->d_alltlb++; 300 stat->d_alltlb++;
301 } else { 301 } else {
302 __flush_tlb_one(msg->address); 302 __flush_tlb_single(msg->address);
303 stat->d_onetlb++; 303 stat->d_onetlb++;
304 } 304 }
305 stat->d_requestee++; 305 stat->d_requestee++;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 5f6fd860820a..e4cb9f4cde8a 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
128 * on the specified blade to allow the sending of MSIs to the specified CPU. 128 * on the specified blade to allow the sending of MSIs to the specified CPU.
129 */ 129 */
130static int uv_domain_activate(struct irq_domain *domain, 130static int uv_domain_activate(struct irq_domain *domain,
131 struct irq_data *irq_data, bool early) 131 struct irq_data *irq_data, bool reserve)
132{ 132{
133 uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); 133 uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
134 return 0; 134 return 0;
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index c34bd8233f7c..5f64f30873e2 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -905,7 +905,7 @@ static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
905/* 905/*
906 * UV NMI handler 906 * UV NMI handler
907 */ 907 */
908int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) 908static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
909{ 909{
910 struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi; 910 struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
911 int cpu = smp_processor_id(); 911 int cpu = smp_processor_id();
@@ -1013,7 +1013,7 @@ void uv_nmi_init(void)
1013} 1013}
1014 1014
1015/* Setup HUB NMI info */ 1015/* Setup HUB NMI info */
1016void __init uv_nmi_setup_common(bool hubbed) 1016static void __init uv_nmi_setup_common(bool hubbed)
1017{ 1017{
1018 int size = sizeof(void *) * (1 << NODES_SHIFT); 1018 int size = sizeof(void *) * (1 << NODES_SHIFT);
1019 int cpu; 1019 int cpu;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 84fcfde53f8f..a7d966964c6f 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
82 /* 82 /*
83 * descriptor tables 83 * descriptor tables
84 */ 84 */
85#ifdef CONFIG_X86_32
86 store_idt(&ctxt->idt); 85 store_idt(&ctxt->idt);
87#else 86
88/* CONFIG_X86_64 */
89 store_idt((struct desc_ptr *)&ctxt->idt_limit);
90#endif
91 /* 87 /*
92 * We save it here, but restore it only in the hibernate case. 88 * We save it here, but restore it only in the hibernate case.
93 * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit 89 * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
103 /* 99 /*
104 * segment registers 100 * segment registers
105 */ 101 */
106#ifdef CONFIG_X86_32 102#ifdef CONFIG_X86_32_LAZY_GS
107 savesegment(es, ctxt->es);
108 savesegment(fs, ctxt->fs);
109 savesegment(gs, ctxt->gs); 103 savesegment(gs, ctxt->gs);
110 savesegment(ss, ctxt->ss); 104#endif
111#else 105#ifdef CONFIG_X86_64
112/* CONFIG_X86_64 */ 106 savesegment(gs, ctxt->gs);
113 asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); 107 savesegment(fs, ctxt->fs);
114 asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); 108 savesegment(ds, ctxt->ds);
115 asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); 109 savesegment(es, ctxt->es);
116 asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
117 asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
118 110
119 rdmsrl(MSR_FS_BASE, ctxt->fs_base); 111 rdmsrl(MSR_FS_BASE, ctxt->fs_base);
120 rdmsrl(MSR_GS_BASE, ctxt->gs_base); 112 rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
121 rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 113 rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
122 mtrr_save_fixed_ranges(NULL); 114 mtrr_save_fixed_ranges(NULL);
123 115
124 rdmsrl(MSR_EFER, ctxt->efer); 116 rdmsrl(MSR_EFER, ctxt->efer);
@@ -160,17 +152,19 @@ static void do_fpu_end(void)
160static void fix_processor_context(void) 152static void fix_processor_context(void)
161{ 153{
162 int cpu = smp_processor_id(); 154 int cpu = smp_processor_id();
163 struct tss_struct *t = &per_cpu(cpu_tss, cpu);
164#ifdef CONFIG_X86_64 155#ifdef CONFIG_X86_64
165 struct desc_struct *desc = get_cpu_gdt_rw(cpu); 156 struct desc_struct *desc = get_cpu_gdt_rw(cpu);
166 tss_desc tss; 157 tss_desc tss;
167#endif 158#endif
168 set_tss_desc(cpu, t); /* 159
169 * This just modifies memory; should not be 160 /*
170 * necessary. But... This is necessary, because 161 * We need to reload TR, which requires that we change the
171 * 386 hardware has concept of busy TSS or some 162 * GDT entry to indicate "available" first.
172 * similar stupidity. 163 *
173 */ 164 * XXX: This could probably all be replaced by a call to
165 * force_reload_TR().
166 */
167 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
174 168
175#ifdef CONFIG_X86_64 169#ifdef CONFIG_X86_64
176 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); 170 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -178,6 +172,9 @@ static void fix_processor_context(void)
178 write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS); 172 write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
179 173
180 syscall_init(); /* This sets MSR_*STAR and related */ 174 syscall_init(); /* This sets MSR_*STAR and related */
175#else
176 if (boot_cpu_has(X86_FEATURE_SEP))
177 enable_sep_cpu();
181#endif 178#endif
182 load_TR_desc(); /* This does ltr */ 179 load_TR_desc(); /* This does ltr */
183 load_mm_ldt(current->active_mm); /* This does lldt */ 180 load_mm_ldt(current->active_mm); /* This does lldt */
@@ -190,9 +187,12 @@ static void fix_processor_context(void)
190} 187}
191 188
192/** 189/**
193 * __restore_processor_state - restore the contents of CPU registers saved 190 * __restore_processor_state - restore the contents of CPU registers saved
194 * by __save_processor_state() 191 * by __save_processor_state()
195 * @ctxt - structure to load the registers contents from 192 * @ctxt - structure to load the registers contents from
193 *
194 * The asm code that gets us here will have restored a usable GDT, although
195 * it will be pointing to the wrong alias.
196 */ 196 */
197static void notrace __restore_processor_state(struct saved_context *ctxt) 197static void notrace __restore_processor_state(struct saved_context *ctxt)
198{ 198{
@@ -215,46 +215,52 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
215 write_cr2(ctxt->cr2); 215 write_cr2(ctxt->cr2);
216 write_cr0(ctxt->cr0); 216 write_cr0(ctxt->cr0);
217 217
218 /* Restore the IDT. */
219 load_idt(&ctxt->idt);
220
218 /* 221 /*
219 * now restore the descriptor tables to their proper values 222 * Just in case the asm code got us here with the SS, DS, or ES
220 * ltr is done i fix_processor_context(). 223 * out of sync with the GDT, update them.
221 */ 224 */
222#ifdef CONFIG_X86_32 225 loadsegment(ss, __KERNEL_DS);
223 load_idt(&ctxt->idt); 226 loadsegment(ds, __USER_DS);
227 loadsegment(es, __USER_DS);
228
229 /*
230 * Restore percpu access. Percpu access can happen in exception
231 * handlers or in complicated helpers like load_gs_index().
232 */
233#ifdef CONFIG_X86_64
234 wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
224#else 235#else
225/* CONFIG_X86_64 */ 236 loadsegment(fs, __KERNEL_PERCPU);
226 load_idt((const struct desc_ptr *)&ctxt->idt_limit); 237 loadsegment(gs, __KERNEL_STACK_CANARY);
227#endif 238#endif
228 239
240 /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
241 fix_processor_context();
242
229 /* 243 /*
230 * segment registers 244 * Now that we have descriptor tables fully restored and working
245 * exception handling, restore the usermode segments.
231 */ 246 */
232#ifdef CONFIG_X86_32 247#ifdef CONFIG_X86_64
248 loadsegment(ds, ctxt->es);
233 loadsegment(es, ctxt->es); 249 loadsegment(es, ctxt->es);
234 loadsegment(fs, ctxt->fs); 250 loadsegment(fs, ctxt->fs);
235 loadsegment(gs, ctxt->gs); 251 load_gs_index(ctxt->gs);
236 loadsegment(ss, ctxt->ss);
237 252
238 /* 253 /*
239 * sysenter MSRs 254 * Restore FSBASE and GSBASE after restoring the selectors, since
255 * restoring the selectors clobbers the bases. Keep in mind
256 * that MSR_KERNEL_GS_BASE is horribly misnamed.
240 */ 257 */
241 if (boot_cpu_has(X86_FEATURE_SEP))
242 enable_sep_cpu();
243#else
244/* CONFIG_X86_64 */
245 asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
246 asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
247 asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
248 load_gs_index(ctxt->gs);
249 asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
250
251 wrmsrl(MSR_FS_BASE, ctxt->fs_base); 258 wrmsrl(MSR_FS_BASE, ctxt->fs_base);
252 wrmsrl(MSR_GS_BASE, ctxt->gs_base); 259 wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
253 wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 260#elif defined(CONFIG_X86_32_LAZY_GS)
261 loadsegment(gs, ctxt->gs);
254#endif 262#endif
255 263
256 fix_processor_context();
257
258 do_fpu_end(); 264 do_fpu_end();
259 tsc_verify_tsc_adjust(true); 265 tsc_verify_tsc_adjust(true);
260 x86_platform.restore_sched_clock_state(); 266 x86_platform.restore_sched_clock_state();
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 6b830d4cb4c8..de58533d3664 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -57,7 +57,7 @@ static u32 xen_apic_read(u32 reg)
57 return 0; 57 return 0;
58 58
59 if (reg == APIC_LVR) 59 if (reg == APIC_LVR)
60 return 0x10; 60 return 0x14;
61#ifdef CONFIG_X86_32 61#ifdef CONFIG_X86_32
62 if (reg == APIC_LDR) 62 if (reg == APIC_LDR)
63 return SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); 63 return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d669e9d89001..c9081c6671f0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,8 +1,12 @@
1#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
2#include <linux/bootmem.h>
3#endif
1#include <linux/cpu.h> 4#include <linux/cpu.h>
2#include <linux/kexec.h> 5#include <linux/kexec.h>
3 6
4#include <xen/features.h> 7#include <xen/features.h>
5#include <xen/page.h> 8#include <xen/page.h>
9#include <xen/interface/memory.h>
6 10
7#include <asm/xen/hypercall.h> 11#include <asm/xen/hypercall.h>
8#include <asm/xen/hypervisor.h> 12#include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
331} 335}
332EXPORT_SYMBOL(xen_arch_unregister_cpu); 336EXPORT_SYMBOL(xen_arch_unregister_cpu);
333#endif 337#endif
338
339#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
340void __init arch_xen_balloon_init(struct resource *hostmem_resource)
341{
342 struct xen_memory_map memmap;
343 int rc;
344 unsigned int i, last_guest_ram;
345 phys_addr_t max_addr = PFN_PHYS(max_pfn);
346 struct e820_table *xen_e820_table;
347 const struct e820_entry *entry;
348 struct resource *res;
349
350 if (!xen_initial_domain())
351 return;
352
353 xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
354 if (!xen_e820_table)
355 return;
356
357 memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
358 set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
359 rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
360 if (rc) {
361 pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
362 goto out;
363 }
364
365 last_guest_ram = 0;
366 for (i = 0; i < memmap.nr_entries; i++) {
367 if (xen_e820_table->entries[i].addr >= max_addr)
368 break;
369 if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
370 last_guest_ram = i;
371 }
372
373 entry = &xen_e820_table->entries[last_guest_ram];
374 if (max_addr >= entry->addr + entry->size)
375 goto out; /* No unallocated host RAM. */
376
377 hostmem_resource->start = max_addr;
378 hostmem_resource->end = entry->addr + entry->size;
379
380 /*
381 * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
382 * as unavailable. The rest of that region can be used for hotplug-based
383 * ballooning.
384 */
385 for (; i < memmap.nr_entries; i++) {
386 entry = &xen_e820_table->entries[i];
387
388 if (entry->type == E820_TYPE_RAM)
389 continue;
390
391 if (entry->addr >= hostmem_resource->end)
392 break;
393
394 res = kzalloc(sizeof(*res), GFP_KERNEL);
395 if (!res)
396 goto out;
397
398 res->name = "Unavailable host RAM";
399 res->start = entry->addr;
400 res->end = (entry->addr + entry->size < hostmem_resource->end) ?
401 entry->addr + entry->size : hostmem_resource->end;
402 rc = insert_resource(hostmem_resource, res);
403 if (rc) {
404 pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
405 __func__, res->start, res->end, rc);
406 kfree(res);
407 goto out;
408 }
409 }
410
411 out:
412 kfree(xen_e820_table);
413}
414#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5b2b3f3f6531..c047f42552e1 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -88,6 +88,8 @@
88#include "multicalls.h" 88#include "multicalls.h"
89#include "pmu.h" 89#include "pmu.h"
90 90
91#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
92
91void *xen_initial_gdt; 93void *xen_initial_gdt;
92 94
93static int xen_cpu_up_prepare_pv(unsigned int cpu); 95static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -622,7 +624,7 @@ static struct trap_array_entry trap_array[] = {
622 { simd_coprocessor_error, xen_simd_coprocessor_error, false }, 624 { simd_coprocessor_error, xen_simd_coprocessor_error, false },
623}; 625};
624 626
625static bool get_trap_addr(void **addr, unsigned int ist) 627static bool __ref get_trap_addr(void **addr, unsigned int ist)
626{ 628{
627 unsigned int nr; 629 unsigned int nr;
628 bool ist_okay = false; 630 bool ist_okay = false;
@@ -644,6 +646,14 @@ static bool get_trap_addr(void **addr, unsigned int ist)
644 } 646 }
645 } 647 }
646 648
649 if (nr == ARRAY_SIZE(trap_array) &&
650 *addr >= (void *)early_idt_handler_array[0] &&
651 *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
652 nr = (*addr - (void *)early_idt_handler_array[0]) /
653 EARLY_IDT_HANDLER_SIZE;
654 *addr = (void *)xen_early_idt_handler_array[nr];
655 }
656
647 if (WARN_ON(ist != 0 && !ist_okay)) 657 if (WARN_ON(ist != 0 && !ist_okay))
648 return false; 658 return false;
649 659
@@ -818,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
818 mcs = xen_mc_entry(0); 828 mcs = xen_mc_entry(0);
819 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); 829 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
820 xen_mc_issue(PARAVIRT_LAZY_CPU); 830 xen_mc_issue(PARAVIRT_LAZY_CPU);
821 this_cpu_write(cpu_tss.x86_tss.sp0, sp0); 831 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
822} 832}
823 833
824void xen_set_iopl_mask(unsigned mask) 834void xen_set_iopl_mask(unsigned mask)
@@ -1250,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
1250 __userpte_alloc_gfp &= ~__GFP_HIGHMEM; 1260 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
1251 1261
1252 /* Work out if we support NX */ 1262 /* Work out if we support NX */
1263 get_cpu_cap(&boot_cpu_data);
1253 x86_configure_nx(); 1264 x86_configure_nx();
1254 1265
1255 /* Get mfn list */ 1266 /* Get mfn list */
@@ -1262,6 +1273,21 @@ asmlinkage __visible void __init xen_start_kernel(void)
1262 xen_setup_gdt(0); 1273 xen_setup_gdt(0);
1263 1274
1264 xen_init_irq_ops(); 1275 xen_init_irq_ops();
1276
1277 /* Let's presume PV guests always boot on vCPU with id 0. */
1278 per_cpu(xen_vcpu_id, 0) = 0;
1279
1280 /*
1281 * Setup xen_vcpu early because idt_setup_early_handler needs it for
1282 * local_irq_disable(), irqs_disabled().
1283 *
1284 * Don't do the full vcpu_info placement stuff until we have
1285 * the cpu_possible_mask and a non-dummy shared_info.
1286 */
1287 xen_vcpu_info_reset(0);
1288
1289 idt_setup_early_handler();
1290
1265 xen_init_capabilities(); 1291 xen_init_capabilities();
1266 1292
1267#ifdef CONFIG_X86_LOCAL_APIC 1293#ifdef CONFIG_X86_LOCAL_APIC
@@ -1295,18 +1321,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
1295 */ 1321 */
1296 acpi_numa = -1; 1322 acpi_numa = -1;
1297#endif 1323#endif
1298 /* Let's presume PV guests always boot on vCPU with id 0. */
1299 per_cpu(xen_vcpu_id, 0) = 0;
1300
1301 /*
1302 * Setup xen_vcpu early because start_kernel needs it for
1303 * local_irq_disable(), irqs_disabled().
1304 *
1305 * Don't do the full vcpu_info placement stuff until we have
1306 * the cpu_possible_mask and a non-dummy shared_info.
1307 */
1308 xen_vcpu_info_reset(0);
1309
1310 WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); 1324 WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
1311 1325
1312 local_irq_disable(); 1326 local_irq_disable();
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index fc048ec686e7..d85076223a69 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1325,20 +1325,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1325{ 1325{
1326 struct { 1326 struct {
1327 struct mmuext_op op; 1327 struct mmuext_op op;
1328#ifdef CONFIG_SMP
1329 DECLARE_BITMAP(mask, num_processors);
1330#else
1331 DECLARE_BITMAP(mask, NR_CPUS); 1328 DECLARE_BITMAP(mask, NR_CPUS);
1332#endif
1333 } *args; 1329 } *args;
1334 struct multicall_space mcs; 1330 struct multicall_space mcs;
1331 const size_t mc_entry_size = sizeof(args->op) +
1332 sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
1335 1333
1336 trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end); 1334 trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
1337 1335
1338 if (cpumask_empty(cpus)) 1336 if (cpumask_empty(cpus))
1339 return; /* nothing to do */ 1337 return; /* nothing to do */
1340 1338
1341 mcs = xen_mc_entry(sizeof(*args)); 1339 mcs = xen_mc_entry(mc_entry_size);
1342 args = mcs.args; 1340 args = mcs.args;
1343 args->op.arg2.vcpumask = to_cpumask(args->mask); 1341 args->op.arg2.vcpumask = to_cpumask(args->mask);
1344 1342
@@ -1902,6 +1900,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1902 /* Graft it onto L4[511][510] */ 1900 /* Graft it onto L4[511][510] */
1903 copy_page(level2_kernel_pgt, l2); 1901 copy_page(level2_kernel_pgt, l2);
1904 1902
1903 /*
1904 * Zap execute permission from the ident map. Due to the sharing of
1905 * L1 entries we need to do this in the L2.
1906 */
1907 if (__supported_pte_mask & _PAGE_NX) {
1908 for (i = 0; i < PTRS_PER_PMD; ++i) {
1909 if (pmd_none(level2_ident_pgt[i]))
1910 continue;
1911 level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
1912 }
1913 }
1914
1905 /* Copy the initial P->M table mappings if necessary. */ 1915 /* Copy the initial P->M table mappings if necessary. */
1906 i = pgd_index(xen_start_info->mfn_list); 1916 i = pgd_index(xen_start_info->mfn_list);
1907 if (i && i < pgd_index(__START_KERNEL_map)) 1917 if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2261 2271
2262 switch (idx) { 2272 switch (idx) {
2263 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: 2273 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
2264 case FIX_RO_IDT:
2265#ifdef CONFIG_X86_32 2274#ifdef CONFIG_X86_32
2266 case FIX_WP_TEST: 2275 case FIX_WP_TEST:
2267# ifdef CONFIG_HIGHMEM 2276# ifdef CONFIG_HIGHMEM
@@ -2272,7 +2281,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2272#endif 2281#endif
2273 case FIX_TEXT_POKE0: 2282 case FIX_TEXT_POKE0:
2274 case FIX_TEXT_POKE1: 2283 case FIX_TEXT_POKE1:
2275 case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
2276 /* All local page mappings */ 2284 /* All local page mappings */
2277 pte = pfn_pte(phys, prot); 2285 pte = pfn_pte(phys, prot);
2278 break; 2286 break;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c114ca767b3b..6e0d2086eacb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
808 addr = xen_e820_table.entries[0].addr; 808 addr = xen_e820_table.entries[0].addr;
809 size = xen_e820_table.entries[0].size; 809 size = xen_e820_table.entries[0].size;
810 while (i < xen_e820_table.nr_entries) { 810 while (i < xen_e820_table.nr_entries) {
811 bool discard = false;
812 811
813 chunk_size = size; 812 chunk_size = size;
814 type = xen_e820_table.entries[i].type; 813 type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
824 xen_add_extra_mem(pfn_s, n_pfns); 823 xen_add_extra_mem(pfn_s, n_pfns);
825 xen_max_p2m_pfn = pfn_s + n_pfns; 824 xen_max_p2m_pfn = pfn_s + n_pfns;
826 } else 825 } else
827 discard = true; 826 type = E820_TYPE_UNUSABLE;
828 } 827 }
829 828
830 if (!discard) 829 xen_align_and_add_e820_region(addr, chunk_size, type);
831 xen_align_and_add_e820_region(addr, chunk_size, type);
832 830
833 addr += chunk_size; 831 addr += chunk_size;
834 size -= chunk_size; 832 size -= chunk_size;
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 8a10c9a9e2b5..417b339e5c8e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,7 @@
15 15
16#include <xen/interface/xen.h> 16#include <xen/interface/xen.h>
17 17
18#include <linux/init.h>
18#include <linux/linkage.h> 19#include <linux/linkage.h>
19 20
20.macro xen_pv_trap name 21.macro xen_pv_trap name
@@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat
54#endif 55#endif
55xen_pv_trap hypervisor_callback 56xen_pv_trap hypervisor_callback
56 57
58 __INIT
59ENTRY(xen_early_idt_handler_array)
60 i = 0
61 .rept NUM_EXCEPTION_VECTORS
62 pop %rcx
63 pop %r11
64 jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
65 i = i + 1
66 .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
67 .endr
68END(xen_early_idt_handler_array)
69 __FINIT
70
57hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 71hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
58/* 72/*
59 * Xen64 iret frame: 73 * Xen64 iret frame:
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 75011b80660f..3b34745d0a52 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -72,7 +72,7 @@ u64 xen_clocksource_read(void);
72void xen_setup_cpu_clockevents(void); 72void xen_setup_cpu_clockevents(void);
73void xen_save_time_memory_area(void); 73void xen_save_time_memory_area(void);
74void xen_restore_time_memory_area(void); 74void xen_restore_time_memory_area(void);
75void __init xen_init_time_ops(void); 75void __ref xen_init_time_ops(void);
76void __init xen_hvm_init_time_ops(void); 76void __init xen_hvm_init_time_ops(void);
77 77
78irqreturn_t xen_debug_interrupt(int irq, void *dev_id); 78irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index a5bcdfb890f1..837d4dd76785 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += bitsperlong.h 4generic-y += bitsperlong.h
5generic-y += bpf_perf_event.h
5generic-y += errno.h 6generic-y += errno.h
6generic-y += fcntl.h 7generic-y += fcntl.h
7generic-y += ioctl.h 8generic-y += ioctl.h