aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2018-01-17 18:32:15 -0500
committerDave Airlie <airlied@redhat.com>2018-01-17 18:32:15 -0500
commit4a6cc7a44e98a0460bd094b68c75f0705fdc450a (patch)
treeb8c86a1e0342b1166ab52c4d79e404eede57abec
parent8563188e37b000979ab66521f4337df9a3453223 (diff)
parenta8750ddca918032d6349adbf9a4b6555e7db20da (diff)
BackMerge tag 'v4.15-rc8' into drm-next
Linux 4.15-rc8 Daniel requested this for so the intel CI won't fall over on drm-next so often.
-rw-r--r--.mailmap1
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu16
-rw-r--r--Documentation/admin-guide/kernel-parameters.rst1
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt62
-rw-r--r--Documentation/admin-guide/thunderbolt.rst2
-rw-r--r--Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/da7218.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/da7219.txt2
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt18
-rw-r--r--Documentation/filesystems/nilfs2.txt4
-rw-r--r--Documentation/kbuild/kconfig-language.txt23
-rw-r--r--Documentation/networking/index.rst2
-rw-r--r--Documentation/networking/msg_zerocopy.rst4
-rw-r--r--Documentation/usb/gadget-testing.txt2
-rw-r--r--Documentation/x86/pti.txt186
-rw-r--r--Documentation/x86/x86_64/mm.txt37
-rw-r--r--MAINTAINERS30
-rw-r--r--Makefile48
-rw-r--r--arch/arc/boot/dts/axc003.dtsi8
-rw-r--r--arch/arc/boot/dts/axc003_idu.dtsi8
-rw-r--r--arch/arc/boot/dts/hsdk.dts8
-rw-r--r--arch/arc/configs/hsdk_defconfig5
-rw-r--r--arch/arc/include/asm/uaccess.h5
-rw-r--r--arch/arc/kernel/setup.c2
-rw-r--r--arch/arc/kernel/stacktrace.c2
-rw-r--r--arch/arc/kernel/traps.c14
-rw-r--r--arch/arc/kernel/troubleshoot.c3
-rw-r--r--arch/arc/plat-axs10x/axs10x.c18
-rw-r--r--arch/arc/plat-hsdk/platform.c42
-rw-r--r--arch/arm/boot/dts/aspeed-g4.dtsi2
-rw-r--r--arch/arm/boot/dts/at91-tse850-3.dts1
-rw-r--r--arch/arm/boot/dts/da850-lego-ev3.dts4
-rw-r--r--arch/arm/boot/dts/exynos5800-peach-pi.dts4
-rw-r--r--arch/arm/boot/dts/ls1021a-qds.dts2
-rw-r--r--arch/arm/boot/dts/ls1021a-twr.dts2
-rw-r--r--arch/arm/boot/dts/rk3066a-marsboard.dts4
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi2
-rw-r--r--arch/arm/boot/dts/sun4i-a10.dtsi4
-rw-r--r--arch/arm/boot/dts/sun5i-a10s.dtsi4
-rw-r--r--arch/arm/boot/dts/sun6i-a31.dtsi4
-rw-r--r--arch/arm/boot/dts/sun7i-a20.dtsi4
-rw-r--r--arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts1
-rw-r--r--arch/arm/boot/dts/tango4-common.dtsi1
-rw-r--r--arch/arm/kernel/traps.c1
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S4
-rw-r--r--arch/arm/mach-davinci/dm365.c29
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts1
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts1
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts3
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi11
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts2
-rw-r--r--arch/arm64/boot/dts/renesas/salvator-common.dtsi1
-rw-r--r--arch/arm64/boot/dts/renesas/ulcb.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock64.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi11
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi4
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c3
-rw-r--r--arch/ia64/kernel/time.c2
-rw-r--r--arch/m32r/kernel/traps.c1
-rw-r--r--arch/mips/kernel/cps-vec.S2
-rw-r--r--arch/mips/kernel/process.c12
-rw-r--r--arch/mips/kernel/ptrace.c147
-rw-r--r--arch/parisc/boot/compressed/misc.c4
-rw-r--r--arch/parisc/include/asm/ldcw.h2
-rw-r--r--arch/parisc/include/asm/thread_info.h5
-rw-r--r--arch/parisc/kernel/drivers.c2
-rw-r--r--arch/parisc/kernel/entry.S25
-rw-r--r--arch/parisc/kernel/hpmc.S1
-rw-r--r--arch/parisc/kernel/pacache.S9
-rw-r--r--arch/parisc/kernel/process.c39
-rw-r--r--arch/parisc/kernel/unwind.c1
-rw-r--r--arch/parisc/lib/delay.c2
-rw-r--r--arch/parisc/mm/init.c10
-rw-r--r--arch/powerpc/include/asm/exception-64e.h6
-rw-r--r--arch/powerpc/include/asm/exception-64s.h57
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h13
-rw-r--r--arch/powerpc/include/asm/hvcall.h17
-rw-r--r--arch/powerpc/include/asm/mmu_context.h5
-rw-r--r--arch/powerpc/include/asm/paca.h10
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h14
-rw-r--r--arch/powerpc/include/asm/setup.h13
-rw-r--r--arch/powerpc/kernel/asm-offsets.c5
-rw-r--r--arch/powerpc/kernel/entry_64.S44
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S137
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c101
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S9
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c90
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S9
-rw-r--r--arch/powerpc/kvm/book3s_pr.c2
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S7
-rw-r--r--arch/powerpc/kvm/book3s_segment.S4
-rw-r--r--arch/powerpc/kvm/book3s_xive.c7
-rw-r--r--arch/powerpc/lib/feature-fixups.c41
-rw-r--r--arch/powerpc/mm/fault.c7
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c6
-rw-r--r--arch/powerpc/perf/core-book3s.c8
-rw-r--r--arch/powerpc/perf/imc-pmu.c17
-rw-r--r--arch/powerpc/platforms/powernv/setup.c49
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c21
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h2
-rw-r--r--arch/powerpc/platforms/pseries/ras.c3
-rw-r--r--arch/powerpc/platforms/pseries/setup.c35
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c4
-rw-r--r--arch/riscv/configs/defconfig75
-rw-r--r--arch/riscv/include/asm/csr.h8
-rw-r--r--arch/riscv/include/asm/io.h4
-rw-r--r--arch/riscv/include/asm/irqflags.h10
-rw-r--r--arch/riscv/include/asm/pgtable.h4
-rw-r--r--arch/riscv/include/asm/ptrace.h2
-rw-r--r--arch/riscv/include/asm/tlbflush.h4
-rw-r--r--arch/riscv/include/asm/uaccess.h12
-rw-r--r--arch/riscv/include/asm/unistd.h1
-rw-r--r--arch/riscv/include/asm/vdso-syscalls.h28
-rw-r--r--arch/riscv/include/uapi/asm/syscalls.h26
-rw-r--r--arch/riscv/kernel/entry.S8
-rw-r--r--arch/riscv/kernel/process.c4
-rw-r--r--arch/riscv/kernel/syscall_table.c1
-rw-r--r--arch/riscv/kernel/vdso/flush_icache.S1
-rw-r--r--arch/riscv/mm/fault.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c9
-rw-r--r--arch/s390/kvm/priv.c2
-rw-r--r--arch/s390/lib/uaccess.c2
-rw-r--r--arch/s390/net/bpf_jit_comp.c11
-rw-r--r--arch/s390/pci/pci_dma.c21
-rw-r--r--arch/s390/pci/pci_insn.c3
-rw-r--r--arch/sh/boards/mach-se/770x/setup.c24
-rw-r--r--arch/sh/include/mach-se/mach/se.h1
-rw-r--r--arch/sparc/lib/hweight.S4
-rw-r--r--arch/sparc/mm/fault_32.c2
-rw-r--r--arch/sparc/mm/fault_64.c2
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c6
-rw-r--r--arch/um/include/asm/mmu_context.h3
-rw-r--r--arch/um/kernel/trap.c2
-rw-r--r--arch/unicore32/include/asm/mmu_context.h5
-rw-r--r--arch/unicore32/kernel/traps.c1
-rw-r--r--arch/x86/Kconfig18
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/boot/compressed/pagetable.c3
-rw-r--r--arch/x86/boot/genimage.sh28
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S5
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S3
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S3
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S3
-rw-r--r--arch/x86/entry/calling.h147
-rw-r--r--arch/x86/entry/entry_32.S19
-rw-r--r--arch/x86/entry/entry_64.S247
-rw-r--r--arch/x86/entry/entry_64_compat.S32
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c38
-rw-r--r--arch/x86/events/intel/bts.c18
-rw-r--r--arch/x86/events/intel/core.c5
-rw-r--r--arch/x86/events/intel/ds.c146
-rw-r--r--arch/x86/events/perf_event.h23
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/asm-prototypes.h25
-rw-r--r--arch/x86/include/asm/asm.h2
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h81
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h8
-rw-r--r--arch/x86/include/asm/desc.h14
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/espfix.h7
-rw-r--r--arch/x86/include/asm/fixmap.h7
-rw-r--r--arch/x86/include/asm/hypervisor.h25
-rw-r--r--arch/x86/include/asm/intel_ds.h36
-rw-r--r--arch/x86/include/asm/invpcid.h53
-rw-r--r--arch/x86/include/asm/irqdomain.h2
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h113
-rw-r--r--arch/x86/include/asm/mshyperv.h18
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h214
-rw-r--r--arch/x86/include/asm/paravirt.h9
-rw-r--r--arch/x86/include/asm/pci_x86.h1
-rw-r--r--arch/x86/include/asm/pgalloc.h11
-rw-r--r--arch/x86/include/asm/pgtable.h30
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h15
-rw-r--r--arch/x86/include/asm/pgtable_64.h92
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h59
-rw-r--r--arch/x86/include/asm/processor-flags.h5
-rw-r--r--arch/x86/include/asm/processor.h82
-rw-r--r--arch/x86/include/asm/pti.h14
-rw-r--r--arch/x86/include/asm/stacktrace.h3
-rw-r--r--arch/x86/include/asm/switch_to.h13
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h312
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h16
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/unwind.h20
-rw-r--r--arch/x86/include/asm/vsyscall.h1
-rw-r--r--arch/x86/include/asm/xen/hypercall.h5
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h7
-rw-r--r--arch/x86/kernel/alternative.c7
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/msi.c8
-rw-r--r--arch/x86/kernel/apic/probe_32.c2
-rw-r--r--arch/x86/kernel/apic/vector.c20
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c10
-rw-r--r--arch/x86/kernel/asm-offsets_32.c9
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c28
-rw-r--r--arch/x86/kernel/cpu/bugs.c185
-rw-r--r--arch/x86/kernel/cpu/common.c106
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c26
-rw-r--r--arch/x86/kernel/doublefault.c36
-rw-r--r--arch/x86/kernel/dumpstack.c98
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c12
-rw-r--r--arch/x86/kernel/ftrace_32.S6
-rw-r--r--arch/x86/kernel/ftrace_64.S8
-rw-r--r--arch/x86/kernel/head_64.S30
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/irq.c12
-rw-r--r--arch/x86/kernel/irq_32.c9
-rw-r--r--arch/x86/kernel/irq_64.c4
-rw-r--r--arch/x86/kernel/ldt.c198
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/process.c19
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c14
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/tboot.c11
-rw-r--r--arch/x86/kernel/tls.c11
-rw-r--r--arch/x86/kernel/traps.c77
-rw-r--r--arch/x86/kernel/unwind_orc.c88
-rw-r--r--arch/x86/kernel/vmlinux.lds.S17
-rw-r--r--arch/x86/kvm/emulate.c32
-rw-r--r--arch/x86/kvm/mmu.c27
-rw-r--r--arch/x86/kvm/svm.c32
-rw-r--r--arch/x86/kvm/vmx.c37
-rw-r--r--arch/x86/kvm/x86.c48
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/checksum_32.S7
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/retpoline.S48
-rw-r--r--arch/x86/mm/Makefile9
-rw-r--r--arch/x86/mm/cpu_entry_area.c166
-rw-r--r--arch/x86/mm/debug_pagetables.c80
-rw-r--r--arch/x86/mm/dump_pagetables.c141
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init.c82
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/kasan_init_64.c23
-rw-r--r--arch/x86/mm/kaslr.c32
-rw-r--r--arch/x86/mm/mem_encrypt.c4
-rw-r--r--arch/x86/mm/pgtable.c5
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/pti.c368
-rw-r--r--arch/x86/mm/tlb.c64
-rw-r--r--arch/x86/pci/common.c5
-rw-r--r--arch/x86/pci/fixup.c29
-rw-r--r--arch/x86/platform/efi/efi_64.c7
-rw-r--r--arch/x86/platform/efi/quirks.c13
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bt.c2
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/platform/uv/uv_irq.c2
-rw-r--r--arch/x86/power/cpu.c16
-rw-r--r--arch/x86/xen/enlighten.c81
-rw-r--r--arch/x86/xen/enlighten_pv.c5
-rw-r--r--arch/x86/xen/mmu_pv.c22
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-core.c9
-rw-r--r--block/blk-map.c38
-rw-r--r--block/blk-mq.c2
-rw-r--r--block/blk-throttle.c8
-rw-r--r--block/blk.h2
-rw-r--r--block/bounce.c6
-rw-r--r--block/kyber-iosched.c37
-rw-r--r--crypto/af_alg.c10
-rw-r--r--crypto/algapi.c12
-rw-r--r--crypto/algif_aead.c18
-rw-r--r--crypto/algif_skcipher.c18
-rw-r--r--crypto/chacha20poly1305.c6
-rw-r--r--crypto/mcryptd.c23
-rw-r--r--crypto/pcrypt.c19
-rw-r--r--crypto/skcipher.c10
-rw-r--r--drivers/acpi/apei/erst.c2
-rw-r--r--drivers/acpi/cppc_acpi.c2
-rw-r--r--drivers/acpi/nfit/core.c9
-rw-r--r--drivers/android/binder.c44
-rw-r--r--drivers/base/Kconfig3
-rw-r--r--drivers/base/cacheinfo.c13
-rw-r--r--drivers/base/cpu.c48
-rw-r--r--drivers/block/loop.c10
-rw-r--r--drivers/block/null_blk.c4
-rw-r--r--drivers/block/rbd.c18
-rw-r--r--drivers/bus/sunxi-rsb.c1
-rw-r--r--drivers/clk/clk.c8
-rw-r--r--drivers/clk/sunxi/clk-sun9i-mmc.c12
-rw-r--r--drivers/cpufreq/cpufreq_governor.c19
-rw-r--r--drivers/cpufreq/imx6q-cpufreq.c11
-rw-r--r--drivers/crypto/chelsio/Kconfig1
-rw-r--r--drivers/crypto/inside-secure/safexcel.c1
-rw-r--r--drivers/crypto/inside-secure/safexcel_cipher.c85
-rw-r--r--drivers/crypto/inside-secure/safexcel_hash.c89
-rw-r--r--drivers/crypto/n2_core.c3
-rw-r--r--drivers/firmware/efi/capsule-loader.c45
-rw-r--r--drivers/gpio/gpio-bcm-kona.c3
-rw-r--r--drivers/gpio/gpio-brcmstb.c4
-rw-r--r--drivers/gpio/gpio-reg.c4
-rw-r--r--drivers/gpio/gpio-tegra.c4
-rw-r--r--drivers/gpio/gpio-xgene-sb.c2
-rw-r--r--drivers/gpio/gpiolib-acpi.c2
-rw-r--r--drivers/gpio/gpiolib-devprop.c17
-rw-r--r--drivers/gpio/gpiolib-of.c3
-rw-r--r--drivers/gpio/gpiolib.c62
-rw-r--r--drivers/gpio/gpiolib.h3
-rw-r--r--drivers/gpu/drm/drm_lease.c22
-rw-r--r--drivers/gpu/drm/drm_plane.c42
-rw-r--r--drivers/gpu/drm/drm_syncobj.c77
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c5
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h4
-rw-r--r--drivers/gpu/drm/i915/intel_cdclk.c35
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c5
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c3
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c10
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h11
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fbcon.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_ttm.c39
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_vmm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c7
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c46
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c20
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.c4
-rw-r--r--drivers/gpu/drm/tegra/sor.c1
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc.c2
-rw-r--r--drivers/gpu/drm/vc4/vc4_irq.c3
-rw-r--r--drivers/gpu/drm/vc4/vc4_v3d.c3
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c6
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.h2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c41
-rw-r--r--drivers/hid/hid-core.c2
-rw-r--r--drivers/hid/hid-cp2112.c15
-rw-r--r--drivers/hid/hid-holtekff.c8
-rw-r--r--drivers/hv/vmbus_drv.c2
-rw-r--r--drivers/hwmon/hwmon.c21
-rw-r--r--drivers/infiniband/core/core_priv.h2
-rw-r--r--drivers/infiniband/core/device.c18
-rw-r--r--drivers/infiniband/core/nldev.c54
-rw-r--r--drivers/infiniband/core/security.c3
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c4
-rw-r--r--drivers/infiniband/core/verbs.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c13
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h2
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c72
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h6
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h1
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c30
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c11
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c43
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c1
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h6
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c17
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c14
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c25
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c5
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c5
-rw-r--r--drivers/input/joystick/analog.c2
-rw-r--r--drivers/input/misc/ims-pcu.c2
-rw-r--r--drivers/input/misc/xen-kbdfront.c2
-rw-r--r--drivers/input/mouse/elantech.c2
-rw-r--r--drivers/input/touchscreen/elants_i2c.c10
-rw-r--r--drivers/input/touchscreen/hideep.c3
-rw-r--r--drivers/iommu/amd_iommu.c2
-rw-r--r--drivers/iommu/arm-smmu-v3.c17
-rw-r--r--drivers/iommu/intel_irq_remapping.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c4
-rw-r--r--drivers/irqchip/irq-renesas-intc-irqpin.c6
-rw-r--r--drivers/leds/led-core.c1
-rw-r--r--drivers/mfd/arizona-irq.c4
-rw-r--r--drivers/mfd/cros_ec_spi.c53
-rw-r--r--drivers/mfd/rtsx_pcr.c3
-rw-r--r--drivers/mfd/twl4030-audio.c9
-rw-r--r--drivers/mfd/twl6040.c12
-rw-r--r--drivers/mmc/host/renesas_sdhi_core.c3
-rw-r--r--drivers/mmc/host/s3cmci.c6
-rw-r--r--drivers/mtd/mtdcore.c2
-rw-r--r--drivers/mtd/nand/brcmnand/brcmnand.c2
-rw-r--r--drivers/mtd/nand/gpio.c6
-rw-r--r--drivers/mtd/nand/gpmi-nand/gpmi-nand.c6
-rw-r--r--drivers/mtd/nand/pxa3xx_nand.c1
-rw-r--r--drivers/mux/core.c4
-rw-r--r--drivers/net/can/flexcan.c2
-rw-r--r--drivers/net/can/usb/ems_usb.c1
-rw-r--r--drivers/net/can/usb/gs_usb.c2
-rw-r--r--drivers/net/can/vxcan.c2
-rw-r--r--drivers/net/dsa/b53/b53_common.c9
-rw-r--r--drivers/net/ethernet/3com/3c59x.c90
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c45
-rw-r--r--drivers/net/ethernet/arc/emac.h2
-rw-r--r--drivers/net/ethernet/arc/emac_main.c164
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c14
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c2
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c21
-rw-r--r--drivers/net/ethernet/broadcom/tg3.h7
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c17
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c13
-rw-r--r--drivers/net/ethernet/freescale/gianfar_ptp.c3
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000.h3
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_main.c27
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c11
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c37
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c26
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c8
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vxlan.c64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vxlan.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci_hw.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c6
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c55
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h8
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c2
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c6
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c33
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/enh_desc.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/norm_desc.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c8
-rw-r--r--drivers/net/geneve.c14
-rw-r--r--drivers/net/macvlan.c7
-rw-r--r--drivers/net/phy/marvell.c14
-rw-r--r--drivers/net/phy/mdio-sun4i.c6
-rw-r--r--drivers/net/phy/mdio-xgene.c21
-rw-r--r--drivers/net/phy/micrel.c1
-rw-r--r--drivers/net/phy/phylink.c7
-rw-r--r--drivers/net/phy/sfp-bus.c6
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/vxlan.c19
-rw-r--r--drivers/net/wireless/ath/wcn36xx/main.c23
-rw-r--r--drivers/net/wireless/ath/wcn36xx/pmc.c6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/internal.h10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx.c8
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c3
-rw-r--r--drivers/net/xen-netfront.c1
-rw-r--r--drivers/nvdimm/btt.c201
-rw-r--r--drivers/nvdimm/btt.h47
-rw-r--r--drivers/nvdimm/pfn_devs.c20
-rw-r--r--drivers/nvme/host/core.c18
-rw-r--r--drivers/nvme/host/fabrics.c1
-rw-r--r--drivers/nvme/host/fc.c1
-rw-r--r--drivers/nvme/host/nvme.h12
-rw-r--r--drivers/nvme/host/pci.c42
-rw-r--r--drivers/nvme/host/rdma.c14
-rw-r--r--drivers/nvme/target/fcloop.c2
-rw-r--r--drivers/nvmem/meson-mx-efuse.c4
-rw-r--r--drivers/of/of_mdio.c9
-rw-r--r--drivers/parisc/dino.c10
-rw-r--r--drivers/parisc/eisa_eeprom.c2
-rw-r--r--drivers/parisc/lba_pci.c33
-rw-r--r--drivers/pci/host/pci-hyperv.c8
-rw-r--r--drivers/pci/pci-driver.c7
-rw-r--r--drivers/phy/motorola/phy-cpcap-usb.c2
-rw-r--r--drivers/phy/renesas/Kconfig2
-rw-r--r--drivers/phy/rockchip/phy-rockchip-typec.c2
-rw-r--r--drivers/phy/tegra/xusb.c58
-rw-r--r--drivers/pinctrl/intel/pinctrl-cherryview.c16
-rw-r--r--drivers/pinctrl/pinctrl-single.c5
-rw-r--r--drivers/pinctrl/stm32/pinctrl-stm32.c2
-rw-r--r--drivers/platform/x86/wmi.c2
-rw-r--r--drivers/s390/block/dasd_3990_erp.c10
-rw-r--r--drivers/s390/char/Makefile2
-rw-r--r--drivers/s390/net/qeth_core_main.c9
-rw-r--r--drivers/scsi/aacraid/aacraid.h1
-rw-r--r--drivers/scsi/aacraid/linit.c2
-rw-r--r--drivers/scsi/osd/osd_initiator.c4
-rw-r--r--drivers/scsi/scsi_devinfo.c6
-rw-r--r--drivers/scsi/scsi_scan.c13
-rw-r--r--drivers/scsi/scsi_sysfs.c10
-rw-r--r--drivers/scsi/scsi_transport_spi.c12
-rw-r--r--drivers/scsi/storvsc_drv.c3
-rw-r--r--drivers/spi/spi-armada-3700.c8
-rw-r--r--drivers/spi/spi-atmel.c2
-rw-r--r--drivers/spi/spi-rspi.c4
-rw-r--r--drivers/spi/spi-sun4i.c2
-rw-r--r--drivers/spi/spi-xilinx.c11
-rw-r--r--drivers/staging/android/ashmem.c2
-rw-r--r--drivers/staging/android/ion/Kconfig2
-rw-r--r--drivers/staging/android/ion/ion.c4
-rw-r--r--drivers/staging/android/ion/ion_cma_heap.c15
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c23
-rw-r--r--drivers/target/target_core_pscsi.c4
-rw-r--r--drivers/thunderbolt/nhi.c2
-rw-r--r--drivers/tty/n_tty.c4
-rw-r--r--drivers/usb/chipidea/ci_hdrc_msm.c2
-rw-r--r--drivers/usb/core/config.c2
-rw-r--r--drivers/usb/core/quirks.c6
-rw-r--r--drivers/usb/gadget/udc/core.c28
-rw-r--r--drivers/usb/host/xhci-debugfs.c16
-rw-r--r--drivers/usb/host/xhci-pci.c3
-rw-r--r--drivers/usb/host/xhci.c6
-rw-r--r--drivers/usb/misc/usb3503.c2
-rw-r--r--drivers/usb/mon/mon_bin.c8
-rw-r--r--drivers/usb/serial/cp210x.c2
-rw-r--r--drivers/usb/serial/ftdi_sio.c1
-rw-r--r--drivers/usb/serial/ftdi_sio_ids.h6
-rw-r--r--drivers/usb/serial/option.c17
-rw-r--r--drivers/usb/serial/qcserial.c3
-rw-r--r--drivers/usb/storage/unusual_uas.h7
-rw-r--r--drivers/usb/usbip/stub_dev.c3
-rw-r--r--drivers/usb/usbip/stub_main.c5
-rw-r--r--drivers/usb/usbip/stub_rx.c18
-rw-r--r--drivers/usb/usbip/stub_tx.c6
-rw-r--r--drivers/usb/usbip/usbip_common.c33
-rw-r--r--drivers/usb/usbip/vhci_hcd.c12
-rw-r--r--drivers/usb/usbip/vhci_rx.c23
-rw-r--r--drivers/usb/usbip/vhci_tx.c3
-rw-r--r--drivers/usb/usbip/vudc_rx.c19
-rw-r--r--drivers/usb/usbip/vudc_tx.c11
-rw-r--r--drivers/xen/balloon.c65
-rw-r--r--drivers/xen/gntdev.c8
-rw-r--r--drivers/xen/pvcalls-front.c2
-rw-r--r--fs/afs/dir.c37
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/afs/write.c8
-rw-r--r--fs/btrfs/delayed-inode.c45
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/exec.c9
-rw-r--r--fs/super.c6
-rw-r--r--fs/userfaultfd.c20
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr.c20
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c9
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h3
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_defer.c39
-rw-r--r--fs/xfs/libxfs/xfs_defer.h5
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c4
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c52
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c99
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h16
-rw-r--r--fs/xfs/xfs_aops.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_fsops.c5
-rw-r--r--fs/xfs/xfs_icache.c35
-rw-r--r--fs/xfs/xfs_icache.h1
-rw-r--r--fs/xfs/xfs_inode.c28
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_qm.c46
-rw-r--r--fs/xfs/xfs_reflink.c21
-rw-r--r--fs/xfs/xfs_super.c9
-rw-r--r--include/asm-generic/mm_hooks.h5
-rw-r--r--include/asm-generic/pgtable.h5
-rw-r--r--include/crypto/if_alg.h5
-rw-r--r--include/crypto/mcryptd.h1
-rw-r--r--include/kvm/arm_arch_timer.h2
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk_types.h9
-rw-r--r--include/linux/blkdev.h25
-rw-r--r--include/linux/bpf.h36
-rw-r--r--include/linux/bpf_verifier.h4
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/linux/cpu.h7
-rw-r--r--include/linux/cpuhotplug.h2
-rw-r--r--include/linux/crash_core.h2
-rw-r--r--include/linux/efi.h4
-rw-r--r--include/linux/fscache.h2
-rw-r--r--include/linux/gpio/driver.h33
-rw-r--r--include/linux/ipv6.h3
-rw-r--r--include/linux/irq.h17
-rw-r--r--include/linux/irqdesc.h9
-rw-r--r--include/linux/irqdomain.h2
-rw-r--r--include/linux/irqflags.h4
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/mfd/rtsx_pci.h2
-rw-r--r--include/linux/mlx5/driver.h7
-rw-r--r--include/linux/mlx5/mlx5_ifc.h8
-rw-r--r--include/linux/pti.h11
-rw-r--r--include/linux/sh_eth.h1
-rw-r--r--include/linux/spi/spi.h2
-rw-r--r--include/linux/tick.h1
-rw-r--r--include/linux/timer.h4
-rw-r--r--include/net/cfg80211.h1
-rw-r--r--include/net/pkt_cls.h5
-rw-r--r--include/net/sctp/structs.h2
-rw-r--r--include/net/sock.h5
-rw-r--r--include/net/vxlan.h2
-rw-r--r--include/net/xfrm.h3
-rw-r--r--include/trace/events/clk.h4
-rw-r--r--include/trace/events/kvm.h7
-rw-r--r--include/trace/events/tcp.h97
-rw-r--r--include/uapi/linux/if_ether.h3
-rw-r--r--include/uapi/linux/libc-compat.h61
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h2
-rw-r--r--include/xen/balloon.h5
-rw-r--r--init/Kconfig14
-rw-r--r--init/main.c9
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/bpf/arraymap.c47
-rw-r--r--kernel/bpf/core.c19
-rw-r--r--kernel/bpf/inode.c40
-rw-r--r--kernel/bpf/sockmap.c11
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/bpf/verifier.c319
-rw-r--r--kernel/cgroup/cgroup-v1.c6
-rw-r--r--kernel/cgroup/cgroup.c20
-rw-r--r--kernel/cpu.c12
-rw-r--r--kernel/crash_core.c2
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/irq/debug.h5
-rw-r--r--kernel/irq/debugfs.c1
-rw-r--r--kernel/irq/generic-chip.c11
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/irqdomain.c13
-rw-r--r--kernel/irq/msi.c64
-rw-r--r--kernel/pid.c8
-rw-r--r--kernel/sched/completion.c5
-rw-r--r--kernel/sched/cpufreq_schedutil.c2
-rw-r--r--kernel/sched/membarrier.c2
-rw-r--r--kernel/time/Kconfig1
-rw-r--r--kernel/time/tick-sched.c32
-rw-r--r--kernel/time/timer.c35
-rw-r--r--kernel/trace/ring_buffer.c12
-rw-r--r--kernel/trace/trace.c13
-rw-r--r--lib/kobject_uevent.c16
-rw-r--r--lib/mpi/longlong.h18
-rw-r--r--lib/test_bpf.c54
-rw-r--r--lib/timerqueue.c8
-rw-r--r--mm/backing-dev.c5
-rw-r--r--mm/debug.c28
-rw-r--r--mm/kmemleak.c2
-rw-r--r--mm/mprotect.c6
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/vmscan.c3
-rw-r--r--mm/zsmalloc.c1
-rw-r--r--net/8021q/vlan.c7
-rw-r--r--net/bluetooth/l2cap_core.c20
-rw-r--r--net/bridge/br_netlink.c11
-rw-r--r--net/caif/caif_dev.c5
-rw-r--r--net/caif/caif_usb.c4
-rw-r--r--net/caif/cfcnfg.c10
-rw-r--r--net/caif/cfctrl.c4
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/ethtool.c15
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/rtnetlink.c10
-rw-r--r--net/core/skbuff.c11
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/core/sysctl_net_core.c6
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/ip_gre.c1
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/xfrm4_input.c12
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/exthdrs.c9
-rw-r--r--net/ipv6/ip6_fib.c73
-rw-r--r--net/ipv6/ip6_gre.c58
-rw-r--r--net/ipv6/ip6_output.c17
-rw-r--r--net/ipv6/ip6_tunnel.c24
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/route.c20
-rw-r--r--net/ipv6/xfrm6_input.c10
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/netfilter/nf_tables_api.c8
-rw-r--r--net/netfilter/xt_bpf.c14
-rw-r--r--net/openvswitch/flow.c15
-rw-r--r--net/rds/rdma.c4
-rw-r--r--net/rds/send.c3
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sched/cls_bpf.c93
-rw-r--r--net/sched/sch_generic.c4
-rw-r--r--net/sctp/debug.c3
-rw-r--r--net/sctp/input.c28
-rw-r--r--net/sctp/socket.c32
-rw-r--r--net/sctp/stream.c22
-rw-r--r--net/sctp/transport.c29
-rw-r--r--net/sctp/ulpqueue.c24
-rw-r--r--net/socket.c13
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/tipc/bearer.c5
-rw-r--r--net/tipc/group.c69
-rw-r--r--net/tipc/monitor.c6
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/wireless/Makefile31
-rw-r--r--net/wireless/certs/sforshee.hex86
-rw-r--r--net/wireless/certs/sforshee.x509bin680 -> 0 bytes
-rw-r--r--net/wireless/nl80211.c9
-rw-r--r--net/xfrm/xfrm_input.c69
-rw-r--r--net/xfrm/xfrm_policy.c9
-rw-r--r--net/xfrm/xfrm_state.c1
-rw-r--r--net/xfrm/xfrm_user.c26
-rw-r--r--scripts/genksyms/.gitignore1
-rw-r--r--scripts/kconfig/expr.c5
-rw-r--r--security/Kconfig11
-rw-r--r--security/apparmor/domain.c9
-rw-r--r--security/apparmor/include/perms.h3
-rw-r--r--security/apparmor/ipc.c53
-rw-r--r--security/apparmor/mount.c12
-rw-r--r--security/commoncap.c21
-rw-r--r--sound/core/oss/pcm_oss.c41
-rw-r--r--sound/core/oss/pcm_plugin.c14
-rw-r--r--sound/core/pcm_lib.c4
-rw-r--r--sound/core/pcm_native.c9
-rw-r--r--sound/core/rawmidi.c15
-rw-r--r--sound/drivers/aloop.c98
-rw-r--r--sound/hda/hdac_i915.c2
-rw-r--r--sound/pci/hda/patch_conexant.c29
-rw-r--r--sound/pci/hda/patch_hdmi.c6
-rw-r--r--sound/pci/hda/patch_realtek.c49
-rw-r--r--sound/soc/amd/acp-pcm-dma.c7
-rw-r--r--sound/soc/atmel/Kconfig2
-rw-r--r--sound/soc/codecs/da7218.c2
-rw-r--r--sound/soc/codecs/msm8916-wcd-analog.c2
-rw-r--r--sound/soc/codecs/msm8916-wcd-digital.c4
-rw-r--r--sound/soc/codecs/nau8825.c1
-rw-r--r--sound/soc/codecs/rt5514-spi.c15
-rw-r--r--sound/soc/codecs/rt5514.c2
-rw-r--r--sound/soc/codecs/rt5645.c2
-rw-r--r--sound/soc/codecs/rt5663.c4
-rw-r--r--sound/soc/codecs/rt5663.h4
-rw-r--r--sound/soc/codecs/tlv320aic31xx.h2
-rw-r--r--sound/soc/codecs/twl4030.c4
-rw-r--r--sound/soc/codecs/wm_adsp.c12
-rw-r--r--sound/soc/fsl/fsl_asrc.h4
-rw-r--r--sound/soc/fsl/fsl_ssi.c44
-rw-r--r--sound/soc/intel/boards/kbl_rt5663_max98927.c2
-rw-r--r--sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c2
-rw-r--r--sound/soc/intel/skylake/skl-nhlt.c15
-rw-r--r--sound/soc/intel/skylake/skl-topology.c2
-rw-r--r--sound/soc/rockchip/rockchip_spdif.c18
-rw-r--r--sound/soc/sh/rcar/adg.c6
-rw-r--r--sound/soc/sh/rcar/core.c4
-rw-r--r--sound/soc/sh/rcar/dma.c86
-rw-r--r--sound/soc/sh/rcar/ssi.c16
-rw-r--r--sound/soc/sh/rcar/ssiu.c5
-rw-r--r--sound/usb/mixer.c27
-rw-r--r--sound/usb/quirks.c7
-rw-r--r--tools/arch/s390/include/uapi/asm/bpf_perf_event.h2
-rw-r--r--tools/arch/s390/include/uapi/asm/perf_regs.h44
-rw-r--r--tools/bpf/bpftool/map.c8
-rw-r--r--tools/bpf/bpftool/prog.c2
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat74
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt4
-rw-r--r--tools/objtool/Makefile2
-rw-r--r--tools/objtool/arch/x86/decode.c2
-rw-r--r--tools/objtool/builtin-orc.c4
-rw-r--r--tools/objtool/check.c69
-rw-r--r--tools/objtool/check.h2
-rw-r--r--tools/objtool/orc_gen.c2
-rw-r--r--tools/perf/Makefile.config9
-rw-r--r--tools/perf/arch/s390/include/perf_regs.h2
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/jvmti/jvmti_agent.c16
-rw-r--r--tools/perf/jvmti/jvmti_agent.h7
-rw-r--r--tools/perf/jvmti/libjvmti.c147
-rw-r--r--tools/testing/selftests/bpf/Makefile4
-rw-r--r--tools/testing/selftests/bpf/test_align.c22
-rw-r--r--tools/testing/selftests/bpf/test_progs.c8
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c629
-rw-r--r--tools/testing/selftests/net/config1
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c12
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c500
-rw-r--r--tools/usb/usbip/src/utils.c9
-rw-r--r--virt/kvm/arm/arch_timer.c40
-rw-r--r--virt/kvm/arm/arm.c2
-rw-r--r--virt/kvm/arm/mmio.c6
-rw-r--r--virt/kvm/arm/mmu.c10
820 files changed, 11799 insertions, 3893 deletions
diff --git a/.mailmap b/.mailmap
index 1469ff0d3f4d..e18cab73e209 100644
--- a/.mailmap
+++ b/.mailmap
@@ -107,6 +107,7 @@ Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
107Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com> 107Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
108Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com> 108Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
109Mark Brown <broonie@sirena.org.uk> 109Mark Brown <broonie@sirena.org.uk>
110Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
110Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com> 111Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
111Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com> 112Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
112Matthieu CASTET <castet.matthieu@free.fr> 113Matthieu CASTET <castet.matthieu@free.fr>
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index d6d862db3b5d..bfd29bc8d37a 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -375,3 +375,19 @@ Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
375Description: information about CPUs heterogeneity. 375Description: information about CPUs heterogeneity.
376 376
377 cpu_capacity: capacity of cpu#. 377 cpu_capacity: capacity of cpu#.
378
379What: /sys/devices/system/cpu/vulnerabilities
380 /sys/devices/system/cpu/vulnerabilities/meltdown
381 /sys/devices/system/cpu/vulnerabilities/spectre_v1
382 /sys/devices/system/cpu/vulnerabilities/spectre_v2
383Date: January 2018
384Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
385Description: Information about CPU vulnerabilities
386
387 The files are named after the code names of CPU
388 vulnerabilities. The output of those files reflects the
389 state of the CPUs in the system. Possible output values:
390
391 "Not affected" CPU is not affected by the vulnerability
392 "Vulnerable" CPU is affected and no mitigation in effect
393 "Mitigation: $M" CPU is affected and mitigation $M is in effect
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index b2598cc9834c..7242cbda15dd 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -109,6 +109,7 @@ parameter is applicable::
109 IPV6 IPv6 support is enabled. 109 IPV6 IPv6 support is enabled.
110 ISAPNP ISA PnP code is enabled. 110 ISAPNP ISA PnP code is enabled.
111 ISDN Appropriate ISDN support is enabled. 111 ISDN Appropriate ISDN support is enabled.
112 ISOL CPU Isolation is enabled.
112 JOY Appropriate joystick support is enabled. 113 JOY Appropriate joystick support is enabled.
113 KGDB Kernel debugger support is enabled. 114 KGDB Kernel debugger support is enabled.
114 KVM Kernel Virtual Machine support is enabled. 115 KVM Kernel Virtual Machine support is enabled.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6571fbfdb2a1..46b26bfee27b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -328,11 +328,15 @@
328 not play well with APC CPU idle - disable it if you have 328 not play well with APC CPU idle - disable it if you have
329 APC and your system crashes randomly. 329 APC and your system crashes randomly.
330 330
331 apic= [APIC,X86-32] Advanced Programmable Interrupt Controller 331 apic= [APIC,X86] Advanced Programmable Interrupt Controller
332 Change the output verbosity whilst booting 332 Change the output verbosity whilst booting
333 Format: { quiet (default) | verbose | debug } 333 Format: { quiet (default) | verbose | debug }
334 Change the amount of debugging information output 334 Change the amount of debugging information output
335 when initialising the APIC and IO-APIC components. 335 when initialising the APIC and IO-APIC components.
336 For X86-32, this can also be used to specify an APIC
337 driver name.
338 Format: apic=driver_name
339 Examples: apic=bigsmp
336 340
337 apic_extnmi= [APIC,X86] External NMI delivery setting 341 apic_extnmi= [APIC,X86] External NMI delivery setting
338 Format: { bsp (default) | all | none } 342 Format: { bsp (default) | all | none }
@@ -709,9 +713,6 @@
709 It will be ignored when crashkernel=X,high is not used 713 It will be ignored when crashkernel=X,high is not used
710 or memory reserved is below 4G. 714 or memory reserved is below 4G.
711 715
712 crossrelease_fullstack
713 [KNL] Allow to record full stack trace in cross-release
714
715 cryptomgr.notests 716 cryptomgr.notests
716 [KNL] Disable crypto self-tests 717 [KNL] Disable crypto self-tests
717 718
@@ -1737,7 +1738,7 @@
1737 isapnp= [ISAPNP] 1738 isapnp= [ISAPNP]
1738 Format: <RDP>,<reset>,<pci_scan>,<verbosity> 1739 Format: <RDP>,<reset>,<pci_scan>,<verbosity>
1739 1740
1740 isolcpus= [KNL,SMP] Isolate a given set of CPUs from disturbance. 1741 isolcpus= [KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance.
1741 [Deprecated - use cpusets instead] 1742 [Deprecated - use cpusets instead]
1742 Format: [flag-list,]<cpu-list> 1743 Format: [flag-list,]<cpu-list>
1743 1744
@@ -2622,6 +2623,11 @@
2622 nosmt [KNL,S390] Disable symmetric multithreading (SMT). 2623 nosmt [KNL,S390] Disable symmetric multithreading (SMT).
2623 Equivalent to smt=1. 2624 Equivalent to smt=1.
2624 2625
2626 nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
2627 (indirect branch prediction) vulnerability. System may
2628 allow data leaks with this option, which is equivalent
2629 to spectre_v2=off.
2630
2625 noxsave [BUGS=X86] Disables x86 extended register state save 2631 noxsave [BUGS=X86] Disables x86 extended register state save
2626 and restore using xsave. The kernel will fallback to 2632 and restore using xsave. The kernel will fallback to
2627 enabling legacy floating-point and sse state. 2633 enabling legacy floating-point and sse state.
@@ -2662,7 +2668,7 @@
2662 Valid arguments: on, off 2668 Valid arguments: on, off
2663 Default: on 2669 Default: on
2664 2670
2665 nohz_full= [KNL,BOOT] 2671 nohz_full= [KNL,BOOT,SMP,ISOL]
2666 The argument is a cpu list, as described above. 2672 The argument is a cpu list, as described above.
2667 In kernels built with CONFIG_NO_HZ_FULL=y, set 2673 In kernels built with CONFIG_NO_HZ_FULL=y, set
2668 the specified list of CPUs whose tick will be stopped 2674 the specified list of CPUs whose tick will be stopped
@@ -3094,6 +3100,12 @@
3094 pcie_scan_all Scan all possible PCIe devices. Otherwise we 3100 pcie_scan_all Scan all possible PCIe devices. Otherwise we
3095 only look for one device below a PCIe downstream 3101 only look for one device below a PCIe downstream
3096 port. 3102 port.
3103 big_root_window Try to add a big 64bit memory window to the PCIe
3104 root complex on AMD CPUs. Some GFX hardware
3105 can resize a BAR to allow access to all VRAM.
3106 Adding the window is slightly risky (it may
3107 conflict with unreported devices), so this
3108 taints the kernel.
3097 3109
3098 pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power 3110 pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
3099 Management. 3111 Management.
@@ -3282,6 +3294,21 @@
3282 pt. [PARIDE] 3294 pt. [PARIDE]
3283 See Documentation/blockdev/paride.txt. 3295 See Documentation/blockdev/paride.txt.
3284 3296
3297 pti= [X86_64] Control Page Table Isolation of user and
3298 kernel address spaces. Disabling this feature
3299 removes hardening, but improves performance of
3300 system calls and interrupts.
3301
3302 on - unconditionally enable
3303 off - unconditionally disable
3304 auto - kernel detects whether your CPU model is
3305 vulnerable to issues that PTI mitigates
3306
3307 Not specifying this option is equivalent to pti=auto.
3308
3309 nopti [X86_64]
3310 Equivalent to pti=off
3311
3285 pty.legacy_count= 3312 pty.legacy_count=
3286 [KNL] Number of legacy pty's. Overwrites compiled-in 3313 [KNL] Number of legacy pty's. Overwrites compiled-in
3287 default number. 3314 default number.
@@ -3931,6 +3958,29 @@
3931 sonypi.*= [HW] Sony Programmable I/O Control Device driver 3958 sonypi.*= [HW] Sony Programmable I/O Control Device driver
3932 See Documentation/laptops/sonypi.txt 3959 See Documentation/laptops/sonypi.txt
3933 3960
3961 spectre_v2= [X86] Control mitigation of Spectre variant 2
3962 (indirect branch speculation) vulnerability.
3963
3964 on - unconditionally enable
3965 off - unconditionally disable
3966 auto - kernel detects whether your CPU model is
3967 vulnerable
3968
3969 Selecting 'on' will, and 'auto' may, choose a
3970 mitigation method at run time according to the
3971 CPU, the available microcode, the setting of the
3972 CONFIG_RETPOLINE configuration option, and the
3973 compiler with which the kernel was built.
3974
3975 Specific mitigations can also be selected manually:
3976
3977 retpoline - replace indirect branches
3978 retpoline,generic - google's original retpoline
3979 retpoline,amd - AMD-specific minimal thunk
3980
3981 Not specifying this option is equivalent to
3982 spectre_v2=auto.
3983
3934 spia_io_base= [HW,MTD] 3984 spia_io_base= [HW,MTD]
3935 spia_fio_base= 3985 spia_fio_base=
3936 spia_pedr= 3986 spia_pedr=
diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst
index de50a8561774..9b55952039a6 100644
--- a/Documentation/admin-guide/thunderbolt.rst
+++ b/Documentation/admin-guide/thunderbolt.rst
@@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with
230a sysfs attribute called "force_power". 230a sysfs attribute called "force_power".
231 231
232For example the intel-wmi-thunderbolt driver exposes this attribute in: 232For example the intel-wmi-thunderbolt driver exposes this attribute in:
233 /sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power 233 /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
234 234
235 To force the power to on, write 1 to this attribute file. 235 To force the power to on, write 1 to this attribute file.
236 To disable force power, write 0 to this attribute file. 236 To disable force power, write 0 to this attribute file.
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
index 376fa2f50e6b..956bb046e599 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -13,7 +13,6 @@ Required properties:
13 at25df321a 13 at25df321a
14 at25df641 14 at25df641
15 at26df081a 15 at26df081a
16 en25s64
17 mr25h128 16 mr25h128
18 mr25h256 17 mr25h256
19 mr25h10 18 mr25h10
@@ -33,7 +32,6 @@ Required properties:
33 s25fl008k 32 s25fl008k
34 s25fl064k 33 s25fl064k
35 sst25vf040b 34 sst25vf040b
36 sst25wf040b
37 m25p40 35 m25p40
38 m25p80 36 m25p80
39 m25p16 37 m25p16
diff --git a/Documentation/devicetree/bindings/sound/da7218.txt b/Documentation/devicetree/bindings/sound/da7218.txt
index 5ca5a709b6aa..3ab9dfef38d1 100644
--- a/Documentation/devicetree/bindings/sound/da7218.txt
+++ b/Documentation/devicetree/bindings/sound/da7218.txt
@@ -73,7 +73,7 @@ Example:
73 compatible = "dlg,da7218"; 73 compatible = "dlg,da7218";
74 reg = <0x1a>; 74 reg = <0x1a>;
75 interrupt-parent = <&gpio6>; 75 interrupt-parent = <&gpio6>;
76 interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; 76 interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
77 wakeup-source; 77 wakeup-source;
78 78
79 VDD-supply = <&reg_audio>; 79 VDD-supply = <&reg_audio>;
diff --git a/Documentation/devicetree/bindings/sound/da7219.txt b/Documentation/devicetree/bindings/sound/da7219.txt
index cf61681826b6..5b54d2d045c3 100644
--- a/Documentation/devicetree/bindings/sound/da7219.txt
+++ b/Documentation/devicetree/bindings/sound/da7219.txt
@@ -77,7 +77,7 @@ Example:
77 reg = <0x1a>; 77 reg = <0x1a>;
78 78
79 interrupt-parent = <&gpio6>; 79 interrupt-parent = <&gpio6>;
80 interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; 80 interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
81 81
82 VDD-supply = <&reg_audio>; 82 VDD-supply = <&reg_audio>;
83 VDDMIC-supply = <&reg_audio>; 83 VDDMIC-supply = <&reg_audio>;
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
index 5bf13960f7f4..e3c48b20b1a6 100644
--- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -12,24 +12,30 @@ Required properties:
12 - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc 12 - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
13- reg : Offset and length of the register set for the device 13- reg : Offset and length of the register set for the device
14- interrupts : Should contain CSPI/eCSPI interrupt 14- interrupts : Should contain CSPI/eCSPI interrupt
15- cs-gpios : Specifies the gpio pins to be used for chipselects.
16- clocks : Clock specifiers for both ipg and per clocks. 15- clocks : Clock specifiers for both ipg and per clocks.
17- clock-names : Clock names should include both "ipg" and "per" 16- clock-names : Clock names should include both "ipg" and "per"
18See the clock consumer binding, 17See the clock consumer binding,
19 Documentation/devicetree/bindings/clock/clock-bindings.txt 18 Documentation/devicetree/bindings/clock/clock-bindings.txt
20- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
21 Documentation/devicetree/bindings/dma/dma.txt
22- dma-names: DMA request names should include "tx" and "rx" if present.
23 19
24Obsolete properties: 20Recommended properties:
25- fsl,spi-num-chipselects : Contains the number of the chipselect 21- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt. While the native chip
22select lines can be used, they appear to always generate a pulse between each
23word of a transfer. Most use cases will require GPIO based chip selects to
24generate a valid transaction.
26 25
27Optional properties: 26Optional properties:
27- num-cs : Number of total chip selects, see spi-bus.txt.
28- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
29Documentation/devicetree/bindings/dma/dma.txt.
30- dma-names: DMA request names, if present, should include "tx" and "rx".
28- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register 31- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
29controlling the SPI_READY handling. Note that to enable the DRCTL consideration, 32controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
30the SPI_READY mode-flag needs to be set too. 33the SPI_READY mode-flag needs to be set too.
31Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst). 34Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
32 35
36Obsolete properties:
37- fsl,spi-num-chipselects : Contains the number of the chipselect
38
33Example: 39Example:
34 40
35ecspi@70010000 { 41ecspi@70010000 {
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index c0727dc36271..f2f3f8592a6f 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -25,8 +25,8 @@ available from the following download page. At least "mkfs.nilfs2",
25cleaner or garbage collector) are required. Details on the tools are 25cleaner or garbage collector) are required. Details on the tools are
26described in the man pages included in the package. 26described in the man pages included in the package.
27 27
28Project web page: http://nilfs.sourceforge.net/ 28Project web page: https://nilfs.sourceforge.io/
29Download page: http://nilfs.sourceforge.net/en/download.html 29Download page: https://nilfs.sourceforge.io/en/download.html
30List info: http://vger.kernel.org/vger-lists.html#linux-nilfs 30List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
31 31
32Caveats 32Caveats
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index 262722d8867b..c4a293a03c33 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -200,10 +200,14 @@ module state. Dependency expressions have the following syntax:
200<expr> ::= <symbol> (1) 200<expr> ::= <symbol> (1)
201 <symbol> '=' <symbol> (2) 201 <symbol> '=' <symbol> (2)
202 <symbol> '!=' <symbol> (3) 202 <symbol> '!=' <symbol> (3)
203 '(' <expr> ')' (4) 203 <symbol1> '<' <symbol2> (4)
204 '!' <expr> (5) 204 <symbol1> '>' <symbol2> (4)
205 <expr> '&&' <expr> (6) 205 <symbol1> '<=' <symbol2> (4)
206 <expr> '||' <expr> (7) 206 <symbol1> '>=' <symbol2> (4)
207 '(' <expr> ')' (5)
208 '!' <expr> (6)
209 <expr> '&&' <expr> (7)
210 <expr> '||' <expr> (8)
207 211
208Expressions are listed in decreasing order of precedence. 212Expressions are listed in decreasing order of precedence.
209 213
@@ -214,10 +218,13 @@ Expressions are listed in decreasing order of precedence.
214 otherwise 'n'. 218 otherwise 'n'.
215(3) If the values of both symbols are equal, it returns 'n', 219(3) If the values of both symbols are equal, it returns 'n',
216 otherwise 'y'. 220 otherwise 'y'.
217(4) Returns the value of the expression. Used to override precedence. 221(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
218(5) Returns the result of (2-/expr/). 222 or greater-or-equal than value of <symbol2>, it returns 'y',
219(6) Returns the result of min(/expr/, /expr/). 223 otherwise 'n'.
220(7) Returns the result of max(/expr/, /expr/). 224(5) Returns the value of the expression. Used to override precedence.
225(6) Returns the result of (2-/expr/).
226(7) Returns the result of min(/expr/, /expr/).
227(8) Returns the result of max(/expr/, /expr/).
221 228
222An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2 229An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
223respectively for calculations). A menu entry becomes visible when its 230respectively for calculations). A menu entry becomes visible when its
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 66e620866245..7d4b15977d61 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -9,6 +9,7 @@ Contents:
9 batman-adv 9 batman-adv
10 kapi 10 kapi
11 z8530book 11 z8530book
12 msg_zerocopy
12 13
13.. only:: subproject 14.. only:: subproject
14 15
@@ -16,4 +17,3 @@ Contents:
16 ======= 17 =======
17 18
18 * :ref:`genindex` 19 * :ref:`genindex`
19
diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst
index 77f6d7e25cfd..291a01264967 100644
--- a/Documentation/networking/msg_zerocopy.rst
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -72,6 +72,10 @@ this flag, a process must first signal intent by setting a socket option:
72 if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one))) 72 if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
73 error(1, errno, "setsockopt zerocopy"); 73 error(1, errno, "setsockopt zerocopy");
74 74
75Setting the socket option only works when the socket is in its initial
76(TCP_CLOSED) state. Trying to set the option for a socket returned by accept(),
77for example, will lead to an EBUSY error. In this case, the option should be set
78to the listening socket and it will be inherited by the accepted sockets.
75 79
76Transmission 80Transmission
77------------ 81------------
diff --git a/Documentation/usb/gadget-testing.txt b/Documentation/usb/gadget-testing.txt
index 441a4b9b666f..5908a21fddb6 100644
--- a/Documentation/usb/gadget-testing.txt
+++ b/Documentation/usb/gadget-testing.txt
@@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value
693in each line. The rules stated above are best illustrated with an example: 693in each line. The rules stated above are best illustrated with an example:
694 694
695# mkdir functions/uvc.usb0/control/header/h 695# mkdir functions/uvc.usb0/control/header/h
696# cd functions/uvc.usb0/control/header/h 696# cd functions/uvc.usb0/control/
697# ln -s header/h class/fs 697# ln -s header/h class/fs
698# ln -s header/h class/ss 698# ln -s header/h class/ss
699# mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p 699# mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
new file mode 100644
index 000000000000..d11eff61fc9a
--- /dev/null
+++ b/Documentation/x86/pti.txt
@@ -0,0 +1,186 @@
1Overview
2========
3
4Page Table Isolation (pti, previously known as KAISER[1]) is a
5countermeasure against attacks on the shared user/kernel address
6space such as the "Meltdown" approach[2].
7
8To mitigate this class of attacks, we create an independent set of
9page tables for use only when running userspace applications. When
10the kernel is entered via syscalls, interrupts or exceptions, the
11page tables are switched to the full "kernel" copy. When the system
12switches back to user mode, the user copy is used again.
13
14The userspace page tables contain only a minimal amount of kernel
15data: only what is needed to enter/exit the kernel such as the
16entry/exit functions themselves and the interrupt descriptor table
17(IDT). There are a few strictly unnecessary things that get mapped
18such as the first C function when entering an interrupt (see
19comments in pti.c).
20
21This approach helps to ensure that side-channel attacks leveraging
22the paging structures do not function when PTI is enabled. It can be
23enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
24Once enabled at compile-time, it can be disabled at boot with the
25'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
26
27Page Table Management
28=====================
29
30When PTI is enabled, the kernel manages two sets of page tables.
31The first set is very similar to the single set which is present in
32kernels without PTI. This includes a complete mapping of userspace
33that the kernel can use for things like copy_to_user().
34
35Although _complete_, the user portion of the kernel page tables is
36crippled by setting the NX bit in the top level. This ensures
37that any missed kernel->user CR3 switch will immediately crash
38userspace upon executing its first instruction.
39
40The userspace page tables map only the kernel data needed to enter
41and exit the kernel. This data is entirely contained in the 'struct
42cpu_entry_area' structure which is placed in the fixmap which gives
43each CPU's copy of the area a compile-time-fixed virtual address.
44
45For new userspace mappings, the kernel makes the entries in its
46page tables like normal. The only difference is when the kernel
47makes entries in the top (PGD) level. In addition to setting the
48entry in the main kernel PGD, a copy of the entry is made in the
49userspace page tables' PGD.
50
51This sharing at the PGD level also inherently shares all the lower
52layers of the page tables. This leaves a single, shared set of
53userspace page tables to manage. One PTE to lock, one set of
54accessed bits, dirty bits, etc...
55
56Overhead
57========
58
59Protection against side-channel attacks is important. But,
60this protection comes at a cost:
61
621. Increased Memory Use
63 a. Each process now needs an order-1 PGD instead of order-0.
64 (Consumes an additional 4k per process).
65 b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
66 aligned so that it can be mapped by setting a single PMD
67 entry. This consumes nearly 2MB of RAM once the kernel
68 is decompressed, but no space in the kernel image itself.
69
702. Runtime Cost
71 a. CR3 manipulation to switch between the page table copies
72 must be done at interrupt, syscall, and exception entry
73 and exit (it can be skipped when the kernel is interrupted,
74 though.) Moves to CR3 are on the order of a hundred
75 cycles, and are required at every entry and exit.
76 b. A "trampoline" must be used for SYSCALL entry. This
77 trampoline depends on a smaller set of resources than the
78 non-PTI SYSCALL entry code, so requires mapping fewer
79 things into the userspace page tables. The downside is
80 that stacks must be switched at entry time.
81 d. Global pages are disabled for all kernel structures not
82 mapped into both kernel and userspace page tables. This
83 feature of the MMU allows different processes to share TLB
84 entries mapping the kernel. Losing the feature means more
85 TLB misses after a context switch. The actual loss of
86 performance is very small, however, never exceeding 1%.
87 d. Process Context IDentifiers (PCID) is a CPU feature that
88 allows us to skip flushing the entire TLB when switching page
89 tables by setting a special bit in CR3 when the page tables
90 are changed. This makes switching the page tables (at context
91 switch, or kernel entry/exit) cheaper. But, on systems with
92 PCID support, the context switch code must flush both the user
93 and kernel entries out of the TLB. The user PCID TLB flush is
94 deferred until the exit to userspace, minimizing the cost.
95 See intel.com/sdm for the gory PCID/INVPCID details.
96 e. The userspace page tables must be populated for each new
97 process. Even without PTI, the shared kernel mappings
98 are created by copying top-level (PGD) entries into each
99 new process. But, with PTI, there are now *two* kernel
100 mappings: one in the kernel page tables that maps everything
101 and one for the entry/exit structures. At fork(), we need to
102 copy both.
103 f. In addition to the fork()-time copying, there must also
104 be an update to the userspace PGD any time a set_pgd() is done
105 on a PGD used to map userspace. This ensures that the kernel
106 and userspace copies always map the same userspace
107 memory.
108 g. On systems without PCID support, each CR3 write flushes
109 the entire TLB. That means that each syscall, interrupt
110 or exception flushes the TLB.
111 h. INVPCID is a TLB-flushing instruction which allows flushing
112 of TLB entries for non-current PCIDs. Some systems support
113 PCIDs, but do not support INVPCID. On these systems, addresses
114 can only be flushed from the TLB for the current PCID. When
115 flushing a kernel address, we need to flush all PCIDs, so a
116 single kernel address flush will require a TLB-flushing CR3
117 write upon the next use of every PCID.
118
119Possible Future Work
120====================
1211. We can be more careful about not actually writing to CR3
122 unless its value is actually changed.
1232. Allow PTI to be enabled/disabled at runtime in addition to the
124 boot-time switching.
125
126Testing
127========
128
129To test stability of PTI, the following test procedure is recommended,
130ideally doing all of these in parallel:
131
1321. Set CONFIG_DEBUG_ENTRY=y
1332. Run several copies of all of the tools/testing/selftests/x86/ tests
134 (excluding MPX and protection_keys) in a loop on multiple CPUs for
135 several minutes. These tests frequently uncover corner cases in the
136 kernel entry code. In general, old kernels might cause these tests
137 themselves to crash, but they should never crash the kernel.
1383. Run the 'perf' tool in a mode (top or record) that generates many
139 frequent performance monitoring non-maskable interrupts (see "NMI"
140 in /proc/interrupts). This exercises the NMI entry/exit code which
141 is known to trigger bugs in code paths that did not expect to be
142 interrupted, including nested NMIs. Using "-c" boosts the rate of
143 NMIs, and using two -c with separate counters encourages nested NMIs
144 and less deterministic behavior.
145
146 while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
147
1484. Launch a KVM virtual machine.
1495. Run 32-bit binaries on systems supporting the SYSCALL instruction.
150 This has been a lightly-tested code path and needs extra scrutiny.
151
152Debugging
153=========
154
155Bugs in PTI cause a few different signatures of crashes
156that are worth noting here.
157
158 * Failures of the selftests/x86 code. Usually a bug in one of the
159 more obscure corners of entry_64.S
160 * Crashes in early boot, especially around CPU bringup. Bugs
161 in the trampoline code or mappings cause these.
162 * Crashes at the first interrupt. Caused by bugs in entry_64.S,
163 like screwing up a page table switch. Also caused by
164 incorrectly mapping the IRQ handler entry code.
165 * Crashes at the first NMI. The NMI code is separate from main
166 interrupt handlers and can have bugs that do not affect
167 normal interrupts. Also caused by incorrectly mapping NMI
168 code. NMIs that interrupt the entry code must be very
169 careful and can be the cause of crashes that show up when
170 running perf.
171 * Kernel crashes at the first exit to userspace. entry_64.S
172 bugs, or failing to map some of the exit code.
173 * Crashes at first interrupt that interrupts userspace. The paths
174 in entry_64.S that return to userspace are sometimes separate
175 from the ones that return to the kernel.
176 * Double faults: overflowing the kernel stack because of page
177 faults upon page faults. Caused by touching non-pti-mapped
178 data in the entry code, or forgetting to switch to kernel
179 CR3 before calling into C functions which are not pti-mapped.
180 * Userspace segfaults early in boot, sometimes manifesting
181 as mount(8) failing to mount the rootfs. These have
182 tended to be TLB invalidation issues. Usually invalidating
183 the wrong PCID, or otherwise missing an invalidation.
184
1851. https://gruss.cc/files/kaiser.pdf
1862. https://meltdownattack.com/meltdown.pdf
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 3448e675b462..ea91cb61a602 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -1,6 +1,4 @@
1 1
2<previous description obsolete, deleted>
3
4Virtual memory map with 4 level page tables: 2Virtual memory map with 4 level page tables:
5 3
60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm 40000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
@@ -14,13 +12,17 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
14... unused hole ... 12... unused hole ...
15ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) 13ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
16... unused hole ... 14... unused hole ...
15 vaddr_end for KASLR
16fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
17fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
17ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks 18ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
18... unused hole ... 19... unused hole ...
19ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space 20ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
20... unused hole ... 21... unused hole ...
21ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 22ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
22ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable) 23ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable)
23ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls 24[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
25ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
24ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole 26ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
25 27
26Virtual memory map with 5 level page tables: 28Virtual memory map with 5 level page tables:
@@ -29,26 +31,31 @@ Virtual memory map with 5 level page tables:
29hole caused by [56:63] sign extension 31hole caused by [56:63] sign extension
30ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor 32ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
31ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory 33ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
32ff90000000000000 - ff91ffffffffffff (=49 bits) hole 34ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
33ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space 35ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
34ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole 36ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
35ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) 37ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
36... unused hole ... 38... unused hole ...
37ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) 39ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
38... unused hole ... 40... unused hole ...
41 vaddr_end for KASLR
42fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
43... unused hole ...
39ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks 44ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
40... unused hole ... 45... unused hole ...
41ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space 46ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
42... unused hole ... 47... unused hole ...
43ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 48ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
44ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space 49ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
45ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls 50[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
51ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
46ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole 52ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
47 53
48Architecture defines a 64-bit virtual address. Implementations can support 54Architecture defines a 64-bit virtual address. Implementations can support
49less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 55less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
50through to the most-significant implemented bit are set to either all ones 56through to the most-significant implemented bit are sign extended.
51or all zero. This causes hole between user space and kernel addresses. 57This causes hole between user space and kernel addresses if you interpret them
58as unsigned.
52 59
53The direct mapping covers all memory in the system up to the highest 60The direct mapping covers all memory in the system up to the highest
54memory address (this means in some cases it can also include PCI memory 61memory address (this means in some cases it can also include PCI memory
@@ -58,19 +65,15 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
58the processes using the page fault handler, with init_top_pgt as 65the processes using the page fault handler, with init_top_pgt as
59reference. 66reference.
60 67
61Current X86-64 implementations support up to 46 bits of address space (64 TB),
62which is our current limit. This expands into MBZ space in the page tables.
63
64We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual 68We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
65memory window (this size is arbitrary, it can be raised later if needed). 69memory window (this size is arbitrary, it can be raised later if needed).
66The mappings are not part of any other kernel PGD and are only available 70The mappings are not part of any other kernel PGD and are only available
67during EFI runtime calls. 71during EFI runtime calls.
68 72
69The module mapping space size changes based on the CONFIG requirements for the
70following fixmap section.
71
72Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all 73Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
73physical memory, vmalloc/ioremap space and virtual memory map are randomized. 74physical memory, vmalloc/ioremap space and virtual memory map are randomized.
74Their order is preserved but their base will be offset early at boot time. 75Their order is preserved but their base will be offset early at boot time.
75 76
76-Andi Kleen, Jul 2004 77Be very careful vs. KASLR when changing anything here. The KASLR address
78range must not overlap with anything except the KASAN shadow area, which is
79correct as KASAN disables KASLR.
diff --git a/MAINTAINERS b/MAINTAINERS
index 81c4b68a5d8e..5bc088f27c83 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2621,24 +2621,22 @@ F: fs/bfs/
2621F: include/uapi/linux/bfs_fs.h 2621F: include/uapi/linux/bfs_fs.h
2622 2622
2623BLACKFIN ARCHITECTURE 2623BLACKFIN ARCHITECTURE
2624M: Steven Miao <realmz6@gmail.com>
2625L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2624L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2626T: git git://git.code.sf.net/p/adi-linux/code 2625T: git git://git.code.sf.net/p/adi-linux/code
2627W: http://blackfin.uclinux.org 2626W: http://blackfin.uclinux.org
2628S: Supported 2627S: Orphan
2629F: arch/blackfin/ 2628F: arch/blackfin/
2630 2629
2631BLACKFIN EMAC DRIVER 2630BLACKFIN EMAC DRIVER
2632L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2631L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2633W: http://blackfin.uclinux.org 2632W: http://blackfin.uclinux.org
2634S: Supported 2633S: Orphan
2635F: drivers/net/ethernet/adi/ 2634F: drivers/net/ethernet/adi/
2636 2635
2637BLACKFIN MEDIA DRIVER 2636BLACKFIN MEDIA DRIVER
2638M: Scott Jiang <scott.jiang.linux@gmail.com>
2639L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2637L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2640W: http://blackfin.uclinux.org/ 2638W: http://blackfin.uclinux.org/
2641S: Supported 2639S: Orphan
2642F: drivers/media/platform/blackfin/ 2640F: drivers/media/platform/blackfin/
2643F: drivers/media/i2c/adv7183* 2641F: drivers/media/i2c/adv7183*
2644F: drivers/media/i2c/vs6624* 2642F: drivers/media/i2c/vs6624*
@@ -2646,25 +2644,25 @@ F: drivers/media/i2c/vs6624*
2646BLACKFIN RTC DRIVER 2644BLACKFIN RTC DRIVER
2647L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2645L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2648W: http://blackfin.uclinux.org 2646W: http://blackfin.uclinux.org
2649S: Supported 2647S: Orphan
2650F: drivers/rtc/rtc-bfin.c 2648F: drivers/rtc/rtc-bfin.c
2651 2649
2652BLACKFIN SDH DRIVER 2650BLACKFIN SDH DRIVER
2653L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2651L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2654W: http://blackfin.uclinux.org 2652W: http://blackfin.uclinux.org
2655S: Supported 2653S: Orphan
2656F: drivers/mmc/host/bfin_sdh.c 2654F: drivers/mmc/host/bfin_sdh.c
2657 2655
2658BLACKFIN SERIAL DRIVER 2656BLACKFIN SERIAL DRIVER
2659L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2657L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2660W: http://blackfin.uclinux.org 2658W: http://blackfin.uclinux.org
2661S: Supported 2659S: Orphan
2662F: drivers/tty/serial/bfin_uart.c 2660F: drivers/tty/serial/bfin_uart.c
2663 2661
2664BLACKFIN WATCHDOG DRIVER 2662BLACKFIN WATCHDOG DRIVER
2665L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) 2663L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
2666W: http://blackfin.uclinux.org 2664W: http://blackfin.uclinux.org
2667S: Supported 2665S: Orphan
2668F: drivers/watchdog/bfin_wdt.c 2666F: drivers/watchdog/bfin_wdt.c
2669 2667
2670BLINKM RGB LED DRIVER 2668BLINKM RGB LED DRIVER
@@ -5173,15 +5171,15 @@ F: sound/usb/misc/ua101.c
5173EFI TEST DRIVER 5171EFI TEST DRIVER
5174L: linux-efi@vger.kernel.org 5172L: linux-efi@vger.kernel.org
5175M: Ivan Hu <ivan.hu@canonical.com> 5173M: Ivan Hu <ivan.hu@canonical.com>
5176M: Matt Fleming <matt@codeblueprint.co.uk> 5174M: Ard Biesheuvel <ard.biesheuvel@linaro.org>
5177S: Maintained 5175S: Maintained
5178F: drivers/firmware/efi/test/ 5176F: drivers/firmware/efi/test/
5179 5177
5180EFI VARIABLE FILESYSTEM 5178EFI VARIABLE FILESYSTEM
5181M: Matthew Garrett <matthew.garrett@nebula.com> 5179M: Matthew Garrett <matthew.garrett@nebula.com>
5182M: Jeremy Kerr <jk@ozlabs.org> 5180M: Jeremy Kerr <jk@ozlabs.org>
5183M: Matt Fleming <matt@codeblueprint.co.uk> 5181M: Ard Biesheuvel <ard.biesheuvel@linaro.org>
5184T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git 5182T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
5185L: linux-efi@vger.kernel.org 5183L: linux-efi@vger.kernel.org
5186S: Maintained 5184S: Maintained
5187F: fs/efivarfs/ 5185F: fs/efivarfs/
@@ -5342,7 +5340,6 @@ S: Supported
5342F: security/integrity/evm/ 5340F: security/integrity/evm/
5343 5341
5344EXTENSIBLE FIRMWARE INTERFACE (EFI) 5342EXTENSIBLE FIRMWARE INTERFACE (EFI)
5345M: Matt Fleming <matt@codeblueprint.co.uk>
5346M: Ard Biesheuvel <ard.biesheuvel@linaro.org> 5343M: Ard Biesheuvel <ard.biesheuvel@linaro.org>
5347L: linux-efi@vger.kernel.org 5344L: linux-efi@vger.kernel.org
5348T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git 5345T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
@@ -9663,8 +9660,8 @@ F: include/uapi/linux/sunrpc/
9663NILFS2 FILESYSTEM 9660NILFS2 FILESYSTEM
9664M: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> 9661M: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
9665L: linux-nilfs@vger.kernel.org 9662L: linux-nilfs@vger.kernel.org
9666W: http://nilfs.sourceforge.net/ 9663W: https://nilfs.sourceforge.io/
9667W: http://nilfs.osdn.jp/ 9664W: https://nilfs.osdn.jp/
9668T: git git://github.com/konis/nilfs2.git 9665T: git git://github.com/konis/nilfs2.git
9669S: Supported 9666S: Supported
9670F: Documentation/filesystems/nilfs2.txt 9667F: Documentation/filesystems/nilfs2.txt
@@ -10159,7 +10156,7 @@ F: drivers/irqchip/irq-ompic.c
10159F: drivers/irqchip/irq-or1k-* 10156F: drivers/irqchip/irq-or1k-*
10160 10157
10161OPENVSWITCH 10158OPENVSWITCH
10162M: Pravin Shelar <pshelar@nicira.com> 10159M: Pravin B Shelar <pshelar@ovn.org>
10163L: netdev@vger.kernel.org 10160L: netdev@vger.kernel.org
10164L: dev@openvswitch.org 10161L: dev@openvswitch.org
10165W: http://openvswitch.org 10162W: http://openvswitch.org
@@ -13516,6 +13513,7 @@ M: Mika Westerberg <mika.westerberg@linux.intel.com>
13516M: Yehezkel Bernat <yehezkel.bernat@intel.com> 13513M: Yehezkel Bernat <yehezkel.bernat@intel.com>
13517T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git 13514T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
13518S: Maintained 13515S: Maintained
13516F: Documentation/admin-guide/thunderbolt.rst
13519F: drivers/thunderbolt/ 13517F: drivers/thunderbolt/
13520F: include/linux/thunderbolt.h 13518F: include/linux/thunderbolt.h
13521 13519
diff --git a/Makefile b/Makefile
index 7e02f951b284..bf5b8cbb9469 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
2VERSION = 4 2VERSION = 4
3PATCHLEVEL = 15 3PATCHLEVEL = 15
4SUBLEVEL = 0 4SUBLEVEL = 0
5EXTRAVERSION = -rc4 5EXTRAVERSION = -rc8
6NAME = Fearless Coyote 6NAME = Fearless Coyote
7 7
8# *DOCUMENTATION* 8# *DOCUMENTATION*
@@ -484,26 +484,6 @@ CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN)
484endif 484endif
485KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) 485KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
486KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) 486KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
487KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
488KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
489KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
490KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
491KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
492# Quiet clang warning: comparison of unsigned expression < 0 is always false
493KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
494# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
495# source of a reference will be _MergedGlobals and not on of the whitelisted names.
496# See modpost pattern 2
497KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
498KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
499KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
500KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
501else
502
503# These warnings generated too much noise in a regular build.
504# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
505KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
506KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
507endif 487endif
508 488
509ifeq ($(config-targets),1) 489ifeq ($(config-targets),1)
@@ -716,6 +696,29 @@ ifdef CONFIG_CC_STACKPROTECTOR
716endif 696endif
717KBUILD_CFLAGS += $(stackp-flag) 697KBUILD_CFLAGS += $(stackp-flag)
718 698
699ifeq ($(cc-name),clang)
700KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
701KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
702KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
703KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
704KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
705# Quiet clang warning: comparison of unsigned expression < 0 is always false
706KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
707# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
708# source of a reference will be _MergedGlobals and not on of the whitelisted names.
709# See modpost pattern 2
710KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
711KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
712KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
713KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
714else
715
716# These warnings generated too much noise in a regular build.
717# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
718KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
719KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
720endif
721
719ifdef CONFIG_FRAME_POINTER 722ifdef CONFIG_FRAME_POINTER
720KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls 723KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
721else 724else
@@ -789,6 +792,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
789# disable invalid "can't wrap" optimizations for signed / pointers 792# disable invalid "can't wrap" optimizations for signed / pointers
790KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) 793KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
791 794
795# Make sure -fstack-check isn't enabled (like gentoo apparently did)
796KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)
797
792# conserve stack if available 798# conserve stack if available
793KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) 799KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
794 800
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index 4e6e9f57e790..dc91c663bcc0 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -35,6 +35,14 @@
35 reg = <0x80 0x10>, <0x100 0x10>; 35 reg = <0x80 0x10>, <0x100 0x10>;
36 #clock-cells = <0>; 36 #clock-cells = <0>;
37 clocks = <&input_clk>; 37 clocks = <&input_clk>;
38
39 /*
40 * Set initial core pll output frequency to 90MHz.
41 * It will be applied at the core pll driver probing
42 * on early boot.
43 */
44 assigned-clocks = <&core_clk>;
45 assigned-clock-rates = <90000000>;
38 }; 46 };
39 47
40 core_intc: archs-intc@cpu { 48 core_intc: archs-intc@cpu {
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 63954a8b0100..69ff4895f2ba 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -35,6 +35,14 @@
35 reg = <0x80 0x10>, <0x100 0x10>; 35 reg = <0x80 0x10>, <0x100 0x10>;
36 #clock-cells = <0>; 36 #clock-cells = <0>;
37 clocks = <&input_clk>; 37 clocks = <&input_clk>;
38
39 /*
40 * Set initial core pll output frequency to 100MHz.
41 * It will be applied at the core pll driver probing
42 * on early boot.
43 */
44 assigned-clocks = <&core_clk>;
45 assigned-clock-rates = <100000000>;
38 }; 46 };
39 47
40 core_intc: archs-intc@cpu { 48 core_intc: archs-intc@cpu {
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 8f627c200d60..006aa3de5348 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -114,6 +114,14 @@
114 reg = <0x00 0x10>, <0x14B8 0x4>; 114 reg = <0x00 0x10>, <0x14B8 0x4>;
115 #clock-cells = <0>; 115 #clock-cells = <0>;
116 clocks = <&input_clk>; 116 clocks = <&input_clk>;
117
118 /*
119 * Set initial core pll output frequency to 1GHz.
120 * It will be applied at the core pll driver probing
121 * on early boot.
122 */
123 assigned-clocks = <&core_clk>;
124 assigned-clock-rates = <1000000000>;
117 }; 125 };
118 126
119 serial: serial@5000 { 127 serial: serial@5000 {
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index 7b8f8faf8a24..ac6b0ed8341e 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -49,10 +49,11 @@ CONFIG_SERIAL_8250_DW=y
49CONFIG_SERIAL_OF_PLATFORM=y 49CONFIG_SERIAL_OF_PLATFORM=y
50# CONFIG_HW_RANDOM is not set 50# CONFIG_HW_RANDOM is not set
51# CONFIG_HWMON is not set 51# CONFIG_HWMON is not set
52CONFIG_DRM=y
53# CONFIG_DRM_FBDEV_EMULATION is not set
54CONFIG_DRM_UDL=y
52CONFIG_FB=y 55CONFIG_FB=y
53CONFIG_FB_UDL=y
54CONFIG_FRAMEBUFFER_CONSOLE=y 56CONFIG_FRAMEBUFFER_CONSOLE=y
55CONFIG_USB=y
56CONFIG_USB_EHCI_HCD=y 57CONFIG_USB_EHCI_HCD=y
57CONFIG_USB_EHCI_HCD_PLATFORM=y 58CONFIG_USB_EHCI_HCD_PLATFORM=y
58CONFIG_USB_OHCI_HCD=y 59CONFIG_USB_OHCI_HCD=y
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index f35974ee7264..c9173c02081c 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -668,6 +668,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
668 return 0; 668 return 0;
669 669
670 __asm__ __volatile__( 670 __asm__ __volatile__(
671 " mov lp_count, %5 \n"
671 " lp 3f \n" 672 " lp 3f \n"
672 "1: ldb.ab %3, [%2, 1] \n" 673 "1: ldb.ab %3, [%2, 1] \n"
673 " breq.d %3, 0, 3f \n" 674 " breq.d %3, 0, 3f \n"
@@ -684,8 +685,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
684 " .word 1b, 4b \n" 685 " .word 1b, 4b \n"
685 " .previous \n" 686 " .previous \n"
686 : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) 687 : "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
687 : "g"(-EFAULT), "l"(count) 688 : "g"(-EFAULT), "r"(count)
688 : "memory"); 689 : "lp_count", "lp_start", "lp_end", "memory");
689 690
690 return res; 691 return res;
691} 692}
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 7ef7d9a8ff89..9d27331fe69a 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -199,7 +199,7 @@ static void read_arc_build_cfg_regs(void)
199 unsigned int exec_ctrl; 199 unsigned int exec_ctrl;
200 200
201 READ_BCR(AUX_EXEC_CTRL, exec_ctrl); 201 READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
202 cpu->extn.dual_enb = exec_ctrl & 1; 202 cpu->extn.dual_enb = !(exec_ctrl & 1);
203 203
204 /* dual issue always present for this core */ 204 /* dual issue always present for this core */
205 cpu->extn.dual = 1; 205 cpu->extn.dual = 1;
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index 74315f302971..bf40e06f3fb8 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -163,7 +163,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
163 */ 163 */
164static int __print_sym(unsigned int address, void *unused) 164static int __print_sym(unsigned int address, void *unused)
165{ 165{
166 __print_symbol(" %s\n", address); 166 printk(" %pS\n", (void *)address);
167 return 0; 167 return 0;
168} 168}
169 169
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index bcd7c9fc5d0f..133a4dae41fe 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -83,6 +83,7 @@ DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
83DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR) 83DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR)
84DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT) 84DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
85DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN) 85DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
86DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0)
86 87
87/* 88/*
88 * Entry Point for Misaligned Data access Exception, for emulating in software 89 * Entry Point for Misaligned Data access Exception, for emulating in software
@@ -115,6 +116,8 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
115 * Thus TRAP_S <n> can be used for specific purpose 116 * Thus TRAP_S <n> can be used for specific purpose
116 * -1 used for software breakpointing (gdb) 117 * -1 used for software breakpointing (gdb)
117 * -2 used by kprobes 118 * -2 used by kprobes
119 * -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as
120 * -fno-isolate-erroneous-paths-dereference
118 */ 121 */
119void do_non_swi_trap(unsigned long address, struct pt_regs *regs) 122void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
120{ 123{
@@ -134,6 +137,9 @@ void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
134 kgdb_trap(regs); 137 kgdb_trap(regs);
135 break; 138 break;
136 139
140 case 5:
141 do_trap5_error(address, regs);
142 break;
137 default: 143 default:
138 break; 144 break;
139 } 145 }
@@ -155,3 +161,11 @@ void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs)
155 161
156 insterror_is_error(address, regs); 162 insterror_is_error(address, regs);
157} 163}
164
165/*
166 * abort() call generated by older gcc for __builtin_trap()
167 */
168void abort(void)
169{
170 __asm__ __volatile__("trap_s 5\n");
171}
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 7d8c1d6c2f60..6e9a0a9a6a04 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -163,6 +163,9 @@ static void show_ecr_verbose(struct pt_regs *regs)
163 else 163 else
164 pr_cont("Bus Error, check PRM\n"); 164 pr_cont("Bus Error, check PRM\n");
165#endif 165#endif
166 } else if (vec == ECR_V_TRAP) {
167 if (regs->ecr_param == 5)
168 pr_cont("gcc generated __builtin_trap\n");
166 } else { 169 } else {
167 pr_cont("Check Programmer's Manual\n"); 170 pr_cont("Check Programmer's Manual\n");
168 } 171 }
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index f1ac6790da5f..46544e88492d 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -317,25 +317,23 @@ static void __init axs103_early_init(void)
317 * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack 317 * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
318 * of fudging the freq in DT 318 * of fudging the freq in DT
319 */ 319 */
320#define AXS103_QUAD_CORE_CPU_FREQ_HZ 50000000
321
320 unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; 322 unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
321 if (num_cores > 2) { 323 if (num_cores > 2) {
322 u32 freq = 50, orig; 324 u32 freq;
323 /*
324 * TODO: use cpu node "cpu-freq" param instead of platform-specific
325 * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu.
326 */
327 int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk"); 325 int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
328 const struct fdt_property *prop; 326 const struct fdt_property *prop;
329 327
330 prop = fdt_get_property(initial_boot_params, off, 328 prop = fdt_get_property(initial_boot_params, off,
331 "clock-frequency", NULL); 329 "assigned-clock-rates", NULL);
332 orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000; 330 freq = be32_to_cpu(*(u32 *)(prop->data));
333 331
334 /* Patching .dtb in-place with new core clock value */ 332 /* Patching .dtb in-place with new core clock value */
335 if (freq != orig ) { 333 if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) {
336 freq = cpu_to_be32(freq * 1000000); 334 freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ);
337 fdt_setprop_inplace(initial_boot_params, off, 335 fdt_setprop_inplace(initial_boot_params, off,
338 "clock-frequency", &freq, sizeof(freq)); 336 "assigned-clock-rates", &freq, sizeof(freq));
339 } 337 }
340 } 338 }
341#endif 339#endif
diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c
index fd0ae5e38639..2958aedb649a 100644
--- a/arch/arc/plat-hsdk/platform.c
+++ b/arch/arc/plat-hsdk/platform.c
@@ -38,42 +38,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu)
38#define CREG_PAE (CREG_BASE + 0x180) 38#define CREG_PAE (CREG_BASE + 0x180)
39#define CREG_PAE_UPDATE (CREG_BASE + 0x194) 39#define CREG_PAE_UPDATE (CREG_BASE + 0x194)
40 40
41#define CREG_CORE_IF_CLK_DIV (CREG_BASE + 0x4B8)
42#define CREG_CORE_IF_CLK_DIV_2 0x1
43#define CGU_BASE ARC_PERIPHERAL_BASE
44#define CGU_PLL_STATUS (ARC_PERIPHERAL_BASE + 0x4)
45#define CGU_PLL_CTRL (ARC_PERIPHERAL_BASE + 0x0)
46#define CGU_PLL_STATUS_LOCK BIT(0)
47#define CGU_PLL_STATUS_ERR BIT(1)
48#define CGU_PLL_CTRL_1GHZ 0x3A10
49#define HSDK_PLL_LOCK_TIMEOUT 500
50
51#define HSDK_PLL_LOCKED() \
52 !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK)
53
54#define HSDK_PLL_ERR() \
55 !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR)
56
57static void __init hsdk_set_cpu_freq_1ghz(void)
58{
59 u32 timeout = HSDK_PLL_LOCK_TIMEOUT;
60
61 /*
62 * As we set cpu clock which exceeds 500MHz, the divider for the interface
63 * clock must be programmed to div-by-2.
64 */
65 iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV);
66
67 /* Set cpu clock to 1GHz */
68 iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL);
69
70 while (!HSDK_PLL_LOCKED() && timeout--)
71 cpu_relax();
72
73 if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR())
74 pr_err("Failed to setup CPU frequency to 1GHz!");
75}
76
77#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) 41#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000)
78#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) 42#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108)
79#define SDIO_UHS_REG_EXT_DIV_2 (2 << 30) 43#define SDIO_UHS_REG_EXT_DIV_2 (2 << 30)
@@ -98,12 +62,6 @@ static void __init hsdk_init_early(void)
98 * minimum possible div-by-2. 62 * minimum possible div-by-2.
99 */ 63 */
100 iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT); 64 iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT);
101
102 /*
103 * Setup CPU frequency to 1GHz.
104 * TODO: remove it after smart hsdk pll driver will be introduced.
105 */
106 hsdk_set_cpu_freq_1ghz();
107} 65}
108 66
109static const char *hsdk_compat[] __initconst = { 67static const char *hsdk_compat[] __initconst = {
diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi
index 45d815a86d42..de08d9045cb8 100644
--- a/arch/arm/boot/dts/aspeed-g4.dtsi
+++ b/arch/arm/boot/dts/aspeed-g4.dtsi
@@ -219,7 +219,7 @@
219 compatible = "aspeed,ast2400-vuart"; 219 compatible = "aspeed,ast2400-vuart";
220 reg = <0x1e787000 0x40>; 220 reg = <0x1e787000 0x40>;
221 reg-shift = <2>; 221 reg-shift = <2>;
222 interrupts = <10>; 222 interrupts = <8>;
223 clocks = <&clk_uart>; 223 clocks = <&clk_uart>;
224 no-loopback-test; 224 no-loopback-test;
225 status = "disabled"; 225 status = "disabled";
diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts
index 5f29010cdbd8..9b82cc8843e1 100644
--- a/arch/arm/boot/dts/at91-tse850-3.dts
+++ b/arch/arm/boot/dts/at91-tse850-3.dts
@@ -221,6 +221,7 @@
221 jc42@18 { 221 jc42@18 {
222 compatible = "nxp,se97b", "jedec,jc-42.4-temp"; 222 compatible = "nxp,se97b", "jedec,jc-42.4-temp";
223 reg = <0x18>; 223 reg = <0x18>;
224 smbus-timeout-disable;
224 }; 225 };
225 226
226 dpot: mcp4651-104@28 { 227 dpot: mcp4651-104@28 {
diff --git a/arch/arm/boot/dts/da850-lego-ev3.dts b/arch/arm/boot/dts/da850-lego-ev3.dts
index 413dbd5d9f64..81942ae83e1f 100644
--- a/arch/arm/boot/dts/da850-lego-ev3.dts
+++ b/arch/arm/boot/dts/da850-lego-ev3.dts
@@ -178,7 +178,7 @@
178 */ 178 */
179 battery { 179 battery {
180 pinctrl-names = "default"; 180 pinctrl-names = "default";
181 pintctrl-0 = <&battery_pins>; 181 pinctrl-0 = <&battery_pins>;
182 compatible = "lego,ev3-battery"; 182 compatible = "lego,ev3-battery";
183 io-channels = <&adc 4>, <&adc 3>; 183 io-channels = <&adc 4>, <&adc 3>;
184 io-channel-names = "voltage", "current"; 184 io-channel-names = "voltage", "current";
@@ -392,7 +392,7 @@
392 batt_volt_en { 392 batt_volt_en {
393 gpio-hog; 393 gpio-hog;
394 gpios = <6 GPIO_ACTIVE_HIGH>; 394 gpios = <6 GPIO_ACTIVE_HIGH>;
395 output-low; 395 output-high;
396 }; 396 };
397}; 397};
398 398
diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts
index b2b95ff205e8..0029ec27819c 100644
--- a/arch/arm/boot/dts/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts
@@ -664,6 +664,10 @@
664 status = "okay"; 664 status = "okay";
665}; 665};
666 666
667&mixer {
668 status = "okay";
669};
670
667/* eMMC flash */ 671/* eMMC flash */
668&mmc_0 { 672&mmc_0 {
669 status = "okay"; 673 status = "okay";
diff --git a/arch/arm/boot/dts/ls1021a-qds.dts b/arch/arm/boot/dts/ls1021a-qds.dts
index 940875316d0f..67b4de0e3439 100644
--- a/arch/arm/boot/dts/ls1021a-qds.dts
+++ b/arch/arm/boot/dts/ls1021a-qds.dts
@@ -215,7 +215,7 @@
215 reg = <0x2a>; 215 reg = <0x2a>;
216 VDDA-supply = <&reg_3p3v>; 216 VDDA-supply = <&reg_3p3v>;
217 VDDIO-supply = <&reg_3p3v>; 217 VDDIO-supply = <&reg_3p3v>;
218 clocks = <&sys_mclk 1>; 218 clocks = <&sys_mclk>;
219 }; 219 };
220 }; 220 };
221 }; 221 };
diff --git a/arch/arm/boot/dts/ls1021a-twr.dts b/arch/arm/boot/dts/ls1021a-twr.dts
index a8b148ad1dd2..44715c8ef756 100644
--- a/arch/arm/boot/dts/ls1021a-twr.dts
+++ b/arch/arm/boot/dts/ls1021a-twr.dts
@@ -187,7 +187,7 @@
187 reg = <0x0a>; 187 reg = <0x0a>;
188 VDDA-supply = <&reg_3p3v>; 188 VDDA-supply = <&reg_3p3v>;
189 VDDIO-supply = <&reg_3p3v>; 189 VDDIO-supply = <&reg_3p3v>;
190 clocks = <&sys_mclk 1>; 190 clocks = <&sys_mclk>;
191 }; 191 };
192}; 192};
193 193
diff --git a/arch/arm/boot/dts/rk3066a-marsboard.dts b/arch/arm/boot/dts/rk3066a-marsboard.dts
index c6d92c25df42..d23ee6d911ac 100644
--- a/arch/arm/boot/dts/rk3066a-marsboard.dts
+++ b/arch/arm/boot/dts/rk3066a-marsboard.dts
@@ -83,6 +83,10 @@
83 }; 83 };
84}; 84};
85 85
86&cpu0 {
87 cpu0-supply = <&vdd_arm>;
88};
89
86&i2c1 { 90&i2c1 {
87 status = "okay"; 91 status = "okay";
88 clock-frequency = <400000>; 92 clock-frequency = <400000>;
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index cd24894ee5c6..6102e4e7f35c 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -956,7 +956,7 @@
956 iep_mmu: iommu@ff900800 { 956 iep_mmu: iommu@ff900800 {
957 compatible = "rockchip,iommu"; 957 compatible = "rockchip,iommu";
958 reg = <0x0 0xff900800 0x0 0x40>; 958 reg = <0x0 0xff900800 0x0 0x40>;
959 interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH 0>; 959 interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
960 interrupt-names = "iep_mmu"; 960 interrupt-names = "iep_mmu";
961 #iommu-cells = <0>; 961 #iommu-cells = <0>;
962 status = "disabled"; 962 status = "disabled";
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index b91300d49a31..5840f5c75c3b 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -502,8 +502,8 @@
502 reg = <0x01c16000 0x1000>; 502 reg = <0x01c16000 0x1000>;
503 interrupts = <58>; 503 interrupts = <58>;
504 clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, 504 clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
505 <&ccu 9>, 505 <&ccu CLK_PLL_VIDEO0_2X>,
506 <&ccu 18>; 506 <&ccu CLK_PLL_VIDEO1_2X>;
507 clock-names = "ahb", "mod", "pll-0", "pll-1"; 507 clock-names = "ahb", "mod", "pll-0", "pll-1";
508 dmas = <&dma SUN4I_DMA_NORMAL 16>, 508 dmas = <&dma SUN4I_DMA_NORMAL 16>,
509 <&dma SUN4I_DMA_NORMAL 16>, 509 <&dma SUN4I_DMA_NORMAL 16>,
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index 6ae4d95e230e..316cb8b2945b 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -82,8 +82,8 @@
82 reg = <0x01c16000 0x1000>; 82 reg = <0x01c16000 0x1000>;
83 interrupts = <58>; 83 interrupts = <58>;
84 clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>, 84 clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>,
85 <&ccu 9>, 85 <&ccu CLK_PLL_VIDEO0_2X>,
86 <&ccu 16>; 86 <&ccu CLK_PLL_VIDEO1_2X>;
87 clock-names = "ahb", "mod", "pll-0", "pll-1"; 87 clock-names = "ahb", "mod", "pll-0", "pll-1";
88 dmas = <&dma SUN4I_DMA_NORMAL 16>, 88 dmas = <&dma SUN4I_DMA_NORMAL 16>,
89 <&dma SUN4I_DMA_NORMAL 16>, 89 <&dma SUN4I_DMA_NORMAL 16>,
diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi
index 8bfa12b548e0..72d3fe44ecaf 100644
--- a/arch/arm/boot/dts/sun6i-a31.dtsi
+++ b/arch/arm/boot/dts/sun6i-a31.dtsi
@@ -429,8 +429,8 @@
429 interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>; 429 interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
430 clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>, 430 clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>,
431 <&ccu CLK_HDMI_DDC>, 431 <&ccu CLK_HDMI_DDC>,
432 <&ccu 7>, 432 <&ccu CLK_PLL_VIDEO0_2X>,
433 <&ccu 13>; 433 <&ccu CLK_PLL_VIDEO1_2X>;
434 clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1"; 434 clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1";
435 resets = <&ccu RST_AHB1_HDMI>; 435 resets = <&ccu RST_AHB1_HDMI>;
436 reset-names = "ahb"; 436 reset-names = "ahb";
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 68dfa82544fc..59655e42e4b0 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -581,8 +581,8 @@
581 reg = <0x01c16000 0x1000>; 581 reg = <0x01c16000 0x1000>;
582 interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>; 582 interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
583 clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, 583 clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
584 <&ccu 9>, 584 <&ccu CLK_PLL_VIDEO0_2X>,
585 <&ccu 18>; 585 <&ccu CLK_PLL_VIDEO1_2X>;
586 clock-names = "ahb", "mod", "pll-0", "pll-1"; 586 clock-names = "ahb", "mod", "pll-0", "pll-1";
587 dmas = <&dma SUN4I_DMA_NORMAL 16>, 587 dmas = <&dma SUN4I_DMA_NORMAL 16>,
588 <&dma SUN4I_DMA_NORMAL 16>, 588 <&dma SUN4I_DMA_NORMAL 16>,
diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
index 98715538932f..a021ee6da396 100644
--- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
@@ -146,6 +146,7 @@
146 status = "okay"; 146 status = "okay";
147 147
148 axp81x: pmic@3a3 { 148 axp81x: pmic@3a3 {
149 compatible = "x-powers,axp813";
149 reg = <0x3a3>; 150 reg = <0x3a3>;
150 interrupt-parent = <&r_intc>; 151 interrupt-parent = <&r_intc>;
151 interrupts = <0 IRQ_TYPE_LEVEL_LOW>; 152 interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
diff --git a/arch/arm/boot/dts/tango4-common.dtsi b/arch/arm/boot/dts/tango4-common.dtsi
index 0ec1b0a317b4..ff72a8efb73d 100644
--- a/arch/arm/boot/dts/tango4-common.dtsi
+++ b/arch/arm/boot/dts/tango4-common.dtsi
@@ -156,7 +156,6 @@
156 reg = <0x6e000 0x400>; 156 reg = <0x6e000 0x400>;
157 ranges = <0 0x6e000 0x400>; 157 ranges = <0 0x6e000 0x400>;
158 interrupt-parent = <&gic>; 158 interrupt-parent = <&gic>;
159 interrupt-controller;
160 #address-cells = <1>; 159 #address-cells = <1>;
161 #size-cells = <1>; 160 #size-cells = <1>;
162 161
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 5cf04888c581..3e26c6f7a191 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -793,7 +793,6 @@ void abort(void)
793 /* if that doesn't kill us, halt */ 793 /* if that doesn't kill us, halt */
794 panic("Oops failed to kill thread"); 794 panic("Oops failed to kill thread");
795} 795}
796EXPORT_SYMBOL(abort);
797 796
798void __init trap_init(void) 797void __init trap_init(void)
799{ 798{
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 1712f132b80d..b83fdc06286a 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -85,7 +85,11 @@
85 .pushsection .text.fixup,"ax" 85 .pushsection .text.fixup,"ax"
86 .align 4 86 .align 4
879001: mov r4, #-EFAULT 879001: mov r4, #-EFAULT
88#ifdef CONFIG_CPU_SW_DOMAIN_PAN
89 ldr r5, [sp, #9*4] @ *err_ptr
90#else
88 ldr r5, [sp, #8*4] @ *err_ptr 91 ldr r5, [sp, #8*4] @ *err_ptr
92#endif
89 str r4, [r5] 93 str r4, [r5]
90 ldmia sp, {r1, r2} @ retrieve dst, len 94 ldmia sp, {r1, r2} @ retrieve dst, len
91 add r2, r2, r1 95 add r2, r2, r1
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index 8be04ec95adf..5ace9380626a 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -868,10 +868,10 @@ static const struct dma_slave_map dm365_edma_map[] = {
868 { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) }, 868 { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) },
869 { "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) }, 869 { "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) },
870 { "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) }, 870 { "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) },
871 { "dm6441-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) }, 871 { "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) },
872 { "dm6441-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) }, 872 { "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) },
873 { "dm6441-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) }, 873 { "da830-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) },
874 { "dm6441-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) }, 874 { "da830-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) },
875}; 875};
876 876
877static struct edma_soc_info dm365_edma_pdata = { 877static struct edma_soc_info dm365_edma_pdata = {
@@ -925,12 +925,14 @@ static struct resource edma_resources[] = {
925 /* not using TC*_ERR */ 925 /* not using TC*_ERR */
926}; 926};
927 927
928static struct platform_device dm365_edma_device = { 928static const struct platform_device_info dm365_edma_device __initconst = {
929 .name = "edma", 929 .name = "edma",
930 .id = 0, 930 .id = 0,
931 .dev.platform_data = &dm365_edma_pdata, 931 .dma_mask = DMA_BIT_MASK(32),
932 .num_resources = ARRAY_SIZE(edma_resources), 932 .res = edma_resources,
933 .resource = edma_resources, 933 .num_res = ARRAY_SIZE(edma_resources),
934 .data = &dm365_edma_pdata,
935 .size_data = sizeof(dm365_edma_pdata),
934}; 936};
935 937
936static struct resource dm365_asp_resources[] = { 938static struct resource dm365_asp_resources[] = {
@@ -1428,13 +1430,18 @@ int __init dm365_init_video(struct vpfe_config *vpfe_cfg,
1428 1430
1429static int __init dm365_init_devices(void) 1431static int __init dm365_init_devices(void)
1430{ 1432{
1433 struct platform_device *edma_pdev;
1431 int ret = 0; 1434 int ret = 0;
1432 1435
1433 if (!cpu_is_davinci_dm365()) 1436 if (!cpu_is_davinci_dm365())
1434 return 0; 1437 return 0;
1435 1438
1436 davinci_cfg_reg(DM365_INT_EDMA_CC); 1439 davinci_cfg_reg(DM365_INT_EDMA_CC);
1437 platform_device_register(&dm365_edma_device); 1440 edma_pdev = platform_device_register_full(&dm365_edma_device);
1441 if (IS_ERR(edma_pdev)) {
1442 pr_warn("%s: Failed to register eDMA\n", __func__);
1443 return PTR_ERR(edma_pdev);
1444 }
1438 1445
1439 platform_device_register(&dm365_mdio_device); 1446 platform_device_register(&dm365_mdio_device);
1440 platform_device_register(&dm365_emac_device); 1447 platform_device_register(&dm365_emac_device);
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
index 45bdbfb96126..4a8d3f83a36e 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
@@ -75,6 +75,7 @@
75 pinctrl-0 = <&rgmii_pins>; 75 pinctrl-0 = <&rgmii_pins>;
76 phy-mode = "rgmii"; 76 phy-mode = "rgmii";
77 phy-handle = <&ext_rgmii_phy>; 77 phy-handle = <&ext_rgmii_phy>;
78 phy-supply = <&reg_dc1sw>;
78 status = "okay"; 79 status = "okay";
79}; 80};
80 81
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
index 806442d3e846..604cdaedac38 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
@@ -77,6 +77,7 @@
77 pinctrl-0 = <&rmii_pins>; 77 pinctrl-0 = <&rmii_pins>;
78 phy-mode = "rmii"; 78 phy-mode = "rmii";
79 phy-handle = <&ext_rmii_phy1>; 79 phy-handle = <&ext_rmii_phy1>;
80 phy-supply = <&reg_dc1sw>;
80 status = "okay"; 81 status = "okay";
81 82
82}; 83};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
index 0eb2acedf8c3..abe179de35d7 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
@@ -82,6 +82,7 @@
82 pinctrl-0 = <&rgmii_pins>; 82 pinctrl-0 = <&rgmii_pins>;
83 phy-mode = "rgmii"; 83 phy-mode = "rgmii";
84 phy-handle = <&ext_rgmii_phy>; 84 phy-handle = <&ext_rgmii_phy>;
85 phy-supply = <&reg_dc1sw>;
85 status = "okay"; 86 status = "okay";
86}; 87};
87 88
@@ -95,7 +96,7 @@
95&mmc2 { 96&mmc2 {
96 pinctrl-names = "default"; 97 pinctrl-names = "default";
97 pinctrl-0 = <&mmc2_pins>; 98 pinctrl-0 = <&mmc2_pins>;
98 vmmc-supply = <&reg_vcc3v3>; 99 vmmc-supply = <&reg_dcdc1>;
99 vqmmc-supply = <&reg_vcc1v8>; 100 vqmmc-supply = <&reg_vcc1v8>;
100 bus-width = <8>; 101 bus-width = <8>;
101 non-removable; 102 non-removable;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi
index a5da18a6f286..43418bd881d8 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi
@@ -45,19 +45,10 @@
45 45
46#include "sun50i-a64.dtsi" 46#include "sun50i-a64.dtsi"
47 47
48/ {
49 reg_vcc3v3: vcc3v3 {
50 compatible = "regulator-fixed";
51 regulator-name = "vcc3v3";
52 regulator-min-microvolt = <3300000>;
53 regulator-max-microvolt = <3300000>;
54 };
55};
56
57&mmc0 { 48&mmc0 {
58 pinctrl-names = "default"; 49 pinctrl-names = "default";
59 pinctrl-0 = <&mmc0_pins>; 50 pinctrl-0 = <&mmc0_pins>;
60 vmmc-supply = <&reg_vcc3v3>; 51 vmmc-supply = <&reg_dcdc1>;
61 non-removable; 52 non-removable;
62 disable-wp; 53 disable-wp;
63 bus-width = <4>; 54 bus-width = <4>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts
index b6b7a561df8c..a42fd79a62a3 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts
@@ -71,7 +71,7 @@
71 pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; 71 pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
72 vmmc-supply = <&reg_vcc3v3>; 72 vmmc-supply = <&reg_vcc3v3>;
73 bus-width = <4>; 73 bus-width = <4>;
74 cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; 74 cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>;
75 status = "okay"; 75 status = "okay";
76}; 76};
77 77
diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
index a298df74ca6c..dbe2648649db 100644
--- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
@@ -255,7 +255,6 @@
255&avb { 255&avb {
256 pinctrl-0 = <&avb_pins>; 256 pinctrl-0 = <&avb_pins>;
257 pinctrl-names = "default"; 257 pinctrl-names = "default";
258 renesas,no-ether-link;
259 phy-handle = <&phy0>; 258 phy-handle = <&phy0>;
260 status = "okay"; 259 status = "okay";
261 260
diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi
index 0d85b315ce71..73439cf48659 100644
--- a/arch/arm64/boot/dts/renesas/ulcb.dtsi
+++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi
@@ -145,7 +145,6 @@
145&avb { 145&avb {
146 pinctrl-0 = <&avb_pins>; 146 pinctrl-0 = <&avb_pins>;
147 pinctrl-names = "default"; 147 pinctrl-names = "default";
148 renesas,no-ether-link;
149 phy-handle = <&phy0>; 148 phy-handle = <&phy0>;
150 status = "okay"; 149 status = "okay";
151 150
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index d4f80786e7c2..3890468678ce 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -132,6 +132,8 @@
132 assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; 132 assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
133 assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>; 133 assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
134 clock_in_out = "input"; 134 clock_in_out = "input";
135 /* shows instability at 1GBit right now */
136 max-speed = <100>;
135 phy-supply = <&vcc_io>; 137 phy-supply = <&vcc_io>;
136 phy-mode = "rgmii"; 138 phy-mode = "rgmii";
137 pinctrl-names = "default"; 139 pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 41d61840fb99..2426da631938 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -514,7 +514,7 @@
514 tsadc: tsadc@ff250000 { 514 tsadc: tsadc@ff250000 {
515 compatible = "rockchip,rk3328-tsadc"; 515 compatible = "rockchip,rk3328-tsadc";
516 reg = <0x0 0xff250000 0x0 0x100>; 516 reg = <0x0 0xff250000 0x0 0x100>;
517 interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>; 517 interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
518 assigned-clocks = <&cru SCLK_TSADC>; 518 assigned-clocks = <&cru SCLK_TSADC>;
519 assigned-clock-rates = <50000>; 519 assigned-clock-rates = <50000>;
520 clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>; 520 clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
index 910628d18add..1fc5060d7027 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
@@ -155,17 +155,6 @@
155 regulator-min-microvolt = <5000000>; 155 regulator-min-microvolt = <5000000>;
156 regulator-max-microvolt = <5000000>; 156 regulator-max-microvolt = <5000000>;
157 }; 157 };
158
159 vdd_log: vdd-log {
160 compatible = "pwm-regulator";
161 pwms = <&pwm2 0 25000 0>;
162 regulator-name = "vdd_log";
163 regulator-min-microvolt = <800000>;
164 regulator-max-microvolt = <1400000>;
165 regulator-always-on;
166 regulator-boot-on;
167 status = "okay";
168 };
169}; 158};
170 159
171&cpu_b0 { 160&cpu_b0 {
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
index 48e733136db4..0ac2ace82435 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
@@ -198,8 +198,8 @@
198 gpio-controller; 198 gpio-controller;
199 #gpio-cells = <2>; 199 #gpio-cells = <2>;
200 gpio-ranges = <&pinctrl 0 0 0>, 200 gpio-ranges = <&pinctrl 0 0 0>,
201 <&pinctrl 96 0 0>, 201 <&pinctrl 104 0 0>,
202 <&pinctrl 160 0 0>; 202 <&pinctrl 168 0 0>;
203 gpio-ranges-group-names = "gpio_range0", 203 gpio-ranges-group-names = "gpio_range0",
204 "gpio_range1", 204 "gpio_range1",
205 "gpio_range2"; 205 "gpio_range2";
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 321c9c05dd9e..f4363d40e2cd 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
74{ 74{
75 u64 reg; 75 u64 reg;
76 76
77 /* Clear pmscr in case of early return */
78 *pmscr_el1 = 0;
79
77 /* SPE present on this CPU? */ 80 /* SPE present on this CPU? */
78 if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), 81 if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
79 ID_AA64DFR0_PMSVER_SHIFT)) 82 ID_AA64DFR0_PMSVER_SHIFT))
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index c6ecb97151a2..9025699049ca 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk)
88 } 88 }
89 89
90 if (ti->softirq_time) { 90 if (ti->softirq_time) {
91 delta = cycle_to_nsec(ti->softirq_time)); 91 delta = cycle_to_nsec(ti->softirq_time);
92 account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ); 92 account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
93 } 93 }
94 94
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index cb79fba79d43..b88a8dd14933 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -122,7 +122,6 @@ void abort(void)
122 /* if that doesn't kill us, halt */ 122 /* if that doesn't kill us, halt */
123 panic("Oops failed to kill thread"); 123 panic("Oops failed to kill thread");
124} 124}
125EXPORT_SYMBOL(abort);
126 125
127void __init trap_init(void) 126void __init trap_init(void)
128{ 127{
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index c7ed26029cbb..e68e6e04063a 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -235,6 +235,7 @@ LEAF(mips_cps_core_init)
235 has_mt t0, 3f 235 has_mt t0, 3f
236 236
237 .set push 237 .set push
238 .set MIPS_ISA_LEVEL_RAW
238 .set mt 239 .set mt
239 240
240 /* Only allow 1 TC per VPE to execute... */ 241 /* Only allow 1 TC per VPE to execute... */
@@ -388,6 +389,7 @@ LEAF(mips_cps_boot_vpes)
388#elif defined(CONFIG_MIPS_MT) 389#elif defined(CONFIG_MIPS_MT)
389 390
390 .set push 391 .set push
392 .set MIPS_ISA_LEVEL_RAW
391 .set mt 393 .set mt
392 394
393 /* If the core doesn't support MT then return */ 395 /* If the core doesn't support MT then return */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 45d0b6b037ee..57028d49c202 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -705,6 +705,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
705 struct task_struct *t; 705 struct task_struct *t;
706 int max_users; 706 int max_users;
707 707
708 /* If nothing to change, return right away, successfully. */
709 if (value == mips_get_process_fp_mode(task))
710 return 0;
711
712 /* Only accept a mode change if 64-bit FP enabled for o32. */
713 if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
714 return -EOPNOTSUPP;
715
716 /* And only for o32 tasks. */
717 if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
718 return -EOPNOTSUPP;
719
708 /* Check the value is valid */ 720 /* Check the value is valid */
709 if (value & ~known_bits) 721 if (value & ~known_bits)
710 return -EOPNOTSUPP; 722 return -EOPNOTSUPP;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index efbd8df8b665..0b23b1ad99e6 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -419,63 +419,160 @@ static int gpr64_set(struct task_struct *target,
419 419
420#endif /* CONFIG_64BIT */ 420#endif /* CONFIG_64BIT */
421 421
422/*
423 * Copy the floating-point context to the supplied NT_PRFPREG buffer,
424 * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
425 * correspond 1:1 to buffer slots. Only general registers are copied.
426 */
427static int fpr_get_fpa(struct task_struct *target,
428 unsigned int *pos, unsigned int *count,
429 void **kbuf, void __user **ubuf)
430{
431 return user_regset_copyout(pos, count, kbuf, ubuf,
432 &target->thread.fpu,
433 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
434}
435
436/*
437 * Copy the floating-point context to the supplied NT_PRFPREG buffer,
438 * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
439 * general register slots are copied to buffer slots. Only general
440 * registers are copied.
441 */
442static int fpr_get_msa(struct task_struct *target,
443 unsigned int *pos, unsigned int *count,
444 void **kbuf, void __user **ubuf)
445{
446 unsigned int i;
447 u64 fpr_val;
448 int err;
449
450 BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
451 for (i = 0; i < NUM_FPU_REGS; i++) {
452 fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
453 err = user_regset_copyout(pos, count, kbuf, ubuf,
454 &fpr_val, i * sizeof(elf_fpreg_t),
455 (i + 1) * sizeof(elf_fpreg_t));
456 if (err)
457 return err;
458 }
459
460 return 0;
461}
462
463/*
464 * Copy the floating-point context to the supplied NT_PRFPREG buffer.
465 * Choose the appropriate helper for general registers, and then copy
466 * the FCSR register separately.
467 */
422static int fpr_get(struct task_struct *target, 468static int fpr_get(struct task_struct *target,
423 const struct user_regset *regset, 469 const struct user_regset *regset,
424 unsigned int pos, unsigned int count, 470 unsigned int pos, unsigned int count,
425 void *kbuf, void __user *ubuf) 471 void *kbuf, void __user *ubuf)
426{ 472{
427 unsigned i; 473 const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
428 int err; 474 int err;
429 u64 fpr_val;
430 475
431 /* XXX fcr31 */ 476 if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
477 err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
478 else
479 err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
480 if (err)
481 return err;
432 482
433 if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) 483 err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
434 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 484 &target->thread.fpu.fcr31,
435 &target->thread.fpu, 485 fcr31_pos, fcr31_pos + sizeof(u32));
436 0, sizeof(elf_fpregset_t));
437 486
438 for (i = 0; i < NUM_FPU_REGS; i++) { 487 return err;
439 fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); 488}
440 err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, 489
441 &fpr_val, i * sizeof(elf_fpreg_t), 490/*
442 (i + 1) * sizeof(elf_fpreg_t)); 491 * Copy the supplied NT_PRFPREG buffer to the floating-point context,
492 * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
493 * context's general register slots. Only general registers are copied.
494 */
495static int fpr_set_fpa(struct task_struct *target,
496 unsigned int *pos, unsigned int *count,
497 const void **kbuf, const void __user **ubuf)
498{
499 return user_regset_copyin(pos, count, kbuf, ubuf,
500 &target->thread.fpu,
501 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
502}
503
504/*
505 * Copy the supplied NT_PRFPREG buffer to the floating-point context,
506 * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
507 * bits only of FP context's general register slots. Only general
508 * registers are copied.
509 */
510static int fpr_set_msa(struct task_struct *target,
511 unsigned int *pos, unsigned int *count,
512 const void **kbuf, const void __user **ubuf)
513{
514 unsigned int i;
515 u64 fpr_val;
516 int err;
517
518 BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
519 for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
520 err = user_regset_copyin(pos, count, kbuf, ubuf,
521 &fpr_val, i * sizeof(elf_fpreg_t),
522 (i + 1) * sizeof(elf_fpreg_t));
443 if (err) 523 if (err)
444 return err; 524 return err;
525 set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
445 } 526 }
446 527
447 return 0; 528 return 0;
448} 529}
449 530
531/*
532 * Copy the supplied NT_PRFPREG buffer to the floating-point context.
533 * Choose the appropriate helper for general registers, and then copy
534 * the FCSR register separately.
535 *
536 * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
537 * which is supposed to have been guaranteed by the kernel before
538 * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
539 * so that we can safely avoid preinitializing temporaries for
540 * partial register writes.
541 */
450static int fpr_set(struct task_struct *target, 542static int fpr_set(struct task_struct *target,
451 const struct user_regset *regset, 543 const struct user_regset *regset,
452 unsigned int pos, unsigned int count, 544 unsigned int pos, unsigned int count,
453 const void *kbuf, const void __user *ubuf) 545 const void *kbuf, const void __user *ubuf)
454{ 546{
455 unsigned i; 547 const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
548 u32 fcr31;
456 int err; 549 int err;
457 u64 fpr_val;
458 550
459 /* XXX fcr31 */ 551 BUG_ON(count % sizeof(elf_fpreg_t));
552
553 if (pos + count > sizeof(elf_fpregset_t))
554 return -EIO;
460 555
461 init_fp_ctx(target); 556 init_fp_ctx(target);
462 557
463 if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) 558 if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
464 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 559 err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
465 &target->thread.fpu, 560 else
466 0, sizeof(elf_fpregset_t)); 561 err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
562 if (err)
563 return err;
467 564
468 BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); 565 if (count > 0) {
469 for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
470 err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 566 err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
471 &fpr_val, i * sizeof(elf_fpreg_t), 567 &fcr31,
472 (i + 1) * sizeof(elf_fpreg_t)); 568 fcr31_pos, fcr31_pos + sizeof(u32));
473 if (err) 569 if (err)
474 return err; 570 return err;
475 set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val); 571
572 ptrace_setfcr31(target, fcr31);
476 } 573 }
477 574
478 return 0; 575 return err;
479} 576}
480 577
481enum mips_regset { 578enum mips_regset {
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 9345b44b86f0..f57118e1f6b4 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
123 while ((nuline = strchr(s, '\n')) != NULL) { 123 while ((nuline = strchr(s, '\n')) != NULL) {
124 if (nuline != s) 124 if (nuline != s)
125 pdc_iodc_print(s, nuline - s); 125 pdc_iodc_print(s, nuline - s);
126 pdc_iodc_print("\r\n", 2); 126 pdc_iodc_print("\r\n", 2);
127 s = nuline + 1; 127 s = nuline + 1;
128 } 128 }
129 if (*s != '\0') 129 if (*s != '\0')
130 pdc_iodc_print(s, strlen(s)); 130 pdc_iodc_print(s, strlen(s));
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index dd5a08aaa4da..3eb4bfc1fb36 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -12,6 +12,7 @@
12 for the semaphore. */ 12 for the semaphore. */
13 13
14#define __PA_LDCW_ALIGNMENT 16 14#define __PA_LDCW_ALIGNMENT 16
15#define __PA_LDCW_ALIGN_ORDER 4
15#define __ldcw_align(a) ({ \ 16#define __ldcw_align(a) ({ \
16 unsigned long __ret = (unsigned long) &(a)->lock[0]; \ 17 unsigned long __ret = (unsigned long) &(a)->lock[0]; \
17 __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ 18 __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \
@@ -29,6 +30,7 @@
29 ldcd). */ 30 ldcd). */
30 31
31#define __PA_LDCW_ALIGNMENT 4 32#define __PA_LDCW_ALIGNMENT 4
33#define __PA_LDCW_ALIGN_ORDER 2
32#define __ldcw_align(a) (&(a)->slock) 34#define __ldcw_align(a) (&(a)->slock)
33#define __LDCW "ldcw,co" 35#define __LDCW "ldcw,co"
34 36
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index c980a02a52bc..598c8d60fa5e 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
35 35
36/* thread information allocation */ 36/* thread information allocation */
37 37
38#ifdef CONFIG_IRQSTACKS
39#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */
40#else
38#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ 41#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */
42#endif
43
39/* Be sure to hunt all references to this down when you change the size of 44/* Be sure to hunt all references to this down when you change the size of
40 * the kernel stack */ 45 * the kernel stack */
41#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 46#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index d8f77358e2ba..29b99b8964aa 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -870,7 +870,7 @@ static void print_parisc_device(struct parisc_device *dev)
870 static int count; 870 static int count;
871 871
872 print_pa_hwpath(dev, hw_path); 872 print_pa_hwpath(dev, hw_path);
873 printk(KERN_INFO "%d. %s at 0x%p [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", 873 printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
874 ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, 874 ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type,
875 dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); 875 dev->id.hversion_rev, dev->id.hversion, dev->id.sversion);
876 876
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index a4fd296c958e..e95207c0565e 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -35,6 +35,7 @@
35#include <asm/pgtable.h> 35#include <asm/pgtable.h>
36#include <asm/signal.h> 36#include <asm/signal.h>
37#include <asm/unistd.h> 37#include <asm/unistd.h>
38#include <asm/ldcw.h>
38#include <asm/thread_info.h> 39#include <asm/thread_info.h>
39 40
40#include <linux/linkage.h> 41#include <linux/linkage.h>
@@ -46,6 +47,14 @@
46#endif 47#endif
47 48
48 .import pa_tlb_lock,data 49 .import pa_tlb_lock,data
50 .macro load_pa_tlb_lock reg
51#if __PA_LDCW_ALIGNMENT > 4
52 load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
53 depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg
54#else
55 load32 PA(pa_tlb_lock), \reg
56#endif
57 .endm
49 58
50 /* space_to_prot macro creates a prot id from a space id */ 59 /* space_to_prot macro creates a prot id from a space id */
51 60
@@ -457,7 +466,7 @@
457 .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault 466 .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault
458#ifdef CONFIG_SMP 467#ifdef CONFIG_SMP
459 cmpib,COND(=),n 0,\spc,2f 468 cmpib,COND(=),n 0,\spc,2f
460 load32 PA(pa_tlb_lock),\tmp 469 load_pa_tlb_lock \tmp
4611: LDCW 0(\tmp),\tmp1 4701: LDCW 0(\tmp),\tmp1
462 cmpib,COND(=) 0,\tmp1,1b 471 cmpib,COND(=) 0,\tmp1,1b
463 nop 472 nop
@@ -480,7 +489,7 @@
480 /* Release pa_tlb_lock lock. */ 489 /* Release pa_tlb_lock lock. */
481 .macro tlb_unlock1 spc,tmp 490 .macro tlb_unlock1 spc,tmp
482#ifdef CONFIG_SMP 491#ifdef CONFIG_SMP
483 load32 PA(pa_tlb_lock),\tmp 492 load_pa_tlb_lock \tmp
484 tlb_unlock0 \spc,\tmp 493 tlb_unlock0 \spc,\tmp
485#endif 494#endif
486 .endm 495 .endm
@@ -878,9 +887,6 @@ ENTRY_CFI(syscall_exit_rfi)
878 STREG %r19,PT_SR7(%r16) 887 STREG %r19,PT_SR7(%r16)
879 888
880intr_return: 889intr_return:
881 /* NOTE: Need to enable interrupts incase we schedule. */
882 ssm PSW_SM_I, %r0
883
884 /* check for reschedule */ 890 /* check for reschedule */
885 mfctl %cr30,%r1 891 mfctl %cr30,%r1
886 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ 892 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +913,11 @@ intr_check_sig:
907 LDREG PT_IASQ1(%r16), %r20 913 LDREG PT_IASQ1(%r16), %r20
908 cmpib,COND(=),n 0,%r20,intr_restore /* backward */ 914 cmpib,COND(=),n 0,%r20,intr_restore /* backward */
909 915
916 /* NOTE: We need to enable interrupts if we have to deliver
917 * signals. We used to do this earlier but it caused kernel
918 * stack overflows. */
919 ssm PSW_SM_I, %r0
920
910 copy %r0, %r25 /* long in_syscall = 0 */ 921 copy %r0, %r25 /* long in_syscall = 0 */
911#ifdef CONFIG_64BIT 922#ifdef CONFIG_64BIT
912 ldo -16(%r30),%r29 /* Reference param save area */ 923 ldo -16(%r30),%r29 /* Reference param save area */
@@ -958,6 +969,10 @@ intr_do_resched:
958 cmpib,COND(=) 0, %r20, intr_do_preempt 969 cmpib,COND(=) 0, %r20, intr_do_preempt
959 nop 970 nop
960 971
972 /* NOTE: We need to enable interrupts if we schedule. We used
973 * to do this earlier but it caused kernel stack overflows. */
974 ssm PSW_SM_I, %r0
975
961#ifdef CONFIG_64BIT 976#ifdef CONFIG_64BIT
962 ldo -16(%r30),%r29 /* Reference param save area */ 977 ldo -16(%r30),%r29 /* Reference param save area */
963#endif 978#endif
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
index e3a8e5e4d5de..8d072c44f300 100644
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
305 305
306 306
307 __INITRODATA 307 __INITRODATA
308 .align 4
308 .export os_hpmc_size 309 .export os_hpmc_size
309os_hpmc_size: 310os_hpmc_size:
310 .word .os_hpmc_end-.os_hpmc 311 .word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index adf7187f8951..2d40c4ff3f69 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -36,6 +36,7 @@
36#include <asm/assembly.h> 36#include <asm/assembly.h>
37#include <asm/pgtable.h> 37#include <asm/pgtable.h>
38#include <asm/cache.h> 38#include <asm/cache.h>
39#include <asm/ldcw.h>
39#include <linux/linkage.h> 40#include <linux/linkage.h>
40 41
41 .text 42 .text
@@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local)
333 334
334 .macro tlb_lock la,flags,tmp 335 .macro tlb_lock la,flags,tmp
335#ifdef CONFIG_SMP 336#ifdef CONFIG_SMP
336 ldil L%pa_tlb_lock,%r1 337#if __PA_LDCW_ALIGNMENT > 4
337 ldo R%pa_tlb_lock(%r1),\la 338 load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
339 depi 0,31,__PA_LDCW_ALIGN_ORDER, \la
340#else
341 load32 pa_tlb_lock, \la
342#endif
338 rsm PSW_SM_I,\flags 343 rsm PSW_SM_I,\flags
3391: LDCW 0(\la),\tmp 3441: LDCW 0(\la),\tmp
340 cmpib,<>,n 0,\tmp,3f 345 cmpib,<>,n 0,\tmp,3f
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 30f92391a93e..cad3e8661cd6 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -39,6 +39,7 @@
39#include <linux/kernel.h> 39#include <linux/kernel.h>
40#include <linux/mm.h> 40#include <linux/mm.h>
41#include <linux/fs.h> 41#include <linux/fs.h>
42#include <linux/cpu.h>
42#include <linux/module.h> 43#include <linux/module.h>
43#include <linux/personality.h> 44#include <linux/personality.h>
44#include <linux/ptrace.h> 45#include <linux/ptrace.h>
@@ -184,6 +185,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
184} 185}
185 186
186/* 187/*
188 * Idle thread support
189 *
190 * Detect when running on QEMU with SeaBIOS PDC Firmware and let
191 * QEMU idle the host too.
192 */
193
194int running_on_qemu __read_mostly;
195
196void __cpuidle arch_cpu_idle_dead(void)
197{
198 /* nop on real hardware, qemu will offline CPU. */
199 asm volatile("or %%r31,%%r31,%%r31\n":::);
200}
201
202void __cpuidle arch_cpu_idle(void)
203{
204 local_irq_enable();
205
206 /* nop on real hardware, qemu will idle sleep. */
207 asm volatile("or %%r10,%%r10,%%r10\n":::);
208}
209
210static int __init parisc_idle_init(void)
211{
212 const char *marker;
213
214 /* check QEMU/SeaBIOS marker in PAGE0 */
215 marker = (char *) &PAGE0->pad0;
216 running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
217
218 if (!running_on_qemu)
219 cpu_idle_poll_ctrl(1);
220
221 return 0;
222}
223arch_initcall(parisc_idle_init);
224
225/*
187 * Copy architecture-specific thread state 226 * Copy architecture-specific thread state
188 */ 227 */
189int 228int
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 5a657986ebbf..143f90e2f9f3 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/kallsyms.h> 16#include <linux/kallsyms.h>
17#include <linux/sort.h> 17#include <linux/sort.h>
18#include <linux/sched.h>
19 18
20#include <linux/uaccess.h> 19#include <linux/uaccess.h>
21#include <asm/assembly.h> 20#include <asm/assembly.h>
diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c
index 7eab4bb8abe6..66e506520505 100644
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
16#include <linux/preempt.h> 16#include <linux/preempt.h>
17#include <linux/init.h> 17#include <linux/init.h>
18 18
19#include <asm/processor.h>
20#include <asm/delay.h> 19#include <asm/delay.h>
21
22#include <asm/special_insns.h> /* for mfctl() */ 20#include <asm/special_insns.h> /* for mfctl() */
23#include <asm/processor.h> /* for boot_cpu_data */ 21#include <asm/processor.h> /* for boot_cpu_data */
24 22
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 13f7854e0d49..48f41399fc0b 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -631,11 +631,11 @@ void __init mem_init(void)
631 mem_init_print_info(NULL); 631 mem_init_print_info(NULL);
632#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ 632#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */
633 printk("virtual kernel memory layout:\n" 633 printk("virtual kernel memory layout:\n"
634 " vmalloc : 0x%p - 0x%p (%4ld MB)\n" 634 " vmalloc : 0x%px - 0x%px (%4ld MB)\n"
635 " memory : 0x%p - 0x%p (%4ld MB)\n" 635 " memory : 0x%px - 0x%px (%4ld MB)\n"
636 " .init : 0x%p - 0x%p (%4ld kB)\n" 636 " .init : 0x%px - 0x%px (%4ld kB)\n"
637 " .data : 0x%p - 0x%p (%4ld kB)\n" 637 " .data : 0x%px - 0x%px (%4ld kB)\n"
638 " .text : 0x%p - 0x%p (%4ld kB)\n", 638 " .text : 0x%px - 0x%px (%4ld kB)\n",
639 639
640 (void*)VMALLOC_START, (void*)VMALLOC_END, 640 (void*)VMALLOC_START, (void*)VMALLOC_END,
641 (VMALLOC_END - VMALLOC_START) >> 20, 641 (VMALLOC_END - VMALLOC_START) >> 20,
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index a703452d67b6..555e22d5e07f 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -209,5 +209,11 @@ exc_##label##_book3e:
209 ori r3,r3,vector_offset@l; \ 209 ori r3,r3,vector_offset@l; \
210 mtspr SPRN_IVOR##vector_number,r3; 210 mtspr SPRN_IVOR##vector_number,r3;
211 211
212#define RFI_TO_KERNEL \
213 rfi
214
215#define RFI_TO_USER \
216 rfi
217
212#endif /* _ASM_POWERPC_EXCEPTION_64E_H */ 218#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
213 219
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index b27205297e1d..7197b179c1b1 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -74,6 +74,59 @@
74 */ 74 */
75#define EX_R3 EX_DAR 75#define EX_R3 EX_DAR
76 76
77/*
78 * Macros for annotating the expected destination of (h)rfid
79 *
80 * The nop instructions allow us to insert one or more instructions to flush the
81 * L1-D cache when returning to userspace or a guest.
82 */
83#define RFI_FLUSH_SLOT \
84 RFI_FLUSH_FIXUP_SECTION; \
85 nop; \
86 nop; \
87 nop
88
89#define RFI_TO_KERNEL \
90 rfid
91
92#define RFI_TO_USER \
93 RFI_FLUSH_SLOT; \
94 rfid; \
95 b rfi_flush_fallback
96
97#define RFI_TO_USER_OR_KERNEL \
98 RFI_FLUSH_SLOT; \
99 rfid; \
100 b rfi_flush_fallback
101
102#define RFI_TO_GUEST \
103 RFI_FLUSH_SLOT; \
104 rfid; \
105 b rfi_flush_fallback
106
107#define HRFI_TO_KERNEL \
108 hrfid
109
110#define HRFI_TO_USER \
111 RFI_FLUSH_SLOT; \
112 hrfid; \
113 b hrfi_flush_fallback
114
115#define HRFI_TO_USER_OR_KERNEL \
116 RFI_FLUSH_SLOT; \
117 hrfid; \
118 b hrfi_flush_fallback
119
120#define HRFI_TO_GUEST \
121 RFI_FLUSH_SLOT; \
122 hrfid; \
123 b hrfi_flush_fallback
124
125#define HRFI_TO_UNKNOWN \
126 RFI_FLUSH_SLOT; \
127 hrfid; \
128 b hrfi_flush_fallback
129
77#ifdef CONFIG_RELOCATABLE 130#ifdef CONFIG_RELOCATABLE
78#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ 131#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
79 mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ 132 mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
@@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
218 mtspr SPRN_##h##SRR0,r12; \ 271 mtspr SPRN_##h##SRR0,r12; \
219 mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ 272 mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
220 mtspr SPRN_##h##SRR1,r10; \ 273 mtspr SPRN_##h##SRR1,r10; \
221 h##rfid; \ 274 h##RFI_TO_KERNEL; \
222 b . /* prevent speculative execution */ 275 b . /* prevent speculative execution */
223#define EXCEPTION_PROLOG_PSERIES_1(label, h) \ 276#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
224 __EXCEPTION_PROLOG_PSERIES_1(label, h) 277 __EXCEPTION_PROLOG_PSERIES_1(label, h)
@@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
232 mtspr SPRN_##h##SRR0,r12; \ 285 mtspr SPRN_##h##SRR0,r12; \
233 mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ 286 mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
234 mtspr SPRN_##h##SRR1,r10; \ 287 mtspr SPRN_##h##SRR1,r10; \
235 h##rfid; \ 288 h##RFI_TO_KERNEL; \
236 b . /* prevent speculative execution */ 289 b . /* prevent speculative execution */
237 290
238#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ 291#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 8f88f771cc55..1e82eb3caabd 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -187,7 +187,20 @@ label##3: \
187 FTR_ENTRY_OFFSET label##1b-label##3b; \ 187 FTR_ENTRY_OFFSET label##1b-label##3b; \
188 .popsection; 188 .popsection;
189 189
190#define RFI_FLUSH_FIXUP_SECTION \
191951: \
192 .pushsection __rfi_flush_fixup,"a"; \
193 .align 2; \
194952: \
195 FTR_ENTRY_OFFSET 951b-952b; \
196 .popsection;
197
198
190#ifndef __ASSEMBLY__ 199#ifndef __ASSEMBLY__
200#include <linux/types.h>
201
202extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
203
191void apply_feature_fixups(void); 204void apply_feature_fixups(void);
192void setup_feature_keys(void); 205void setup_feature_keys(void);
193#endif 206#endif
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a409177be8bd..f0461618bf7b 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -241,6 +241,7 @@
241#define H_GET_HCA_INFO 0x1B8 241#define H_GET_HCA_INFO 0x1B8
242#define H_GET_PERF_COUNT 0x1BC 242#define H_GET_PERF_COUNT 0x1BC
243#define H_MANAGE_TRACE 0x1C0 243#define H_MANAGE_TRACE 0x1C0
244#define H_GET_CPU_CHARACTERISTICS 0x1C8
244#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 245#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
245#define H_QUERY_INT_STATE 0x1E4 246#define H_QUERY_INT_STATE 0x1E4
246#define H_POLL_PENDING 0x1D8 247#define H_POLL_PENDING 0x1D8
@@ -330,6 +331,17 @@
330#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 331#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
331/* >= 0 values are CPU number */ 332/* >= 0 values are CPU number */
332 333
334/* H_GET_CPU_CHARACTERISTICS return values */
335#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
336#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
337#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
338#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
339#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
340
341#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
342#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
343#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
344
333/* Flag values used in H_REGISTER_PROC_TBL hcall */ 345/* Flag values used in H_REGISTER_PROC_TBL hcall */
334#define PROC_TABLE_OP_MASK 0x18 346#define PROC_TABLE_OP_MASK 0x18
335#define PROC_TABLE_DEREG 0x10 347#define PROC_TABLE_DEREG 0x10
@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
436 } 448 }
437} 449}
438 450
451struct h_cpu_char_result {
452 u64 character;
453 u64 behaviour;
454};
455
439#endif /* __ASSEMBLY__ */ 456#endif /* __ASSEMBLY__ */
440#endif /* __KERNEL__ */ 457#endif /* __KERNEL__ */
441#endif /* _ASM_POWERPC_HVCALL_H */ 458#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6177d43f0ce8..e2a2b8400490 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
160#endif 160#endif
161} 161}
162 162
163static inline void arch_dup_mmap(struct mm_struct *oldmm, 163static inline int arch_dup_mmap(struct mm_struct *oldmm,
164 struct mm_struct *mm) 164 struct mm_struct *mm)
165{ 165{
166 return 0;
166} 167}
167 168
168#ifndef CONFIG_PPC_BOOK3S_64 169#ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 3892db93b837..23ac7fc0af23 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -232,6 +232,16 @@ struct paca_struct {
232 struct sibling_subcore_state *sibling_subcore_state; 232 struct sibling_subcore_state *sibling_subcore_state;
233#endif 233#endif
234#endif 234#endif
235#ifdef CONFIG_PPC_BOOK3S_64
236 /*
237 * rfi fallback flush must be in its own cacheline to prevent
238 * other paca data leaking into the L1d
239 */
240 u64 exrfi[EX_SIZE] __aligned(0x80);
241 void *rfi_flush_fallback_area;
242 u64 l1d_flush_congruence;
243 u64 l1d_flush_sets;
244#endif
235}; 245};
236 246
237extern void copy_mm_to_paca(struct mm_struct *mm); 247extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 7f01b22fa6cb..55eddf50d149 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
326 return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); 326 return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
327} 327}
328 328
329static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
330{
331 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
332 long rc;
333
334 rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
335 if (rc == H_SUCCESS) {
336 p->character = retbuf[0];
337 p->behaviour = retbuf[1];
338 }
339
340 return rc;
341}
342
329#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ 343#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index cf00ec26303a..469b7fdc9be4 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
39static inline void pseries_little_endian_exceptions(void) {} 39static inline void pseries_little_endian_exceptions(void) {}
40#endif /* CONFIG_PPC_PSERIES */ 40#endif /* CONFIG_PPC_PSERIES */
41 41
42void rfi_flush_enable(bool enable);
43
44/* These are bit flags */
45enum l1d_flush_type {
46 L1D_FLUSH_NONE = 0x1,
47 L1D_FLUSH_FALLBACK = 0x2,
48 L1D_FLUSH_ORI = 0x4,
49 L1D_FLUSH_MTTRIG = 0x8,
50};
51
52void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
53void do_rfi_flush_fixups(enum l1d_flush_type types);
54
42#endif /* !__ASSEMBLY__ */ 55#endif /* !__ASSEMBLY__ */
43 56
44#endif /* _ASM_POWERPC_SETUP_H */ 57#endif /* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6b958414b4e0..f390d57cf2e1 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -237,6 +237,11 @@ int main(void)
237 OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); 237 OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
238 OFFSET(PACA_IN_MCE, paca_struct, in_mce); 238 OFFSET(PACA_IN_MCE, paca_struct, in_mce);
239 OFFSET(PACA_IN_NMI, paca_struct, in_nmi); 239 OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
240 OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
241 OFFSET(PACA_EXRFI, paca_struct, exrfi);
242 OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
243 OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
244
240#endif 245#endif
241 OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); 246 OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
242 OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); 247 OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 3320bcac7192..2748584b767d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -37,6 +37,11 @@
37#include <asm/tm.h> 37#include <asm/tm.h>
38#include <asm/ppc-opcode.h> 38#include <asm/ppc-opcode.h>
39#include <asm/export.h> 39#include <asm/export.h>
40#ifdef CONFIG_PPC_BOOK3S
41#include <asm/exception-64s.h>
42#else
43#include <asm/exception-64e.h>
44#endif
40 45
41/* 46/*
42 * System calls. 47 * System calls.
@@ -262,13 +267,23 @@ BEGIN_FTR_SECTION
262END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 267END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
263 268
264 ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ 269 ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
270 ld r2,GPR2(r1)
271 ld r1,GPR1(r1)
272 mtlr r4
273 mtcr r5
274 mtspr SPRN_SRR0,r7
275 mtspr SPRN_SRR1,r8
276 RFI_TO_USER
277 b . /* prevent speculative execution */
278
279 /* exit to kernel */
2651: ld r2,GPR2(r1) 2801: ld r2,GPR2(r1)
266 ld r1,GPR1(r1) 281 ld r1,GPR1(r1)
267 mtlr r4 282 mtlr r4
268 mtcr r5 283 mtcr r5
269 mtspr SPRN_SRR0,r7 284 mtspr SPRN_SRR0,r7
270 mtspr SPRN_SRR1,r8 285 mtspr SPRN_SRR1,r8
271 RFI 286 RFI_TO_KERNEL
272 b . /* prevent speculative execution */ 287 b . /* prevent speculative execution */
273 288
274.Lsyscall_error: 289.Lsyscall_error:
@@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
397 mtmsrd r10, 1 412 mtmsrd r10, 1
398 mtspr SPRN_SRR0, r11 413 mtspr SPRN_SRR0, r11
399 mtspr SPRN_SRR1, r12 414 mtspr SPRN_SRR1, r12
400 415 RFI_TO_USER
401 rfid
402 b . /* prevent speculative execution */ 416 b . /* prevent speculative execution */
403#endif 417#endif
404_ASM_NOKPROBE_SYMBOL(system_call_common); 418_ASM_NOKPROBE_SYMBOL(system_call_common);
@@ -878,7 +892,7 @@ BEGIN_FTR_SECTION
878END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 892END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
879 ACCOUNT_CPU_USER_EXIT(r13, r2, r4) 893 ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
880 REST_GPR(13, r1) 894 REST_GPR(13, r1)
8811: 895
882 mtspr SPRN_SRR1,r3 896 mtspr SPRN_SRR1,r3
883 897
884 ld r2,_CCR(r1) 898 ld r2,_CCR(r1)
@@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
891 ld r3,GPR3(r1) 905 ld r3,GPR3(r1)
892 ld r4,GPR4(r1) 906 ld r4,GPR4(r1)
893 ld r1,GPR1(r1) 907 ld r1,GPR1(r1)
908 RFI_TO_USER
909 b . /* prevent speculative execution */
894 910
895 rfid 9111: mtspr SPRN_SRR1,r3
912
913 ld r2,_CCR(r1)
914 mtcrf 0xFF,r2
915 ld r2,_NIP(r1)
916 mtspr SPRN_SRR0,r2
917
918 ld r0,GPR0(r1)
919 ld r2,GPR2(r1)
920 ld r3,GPR3(r1)
921 ld r4,GPR4(r1)
922 ld r1,GPR1(r1)
923 RFI_TO_KERNEL
896 b . /* prevent speculative execution */ 924 b . /* prevent speculative execution */
897 925
898#endif /* CONFIG_PPC_BOOK3E */ 926#endif /* CONFIG_PPC_BOOK3E */
@@ -1073,7 +1101,7 @@ __enter_rtas:
1073 1101
1074 mtspr SPRN_SRR0,r5 1102 mtspr SPRN_SRR0,r5
1075 mtspr SPRN_SRR1,r6 1103 mtspr SPRN_SRR1,r6
1076 rfid 1104 RFI_TO_KERNEL
1077 b . /* prevent speculative execution */ 1105 b . /* prevent speculative execution */
1078 1106
1079rtas_return_loc: 1107rtas_return_loc:
@@ -1098,7 +1126,7 @@ rtas_return_loc:
1098 1126
1099 mtspr SPRN_SRR0,r3 1127 mtspr SPRN_SRR0,r3
1100 mtspr SPRN_SRR1,r4 1128 mtspr SPRN_SRR1,r4
1101 rfid 1129 RFI_TO_KERNEL
1102 b . /* prevent speculative execution */ 1130 b . /* prevent speculative execution */
1103_ASM_NOKPROBE_SYMBOL(__enter_rtas) 1131_ASM_NOKPROBE_SYMBOL(__enter_rtas)
1104_ASM_NOKPROBE_SYMBOL(rtas_return_loc) 1132_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
@@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)
1171 LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) 1199 LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
1172 andc r11,r11,r12 1200 andc r11,r11,r12
1173 mtsrr1 r11 1201 mtsrr1 r11
1174 rfid 1202 RFI_TO_KERNEL
1175#endif /* CONFIG_PPC_BOOK3E */ 1203#endif /* CONFIG_PPC_BOOK3E */
1176 1204
11771: /* Return from OF */ 12051: /* Return from OF */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e441b469dc8f..2dc10bf646b8 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -256,7 +256,7 @@ BEGIN_FTR_SECTION
256 LOAD_HANDLER(r12, machine_check_handle_early) 256 LOAD_HANDLER(r12, machine_check_handle_early)
2571: mtspr SPRN_SRR0,r12 2571: mtspr SPRN_SRR0,r12
258 mtspr SPRN_SRR1,r11 258 mtspr SPRN_SRR1,r11
259 rfid 259 RFI_TO_KERNEL
260 b . /* prevent speculative execution */ 260 b . /* prevent speculative execution */
2612: 2612:
262 /* Stack overflow. Stay on emergency stack and panic. 262 /* Stack overflow. Stay on emergency stack and panic.
@@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
445 li r3,MSR_ME 445 li r3,MSR_ME
446 andc r10,r10,r3 /* Turn off MSR_ME */ 446 andc r10,r10,r3 /* Turn off MSR_ME */
447 mtspr SPRN_SRR1,r10 447 mtspr SPRN_SRR1,r10
448 rfid 448 RFI_TO_KERNEL
449 b . 449 b .
4502: 4502:
451 /* 451 /*
@@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
463 */ 463 */
464 bl machine_check_queue_event 464 bl machine_check_queue_event
465 MACHINE_CHECK_HANDLER_WINDUP 465 MACHINE_CHECK_HANDLER_WINDUP
466 rfid 466 RFI_TO_USER_OR_KERNEL
4679: 4679:
468 /* Deliver the machine check to host kernel in V mode. */ 468 /* Deliver the machine check to host kernel in V mode. */
469 MACHINE_CHECK_HANDLER_WINDUP 469 MACHINE_CHECK_HANDLER_WINDUP
@@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
598 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ 598 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
599 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ 599 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
600 600
601 andi. r9,r11,MSR_PR // Check for exception from userspace
602 cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later
603
601 /* 604 /*
602 * Test MSR_RI before calling slb_allocate_realmode, because the 605 * Test MSR_RI before calling slb_allocate_realmode, because the
603 * MSR in r11 gets clobbered. However we still want to allocate 606 * MSR in r11 gets clobbered. However we still want to allocate
@@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
624 627
625 /* All done -- return from exception. */ 628 /* All done -- return from exception. */
626 629
630 bne cr4,1f /* returning to kernel */
631
627.machine push 632.machine push
628.machine "power4" 633.machine "power4"
629 mtcrf 0x80,r9 634 mtcrf 0x80,r9
635 mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
630 mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ 636 mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
631 mtcrf 0x02,r9 /* I/D indication is in cr6 */ 637 mtcrf 0x02,r9 /* I/D indication is in cr6 */
632 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ 638 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
@@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
640 ld r11,PACA_EXSLB+EX_R11(r13) 646 ld r11,PACA_EXSLB+EX_R11(r13)
641 ld r12,PACA_EXSLB+EX_R12(r13) 647 ld r12,PACA_EXSLB+EX_R12(r13)
642 ld r13,PACA_EXSLB+EX_R13(r13) 648 ld r13,PACA_EXSLB+EX_R13(r13)
643 rfid 649 RFI_TO_USER
650 b . /* prevent speculative execution */
6511:
652.machine push
653.machine "power4"
654 mtcrf 0x80,r9
655 mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
656 mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
657 mtcrf 0x02,r9 /* I/D indication is in cr6 */
658 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
659.machine pop
660
661 RESTORE_CTR(r9, PACA_EXSLB)
662 RESTORE_PPR_PACA(PACA_EXSLB, r9)
663 mr r3,r12
664 ld r9,PACA_EXSLB+EX_R9(r13)
665 ld r10,PACA_EXSLB+EX_R10(r13)
666 ld r11,PACA_EXSLB+EX_R11(r13)
667 ld r12,PACA_EXSLB+EX_R12(r13)
668 ld r13,PACA_EXSLB+EX_R13(r13)
669 RFI_TO_KERNEL
644 b . /* prevent speculative execution */ 670 b . /* prevent speculative execution */
645 671
672
6462: std r3,PACA_EXSLB+EX_DAR(r13) 6732: std r3,PACA_EXSLB+EX_DAR(r13)
647 mr r3,r12 674 mr r3,r12
648 mfspr r11,SPRN_SRR0 675 mfspr r11,SPRN_SRR0
@@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
651 mtspr SPRN_SRR0,r10 678 mtspr SPRN_SRR0,r10
652 ld r10,PACAKMSR(r13) 679 ld r10,PACAKMSR(r13)
653 mtspr SPRN_SRR1,r10 680 mtspr SPRN_SRR1,r10
654 rfid 681 RFI_TO_KERNEL
655 b . 682 b .
656 683
6578: std r3,PACA_EXSLB+EX_DAR(r13) 6848: std r3,PACA_EXSLB+EX_DAR(r13)
@@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
662 mtspr SPRN_SRR0,r10 689 mtspr SPRN_SRR0,r10
663 ld r10,PACAKMSR(r13) 690 ld r10,PACAKMSR(r13)
664 mtspr SPRN_SRR1,r10 691 mtspr SPRN_SRR1,r10
665 rfid 692 RFI_TO_KERNEL
666 b . 693 b .
667 694
668EXC_COMMON_BEGIN(unrecov_slb) 695EXC_COMMON_BEGIN(unrecov_slb)
@@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
901 mtspr SPRN_SRR0,r10 ; \ 928 mtspr SPRN_SRR0,r10 ; \
902 ld r10,PACAKMSR(r13) ; \ 929 ld r10,PACAKMSR(r13) ; \
903 mtspr SPRN_SRR1,r10 ; \ 930 mtspr SPRN_SRR1,r10 ; \
904 rfid ; \ 931 RFI_TO_KERNEL ; \
905 b . ; /* prevent speculative execution */ 932 b . ; /* prevent speculative execution */
906 933
907#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH 934#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
@@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
917 xori r12,r12,MSR_LE ; \ 944 xori r12,r12,MSR_LE ; \
918 mtspr SPRN_SRR1,r12 ; \ 945 mtspr SPRN_SRR1,r12 ; \
919 mr r13,r9 ; \ 946 mr r13,r9 ; \
920 rfid ; /* return to userspace */ \ 947 RFI_TO_USER ; /* return to userspace */ \
921 b . ; /* prevent speculative execution */ 948 b . ; /* prevent speculative execution */
922#else 949#else
923#define SYSCALL_FASTENDIAN_TEST 950#define SYSCALL_FASTENDIAN_TEST
@@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
1063 mtcr r11 1090 mtcr r11
1064 REST_GPR(11, r1) 1091 REST_GPR(11, r1)
1065 ld r1,GPR1(r1) 1092 ld r1,GPR1(r1)
1066 hrfid 1093 HRFI_TO_USER_OR_KERNEL
1067 1094
10681: mtcr r11 10951: mtcr r11
1069 REST_GPR(11, r1) 1096 REST_GPR(11, r1)
@@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1314 ld r11,PACA_EXGEN+EX_R11(r13) 1341 ld r11,PACA_EXGEN+EX_R11(r13)
1315 ld r12,PACA_EXGEN+EX_R12(r13) 1342 ld r12,PACA_EXGEN+EX_R12(r13)
1316 ld r13,PACA_EXGEN+EX_R13(r13) 1343 ld r13,PACA_EXGEN+EX_R13(r13)
1317 HRFID 1344 HRFI_TO_UNKNOWN
1318 b . 1345 b .
1319#endif 1346#endif
1320 1347
@@ -1418,10 +1445,94 @@ masked_##_H##interrupt: \
1418 ld r10,PACA_EXGEN+EX_R10(r13); \ 1445 ld r10,PACA_EXGEN+EX_R10(r13); \
1419 ld r11,PACA_EXGEN+EX_R11(r13); \ 1446 ld r11,PACA_EXGEN+EX_R11(r13); \
1420 /* returns to kernel where r13 must be set up, so don't restore it */ \ 1447 /* returns to kernel where r13 must be set up, so don't restore it */ \
1421 ##_H##rfid; \ 1448 ##_H##RFI_TO_KERNEL; \
1422 b .; \ 1449 b .; \
1423 MASKED_DEC_HANDLER(_H) 1450 MASKED_DEC_HANDLER(_H)
1424 1451
1452TRAMP_REAL_BEGIN(rfi_flush_fallback)
1453 SET_SCRATCH0(r13);
1454 GET_PACA(r13);
1455 std r9,PACA_EXRFI+EX_R9(r13)
1456 std r10,PACA_EXRFI+EX_R10(r13)
1457 std r11,PACA_EXRFI+EX_R11(r13)
1458 std r12,PACA_EXRFI+EX_R12(r13)
1459 std r8,PACA_EXRFI+EX_R13(r13)
1460 mfctr r9
1461 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
1462 ld r11,PACA_L1D_FLUSH_SETS(r13)
1463 ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
1464 /*
1465 * The load adresses are at staggered offsets within cachelines,
1466 * which suits some pipelines better (on others it should not
1467 * hurt).
1468 */
1469 addi r12,r12,8
1470 mtctr r11
1471 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
1472
1473 /* order ld/st prior to dcbt stop all streams with flushing */
1474 sync
14751: li r8,0
1476 .rept 8 /* 8-way set associative */
1477 ldx r11,r10,r8
1478 add r8,r8,r12
1479 xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
1480 add r8,r8,r11 // Add 0, this creates a dependency on the ldx
1481 .endr
1482 addi r10,r10,128 /* 128 byte cache line */
1483 bdnz 1b
1484
1485 mtctr r9
1486 ld r9,PACA_EXRFI+EX_R9(r13)
1487 ld r10,PACA_EXRFI+EX_R10(r13)
1488 ld r11,PACA_EXRFI+EX_R11(r13)
1489 ld r12,PACA_EXRFI+EX_R12(r13)
1490 ld r8,PACA_EXRFI+EX_R13(r13)
1491 GET_SCRATCH0(r13);
1492 rfid
1493
1494TRAMP_REAL_BEGIN(hrfi_flush_fallback)
1495 SET_SCRATCH0(r13);
1496 GET_PACA(r13);
1497 std r9,PACA_EXRFI+EX_R9(r13)
1498 std r10,PACA_EXRFI+EX_R10(r13)
1499 std r11,PACA_EXRFI+EX_R11(r13)
1500 std r12,PACA_EXRFI+EX_R12(r13)
1501 std r8,PACA_EXRFI+EX_R13(r13)
1502 mfctr r9
1503 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
1504 ld r11,PACA_L1D_FLUSH_SETS(r13)
1505 ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
1506 /*
1507 * The load adresses are at staggered offsets within cachelines,
1508 * which suits some pipelines better (on others it should not
1509 * hurt).
1510 */
1511 addi r12,r12,8
1512 mtctr r11
1513 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
1514
1515 /* order ld/st prior to dcbt stop all streams with flushing */
1516 sync
15171: li r8,0
1518 .rept 8 /* 8-way set associative */
1519 ldx r11,r10,r8
1520 add r8,r8,r12
1521 xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
1522 add r8,r8,r11 // Add 0, this creates a dependency on the ldx
1523 .endr
1524 addi r10,r10,128 /* 128 byte cache line */
1525 bdnz 1b
1526
1527 mtctr r9
1528 ld r9,PACA_EXRFI+EX_R9(r13)
1529 ld r10,PACA_EXRFI+EX_R10(r13)
1530 ld r11,PACA_EXRFI+EX_R11(r13)
1531 ld r12,PACA_EXRFI+EX_R12(r13)
1532 ld r8,PACA_EXRFI+EX_R13(r13)
1533 GET_SCRATCH0(r13);
1534 hrfid
1535
1425/* 1536/*
1426 * Real mode exceptions actually use this too, but alternate 1537 * Real mode exceptions actually use this too, but alternate
1427 * instruction code patches (which end up in the common .text area) 1538 * instruction code patches (which end up in the common .text area)
@@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
1441 addi r13, r13, 4 1552 addi r13, r13, 4
1442 mtspr SPRN_SRR0, r13 1553 mtspr SPRN_SRR0, r13
1443 GET_SCRATCH0(r13) 1554 GET_SCRATCH0(r13)
1444 rfid 1555 RFI_TO_KERNEL
1445 b . 1556 b .
1446 1557
1447TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) 1558TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
@@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
1453 addi r13, r13, 4 1564 addi r13, r13, 4
1454 mtspr SPRN_HSRR0, r13 1565 mtspr SPRN_HSRR0, r13
1455 GET_SCRATCH0(r13) 1566 GET_SCRATCH0(r13)
1456 hrfid 1567 HRFI_TO_KERNEL
1457 b . 1568 b .
1458#endif 1569#endif
1459 1570
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5acb5a176dbe..72be0c32e902 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
1403 1403
1404 printk("NIP: "REG" LR: "REG" CTR: "REG"\n", 1404 printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
1405 regs->nip, regs->link, regs->ctr); 1405 regs->nip, regs->link, regs->ctr);
1406 printk("REGS: %p TRAP: %04lx %s (%s)\n", 1406 printk("REGS: %px TRAP: %04lx %s (%s)\n",
1407 regs, regs->trap, print_tainted(), init_utsname()->release); 1407 regs, regs->trap, print_tainted(), init_utsname()->release);
1408 printk("MSR: "REG" ", regs->msr); 1408 printk("MSR: "REG" ", regs->msr);
1409 print_msr_bits(regs->msr); 1409 print_msr_bits(regs->msr);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 8956a9856604..491be4179ddd 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -801,3 +801,104 @@ static int __init disable_hardlockup_detector(void)
801 return 0; 801 return 0;
802} 802}
803early_initcall(disable_hardlockup_detector); 803early_initcall(disable_hardlockup_detector);
804
805#ifdef CONFIG_PPC_BOOK3S_64
806static enum l1d_flush_type enabled_flush_types;
807static void *l1d_flush_fallback_area;
808static bool no_rfi_flush;
809bool rfi_flush;
810
811static int __init handle_no_rfi_flush(char *p)
812{
813 pr_info("rfi-flush: disabled on command line.");
814 no_rfi_flush = true;
815 return 0;
816}
817early_param("no_rfi_flush", handle_no_rfi_flush);
818
819/*
820 * The RFI flush is not KPTI, but because users will see doco that says to use
821 * nopti we hijack that option here to also disable the RFI flush.
822 */
823static int __init handle_no_pti(char *p)
824{
825 pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
826 handle_no_rfi_flush(NULL);
827 return 0;
828}
829early_param("nopti", handle_no_pti);
830
831static void do_nothing(void *unused)
832{
833 /*
834 * We don't need to do the flush explicitly, just enter+exit kernel is
835 * sufficient, the RFI exit handlers will do the right thing.
836 */
837}
838
839void rfi_flush_enable(bool enable)
840{
841 if (rfi_flush == enable)
842 return;
843
844 if (enable) {
845 do_rfi_flush_fixups(enabled_flush_types);
846 on_each_cpu(do_nothing, NULL, 1);
847 } else
848 do_rfi_flush_fixups(L1D_FLUSH_NONE);
849
850 rfi_flush = enable;
851}
852
853static void init_fallback_flush(void)
854{
855 u64 l1d_size, limit;
856 int cpu;
857
858 l1d_size = ppc64_caches.l1d.size;
859 limit = min(safe_stack_limit(), ppc64_rma_size);
860
861 /*
862 * Align to L1d size, and size it at 2x L1d size, to catch possible
863 * hardware prefetch runoff. We don't have a recipe for load patterns to
864 * reliably avoid the prefetcher.
865 */
866 l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
867 memset(l1d_flush_fallback_area, 0, l1d_size * 2);
868
869 for_each_possible_cpu(cpu) {
870 /*
871 * The fallback flush is currently coded for 8-way
872 * associativity. Different associativity is possible, but it
873 * will be treated as 8-way and may not evict the lines as
874 * effectively.
875 *
876 * 128 byte lines are mandatory.
877 */
878 u64 c = l1d_size / 8;
879
880 paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
881 paca[cpu].l1d_flush_congruence = c;
882 paca[cpu].l1d_flush_sets = c / 128;
883 }
884}
885
886void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
887{
888 if (types & L1D_FLUSH_FALLBACK) {
889 pr_info("rfi-flush: Using fallback displacement flush\n");
890 init_fallback_flush();
891 }
892
893 if (types & L1D_FLUSH_ORI)
894 pr_info("rfi-flush: Using ori type flush\n");
895
896 if (types & L1D_FLUSH_MTTRIG)
897 pr_info("rfi-flush: Using mttrig type flush\n");
898
899 enabled_flush_types = types;
900
901 if (!no_rfi_flush)
902 rfi_flush_enable(enable);
903}
904#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 0494e1566ee2..307843d23682 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -132,6 +132,15 @@ SECTIONS
132 /* Read-only data */ 132 /* Read-only data */
133 RO_DATA(PAGE_SIZE) 133 RO_DATA(PAGE_SIZE)
134 134
135#ifdef CONFIG_PPC64
136 . = ALIGN(8);
137 __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
138 __start___rfi_flush_fixup = .;
139 *(__rfi_flush_fixup)
140 __stop___rfi_flush_fixup = .;
141 }
142#endif
143
135 EXCEPTION_TABLE(0) 144 EXCEPTION_TABLE(0)
136 145
137 NOTES :kernel :notes 146 NOTES :kernel :notes
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 29ebe2fd5867..a93d719edc90 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
235 gpte->may_read = true; 235 gpte->may_read = true;
236 gpte->may_write = true; 236 gpte->may_write = true;
237 gpte->page_size = MMU_PAGE_4K; 237 gpte->page_size = MMU_PAGE_4K;
238 gpte->wimg = HPTE_R_M;
238 239
239 return 0; 240 return 0;
240 } 241 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 966097232d21..b73dbc9e797d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -65,11 +65,17 @@ struct kvm_resize_hpt {
65 u32 order; 65 u32 order;
66 66
67 /* These fields protected by kvm->lock */ 67 /* These fields protected by kvm->lock */
68
69 /* Possible values and their usage:
70 * <0 an error occurred during allocation,
71 * -EBUSY allocation is in the progress,
72 * 0 allocation made successfuly.
73 */
68 int error; 74 int error;
69 bool prepare_done;
70 75
71 /* Private to the work thread, until prepare_done is true, 76 /* Private to the work thread, until error != -EBUSY,
72 * then protected by kvm->resize_hpt_sem */ 77 * then protected by kvm->lock.
78 */
73 struct kvm_hpt_info hpt; 79 struct kvm_hpt_info hpt;
74}; 80};
75 81
@@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
159 * Reset all the reverse-mapping chains for all memslots 165 * Reset all the reverse-mapping chains for all memslots
160 */ 166 */
161 kvmppc_rmap_reset(kvm); 167 kvmppc_rmap_reset(kvm);
162 /* Ensure that each vcpu will flush its TLB on next entry. */
163 cpumask_setall(&kvm->arch.need_tlb_flush);
164 err = 0; 168 err = 0;
165 goto out; 169 goto out;
166 } 170 }
@@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
176 kvmppc_set_hpt(kvm, &info); 180 kvmppc_set_hpt(kvm, &info);
177 181
178out: 182out:
183 if (err == 0)
184 /* Ensure that each vcpu will flush its TLB on next entry. */
185 cpumask_setall(&kvm->arch.need_tlb_flush);
186
179 mutex_unlock(&kvm->lock); 187 mutex_unlock(&kvm->lock);
180 return err; 188 return err;
181} 189}
@@ -1413,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
1413 1421
1414static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) 1422static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
1415{ 1423{
1416 BUG_ON(kvm->arch.resize_hpt != resize); 1424 if (WARN_ON(!mutex_is_locked(&kvm->lock)))
1425 return;
1417 1426
1418 if (!resize) 1427 if (!resize)
1419 return; 1428 return;
1420 1429
1421 if (resize->hpt.virt) 1430 if (resize->error != -EBUSY) {
1422 kvmppc_free_hpt(&resize->hpt); 1431 if (resize->hpt.virt)
1432 kvmppc_free_hpt(&resize->hpt);
1433 kfree(resize);
1434 }
1423 1435
1424 kvm->arch.resize_hpt = NULL; 1436 if (kvm->arch.resize_hpt == resize)
1425 kfree(resize); 1437 kvm->arch.resize_hpt = NULL;
1426} 1438}
1427 1439
1428static void resize_hpt_prepare_work(struct work_struct *work) 1440static void resize_hpt_prepare_work(struct work_struct *work)
@@ -1431,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
1431 struct kvm_resize_hpt, 1443 struct kvm_resize_hpt,
1432 work); 1444 work);
1433 struct kvm *kvm = resize->kvm; 1445 struct kvm *kvm = resize->kvm;
1434 int err; 1446 int err = 0;
1435 1447
1436 resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", 1448 if (WARN_ON(resize->error != -EBUSY))
1437 resize->order); 1449 return;
1438
1439 err = resize_hpt_allocate(resize);
1440 1450
1441 mutex_lock(&kvm->lock); 1451 mutex_lock(&kvm->lock);
1442 1452
1453 /* Request is still current? */
1454 if (kvm->arch.resize_hpt == resize) {
1455 /* We may request large allocations here:
1456 * do not sleep with kvm->lock held for a while.
1457 */
1458 mutex_unlock(&kvm->lock);
1459
1460 resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
1461 resize->order);
1462
1463 err = resize_hpt_allocate(resize);
1464
1465 /* We have strict assumption about -EBUSY
1466 * when preparing for HPT resize.
1467 */
1468 if (WARN_ON(err == -EBUSY))
1469 err = -EINPROGRESS;
1470
1471 mutex_lock(&kvm->lock);
1472 /* It is possible that kvm->arch.resize_hpt != resize
1473 * after we grab kvm->lock again.
1474 */
1475 }
1476
1443 resize->error = err; 1477 resize->error = err;
1444 resize->prepare_done = true; 1478
1479 if (kvm->arch.resize_hpt != resize)
1480 resize_hpt_release(kvm, resize);
1445 1481
1446 mutex_unlock(&kvm->lock); 1482 mutex_unlock(&kvm->lock);
1447} 1483}
@@ -1466,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
1466 1502
1467 if (resize) { 1503 if (resize) {
1468 if (resize->order == shift) { 1504 if (resize->order == shift) {
1469 /* Suitable resize in progress */ 1505 /* Suitable resize in progress? */
1470 if (resize->prepare_done) { 1506 ret = resize->error;
1471 ret = resize->error; 1507 if (ret == -EBUSY)
1472 if (ret != 0)
1473 resize_hpt_release(kvm, resize);
1474 } else {
1475 ret = 100; /* estimated time in ms */ 1508 ret = 100; /* estimated time in ms */
1476 } 1509 else if (ret)
1510 resize_hpt_release(kvm, resize);
1477 1511
1478 goto out; 1512 goto out;
1479 } 1513 }
@@ -1493,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
1493 ret = -ENOMEM; 1527 ret = -ENOMEM;
1494 goto out; 1528 goto out;
1495 } 1529 }
1530
1531 resize->error = -EBUSY;
1496 resize->order = shift; 1532 resize->order = shift;
1497 resize->kvm = kvm; 1533 resize->kvm = kvm;
1498 INIT_WORK(&resize->work, resize_hpt_prepare_work); 1534 INIT_WORK(&resize->work, resize_hpt_prepare_work);
@@ -1547,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
1547 if (!resize || (resize->order != shift)) 1583 if (!resize || (resize->order != shift))
1548 goto out; 1584 goto out;
1549 1585
1550 ret = -EBUSY;
1551 if (!resize->prepare_done)
1552 goto out;
1553
1554 ret = resize->error; 1586 ret = resize->error;
1555 if (ret != 0) 1587 if (ret)
1556 goto out; 1588 goto out;
1557 1589
1558 ret = resize_hpt_rehash(resize); 1590 ret = resize_hpt_rehash(resize);
1559 if (ret != 0) 1591 if (ret)
1560 goto out; 1592 goto out;
1561 1593
1562 resize_hpt_pivot(resize); 1594 resize_hpt_pivot(resize);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2659844784b8..9c61f736c75b 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
79 mtmsrd r0,1 /* clear RI in MSR */ 79 mtmsrd r0,1 /* clear RI in MSR */
80 mtsrr0 r5 80 mtsrr0 r5
81 mtsrr1 r6 81 mtsrr1 r6
82 RFI 82 RFI_TO_KERNEL
83 83
84kvmppc_call_hv_entry: 84kvmppc_call_hv_entry:
85BEGIN_FTR_SECTION 85BEGIN_FTR_SECTION
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
199 mtmsrd r6, 1 /* Clear RI in MSR */ 199 mtmsrd r6, 1 /* Clear RI in MSR */
200 mtsrr0 r8 200 mtsrr0 r8
201 mtsrr1 r7 201 mtsrr1 r7
202 RFI 202 RFI_TO_KERNEL
203 203
204 /* Virtual-mode return */ 204 /* Virtual-mode return */
205.Lvirt_return: 205.Lvirt_return:
@@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1167 1167
1168 ld r0, VCPU_GPR(R0)(r4) 1168 ld r0, VCPU_GPR(R0)(r4)
1169 ld r4, VCPU_GPR(R4)(r4) 1169 ld r4, VCPU_GPR(R4)(r4)
1170 1170 HRFI_TO_GUEST
1171 hrfid
1172 b . 1171 b .
1173 1172
1174secondary_too_late: 1173secondary_too_late:
@@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
3320 ld r4, PACAKMSR(r13) 3319 ld r4, PACAKMSR(r13)
3321 mtspr SPRN_SRR0, r3 3320 mtspr SPRN_SRR0, r3
3322 mtspr SPRN_SRR1, r4 3321 mtspr SPRN_SRR1, r4
3323 rfid 3322 RFI_TO_KERNEL
33249: addi r3, r1, STACK_FRAME_OVERHEAD 33239: addi r3, r1, STACK_FRAME_OVERHEAD
3325 bl kvmppc_bad_interrupt 3324 bl kvmppc_bad_interrupt
3326 b 9b 3325 b 9b
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d0dc8624198f..7deaeeb14b93 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
60#define MSR_USER32 MSR_USER 60#define MSR_USER32 MSR_USER
61#define MSR_USER64 MSR_USER 61#define MSR_USER64 MSR_USER
62#define HW_PAGE_SIZE PAGE_SIZE 62#define HW_PAGE_SIZE PAGE_SIZE
63#define HPTE_R_M _PAGE_COHERENT
63#endif 64#endif
64 65
65static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) 66static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
@@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
557 pte.eaddr = eaddr; 558 pte.eaddr = eaddr;
558 pte.vpage = eaddr >> 12; 559 pte.vpage = eaddr >> 12;
559 pte.page_size = MMU_PAGE_64K; 560 pte.page_size = MMU_PAGE_64K;
561 pte.wimg = HPTE_R_M;
560 } 562 }
561 563
562 switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) { 564 switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 42a4b237df5f..34a5adeff084 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -46,6 +46,9 @@
46 46
47#define FUNC(name) name 47#define FUNC(name) name
48 48
49#define RFI_TO_KERNEL RFI
50#define RFI_TO_GUEST RFI
51
49.macro INTERRUPT_TRAMPOLINE intno 52.macro INTERRUPT_TRAMPOLINE intno
50 53
51.global kvmppc_trampoline_\intno 54.global kvmppc_trampoline_\intno
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
141 GET_SCRATCH0(r13) 144 GET_SCRATCH0(r13)
142 145
143 /* And get back into the code */ 146 /* And get back into the code */
144 RFI 147 RFI_TO_KERNEL
145#endif 148#endif
146 149
147/* 150/*
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
164 ori r5, r5, MSR_EE 167 ori r5, r5, MSR_EE
165 mtsrr0 r7 168 mtsrr0 r7
166 mtsrr1 r6 169 mtsrr1 r6
167 RFI 170 RFI_TO_KERNEL
168 171
169#include "book3s_segment.S" 172#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 2a2b96d53999..93a180ceefad 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -156,7 +156,7 @@ no_dcbz32_on:
156 PPC_LL r9, SVCPU_R9(r3) 156 PPC_LL r9, SVCPU_R9(r3)
157 PPC_LL r3, (SVCPU_R3)(r3) 157 PPC_LL r3, (SVCPU_R3)(r3)
158 158
159 RFI 159 RFI_TO_GUEST
160kvmppc_handler_trampoline_enter_end: 160kvmppc_handler_trampoline_enter_end:
161 161
162 162
@@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
407 cmpwi r12, BOOK3S_INTERRUPT_DOORBELL 407 cmpwi r12, BOOK3S_INTERRUPT_DOORBELL
408 beqa BOOK3S_INTERRUPT_DOORBELL 408 beqa BOOK3S_INTERRUPT_DOORBELL
409 409
410 RFI 410 RFI_TO_KERNEL
411kvmppc_handler_trampoline_exit_end: 411kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index bf457843e032..0d750d274c4e 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
725 725
726 /* Return the per-cpu state for state saving/migration */ 726 /* Return the per-cpu state for state saving/migration */
727 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | 727 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
728 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; 728 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
729 (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
729} 730}
730 731
731int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) 732int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1558 1559
1559 /* 1560 /*
1560 * Restore P and Q. If the interrupt was pending, we 1561 * Restore P and Q. If the interrupt was pending, we
1561 * force both P and Q, which will trigger a resend. 1562 * force Q and !P, which will trigger a resend.
1562 * 1563 *
1563 * That means that a guest that had both an interrupt 1564 * That means that a guest that had both an interrupt
1564 * pending (queued) and Q set will restore with only 1565 * pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1566 * is perfectly fine as coalescing interrupts that haven't 1567 * is perfectly fine as coalescing interrupts that haven't
1567 * been presented yet is always allowed. 1568 * been presented yet is always allowed.
1568 */ 1569 */
1569 if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) 1570 if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
1570 state->old_p = true; 1571 state->old_p = true;
1571 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) 1572 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
1572 state->old_q = true; 1573 state->old_q = true;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 41cf5ae273cf..a95ea007d654 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
116 } 116 }
117} 117}
118 118
119#ifdef CONFIG_PPC_BOOK3S_64
120void do_rfi_flush_fixups(enum l1d_flush_type types)
121{
122 unsigned int instrs[3], *dest;
123 long *start, *end;
124 int i;
125
126 start = PTRRELOC(&__start___rfi_flush_fixup),
127 end = PTRRELOC(&__stop___rfi_flush_fixup);
128
129 instrs[0] = 0x60000000; /* nop */
130 instrs[1] = 0x60000000; /* nop */
131 instrs[2] = 0x60000000; /* nop */
132
133 if (types & L1D_FLUSH_FALLBACK)
134 /* b .+16 to fallback flush */
135 instrs[0] = 0x48000010;
136
137 i = 0;
138 if (types & L1D_FLUSH_ORI) {
139 instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
140 instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
141 }
142
143 if (types & L1D_FLUSH_MTTRIG)
144 instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
145
146 for (i = 0; start < end; start++, i++) {
147 dest = (void *)start + *start;
148
149 pr_devel("patching dest %lx\n", (unsigned long)dest);
150
151 patch_instruction(dest, instrs[0]);
152 patch_instruction(dest + 1, instrs[1]);
153 patch_instruction(dest + 2, instrs[2]);
154 }
155
156 printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
157}
158#endif /* CONFIG_PPC_BOOK3S_64 */
159
119void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) 160void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
120{ 161{
121 long *start, *end; 162 long *start, *end;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 4797d08581ce..6e1e39035380 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
145 return __bad_area(regs, address, SEGV_MAPERR); 145 return __bad_area(regs, address, SEGV_MAPERR);
146} 146}
147 147
148static noinline int bad_access(struct pt_regs *regs, unsigned long address)
149{
150 return __bad_area(regs, address, SEGV_ACCERR);
151}
152
148static int do_sigbus(struct pt_regs *regs, unsigned long address, 153static int do_sigbus(struct pt_regs *regs, unsigned long address,
149 unsigned int fault) 154 unsigned int fault)
150{ 155{
@@ -490,7 +495,7 @@ retry:
490 495
491good_area: 496good_area:
492 if (unlikely(access_error(is_write, is_exec, vma))) 497 if (unlikely(access_error(is_write, is_exec, vma)))
493 return bad_area(regs, address); 498 return bad_access(regs, address);
494 499
495 /* 500 /*
496 * If for any reason at all we couldn't handle the fault, 501 * If for any reason at all we couldn't handle the fault,
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 46d74e81aff1..d183b4801bdb 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ emit_clear:
763 func = (u8 *) __bpf_call_base + imm; 763 func = (u8 *) __bpf_call_base + imm;
764 764
765 /* Save skb pointer if we need to re-cache skb data */ 765 /* Save skb pointer if we need to re-cache skb data */
766 if (bpf_helper_changes_pkt_data(func)) 766 if ((ctx->seen & SEEN_SKB) &&
767 bpf_helper_changes_pkt_data(func))
767 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); 768 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
768 769
769 bpf_jit_emit_func_call(image, ctx, (u64)func); 770 bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
772 PPC_MR(b2p[BPF_REG_0], 3); 773 PPC_MR(b2p[BPF_REG_0], 3);
773 774
774 /* refresh skb cache */ 775 /* refresh skb cache */
775 if (bpf_helper_changes_pkt_data(func)) { 776 if ((ctx->seen & SEEN_SKB) &&
777 bpf_helper_changes_pkt_data(func)) {
776 /* reload skb pointer to r3 */ 778 /* reload skb pointer to r3 */
777 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); 779 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
778 bpf_jit_emit_skb_loads(image, ctx); 780 bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 153812966365..fce545774d50 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
410 int ret; 410 int ret;
411 __u64 target; 411 __u64 target;
412 412
413 if (is_kernel_addr(addr)) 413 if (is_kernel_addr(addr)) {
414 return branch_target((unsigned int *)addr); 414 if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
415 return 0;
416
417 return branch_target(&instr);
418 }
415 419
416 /* Userspace: need copy instruction here then translate it */ 420 /* Userspace: need copy instruction here then translate it */
417 pagefault_disable(); 421 pagefault_disable();
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0ead3cd73caa..be4e7f84f70a 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -310,6 +310,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
310 return 0; 310 return 0;
311 311
312 /* 312 /*
313 * Check whether nest_imc is registered. We could end up here if the
314 * cpuhotplug callback registration fails. i.e, callback invokes the
315 * offline path for all successfully registered nodes. At this stage,
316 * nest_imc pmu will not be registered and we should return here.
317 *
318 * We return with a zero since this is not an offline failure. And
319 * cpuhp_setup_state() returns the actual failure reason to the caller,
320 * which in turn will call the cleanup routine.
321 */
322 if (!nest_pmus)
323 return 0;
324
325 /*
313 * Now that this cpu is one of the designated, 326 * Now that this cpu is one of the designated,
314 * find a next cpu a) which is online and b) in same chip. 327 * find a next cpu a) which is online and b) in same chip.
315 */ 328 */
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1171 if (nest_pmus == 1) { 1184 if (nest_pmus == 1) {
1172 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); 1185 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
1173 kfree(nest_imc_refc); 1186 kfree(nest_imc_refc);
1187 kfree(per_nest_pmu_arr);
1174 } 1188 }
1175 1189
1176 if (nest_pmus > 0) 1190 if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1195 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); 1209 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
1196 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); 1210 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
1197 kfree(pmu_ptr); 1211 kfree(pmu_ptr);
1198 kfree(per_nest_pmu_arr);
1199 return; 1212 return;
1200} 1213}
1201 1214
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
1309 ret = nest_pmu_cpumask_init(); 1322 ret = nest_pmu_cpumask_init();
1310 if (ret) { 1323 if (ret) {
1311 mutex_unlock(&nest_init_lock); 1324 mutex_unlock(&nest_init_lock);
1325 kfree(nest_imc_refc);
1326 kfree(per_nest_pmu_arr);
1312 goto err_free; 1327 goto err_free;
1313 } 1328 }
1314 } 1329 }
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 1edfbc1e40f4..4fb21e17504a 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -37,13 +37,62 @@
37#include <asm/kexec.h> 37#include <asm/kexec.h>
38#include <asm/smp.h> 38#include <asm/smp.h>
39#include <asm/tm.h> 39#include <asm/tm.h>
40#include <asm/setup.h>
40 41
41#include "powernv.h" 42#include "powernv.h"
42 43
44static void pnv_setup_rfi_flush(void)
45{
46 struct device_node *np, *fw_features;
47 enum l1d_flush_type type;
48 int enable;
49
50 /* Default to fallback in case fw-features are not available */
51 type = L1D_FLUSH_FALLBACK;
52 enable = 1;
53
54 np = of_find_node_by_name(NULL, "ibm,opal");
55 fw_features = of_get_child_by_name(np, "fw-features");
56 of_node_put(np);
57
58 if (fw_features) {
59 np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
60 if (np && of_property_read_bool(np, "enabled"))
61 type = L1D_FLUSH_MTTRIG;
62
63 of_node_put(np);
64
65 np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
66 if (np && of_property_read_bool(np, "enabled"))
67 type = L1D_FLUSH_ORI;
68
69 of_node_put(np);
70
71 /* Enable unless firmware says NOT to */
72 enable = 2;
73 np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
74 if (np && of_property_read_bool(np, "disabled"))
75 enable--;
76
77 of_node_put(np);
78
79 np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
80 if (np && of_property_read_bool(np, "disabled"))
81 enable--;
82
83 of_node_put(np);
84 of_node_put(fw_features);
85 }
86
87 setup_rfi_flush(type, enable > 0);
88}
89
43static void __init pnv_setup_arch(void) 90static void __init pnv_setup_arch(void)
44{ 91{
45 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); 92 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
46 93
94 pnv_setup_rfi_flush();
95
47 /* Initialize SMP */ 96 /* Initialize SMP */
48 pnv_smp_init(); 97 pnv_smp_init();
49 98
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 6e35780c5962..a0b20c03f078 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
574 574
575static CLASS_ATTR_RW(dlpar); 575static CLASS_ATTR_RW(dlpar);
576 576
577static int __init pseries_dlpar_init(void) 577int __init dlpar_workqueue_init(void)
578{ 578{
579 if (pseries_hp_wq)
580 return 0;
581
579 pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue", 582 pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
580 WQ_UNBOUND, 1); 583 WQ_UNBOUND, 1);
584
585 return pseries_hp_wq ? 0 : -ENOMEM;
586}
587
588static int __init dlpar_sysfs_init(void)
589{
590 int rc;
591
592 rc = dlpar_workqueue_init();
593 if (rc)
594 return rc;
595
581 return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr); 596 return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
582} 597}
583machine_device_initcall(pseries, pseries_dlpar_init); 598machine_device_initcall(pseries, dlpar_sysfs_init);
584 599
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 4470a3194311..1ae1d9f4dbe9 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void)
98 return CMO_PageSize; 98 return CMO_PageSize;
99} 99}
100 100
101int dlpar_workqueue_init(void);
102
101#endif /* _PSERIES_PSERIES_H */ 103#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 4923ffe230cf..81d8614e7379 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void)
69 /* Hotplug Events */ 69 /* Hotplug Events */
70 np = of_find_node_by_path("/event-sources/hot-plug-events"); 70 np = of_find_node_by_path("/event-sources/hot-plug-events");
71 if (np != NULL) { 71 if (np != NULL) {
72 request_event_sources_irqs(np, ras_hotplug_interrupt, 72 if (dlpar_workqueue_init() == 0)
73 request_event_sources_irqs(np, ras_hotplug_interrupt,
73 "RAS_HOTPLUG"); 74 "RAS_HOTPLUG");
74 of_node_put(np); 75 of_node_put(np);
75 } 76 }
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index a8531e012658..ae4f596273b5 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)
459 of_pci_check_probe_only(); 459 of_pci_check_probe_only();
460} 460}
461 461
462static void pseries_setup_rfi_flush(void)
463{
464 struct h_cpu_char_result result;
465 enum l1d_flush_type types;
466 bool enable;
467 long rc;
468
469 /* Enable by default */
470 enable = true;
471
472 rc = plpar_get_cpu_characteristics(&result);
473 if (rc == H_SUCCESS) {
474 types = L1D_FLUSH_NONE;
475
476 if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
477 types |= L1D_FLUSH_MTTRIG;
478 if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
479 types |= L1D_FLUSH_ORI;
480
481 /* Use fallback if nothing set in hcall */
482 if (types == L1D_FLUSH_NONE)
483 types = L1D_FLUSH_FALLBACK;
484
485 if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
486 enable = false;
487 } else {
488 /* Default to fallback if case hcall is not available */
489 types = L1D_FLUSH_FALLBACK;
490 }
491
492 setup_rfi_flush(types, enable);
493}
494
462static void __init pSeries_setup_arch(void) 495static void __init pSeries_setup_arch(void)
463{ 496{
464 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); 497 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)
476 509
477 fwnmi_init(); 510 fwnmi_init();
478 511
512 pseries_setup_rfi_flush();
513
479 /* By default, only probe PCI (can be overridden by rtas_pci) */ 514 /* By default, only probe PCI (can be overridden by rtas_pci) */
480 pci_add_flags(PCI_PROBE_ONLY); 515 pci_add_flags(PCI_PROBE_ONLY);
481 516
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 44cbf4c12ea1..df95102e732c 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
354} 354}
355 355
356static struct lock_class_key fsl_msi_irq_class; 356static struct lock_class_key fsl_msi_irq_class;
357static struct lock_class_key fsl_msi_irq_request_class;
357 358
358static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, 359static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
359 int offset, int irq_index) 360 int offset, int irq_index)
@@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
373 dev_err(&dev->dev, "No memory for MSI cascade data\n"); 374 dev_err(&dev->dev, "No memory for MSI cascade data\n");
374 return -ENOMEM; 375 return -ENOMEM;
375 } 376 }
376 irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); 377 irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
378 &fsl_msi_irq_request_class);
377 cascade_data->index = offset; 379 cascade_data->index = offset;
378 cascade_data->msi_data = msi; 380 cascade_data->msi_data = msi;
379 cascade_data->virq = virt_msir; 381 cascade_data->virq = virt_msir;
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index e69de29bb2d1..47dacf06c679 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -0,0 +1,75 @@
1CONFIG_SMP=y
2CONFIG_PCI=y
3CONFIG_PCIE_XILINX=y
4CONFIG_SYSVIPC=y
5CONFIG_POSIX_MQUEUE=y
6CONFIG_IKCONFIG=y
7CONFIG_IKCONFIG_PROC=y
8CONFIG_CGROUPS=y
9CONFIG_CGROUP_SCHED=y
10CONFIG_CFS_BANDWIDTH=y
11CONFIG_CGROUP_BPF=y
12CONFIG_NAMESPACES=y
13CONFIG_USER_NS=y
14CONFIG_BLK_DEV_INITRD=y
15CONFIG_EXPERT=y
16CONFIG_CHECKPOINT_RESTORE=y
17CONFIG_BPF_SYSCALL=y
18CONFIG_NET=y
19CONFIG_PACKET=y
20CONFIG_UNIX=y
21CONFIG_INET=y
22CONFIG_IP_MULTICAST=y
23CONFIG_IP_ADVANCED_ROUTER=y
24CONFIG_IP_PNP=y
25CONFIG_IP_PNP_DHCP=y
26CONFIG_IP_PNP_BOOTP=y
27CONFIG_IP_PNP_RARP=y
28CONFIG_NETLINK_DIAG=y
29CONFIG_DEVTMPFS=y
30CONFIG_BLK_DEV_LOOP=y
31CONFIG_VIRTIO_BLK=y
32CONFIG_BLK_DEV_SD=y
33CONFIG_BLK_DEV_SR=y
34CONFIG_ATA=y
35CONFIG_SATA_AHCI=y
36CONFIG_SATA_AHCI_PLATFORM=y
37CONFIG_NETDEVICES=y
38CONFIG_VIRTIO_NET=y
39CONFIG_MACB=y
40CONFIG_E1000E=y
41CONFIG_R8169=y
42CONFIG_MICROSEMI_PHY=y
43CONFIG_INPUT_MOUSEDEV=y
44CONFIG_SERIAL_8250=y
45CONFIG_SERIAL_8250_CONSOLE=y
46CONFIG_SERIAL_OF_PLATFORM=y
47# CONFIG_PTP_1588_CLOCK is not set
48CONFIG_DRM=y
49CONFIG_DRM_RADEON=y
50CONFIG_FRAMEBUFFER_CONSOLE=y
51CONFIG_USB=y
52CONFIG_USB_XHCI_HCD=y
53CONFIG_USB_XHCI_PLATFORM=y
54CONFIG_USB_EHCI_HCD=y
55CONFIG_USB_EHCI_HCD_PLATFORM=y
56CONFIG_USB_OHCI_HCD=y
57CONFIG_USB_OHCI_HCD_PLATFORM=y
58CONFIG_USB_STORAGE=y
59CONFIG_USB_UAS=y
60CONFIG_VIRTIO_MMIO=y
61CONFIG_RAS=y
62CONFIG_EXT4_FS=y
63CONFIG_EXT4_FS_POSIX_ACL=y
64CONFIG_AUTOFS4_FS=y
65CONFIG_MSDOS_FS=y
66CONFIG_VFAT_FS=y
67CONFIG_TMPFS=y
68CONFIG_TMPFS_POSIX_ACL=y
69CONFIG_NFS_FS=y
70CONFIG_NFS_V4=y
71CONFIG_NFS_V4_1=y
72CONFIG_NFS_V4_2=y
73CONFIG_ROOT_NFS=y
74# CONFIG_RCU_TRACE is not set
75CONFIG_CRYPTO_USER_API_HASH=y
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 0d64bc9f4f91..3c7a2c97e377 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -17,10 +17,10 @@
17#include <linux/const.h> 17#include <linux/const.h>
18 18
19/* Status register flags */ 19/* Status register flags */
20#define SR_IE _AC(0x00000002, UL) /* Interrupt Enable */ 20#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
21#define SR_PIE _AC(0x00000020, UL) /* Previous IE */ 21#define SR_SPIE _AC(0x00000020, UL) /* Previous Supervisor IE */
22#define SR_PS _AC(0x00000100, UL) /* Previously Supervisor */ 22#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */
23#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */ 23#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */
24 24
25#define SR_FS _AC(0x00006000, UL) /* Floating-point Status */ 25#define SR_FS _AC(0x00006000, UL) /* Floating-point Status */
26#define SR_FS_OFF _AC(0x00000000, UL) 26#define SR_FS_OFF _AC(0x00000000, UL)
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index a82ce599b639..b269451e7e85 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -21,8 +21,6 @@
21 21
22#include <linux/types.h> 22#include <linux/types.h>
23 23
24#ifdef CONFIG_MMU
25
26extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); 24extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
27 25
28/* 26/*
@@ -36,8 +34,6 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
36 34
37extern void iounmap(volatile void __iomem *addr); 35extern void iounmap(volatile void __iomem *addr);
38 36
39#endif /* CONFIG_MMU */
40
41/* Generic IO read/write. These perform native-endian accesses. */ 37/* Generic IO read/write. These perform native-endian accesses. */
42#define __raw_writeb __raw_writeb 38#define __raw_writeb __raw_writeb
43static inline void __raw_writeb(u8 val, volatile void __iomem *addr) 39static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
diff --git a/arch/riscv/include/asm/irqflags.h b/arch/riscv/include/asm/irqflags.h
index 6fdc860d7f84..07a3c6d5706f 100644
--- a/arch/riscv/include/asm/irqflags.h
+++ b/arch/riscv/include/asm/irqflags.h
@@ -27,25 +27,25 @@ static inline unsigned long arch_local_save_flags(void)
27/* unconditionally enable interrupts */ 27/* unconditionally enable interrupts */
28static inline void arch_local_irq_enable(void) 28static inline void arch_local_irq_enable(void)
29{ 29{
30 csr_set(sstatus, SR_IE); 30 csr_set(sstatus, SR_SIE);
31} 31}
32 32
33/* unconditionally disable interrupts */ 33/* unconditionally disable interrupts */
34static inline void arch_local_irq_disable(void) 34static inline void arch_local_irq_disable(void)
35{ 35{
36 csr_clear(sstatus, SR_IE); 36 csr_clear(sstatus, SR_SIE);
37} 37}
38 38
39/* get status and disable interrupts */ 39/* get status and disable interrupts */
40static inline unsigned long arch_local_irq_save(void) 40static inline unsigned long arch_local_irq_save(void)
41{ 41{
42 return csr_read_clear(sstatus, SR_IE); 42 return csr_read_clear(sstatus, SR_SIE);
43} 43}
44 44
45/* test flags */ 45/* test flags */
46static inline int arch_irqs_disabled_flags(unsigned long flags) 46static inline int arch_irqs_disabled_flags(unsigned long flags)
47{ 47{
48 return !(flags & SR_IE); 48 return !(flags & SR_SIE);
49} 49}
50 50
51/* test hardware interrupt enable bit */ 51/* test hardware interrupt enable bit */
@@ -57,7 +57,7 @@ static inline int arch_irqs_disabled(void)
57/* set interrupt enabled status */ 57/* set interrupt enabled status */
58static inline void arch_local_irq_restore(unsigned long flags) 58static inline void arch_local_irq_restore(unsigned long flags)
59{ 59{
60 csr_set(sstatus, flags & SR_IE); 60 csr_set(sstatus, flags & SR_SIE);
61} 61}
62 62
63#endif /* _ASM_RISCV_IRQFLAGS_H */ 63#endif /* _ASM_RISCV_IRQFLAGS_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 2cbd92ed1629..16301966d65b 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -20,8 +20,6 @@
20 20
21#ifndef __ASSEMBLY__ 21#ifndef __ASSEMBLY__
22 22
23#ifdef CONFIG_MMU
24
25/* Page Upper Directory not used in RISC-V */ 23/* Page Upper Directory not used in RISC-V */
26#include <asm-generic/pgtable-nopud.h> 24#include <asm-generic/pgtable-nopud.h>
27#include <asm/page.h> 25#include <asm/page.h>
@@ -413,8 +411,6 @@ static inline void pgtable_cache_init(void)
413 /* No page table caches to initialize */ 411 /* No page table caches to initialize */
414} 412}
415 413
416#endif /* CONFIG_MMU */
417
418#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) 414#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
419#define VMALLOC_END (PAGE_OFFSET - 1) 415#define VMALLOC_END (PAGE_OFFSET - 1)
420#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) 416#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index 93b8956e25e4..2c5df945d43c 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -66,7 +66,7 @@ struct pt_regs {
66#define REG_FMT "%08lx" 66#define REG_FMT "%08lx"
67#endif 67#endif
68 68
69#define user_mode(regs) (((regs)->sstatus & SR_PS) == 0) 69#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0)
70 70
71 71
72/* Helpers for working with the instruction pointer */ 72/* Helpers for working with the instruction pointer */
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 715b0f10af58..7b9c24ebdf52 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -15,8 +15,6 @@
15#ifndef _ASM_RISCV_TLBFLUSH_H 15#ifndef _ASM_RISCV_TLBFLUSH_H
16#define _ASM_RISCV_TLBFLUSH_H 16#define _ASM_RISCV_TLBFLUSH_H
17 17
18#ifdef CONFIG_MMU
19
20#include <linux/mm_types.h> 18#include <linux/mm_types.h>
21 19
22/* 20/*
@@ -64,6 +62,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
64 flush_tlb_all(); 62 flush_tlb_all();
65} 63}
66 64
67#endif /* CONFIG_MMU */
68
69#endif /* _ASM_RISCV_TLBFLUSH_H */ 65#endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 27b90d64814b..14b0b22fb578 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -127,7 +127,6 @@ extern int fixup_exception(struct pt_regs *state);
127 * call. 127 * call.
128 */ 128 */
129 129
130#ifdef CONFIG_MMU
131#define __get_user_asm(insn, x, ptr, err) \ 130#define __get_user_asm(insn, x, ptr, err) \
132do { \ 131do { \
133 uintptr_t __tmp; \ 132 uintptr_t __tmp; \
@@ -153,13 +152,11 @@ do { \
153 __disable_user_access(); \ 152 __disable_user_access(); \
154 (x) = __x; \ 153 (x) = __x; \
155} while (0) 154} while (0)
156#endif /* CONFIG_MMU */
157 155
158#ifdef CONFIG_64BIT 156#ifdef CONFIG_64BIT
159#define __get_user_8(x, ptr, err) \ 157#define __get_user_8(x, ptr, err) \
160 __get_user_asm("ld", x, ptr, err) 158 __get_user_asm("ld", x, ptr, err)
161#else /* !CONFIG_64BIT */ 159#else /* !CONFIG_64BIT */
162#ifdef CONFIG_MMU
163#define __get_user_8(x, ptr, err) \ 160#define __get_user_8(x, ptr, err) \
164do { \ 161do { \
165 u32 __user *__ptr = (u32 __user *)(ptr); \ 162 u32 __user *__ptr = (u32 __user *)(ptr); \
@@ -193,7 +190,6 @@ do { \
193 (x) = (__typeof__(x))((__typeof__((x)-(x)))( \ 190 (x) = (__typeof__(x))((__typeof__((x)-(x)))( \
194 (((u64)__hi << 32) | __lo))); \ 191 (((u64)__hi << 32) | __lo))); \
195} while (0) 192} while (0)
196#endif /* CONFIG_MMU */
197#endif /* CONFIG_64BIT */ 193#endif /* CONFIG_64BIT */
198 194
199 195
@@ -267,8 +263,6 @@ do { \
267 ((x) = 0, -EFAULT); \ 263 ((x) = 0, -EFAULT); \
268}) 264})
269 265
270
271#ifdef CONFIG_MMU
272#define __put_user_asm(insn, x, ptr, err) \ 266#define __put_user_asm(insn, x, ptr, err) \
273do { \ 267do { \
274 uintptr_t __tmp; \ 268 uintptr_t __tmp; \
@@ -292,14 +286,11 @@ do { \
292 : "rJ" (__x), "i" (-EFAULT)); \ 286 : "rJ" (__x), "i" (-EFAULT)); \
293 __disable_user_access(); \ 287 __disable_user_access(); \
294} while (0) 288} while (0)
295#endif /* CONFIG_MMU */
296
297 289
298#ifdef CONFIG_64BIT 290#ifdef CONFIG_64BIT
299#define __put_user_8(x, ptr, err) \ 291#define __put_user_8(x, ptr, err) \
300 __put_user_asm("sd", x, ptr, err) 292 __put_user_asm("sd", x, ptr, err)
301#else /* !CONFIG_64BIT */ 293#else /* !CONFIG_64BIT */
302#ifdef CONFIG_MMU
303#define __put_user_8(x, ptr, err) \ 294#define __put_user_8(x, ptr, err) \
304do { \ 295do { \
305 u32 __user *__ptr = (u32 __user *)(ptr); \ 296 u32 __user *__ptr = (u32 __user *)(ptr); \
@@ -329,7 +320,6 @@ do { \
329 : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \ 320 : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \
330 __disable_user_access(); \ 321 __disable_user_access(); \
331} while (0) 322} while (0)
332#endif /* CONFIG_MMU */
333#endif /* CONFIG_64BIT */ 323#endif /* CONFIG_64BIT */
334 324
335 325
@@ -438,7 +428,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
438 * will set "err" to -EFAULT, while successful accesses return the previous 428 * will set "err" to -EFAULT, while successful accesses return the previous
439 * value. 429 * value.
440 */ 430 */
441#ifdef CONFIG_MMU
442#define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \ 431#define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \
443({ \ 432({ \
444 __typeof__(ptr) __ptr = (ptr); \ 433 __typeof__(ptr) __ptr = (ptr); \
@@ -508,6 +497,5 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
508 (err) = __err; \ 497 (err) = __err; \
509 __ret; \ 498 __ret; \
510}) 499})
511#endif /* CONFIG_MMU */
512 500
513#endif /* _ASM_RISCV_UACCESS_H */ 501#endif /* _ASM_RISCV_UACCESS_H */
diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
index 9f250ed007cd..2f704a5c4196 100644
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -14,3 +14,4 @@
14#define __ARCH_HAVE_MMU 14#define __ARCH_HAVE_MMU
15#define __ARCH_WANT_SYS_CLONE 15#define __ARCH_WANT_SYS_CLONE
16#include <uapi/asm/unistd.h> 16#include <uapi/asm/unistd.h>
17#include <uapi/asm/syscalls.h>
diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h
deleted file mode 100644
index a2ccf1894929..000000000000
--- a/arch/riscv/include/asm/vdso-syscalls.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/*
2 * Copyright (C) 2017 SiFive
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
18#define _ASM_RISCV_VDSO_SYSCALLS_H
19
20#ifdef CONFIG_SMP
21
22/* These syscalls are only used by the vDSO and are not in the uapi. */
23#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
24__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
25
26#endif
27
28#endif /* _ASM_RISCV_VDSO_H */
diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/syscalls.h
new file mode 100644
index 000000000000..818655b0d535
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/syscalls.h
@@ -0,0 +1,26 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2017 SiFive
4 */
5
6#ifndef _ASM__UAPI__SYSCALLS_H
7#define _ASM__UAPI__SYSCALLS_H
8
9/*
10 * Allows the instruction cache to be flushed from userspace. Despite RISC-V
11 * having a direct 'fence.i' instruction available to userspace (which we
12 * can't trap!), that's not actually viable when running on Linux because the
13 * kernel might schedule a process on another hart. There is no way for
14 * userspace to handle this without invoking the kernel (as it doesn't know the
15 * thread->hart mappings), so we've defined a RISC-V specific system call to
16 * flush the instruction cache.
17 *
18 * __NR_riscv_flush_icache is defined to flush the instruction cache over an
19 * address range, with the flush applying to either all threads or just the
20 * caller. We don't currently do anything with the address range, that's just
21 * in there for forwards compatibility.
22 */
23#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
24__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
25
26#endif
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 20ee86f782a9..7404ec222406 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -196,7 +196,7 @@ handle_syscall:
196 addi s2, s2, 0x4 196 addi s2, s2, 0x4
197 REG_S s2, PT_SEPC(sp) 197 REG_S s2, PT_SEPC(sp)
198 /* System calls run with interrupts enabled */ 198 /* System calls run with interrupts enabled */
199 csrs sstatus, SR_IE 199 csrs sstatus, SR_SIE
200 /* Trace syscalls, but only if requested by the user. */ 200 /* Trace syscalls, but only if requested by the user. */
201 REG_L t0, TASK_TI_FLAGS(tp) 201 REG_L t0, TASK_TI_FLAGS(tp)
202 andi t0, t0, _TIF_SYSCALL_TRACE 202 andi t0, t0, _TIF_SYSCALL_TRACE
@@ -224,8 +224,8 @@ ret_from_syscall:
224 224
225ret_from_exception: 225ret_from_exception:
226 REG_L s0, PT_SSTATUS(sp) 226 REG_L s0, PT_SSTATUS(sp)
227 csrc sstatus, SR_IE 227 csrc sstatus, SR_SIE
228 andi s0, s0, SR_PS 228 andi s0, s0, SR_SPP
229 bnez s0, restore_all 229 bnez s0, restore_all
230 230
231resume_userspace: 231resume_userspace:
@@ -255,7 +255,7 @@ work_pending:
255 bnez s1, work_resched 255 bnez s1, work_resched
256work_notifysig: 256work_notifysig:
257 /* Handle pending signals and notify-resume requests */ 257 /* Handle pending signals and notify-resume requests */
258 csrs sstatus, SR_IE /* Enable interrupts for do_notify_resume() */ 258 csrs sstatus, SR_SIE /* Enable interrupts for do_notify_resume() */
259 move a0, sp /* pt_regs */ 259 move a0, sp /* pt_regs */
260 move a1, s0 /* current_thread_info->flags */ 260 move a1, s0 /* current_thread_info->flags */
261 tail do_notify_resume 261 tail do_notify_resume
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 0d90dcc1fbd3..d74d4adf2d54 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -76,7 +76,7 @@ void show_regs(struct pt_regs *regs)
76void start_thread(struct pt_regs *regs, unsigned long pc, 76void start_thread(struct pt_regs *regs, unsigned long pc,
77 unsigned long sp) 77 unsigned long sp)
78{ 78{
79 regs->sstatus = SR_PIE /* User mode, irqs on */ | SR_FS_INITIAL; 79 regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL;
80 regs->sepc = pc; 80 regs->sepc = pc;
81 regs->sp = sp; 81 regs->sp = sp;
82 set_fs(USER_DS); 82 set_fs(USER_DS);
@@ -110,7 +110,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
110 const register unsigned long gp __asm__ ("gp"); 110 const register unsigned long gp __asm__ ("gp");
111 memset(childregs, 0, sizeof(struct pt_regs)); 111 memset(childregs, 0, sizeof(struct pt_regs));
112 childregs->gp = gp; 112 childregs->gp = gp;
113 childregs->sstatus = SR_PS | SR_PIE; /* Supervisor, irqs on */ 113 childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */
114 114
115 p->thread.ra = (unsigned long)ret_from_kernel_thread; 115 p->thread.ra = (unsigned long)ret_from_kernel_thread;
116 p->thread.s[0] = usp; /* fn */ 116 p->thread.s[0] = usp; /* fn */
diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c
index a5bd6401f95e..ade52b903a43 100644
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -23,5 +23,4 @@
23void *sys_call_table[__NR_syscalls] = { 23void *sys_call_table[__NR_syscalls] = {
24 [0 ... __NR_syscalls - 1] = sys_ni_syscall, 24 [0 ... __NR_syscalls - 1] = sys_ni_syscall,
25#include <asm/unistd.h> 25#include <asm/unistd.h>
26#include <asm/vdso-syscalls.h>
27}; 26};
diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S
index b0fbad74e873..023e4d4aef58 100644
--- a/arch/riscv/kernel/vdso/flush_icache.S
+++ b/arch/riscv/kernel/vdso/flush_icache.S
@@ -13,7 +13,6 @@
13 13
14#include <linux/linkage.h> 14#include <linux/linkage.h>
15#include <asm/unistd.h> 15#include <asm/unistd.h>
16#include <asm/vdso-syscalls.h>
17 16
18 .text 17 .text
19/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ 18/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index df2ca3c65048..0713f3c67ab4 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -63,7 +63,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
63 goto vmalloc_fault; 63 goto vmalloc_fault;
64 64
65 /* Enable interrupts if they were enabled in the parent context. */ 65 /* Enable interrupts if they were enabled in the parent context. */
66 if (likely(regs->sstatus & SR_PIE)) 66 if (likely(regs->sstatus & SR_SPIE))
67 local_irq_enable(); 67 local_irq_enable();
68 68
69 /* 69 /*
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ec8b68e97d3c..2c93cbbcd15e 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -792,11 +792,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
792 792
793 if (kvm->arch.use_cmma) { 793 if (kvm->arch.use_cmma) {
794 /* 794 /*
795 * Get the last slot. They should be sorted by base_gfn, so the 795 * Get the first slot. They are reverse sorted by base_gfn, so
796 * last slot is also the one at the end of the address space. 796 * the first slot is also the one at the end of the address
797 * We have verified above that at least one slot is present. 797 * space. We have verified above that at least one slot is
798 * present.
798 */ 799 */
799 ms = slots->memslots + slots->used_slots - 1; 800 ms = slots->memslots;
800 /* round up so we only use full longs */ 801 /* round up so we only use full longs */
801 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); 802 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
802 /* allocate enough bytes to store all the bits */ 803 /* allocate enough bytes to store all the bits */
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 572496c688cc..0714bfa56da0 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1006,7 +1006,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
1006 cbrlo[entries] = gfn << PAGE_SHIFT; 1006 cbrlo[entries] = gfn << PAGE_SHIFT;
1007 } 1007 }
1008 1008
1009 if (orc) { 1009 if (orc && gfn < ms->bitmap_size) {
1010 /* increment only if we are really flipping the bit to 1 */ 1010 /* increment only if we are really flipping the bit to 1 */
1011 if (!test_and_set_bit(gfn, ms->pgste_bitmap)) 1011 if (!test_and_set_bit(gfn, ms->pgste_bitmap))
1012 atomic64_inc(&ms->dirty_pages); 1012 atomic64_inc(&ms->dirty_pages);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index cae5a1e16cbd..c4f8039a35e8 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -89,11 +89,11 @@ EXPORT_SYMBOL(enable_sacf_uaccess);
89 89
90void disable_sacf_uaccess(mm_segment_t old_fs) 90void disable_sacf_uaccess(mm_segment_t old_fs)
91{ 91{
92 current->thread.mm_segment = old_fs;
92 if (old_fs == USER_DS && test_facility(27)) { 93 if (old_fs == USER_DS && test_facility(27)) {
93 __ctl_load(S390_lowcore.user_asce, 1, 1); 94 __ctl_load(S390_lowcore.user_asce, 1, 1);
94 clear_cpu_flag(CIF_ASCE_PRIMARY); 95 clear_cpu_flag(CIF_ASCE_PRIMARY);
95 } 96 }
96 current->thread.mm_segment = old_fs;
97} 97}
98EXPORT_SYMBOL(disable_sacf_uaccess); 98EXPORT_SYMBOL(disable_sacf_uaccess);
99 99
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e81c16838b90..9557d8b516df 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
55#define SEEN_LITERAL 8 /* code uses literals */ 55#define SEEN_LITERAL 8 /* code uses literals */
56#define SEEN_FUNC 16 /* calls C functions */ 56#define SEEN_FUNC 16 /* calls C functions */
57#define SEEN_TAIL_CALL 32 /* code uses tail calls */ 57#define SEEN_TAIL_CALL 32 /* code uses tail calls */
58#define SEEN_SKB_CHANGE 64 /* code changes skb data */ 58#define SEEN_REG_AX 64 /* code uses constant blinding */
59#define SEEN_REG_AX 128 /* code uses constant blinding */
60#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) 59#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
61 60
62/* 61/*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
448 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, 447 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
449 REG_15, 152); 448 REG_15, 152);
450 } 449 }
451 if (jit->seen & SEEN_SKB) 450 if (jit->seen & SEEN_SKB) {
452 emit_load_skb_data_hlen(jit); 451 emit_load_skb_data_hlen(jit);
453 if (jit->seen & SEEN_SKB_CHANGE)
454 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ 452 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
455 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, 453 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
456 STK_OFF_SKBP); 454 STK_OFF_SKBP);
455 }
457} 456}
458 457
459/* 458/*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
983 EMIT2(0x0d00, REG_14, REG_W1); 982 EMIT2(0x0d00, REG_14, REG_W1);
984 /* lgr %b0,%r2: load return value into %b0 */ 983 /* lgr %b0,%r2: load return value into %b0 */
985 EMIT4(0xb9040000, BPF_REG_0, REG_2); 984 EMIT4(0xb9040000, BPF_REG_0, REG_2);
986 if (bpf_helper_changes_pkt_data((void *)func)) { 985 if ((jit->seen & SEEN_SKB) &&
987 jit->seen |= SEEN_SKB_CHANGE; 986 bpf_helper_changes_pkt_data((void *)func)) {
988 /* lg %b1,ST_OFF_SKBP(%r15) */ 987 /* lg %b1,ST_OFF_SKBP(%r15) */
989 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, 988 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
990 REG_15, STK_OFF_SKBP); 989 REG_15, STK_OFF_SKBP);
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f7aa5a77827e..2d15d84c20ed 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -181,6 +181,9 @@ out_unlock:
181static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, 181static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
182 size_t size, int flags) 182 size_t size, int flags)
183{ 183{
184 unsigned long irqflags;
185 int ret;
186
184 /* 187 /*
185 * With zdev->tlb_refresh == 0, rpcit is not required to establish new 188 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
186 * translations when previously invalid translation-table entries are 189 * translations when previously invalid translation-table entries are
@@ -196,8 +199,22 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
196 return 0; 199 return 0;
197 } 200 }
198 201
199 return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, 202 ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
200 PAGE_ALIGN(size)); 203 PAGE_ALIGN(size));
204 if (ret == -ENOMEM && !s390_iommu_strict) {
205 /* enable the hypervisor to free some resources */
206 if (zpci_refresh_global(zdev))
207 goto out;
208
209 spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
210 bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
211 zdev->lazy_bitmap, zdev->iommu_pages);
212 bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
213 spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
214 ret = 0;
215 }
216out:
217 return ret;
201} 218}
202 219
203static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, 220static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 19bcb3b45a70..f069929e8211 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -89,6 +89,9 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
89 if (cc) 89 if (cc)
90 zpci_err_insn(cc, status, addr, range); 90 zpci_err_insn(cc, status, addr, range);
91 91
92 if (cc == 1 && (status == 4 || status == 16))
93 return -ENOMEM;
94
92 return (cc) ? -EIO : 0; 95 return (cc) ? -EIO : 0;
93} 96}
94 97
diff --git a/arch/sh/boards/mach-se/770x/setup.c b/arch/sh/boards/mach-se/770x/setup.c
index 77c35350ee77..412326d59e6f 100644
--- a/arch/sh/boards/mach-se/770x/setup.c
+++ b/arch/sh/boards/mach-se/770x/setup.c
@@ -9,6 +9,7 @@
9 */ 9 */
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/platform_device.h> 11#include <linux/platform_device.h>
12#include <linux/sh_eth.h>
12#include <mach-se/mach/se.h> 13#include <mach-se/mach/se.h>
13#include <mach-se/mach/mrshpc.h> 14#include <mach-se/mach/mrshpc.h>
14#include <asm/machvec.h> 15#include <asm/machvec.h>
@@ -115,13 +116,23 @@ static struct platform_device heartbeat_device = {
115#if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\ 116#if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\
116 defined(CONFIG_CPU_SUBTYPE_SH7712) 117 defined(CONFIG_CPU_SUBTYPE_SH7712)
117/* SH771X Ethernet driver */ 118/* SH771X Ethernet driver */
119static struct sh_eth_plat_data sh_eth_plat = {
120 .phy = PHY_ID,
121 .phy_interface = PHY_INTERFACE_MODE_MII,
122};
123
118static struct resource sh_eth0_resources[] = { 124static struct resource sh_eth0_resources[] = {
119 [0] = { 125 [0] = {
120 .start = SH_ETH0_BASE, 126 .start = SH_ETH0_BASE,
121 .end = SH_ETH0_BASE + 0x1B8, 127 .end = SH_ETH0_BASE + 0x1B8 - 1,
122 .flags = IORESOURCE_MEM, 128 .flags = IORESOURCE_MEM,
123 }, 129 },
124 [1] = { 130 [1] = {
131 .start = SH_TSU_BASE,
132 .end = SH_TSU_BASE + 0x200 - 1,
133 .flags = IORESOURCE_MEM,
134 },
135 [2] = {
125 .start = SH_ETH0_IRQ, 136 .start = SH_ETH0_IRQ,
126 .end = SH_ETH0_IRQ, 137 .end = SH_ETH0_IRQ,
127 .flags = IORESOURCE_IRQ, 138 .flags = IORESOURCE_IRQ,
@@ -132,7 +143,7 @@ static struct platform_device sh_eth0_device = {
132 .name = "sh771x-ether", 143 .name = "sh771x-ether",
133 .id = 0, 144 .id = 0,
134 .dev = { 145 .dev = {
135 .platform_data = PHY_ID, 146 .platform_data = &sh_eth_plat,
136 }, 147 },
137 .num_resources = ARRAY_SIZE(sh_eth0_resources), 148 .num_resources = ARRAY_SIZE(sh_eth0_resources),
138 .resource = sh_eth0_resources, 149 .resource = sh_eth0_resources,
@@ -141,10 +152,15 @@ static struct platform_device sh_eth0_device = {
141static struct resource sh_eth1_resources[] = { 152static struct resource sh_eth1_resources[] = {
142 [0] = { 153 [0] = {
143 .start = SH_ETH1_BASE, 154 .start = SH_ETH1_BASE,
144 .end = SH_ETH1_BASE + 0x1B8, 155 .end = SH_ETH1_BASE + 0x1B8 - 1,
145 .flags = IORESOURCE_MEM, 156 .flags = IORESOURCE_MEM,
146 }, 157 },
147 [1] = { 158 [1] = {
159 .start = SH_TSU_BASE,
160 .end = SH_TSU_BASE + 0x200 - 1,
161 .flags = IORESOURCE_MEM,
162 },
163 [2] = {
148 .start = SH_ETH1_IRQ, 164 .start = SH_ETH1_IRQ,
149 .end = SH_ETH1_IRQ, 165 .end = SH_ETH1_IRQ,
150 .flags = IORESOURCE_IRQ, 166 .flags = IORESOURCE_IRQ,
@@ -155,7 +171,7 @@ static struct platform_device sh_eth1_device = {
155 .name = "sh771x-ether", 171 .name = "sh771x-ether",
156 .id = 1, 172 .id = 1,
157 .dev = { 173 .dev = {
158 .platform_data = PHY_ID, 174 .platform_data = &sh_eth_plat,
159 }, 175 },
160 .num_resources = ARRAY_SIZE(sh_eth1_resources), 176 .num_resources = ARRAY_SIZE(sh_eth1_resources),
161 .resource = sh_eth1_resources, 177 .resource = sh_eth1_resources,
diff --git a/arch/sh/include/mach-se/mach/se.h b/arch/sh/include/mach-se/mach/se.h
index 4246ef9b07a3..aa83fe1ff0b1 100644
--- a/arch/sh/include/mach-se/mach/se.h
+++ b/arch/sh/include/mach-se/mach/se.h
@@ -100,6 +100,7 @@
100/* Base address */ 100/* Base address */
101#define SH_ETH0_BASE 0xA7000000 101#define SH_ETH0_BASE 0xA7000000
102#define SH_ETH1_BASE 0xA7000400 102#define SH_ETH1_BASE 0xA7000400
103#define SH_TSU_BASE 0xA7000800
103/* PHY ID */ 104/* PHY ID */
104#if defined(CONFIG_CPU_SUBTYPE_SH7710) 105#if defined(CONFIG_CPU_SUBTYPE_SH7710)
105# define PHY_ID 0x00 106# define PHY_ID 0x00
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
index e5547b22cd18..0ddbbb031822 100644
--- a/arch/sparc/lib/hweight.S
+++ b/arch/sparc/lib/hweight.S
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
44 .previous 44 .previous
45 45
46ENTRY(__arch_hweight64) 46ENTRY(__arch_hweight64)
47 sethi %hi(__sw_hweight16), %g1 47 sethi %hi(__sw_hweight64), %g1
48 jmpl %g1 + %lo(__sw_hweight16), %g0 48 jmpl %g1 + %lo(__sw_hweight64), %g0
49 nop 49 nop
50ENDPROC(__arch_hweight64) 50ENDPROC(__arch_hweight64)
51EXPORT_SYMBOL(__arch_hweight64) 51EXPORT_SYMBOL(__arch_hweight64)
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index be3136f142a9..a8103a84b4ac 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
113 if (!printk_ratelimit()) 113 if (!printk_ratelimit())
114 return; 114 return;
115 115
116 printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", 116 printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
117 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 117 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
118 tsk->comm, task_pid_nr(tsk), address, 118 tsk->comm, task_pid_nr(tsk), address,
119 (void *)regs->pc, (void *)regs->u_regs[UREG_I7], 119 (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 815c03d7a765..41363f46797b 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
154 if (!printk_ratelimit()) 154 if (!printk_ratelimit())
155 return; 155 return;
156 156
157 printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", 157 printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
158 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 158 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
159 tsk->comm, task_pid_nr(tsk), address, 159 tsk->comm, task_pid_nr(tsk), address,
160 (void *)regs->tpc, (void *)regs->u_regs[UREG_I7], 160 (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 5765e7e711f7..ff5f9cb3039a 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1245 u8 *func = ((u8 *)__bpf_call_base) + imm; 1245 u8 *func = ((u8 *)__bpf_call_base) + imm;
1246 1246
1247 ctx->saw_call = true; 1247 ctx->saw_call = true;
1248 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1249 emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
1248 1250
1249 emit_call((u32 *)func, ctx); 1251 emit_call((u32 *)func, ctx);
1250 emit_nop(ctx); 1252 emit_nop(ctx);
1251 1253
1252 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); 1254 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
1253 1255
1254 if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind) 1256 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1255 load_skb_regs(ctx, bpf2sparc[BPF_REG_6]); 1257 load_skb_regs(ctx, L7);
1256 break; 1258 break;
1257 } 1259 }
1258 1260
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index b668e351fd6c..fca34b2177e2 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
15/* 15/*
16 * Needed since we do not use the asm-generic/mm_hooks.h: 16 * Needed since we do not use the asm-generic/mm_hooks.h:
17 */ 17 */
18static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 18static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
19{ 19{
20 uml_setup_stubs(mm); 20 uml_setup_stubs(mm);
21 return 0;
21} 22}
22extern void arch_exit_mmap(struct mm_struct *mm); 23extern void arch_exit_mmap(struct mm_struct *mm);
23static inline void arch_unmap(struct mm_struct *mm, 24static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 4e6fcb32620f..428644175956 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
150 if (!printk_ratelimit()) 150 if (!printk_ratelimit())
151 return; 151 return;
152 152
153 printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x", 153 printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
154 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 154 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
155 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi), 155 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
156 (void *)UPT_IP(regs), (void *)UPT_SP(regs), 156 (void *)UPT_IP(regs), (void *)UPT_SP(regs),
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index 59b06b48f27d..5c205a9cb5a6 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -81,9 +81,10 @@ do { \
81 } \ 81 } \
82} while (0) 82} while (0)
83 83
84static inline void arch_dup_mmap(struct mm_struct *oldmm, 84static inline int arch_dup_mmap(struct mm_struct *oldmm,
85 struct mm_struct *mm) 85 struct mm_struct *mm)
86{ 86{
87 return 0;
87} 88}
88 89
89static inline void arch_unmap(struct mm_struct *mm, 90static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 5f25b39f04d4..c4ac6043ebb0 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -298,7 +298,6 @@ void abort(void)
298 /* if that doesn't kill us, halt */ 298 /* if that doesn't kill us, halt */
299 panic("Oops failed to kill thread"); 299 panic("Oops failed to kill thread");
300} 300}
301EXPORT_SYMBOL(abort);
302 301
303void __init trap_init(void) 302void __init trap_init(void)
304{ 303{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8eed3f94bfc7..20da391b5f32 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,7 +55,6 @@ config X86
55 select ARCH_HAS_GCOV_PROFILE_ALL 55 select ARCH_HAS_GCOV_PROFILE_ALL
56 select ARCH_HAS_KCOV if X86_64 56 select ARCH_HAS_KCOV if X86_64
57 select ARCH_HAS_PMEM_API if X86_64 57 select ARCH_HAS_PMEM_API if X86_64
58 # Causing hangs/crashes, see the commit that added this change for details.
59 select ARCH_HAS_REFCOUNT 58 select ARCH_HAS_REFCOUNT
60 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 59 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
61 select ARCH_HAS_SET_MEMORY 60 select ARCH_HAS_SET_MEMORY
@@ -89,6 +88,7 @@ config X86
89 select GENERIC_CLOCKEVENTS_MIN_ADJUST 88 select GENERIC_CLOCKEVENTS_MIN_ADJUST
90 select GENERIC_CMOS_UPDATE 89 select GENERIC_CMOS_UPDATE
91 select GENERIC_CPU_AUTOPROBE 90 select GENERIC_CPU_AUTOPROBE
91 select GENERIC_CPU_VULNERABILITIES
92 select GENERIC_EARLY_IOREMAP 92 select GENERIC_EARLY_IOREMAP
93 select GENERIC_FIND_FIRST_BIT 93 select GENERIC_FIND_FIRST_BIT
94 select GENERIC_IOMAP 94 select GENERIC_IOMAP
@@ -429,6 +429,19 @@ config GOLDFISH
429 def_bool y 429 def_bool y
430 depends on X86_GOLDFISH 430 depends on X86_GOLDFISH
431 431
432config RETPOLINE
433 bool "Avoid speculative indirect branches in kernel"
434 default y
435 help
436 Compile kernel with the retpoline compiler options to guard against
437 kernel-to-user data leaks by avoiding speculative indirect
438 branches. Requires a compiler with -mindirect-branch=thunk-extern
439 support for full protection. The kernel may run slower.
440
441 Without compiler support, at least indirect branches in assembler
442 code are eliminated. Since this includes the syscall entry path,
443 it is not entirely pointless.
444
432config INTEL_RDT 445config INTEL_RDT
433 bool "Intel Resource Director Technology support" 446 bool "Intel Resource Director Technology support"
434 default n 447 default n
@@ -926,7 +939,8 @@ config MAXSMP
926config NR_CPUS 939config NR_CPUS
927 int "Maximum number of CPUs" if SMP && !MAXSMP 940 int "Maximum number of CPUs" if SMP && !MAXSMP
928 range 2 8 if SMP && X86_32 && !X86_BIGSMP 941 range 2 8 if SMP && X86_32 && !X86_BIGSMP
929 range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK 942 range 2 64 if SMP && X86_32 && X86_BIGSMP
943 range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
930 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 944 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
931 default "1" if !SMP 945 default "1" if !SMP
932 default "8192" if MAXSMP 946 default "8192" if MAXSMP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 3e73bc255e4e..fad55160dcb9 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -230,6 +230,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
230# 230#
231KBUILD_CFLAGS += -fno-asynchronous-unwind-tables 231KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
232 232
233# Avoid indirect branches in kernel to deal with Spectre
234ifdef CONFIG_RETPOLINE
235 RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
236 ifneq ($(RETPOLINE_CFLAGS),)
237 KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
238 endif
239endif
240
233archscripts: scripts_basic 241archscripts: scripts_basic
234 $(Q)$(MAKE) $(build)=arch/x86/tools relocs 242 $(Q)$(MAKE) $(build)=arch/x86/tools relocs
235 243
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index d5364ca2e3f9..b5e5e02f8cde 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -23,6 +23,9 @@
23 */ 23 */
24#undef CONFIG_AMD_MEM_ENCRYPT 24#undef CONFIG_AMD_MEM_ENCRYPT
25 25
26/* No PAGE_TABLE_ISOLATION support needed either: */
27#undef CONFIG_PAGE_TABLE_ISOLATION
28
26#include "misc.h" 29#include "misc.h"
27 30
28/* These actually do the work of building the kernel identity maps. */ 31/* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index c9e8499fbfe7..6a10d52a4145 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -80,39 +80,43 @@ genfdimage288() {
80 mcopy $FBZIMAGE w:linux 80 mcopy $FBZIMAGE w:linux
81} 81}
82 82
83genisoimage() { 83geniso() {
84 tmp_dir=`dirname $FIMAGE`/isoimage 84 tmp_dir=`dirname $FIMAGE`/isoimage
85 rm -rf $tmp_dir 85 rm -rf $tmp_dir
86 mkdir $tmp_dir 86 mkdir $tmp_dir
87 for i in lib lib64 share end ; do 87 for i in lib lib64 share ; do
88 for j in syslinux ISOLINUX ; do 88 for j in syslinux ISOLINUX ; do
89 if [ -f /usr/$i/$j/isolinux.bin ] ; then 89 if [ -f /usr/$i/$j/isolinux.bin ] ; then
90 isolinux=/usr/$i/$j/isolinux.bin 90 isolinux=/usr/$i/$j/isolinux.bin
91 cp $isolinux $tmp_dir
92 fi 91 fi
93 done 92 done
94 for j in syslinux syslinux/modules/bios ; do 93 for j in syslinux syslinux/modules/bios ; do
95 if [ -f /usr/$i/$j/ldlinux.c32 ]; then 94 if [ -f /usr/$i/$j/ldlinux.c32 ]; then
96 ldlinux=/usr/$i/$j/ldlinux.c32 95 ldlinux=/usr/$i/$j/ldlinux.c32
97 cp $ldlinux $tmp_dir
98 fi 96 fi
99 done 97 done
100 if [ -n "$isolinux" -a -n "$ldlinux" ] ; then 98 if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
101 break 99 break
102 fi 100 fi
103 if [ $i = end -a -z "$isolinux" ] ; then
104 echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
105 exit 1
106 fi
107 done 101 done
102 if [ -z "$isolinux" ] ; then
103 echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
104 exit 1
105 fi
106 if [ -z "$ldlinux" ] ; then
107 echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
108 exit 1
109 fi
110 cp $isolinux $tmp_dir
111 cp $ldlinux $tmp_dir
108 cp $FBZIMAGE $tmp_dir/linux 112 cp $FBZIMAGE $tmp_dir/linux
109 echo "$KCMDLINE" > $tmp_dir/isolinux.cfg 113 echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
110 if [ -f "$FDINITRD" ] ; then 114 if [ -f "$FDINITRD" ] ; then
111 cp "$FDINITRD" $tmp_dir/initrd.img 115 cp "$FDINITRD" $tmp_dir/initrd.img
112 fi 116 fi
113 mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \ 117 genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
114 -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \ 118 -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
115 $tmp_dir 119 -boot-info-table $tmp_dir
116 isohybrid $FIMAGE 2>/dev/null || true 120 isohybrid $FIMAGE 2>/dev/null || true
117 rm -rf $tmp_dir 121 rm -rf $tmp_dir
118} 122}
@@ -121,6 +125,6 @@ case $1 in
121 bzdisk) genbzdisk;; 125 bzdisk) genbzdisk;;
122 fdimage144) genfdimage144;; 126 fdimage144) genfdimage144;;
123 fdimage288) genfdimage288;; 127 fdimage288) genfdimage288;;
124 isoimage) genisoimage;; 128 isoimage) geniso;;
125 *) echo 'Unknown image format'; exit 1; 129 *) echo 'Unknown image format'; exit 1;
126esac 130esac
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 16627fec80b2..3d09e3aca18d 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,7 @@
32#include <linux/linkage.h> 32#include <linux/linkage.h>
33#include <asm/inst.h> 33#include <asm/inst.h>
34#include <asm/frame.h> 34#include <asm/frame.h>
35#include <asm/nospec-branch.h>
35 36
36/* 37/*
37 * The following macros are used to move an (un)aligned 16 byte value to/from 38 * The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
2884 pxor INC, STATE4 2885 pxor INC, STATE4
2885 movdqu IV, 0x30(OUTP) 2886 movdqu IV, 0x30(OUTP)
2886 2887
2887 call *%r11 2888 CALL_NOSPEC %r11
2888 2889
2889 movdqu 0x00(OUTP), INC 2890 movdqu 0x00(OUTP), INC
2890 pxor INC, STATE1 2891 pxor INC, STATE1
@@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
2929 _aesni_gf128mul_x_ble() 2930 _aesni_gf128mul_x_ble()
2930 movups IV, (IVP) 2931 movups IV, (IVP)
2931 2932
2932 call *%r11 2933 CALL_NOSPEC %r11
2933 2934
2934 movdqu 0x40(OUTP), INC 2935 movdqu 0x40(OUTP), INC
2935 pxor INC, STATE1 2936 pxor INC, STATE1
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index f7c495e2863c..a14af6eb09cb 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -17,6 +17,7 @@
17 17
18#include <linux/linkage.h> 18#include <linux/linkage.h>
19#include <asm/frame.h> 19#include <asm/frame.h>
20#include <asm/nospec-branch.h>
20 21
21#define CAMELLIA_TABLE_BYTE_LEN 272 22#define CAMELLIA_TABLE_BYTE_LEN 272
22 23
@@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
1227 vpxor 14 * 16(%rax), %xmm15, %xmm14; 1228 vpxor 14 * 16(%rax), %xmm15, %xmm14;
1228 vpxor 15 * 16(%rax), %xmm15, %xmm15; 1229 vpxor 15 * 16(%rax), %xmm15, %xmm15;
1229 1230
1230 call *%r9; 1231 CALL_NOSPEC %r9;
1231 1232
1232 addq $(16 * 16), %rsp; 1233 addq $(16 * 16), %rsp;
1233 1234
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index eee5b3982cfd..b66bbfa62f50 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -12,6 +12,7 @@
12 12
13#include <linux/linkage.h> 13#include <linux/linkage.h>
14#include <asm/frame.h> 14#include <asm/frame.h>
15#include <asm/nospec-branch.h>
15 16
16#define CAMELLIA_TABLE_BYTE_LEN 272 17#define CAMELLIA_TABLE_BYTE_LEN 272
17 18
@@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
1343 vpxor 14 * 32(%rax), %ymm15, %ymm14; 1344 vpxor 14 * 32(%rax), %ymm15, %ymm14;
1344 vpxor 15 * 32(%rax), %ymm15, %ymm15; 1345 vpxor 15 * 32(%rax), %ymm15, %ymm15;
1345 1346
1346 call *%r9; 1347 CALL_NOSPEC %r9;
1347 1348
1348 addq $(16 * 32), %rsp; 1349 addq $(16 * 32), %rsp;
1349 1350
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 7a7de27c6f41..d9b734d0c8cc 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -45,6 +45,7 @@
45 45
46#include <asm/inst.h> 46#include <asm/inst.h>
47#include <linux/linkage.h> 47#include <linux/linkage.h>
48#include <asm/nospec-branch.h>
48 49
49## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction 50## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
50 51
@@ -172,7 +173,7 @@ continue_block:
172 movzxw (bufp, %rax, 2), len 173 movzxw (bufp, %rax, 2), len
173 lea crc_array(%rip), bufp 174 lea crc_array(%rip), bufp
174 lea (bufp, len, 1), bufp 175 lea (bufp, len, 1), bufp
175 jmp *bufp 176 JMP_NOSPEC bufp
176 177
177 ################################################################ 178 ################################################################
178 ## 2a) PROCESS FULL BLOCKS: 179 ## 2a) PROCESS FULL BLOCKS:
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3fd8bc560fae..3f48f695d5e6 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,6 +1,11 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/jump_label.h> 2#include <linux/jump_label.h>
3#include <asm/unwind_hints.h> 3#include <asm/unwind_hints.h>
4#include <asm/cpufeatures.h>
5#include <asm/page_types.h>
6#include <asm/percpu.h>
7#include <asm/asm-offsets.h>
8#include <asm/processor-flags.h>
4 9
5/* 10/*
6 11
@@ -187,6 +192,148 @@ For 32-bit we have the following conventions - kernel is built with
187#endif 192#endif
188.endm 193.endm
189 194
195#ifdef CONFIG_PAGE_TABLE_ISOLATION
196
197/*
198 * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
199 * halves:
200 */
201#define PTI_USER_PGTABLE_BIT PAGE_SHIFT
202#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
203#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
204#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
205#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
206
207.macro SET_NOFLUSH_BIT reg:req
208 bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
209.endm
210
211.macro ADJUST_KERNEL_CR3 reg:req
212 ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
213 /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
214 andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
215.endm
216
217.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
218 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
219 mov %cr3, \scratch_reg
220 ADJUST_KERNEL_CR3 \scratch_reg
221 mov \scratch_reg, %cr3
222.Lend_\@:
223.endm
224
225#define THIS_CPU_user_pcid_flush_mask \
226 PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
227
228.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
229 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
230 mov %cr3, \scratch_reg
231
232 ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
233
234 /*
235 * Test if the ASID needs a flush.
236 */
237 movq \scratch_reg, \scratch_reg2
238 andq $(0x7FF), \scratch_reg /* mask ASID */
239 bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
240 jnc .Lnoflush_\@
241
242 /* Flush needed, clear the bit */
243 btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
244 movq \scratch_reg2, \scratch_reg
245 jmp .Lwrcr3_pcid_\@
246
247.Lnoflush_\@:
248 movq \scratch_reg2, \scratch_reg
249 SET_NOFLUSH_BIT \scratch_reg
250
251.Lwrcr3_pcid_\@:
252 /* Flip the ASID to the user version */
253 orq $(PTI_USER_PCID_MASK), \scratch_reg
254
255.Lwrcr3_\@:
256 /* Flip the PGD to the user version */
257 orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
258 mov \scratch_reg, %cr3
259.Lend_\@:
260.endm
261
262.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
263 pushq %rax
264 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
265 popq %rax
266.endm
267
268.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
269 ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
270 movq %cr3, \scratch_reg
271 movq \scratch_reg, \save_reg
272 /*
273 * Test the user pagetable bit. If set, then the user page tables
274 * are active. If clear CR3 already has the kernel page table
275 * active.
276 */
277 bt $PTI_USER_PGTABLE_BIT, \scratch_reg
278 jnc .Ldone_\@
279
280 ADJUST_KERNEL_CR3 \scratch_reg
281 movq \scratch_reg, %cr3
282
283.Ldone_\@:
284.endm
285
286.macro RESTORE_CR3 scratch_reg:req save_reg:req
287 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
288
289 ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
290
291 /*
292 * KERNEL pages can always resume with NOFLUSH as we do
293 * explicit flushes.
294 */
295 bt $PTI_USER_PGTABLE_BIT, \save_reg
296 jnc .Lnoflush_\@
297
298 /*
299 * Check if there's a pending flush for the user ASID we're
300 * about to set.
301 */
302 movq \save_reg, \scratch_reg
303 andq $(0x7FF), \scratch_reg
304 bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
305 jnc .Lnoflush_\@
306
307 btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
308 jmp .Lwrcr3_\@
309
310.Lnoflush_\@:
311 SET_NOFLUSH_BIT \save_reg
312
313.Lwrcr3_\@:
314 /*
315 * The CR3 write could be avoided when not changing its value,
316 * but would require a CR3 read *and* a scratch register.
317 */
318 movq \save_reg, %cr3
319.Lend_\@:
320.endm
321
322#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
323
324.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
325.endm
326.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
327.endm
328.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
329.endm
330.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
331.endm
332.macro RESTORE_CR3 scratch_reg:req save_reg:req
333.endm
334
335#endif
336
190#endif /* CONFIG_X86_64 */ 337#endif /* CONFIG_X86_64 */
191 338
192/* 339/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4838037f97f6..a1f28a54f23a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -44,6 +44,7 @@
44#include <asm/asm.h> 44#include <asm/asm.h>
45#include <asm/smap.h> 45#include <asm/smap.h>
46#include <asm/frame.h> 46#include <asm/frame.h>
47#include <asm/nospec-branch.h>
47 48
48 .section .entry.text, "ax" 49 .section .entry.text, "ax"
49 50
@@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
290 291
291 /* kernel thread */ 292 /* kernel thread */
2921: movl %edi, %eax 2931: movl %edi, %eax
293 call *%ebx 294 CALL_NOSPEC %ebx
294 /* 295 /*
295 * A kernel thread is allowed to return here after successfully 296 * A kernel thread is allowed to return here after successfully
296 * calling do_execve(). Exit to userspace to complete the execve() 297 * calling do_execve(). Exit to userspace to complete the execve()
@@ -919,7 +920,7 @@ common_exception:
919 movl %ecx, %es 920 movl %ecx, %es
920 TRACE_IRQS_OFF 921 TRACE_IRQS_OFF
921 movl %esp, %eax # pt_regs pointer 922 movl %esp, %eax # pt_regs pointer
922 call *%edi 923 CALL_NOSPEC %edi
923 jmp ret_from_exception 924 jmp ret_from_exception
924END(common_exception) 925END(common_exception)
925 926
@@ -941,9 +942,10 @@ ENTRY(debug)
941 movl %esp, %eax # pt_regs pointer 942 movl %esp, %eax # pt_regs pointer
942 943
943 /* Are we currently on the SYSENTER stack? */ 944 /* Are we currently on the SYSENTER stack? */
944 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 945 movl PER_CPU_VAR(cpu_entry_area), %ecx
945 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 946 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
946 cmpl $SIZEOF_SYSENTER_stack, %ecx 947 subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
948 cmpl $SIZEOF_entry_stack, %ecx
947 jb .Ldebug_from_sysenter_stack 949 jb .Ldebug_from_sysenter_stack
948 950
949 TRACE_IRQS_OFF 951 TRACE_IRQS_OFF
@@ -984,9 +986,10 @@ ENTRY(nmi)
984 movl %esp, %eax # pt_regs pointer 986 movl %esp, %eax # pt_regs pointer
985 987
986 /* Are we currently on the SYSENTER stack? */ 988 /* Are we currently on the SYSENTER stack? */
987 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 989 movl PER_CPU_VAR(cpu_entry_area), %ecx
988 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 990 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
989 cmpl $SIZEOF_SYSENTER_stack, %ecx 991 subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
992 cmpl $SIZEOF_entry_stack, %ecx
990 jb .Lnmi_from_sysenter_stack 993 jb .Lnmi_from_sysenter_stack
991 994
992 /* Not on SYSENTER stack. */ 995 /* Not on SYSENTER stack. */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f81d50d7ceac..4f8e1d35a97c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -23,7 +23,6 @@
23#include <asm/segment.h> 23#include <asm/segment.h>
24#include <asm/cache.h> 24#include <asm/cache.h>
25#include <asm/errno.h> 25#include <asm/errno.h>
26#include "calling.h"
27#include <asm/asm-offsets.h> 26#include <asm/asm-offsets.h>
28#include <asm/msr.h> 27#include <asm/msr.h>
29#include <asm/unistd.h> 28#include <asm/unistd.h>
@@ -38,8 +37,11 @@
38#include <asm/pgtable_types.h> 37#include <asm/pgtable_types.h>
39#include <asm/export.h> 38#include <asm/export.h>
40#include <asm/frame.h> 39#include <asm/frame.h>
40#include <asm/nospec-branch.h>
41#include <linux/err.h> 41#include <linux/err.h>
42 42
43#include "calling.h"
44
43.code64 45.code64
44.section .entry.text, "ax" 46.section .entry.text, "ax"
45 47
@@ -140,6 +142,67 @@ END(native_usergs_sysret64)
140 * with them due to bugs in both AMD and Intel CPUs. 142 * with them due to bugs in both AMD and Intel CPUs.
141 */ 143 */
142 144
145 .pushsection .entry_trampoline, "ax"
146
147/*
148 * The code in here gets remapped into cpu_entry_area's trampoline. This means
149 * that the assembler and linker have the wrong idea as to where this code
150 * lives (and, in fact, it's mapped more than once, so it's not even at a
151 * fixed address). So we can't reference any symbols outside the entry
152 * trampoline and expect it to work.
153 *
154 * Instead, we carefully abuse %rip-relative addressing.
155 * _entry_trampoline(%rip) refers to the start of the remapped) entry
156 * trampoline. We can thus find cpu_entry_area with this macro:
157 */
158
159#define CPU_ENTRY_AREA \
160 _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
161
162/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
163#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
164 SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
165
166ENTRY(entry_SYSCALL_64_trampoline)
167 UNWIND_HINT_EMPTY
168 swapgs
169
170 /* Stash the user RSP. */
171 movq %rsp, RSP_SCRATCH
172
173 /* Note: using %rsp as a scratch reg. */
174 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
175
176 /* Load the top of the task stack into RSP */
177 movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
178
179 /* Start building the simulated IRET frame. */
180 pushq $__USER_DS /* pt_regs->ss */
181 pushq RSP_SCRATCH /* pt_regs->sp */
182 pushq %r11 /* pt_regs->flags */
183 pushq $__USER_CS /* pt_regs->cs */
184 pushq %rcx /* pt_regs->ip */
185
186 /*
187 * x86 lacks a near absolute jump, and we can't jump to the real
188 * entry text with a relative jump. We could push the target
189 * address and then use retq, but this destroys the pipeline on
190 * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
191 * spill RDI and restore it in a second-stage trampoline.
192 */
193 pushq %rdi
194 movq $entry_SYSCALL_64_stage2, %rdi
195 JMP_NOSPEC %rdi
196END(entry_SYSCALL_64_trampoline)
197
198 .popsection
199
200ENTRY(entry_SYSCALL_64_stage2)
201 UNWIND_HINT_EMPTY
202 popq %rdi
203 jmp entry_SYSCALL_64_after_hwframe
204END(entry_SYSCALL_64_stage2)
205
143ENTRY(entry_SYSCALL_64) 206ENTRY(entry_SYSCALL_64)
144 UNWIND_HINT_EMPTY 207 UNWIND_HINT_EMPTY
145 /* 208 /*
@@ -149,6 +212,10 @@ ENTRY(entry_SYSCALL_64)
149 */ 212 */
150 213
151 swapgs 214 swapgs
215 /*
216 * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
217 * is not required to switch CR3.
218 */
152 movq %rsp, PER_CPU_VAR(rsp_scratch) 219 movq %rsp, PER_CPU_VAR(rsp_scratch)
153 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 220 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
154 221
@@ -204,7 +271,12 @@ entry_SYSCALL_64_fastpath:
204 * It might end up jumping to the slow path. If it jumps, RAX 271 * It might end up jumping to the slow path. If it jumps, RAX
205 * and all argument registers are clobbered. 272 * and all argument registers are clobbered.
206 */ 273 */
274#ifdef CONFIG_RETPOLINE
275 movq sys_call_table(, %rax, 8), %rax
276 call __x86_indirect_thunk_rax
277#else
207 call *sys_call_table(, %rax, 8) 278 call *sys_call_table(, %rax, 8)
279#endif
208.Lentry_SYSCALL_64_after_fastpath_call: 280.Lentry_SYSCALL_64_after_fastpath_call:
209 281
210 movq %rax, RAX(%rsp) 282 movq %rax, RAX(%rsp)
@@ -330,8 +402,25 @@ syscall_return_via_sysret:
330 popq %rsi /* skip rcx */ 402 popq %rsi /* skip rcx */
331 popq %rdx 403 popq %rdx
332 popq %rsi 404 popq %rsi
405
406 /*
407 * Now all regs are restored except RSP and RDI.
408 * Save old stack pointer and switch to trampoline stack.
409 */
410 movq %rsp, %rdi
411 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
412
413 pushq RSP-RDI(%rdi) /* RSP */
414 pushq (%rdi) /* RDI */
415
416 /*
417 * We are on the trampoline stack. All regs except RDI are live.
418 * We can do future final exit work right here.
419 */
420 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
421
333 popq %rdi 422 popq %rdi
334 movq RSP-ORIG_RAX(%rsp), %rsp 423 popq %rsp
335 USERGS_SYSRET64 424 USERGS_SYSRET64
336END(entry_SYSCALL_64) 425END(entry_SYSCALL_64)
337 426
@@ -359,7 +448,7 @@ ENTRY(stub_ptregs_64)
359 jmp entry_SYSCALL64_slow_path 448 jmp entry_SYSCALL64_slow_path
360 449
3611: 4501:
362 jmp *%rax /* Called from C */ 451 JMP_NOSPEC %rax /* Called from C */
363END(stub_ptregs_64) 452END(stub_ptregs_64)
364 453
365.macro ptregs_stub func 454.macro ptregs_stub func
@@ -438,7 +527,7 @@ ENTRY(ret_from_fork)
4381: 5271:
439 /* kernel thread */ 528 /* kernel thread */
440 movq %r12, %rdi 529 movq %r12, %rdi
441 call *%rbx 530 CALL_NOSPEC %rbx
442 /* 531 /*
443 * A kernel thread is allowed to return here after successfully 532 * A kernel thread is allowed to return here after successfully
444 * calling do_execve(). Exit to userspace to complete the execve() 533 * calling do_execve(). Exit to userspace to complete the execve()
@@ -466,12 +555,13 @@ END(irq_entries_start)
466 555
467.macro DEBUG_ENTRY_ASSERT_IRQS_OFF 556.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
468#ifdef CONFIG_DEBUG_ENTRY 557#ifdef CONFIG_DEBUG_ENTRY
469 pushfq 558 pushq %rax
470 testl $X86_EFLAGS_IF, (%rsp) 559 SAVE_FLAGS(CLBR_RAX)
560 testl $X86_EFLAGS_IF, %eax
471 jz .Lokay_\@ 561 jz .Lokay_\@
472 ud2 562 ud2
473.Lokay_\@: 563.Lokay_\@:
474 addq $8, %rsp 564 popq %rax
475#endif 565#endif
476.endm 566.endm
477 567
@@ -563,6 +653,13 @@ END(irq_entries_start)
563/* 0(%rsp): ~(interrupt number) */ 653/* 0(%rsp): ~(interrupt number) */
564 .macro interrupt func 654 .macro interrupt func
565 cld 655 cld
656
657 testb $3, CS-ORIG_RAX(%rsp)
658 jz 1f
659 SWAPGS
660 call switch_to_thread_stack
6611:
662
566 ALLOC_PT_GPREGS_ON_STACK 663 ALLOC_PT_GPREGS_ON_STACK
567 SAVE_C_REGS 664 SAVE_C_REGS
568 SAVE_EXTRA_REGS 665 SAVE_EXTRA_REGS
@@ -572,12 +669,8 @@ END(irq_entries_start)
572 jz 1f 669 jz 1f
573 670
574 /* 671 /*
575 * IRQ from user mode. Switch to kernel gsbase and inform context 672 * IRQ from user mode.
576 * tracking that we're in kernel mode. 673 *
577 */
578 SWAPGS
579
580 /*
581 * We need to tell lockdep that IRQs are off. We can't do this until 674 * We need to tell lockdep that IRQs are off. We can't do this until
582 * we fix gsbase, and we should do it before enter_from_user_mode 675 * we fix gsbase, and we should do it before enter_from_user_mode
583 * (which can take locks). Since TRACE_IRQS_OFF idempotent, 676 * (which can take locks). Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +723,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
630 ud2 723 ud2
6311: 7241:
632#endif 725#endif
633 SWAPGS
634 POP_EXTRA_REGS 726 POP_EXTRA_REGS
635 POP_C_REGS 727 popq %r11
636 addq $8, %rsp /* skip regs->orig_ax */ 728 popq %r10
729 popq %r9
730 popq %r8
731 popq %rax
732 popq %rcx
733 popq %rdx
734 popq %rsi
735
736 /*
737 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
738 * Save old stack pointer and switch to trampoline stack.
739 */
740 movq %rsp, %rdi
741 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
742
743 /* Copy the IRET frame to the trampoline stack. */
744 pushq 6*8(%rdi) /* SS */
745 pushq 5*8(%rdi) /* RSP */
746 pushq 4*8(%rdi) /* EFLAGS */
747 pushq 3*8(%rdi) /* CS */
748 pushq 2*8(%rdi) /* RIP */
749
750 /* Push user RDI on the trampoline stack. */
751 pushq (%rdi)
752
753 /*
754 * We are on the trampoline stack. All regs except RDI are live.
755 * We can do future final exit work right here.
756 */
757
758 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
759
760 /* Restore RDI. */
761 popq %rdi
762 SWAPGS
637 INTERRUPT_RETURN 763 INTERRUPT_RETURN
638 764
639 765
@@ -713,7 +839,9 @@ native_irq_return_ldt:
713 */ 839 */
714 840
715 pushq %rdi /* Stash user RDI */ 841 pushq %rdi /* Stash user RDI */
716 SWAPGS 842 SWAPGS /* to kernel GS */
843 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
844
717 movq PER_CPU_VAR(espfix_waddr), %rdi 845 movq PER_CPU_VAR(espfix_waddr), %rdi
718 movq %rax, (0*8)(%rdi) /* user RAX */ 846 movq %rax, (0*8)(%rdi) /* user RAX */
719 movq (1*8)(%rsp), %rax /* user RIP */ 847 movq (1*8)(%rsp), %rax /* user RIP */
@@ -729,7 +857,6 @@ native_irq_return_ldt:
729 /* Now RAX == RSP. */ 857 /* Now RAX == RSP. */
730 858
731 andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ 859 andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
732 popq %rdi /* Restore user RDI */
733 860
734 /* 861 /*
735 * espfix_stack[31:16] == 0. The page tables are set up such that 862 * espfix_stack[31:16] == 0. The page tables are set up such that
@@ -740,7 +867,11 @@ native_irq_return_ldt:
740 * still points to an RO alias of the ESPFIX stack. 867 * still points to an RO alias of the ESPFIX stack.
741 */ 868 */
742 orq PER_CPU_VAR(espfix_stack), %rax 869 orq PER_CPU_VAR(espfix_stack), %rax
743 SWAPGS 870
871 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
872 SWAPGS /* to user GS */
873 popq %rdi /* Restore user RDI */
874
744 movq %rax, %rsp 875 movq %rax, %rsp
745 UNWIND_HINT_IRET_REGS offset=8 876 UNWIND_HINT_IRET_REGS offset=8
746 877
@@ -829,7 +960,35 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
829/* 960/*
830 * Exception entry points. 961 * Exception entry points.
831 */ 962 */
832#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) 963#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
964
965/*
966 * Switch to the thread stack. This is called with the IRET frame and
967 * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
968 * space has not been allocated for them.)
969 */
970ENTRY(switch_to_thread_stack)
971 UNWIND_HINT_FUNC
972
973 pushq %rdi
974 /* Need to switch before accessing the thread stack. */
975 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
976 movq %rsp, %rdi
977 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
978 UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
979
980 pushq 7*8(%rdi) /* regs->ss */
981 pushq 6*8(%rdi) /* regs->rsp */
982 pushq 5*8(%rdi) /* regs->eflags */
983 pushq 4*8(%rdi) /* regs->cs */
984 pushq 3*8(%rdi) /* regs->ip */
985 pushq 2*8(%rdi) /* regs->orig_ax */
986 pushq 8(%rdi) /* return address */
987 UNWIND_HINT_FUNC
988
989 movq (%rdi), %rdi
990 ret
991END(switch_to_thread_stack)
833 992
834.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 993.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
835ENTRY(\sym) 994ENTRY(\sym)
@@ -848,11 +1007,12 @@ ENTRY(\sym)
848 1007
849 ALLOC_PT_GPREGS_ON_STACK 1008 ALLOC_PT_GPREGS_ON_STACK
850 1009
851 .if \paranoid 1010 .if \paranoid < 2
852 .if \paranoid == 1
853 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ 1011 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
854 jnz 1f 1012 jnz .Lfrom_usermode_switch_stack_\@
855 .endif 1013 .endif
1014
1015 .if \paranoid
856 call paranoid_entry 1016 call paranoid_entry
857 .else 1017 .else
858 call error_entry 1018 call error_entry
@@ -894,20 +1054,15 @@ ENTRY(\sym)
894 jmp error_exit 1054 jmp error_exit
895 .endif 1055 .endif
896 1056
897 .if \paranoid == 1 1057 .if \paranoid < 2
898 /* 1058 /*
899 * Paranoid entry from userspace. Switch stacks and treat it 1059 * Entry from userspace. Switch stacks and treat it
900 * as a normal entry. This means that paranoid handlers 1060 * as a normal entry. This means that paranoid handlers
901 * run in real process context if user_mode(regs). 1061 * run in real process context if user_mode(regs).
902 */ 1062 */
9031: 1063.Lfrom_usermode_switch_stack_\@:
904 call error_entry 1064 call error_entry
905 1065
906
907 movq %rsp, %rdi /* pt_regs pointer */
908 call sync_regs
909 movq %rax, %rsp /* switch stack */
910
911 movq %rsp, %rdi /* pt_regs pointer */ 1066 movq %rsp, %rdi /* pt_regs pointer */
912 1067
913 .if \has_error_code 1068 .if \has_error_code
@@ -1119,7 +1274,11 @@ ENTRY(paranoid_entry)
1119 js 1f /* negative -> in kernel */ 1274 js 1f /* negative -> in kernel */
1120 SWAPGS 1275 SWAPGS
1121 xorl %ebx, %ebx 1276 xorl %ebx, %ebx
11221: ret 1277
12781:
1279 SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
1280
1281 ret
1123END(paranoid_entry) 1282END(paranoid_entry)
1124 1283
1125/* 1284/*
@@ -1141,6 +1300,7 @@ ENTRY(paranoid_exit)
1141 testl %ebx, %ebx /* swapgs needed? */ 1300 testl %ebx, %ebx /* swapgs needed? */
1142 jnz .Lparanoid_exit_no_swapgs 1301 jnz .Lparanoid_exit_no_swapgs
1143 TRACE_IRQS_IRETQ 1302 TRACE_IRQS_IRETQ
1303 RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
1144 SWAPGS_UNSAFE_STACK 1304 SWAPGS_UNSAFE_STACK
1145 jmp .Lparanoid_exit_restore 1305 jmp .Lparanoid_exit_restore
1146.Lparanoid_exit_no_swapgs: 1306.Lparanoid_exit_no_swapgs:
@@ -1168,8 +1328,18 @@ ENTRY(error_entry)
1168 * from user mode due to an IRET fault. 1328 * from user mode due to an IRET fault.
1169 */ 1329 */
1170 SWAPGS 1330 SWAPGS
1331 /* We have user CR3. Change to kernel CR3. */
1332 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
1171 1333
1172.Lerror_entry_from_usermode_after_swapgs: 1334.Lerror_entry_from_usermode_after_swapgs:
1335 /* Put us onto the real thread stack. */
1336 popq %r12 /* save return addr in %12 */
1337 movq %rsp, %rdi /* arg0 = pt_regs pointer */
1338 call sync_regs
1339 movq %rax, %rsp /* switch stack */
1340 ENCODE_FRAME_POINTER
1341 pushq %r12
1342
1173 /* 1343 /*
1174 * We need to tell lockdep that IRQs are off. We can't do this until 1344 * We need to tell lockdep that IRQs are off. We can't do this until
1175 * we fix gsbase, and we should do it before enter_from_user_mode 1345 * we fix gsbase, and we should do it before enter_from_user_mode
@@ -1206,6 +1376,7 @@ ENTRY(error_entry)
1206 * .Lgs_change's error handler with kernel gsbase. 1376 * .Lgs_change's error handler with kernel gsbase.
1207 */ 1377 */
1208 SWAPGS 1378 SWAPGS
1379 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
1209 jmp .Lerror_entry_done 1380 jmp .Lerror_entry_done
1210 1381
1211.Lbstep_iret: 1382.Lbstep_iret:
@@ -1215,10 +1386,11 @@ ENTRY(error_entry)
1215 1386
1216.Lerror_bad_iret: 1387.Lerror_bad_iret:
1217 /* 1388 /*
1218 * We came from an IRET to user mode, so we have user gsbase. 1389 * We came from an IRET to user mode, so we have user
1219 * Switch to kernel gsbase: 1390 * gsbase and CR3. Switch to kernel gsbase and CR3:
1220 */ 1391 */
1221 SWAPGS 1392 SWAPGS
1393 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
1222 1394
1223 /* 1395 /*
1224 * Pretend that the exception came from user mode: set up pt_regs 1396 * Pretend that the exception came from user mode: set up pt_regs
@@ -1250,6 +1422,10 @@ END(error_exit)
1250/* 1422/*
1251 * Runs on exception stack. Xen PV does not go through this path at all, 1423 * Runs on exception stack. Xen PV does not go through this path at all,
1252 * so we can use real assembly here. 1424 * so we can use real assembly here.
1425 *
1426 * Registers:
1427 * %r14: Used to save/restore the CR3 of the interrupted context
1428 * when PAGE_TABLE_ISOLATION is in use. Do not clobber.
1253 */ 1429 */
1254ENTRY(nmi) 1430ENTRY(nmi)
1255 UNWIND_HINT_IRET_REGS 1431 UNWIND_HINT_IRET_REGS
@@ -1313,6 +1489,7 @@ ENTRY(nmi)
1313 1489
1314 swapgs 1490 swapgs
1315 cld 1491 cld
1492 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
1316 movq %rsp, %rdx 1493 movq %rsp, %rdx
1317 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 1494 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
1318 UNWIND_HINT_IRET_REGS base=%rdx offset=8 1495 UNWIND_HINT_IRET_REGS base=%rdx offset=8
@@ -1565,6 +1742,8 @@ end_repeat_nmi:
1565 movq $-1, %rsi 1742 movq $-1, %rsi
1566 call do_nmi 1743 call do_nmi
1567 1744
1745 RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
1746
1568 testl %ebx, %ebx /* swapgs needed? */ 1747 testl %ebx, %ebx /* swapgs needed? */
1569 jnz nmi_restore 1748 jnz nmi_restore
1570nmi_swapgs: 1749nmi_swapgs:
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 568e130d932c..98d5358e4041 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,11 @@
48 */ 48 */
49ENTRY(entry_SYSENTER_compat) 49ENTRY(entry_SYSENTER_compat)
50 /* Interrupts are off on entry. */ 50 /* Interrupts are off on entry. */
51 SWAPGS_UNSAFE_STACK 51 SWAPGS
52
53 /* We are about to clobber %rsp anyway, clobbering here is OK */
54 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
55
52 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 56 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
53 57
54 /* 58 /*
@@ -186,8 +190,13 @@ ENTRY(entry_SYSCALL_compat)
186 /* Interrupts are off on entry. */ 190 /* Interrupts are off on entry. */
187 swapgs 191 swapgs
188 192
189 /* Stash user ESP and switch to the kernel stack. */ 193 /* Stash user ESP */
190 movl %esp, %r8d 194 movl %esp, %r8d
195
196 /* Use %rsp as scratch reg. User ESP is stashed in r8 */
197 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
198
199 /* Switch to the kernel stack */
191 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 200 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
192 201
193 /* Construct struct pt_regs on stack */ 202 /* Construct struct pt_regs on stack */
@@ -256,10 +265,22 @@ sysret32_from_system_call:
256 * when the system call started, which is already known to user 265 * when the system call started, which is already known to user
257 * code. We zero R8-R10 to avoid info leaks. 266 * code. We zero R8-R10 to avoid info leaks.
258 */ 267 */
268 movq RSP-ORIG_RAX(%rsp), %rsp
269
270 /*
271 * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
272 * on the process stack which is not mapped to userspace and
273 * not readable after we SWITCH_TO_USER_CR3. Delay the CR3
274 * switch until after after the last reference to the process
275 * stack.
276 *
277 * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
278 */
279 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
280
259 xorq %r8, %r8 281 xorq %r8, %r8
260 xorq %r9, %r9 282 xorq %r9, %r9
261 xorq %r10, %r10 283 xorq %r10, %r10
262 movq RSP-ORIG_RAX(%rsp), %rsp
263 swapgs 284 swapgs
264 sysretl 285 sysretl
265END(entry_SYSCALL_compat) 286END(entry_SYSCALL_compat)
@@ -306,8 +327,11 @@ ENTRY(entry_INT80_compat)
306 */ 327 */
307 movl %eax, %eax 328 movl %eax, %eax
308 329
309 /* Construct struct pt_regs on stack (iret frame is already on stack) */
310 pushq %rax /* pt_regs->orig_ax */ 330 pushq %rax /* pt_regs->orig_ax */
331
332 /* switch to thread stack expects orig_ax to be pushed */
333 call switch_to_thread_stack
334
311 pushq %rdi /* pt_regs->di */ 335 pushq %rdi /* pt_regs->di */
312 pushq %rsi /* pt_regs->si */ 336 pushq %rsi /* pt_regs->si */
313 pushq %rdx /* pt_regs->dx */ 337 pushq %rdx /* pt_regs->dx */
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index f279ba2643dc..577fa8adb785 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -37,6 +37,7 @@
37#include <asm/unistd.h> 37#include <asm/unistd.h>
38#include <asm/fixmap.h> 38#include <asm/fixmap.h>
39#include <asm/traps.h> 39#include <asm/traps.h>
40#include <asm/paravirt.h>
40 41
41#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
42#include "vsyscall_trace.h" 43#include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
138 139
139 WARN_ON_ONCE(address != regs->ip); 140 WARN_ON_ONCE(address != regs->ip);
140 141
142 /* This should be unreachable in NATIVE mode. */
143 if (WARN_ON(vsyscall_mode == NATIVE))
144 return false;
145
141 if (vsyscall_mode == NONE) { 146 if (vsyscall_mode == NONE) {
142 warn_bad_vsyscall(KERN_INFO, regs, 147 warn_bad_vsyscall(KERN_INFO, regs,
143 "vsyscall attempted with vsyscall=none"); 148 "vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
329 return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; 334 return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
330} 335}
331 336
337/*
338 * The VSYSCALL page is the only user-accessible page in the kernel address
339 * range. Normally, the kernel page tables can have _PAGE_USER clear, but
340 * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
341 * are enabled.
342 *
343 * Some day we may create a "minimal" vsyscall mode in which we emulate
344 * vsyscalls but leave the page not present. If so, we skip calling
345 * this.
346 */
347void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
348{
349 pgd_t *pgd;
350 p4d_t *p4d;
351 pud_t *pud;
352 pmd_t *pmd;
353
354 pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
355 set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
356 p4d = p4d_offset(pgd, VSYSCALL_ADDR);
357#if CONFIG_PGTABLE_LEVELS >= 5
358 p4d->p4d |= _PAGE_USER;
359#endif
360 pud = pud_offset(p4d, VSYSCALL_ADDR);
361 set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
362 pmd = pmd_offset(pud, VSYSCALL_ADDR);
363 set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
364}
365
332void __init map_vsyscall(void) 366void __init map_vsyscall(void)
333{ 367{
334 extern char __vsyscall_page; 368 extern char __vsyscall_page;
335 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); 369 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
336 370
337 if (vsyscall_mode != NONE) 371 if (vsyscall_mode != NONE) {
338 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, 372 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
339 vsyscall_mode == NATIVE 373 vsyscall_mode == NATIVE
340 ? PAGE_KERNEL_VSYSCALL 374 ? PAGE_KERNEL_VSYSCALL
341 : PAGE_KERNEL_VVAR); 375 : PAGE_KERNEL_VVAR);
376 set_vsyscall_pgtable_user_bits(swapper_pg_dir);
377 }
342 378
343 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != 379 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
344 (unsigned long)VSYSCALL_ADDR); 380 (unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 141e07b06216..24ffa1e88cf9 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -582,6 +582,24 @@ static __init int bts_init(void)
582 if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) 582 if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
583 return -ENODEV; 583 return -ENODEV;
584 584
585 if (boot_cpu_has(X86_FEATURE_PTI)) {
586 /*
587 * BTS hardware writes through a virtual memory map we must
588 * either use the kernel physical map, or the user mapping of
589 * the AUX buffer.
590 *
591 * However, since this driver supports per-CPU and per-task inherit
592 * we cannot use the user mapping since it will not be availble
593 * if we're not running the owning process.
594 *
595 * With PTI we can't use the kernal map either, because its not
596 * there when we run userspace.
597 *
598 * For now, disable this driver when using PTI.
599 */
600 return -ENODEV;
601 }
602
585 bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | 603 bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
586 PERF_PMU_CAP_EXCLUSIVE; 604 PERF_PMU_CAP_EXCLUSIVE;
587 bts_pmu.task_ctx_nr = perf_sw_context; 605 bts_pmu.task_ctx_nr = perf_sw_context;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 09c26a4f139c..731153a4681e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
3847 3847
3848__init int intel_pmu_init(void) 3848__init int intel_pmu_init(void)
3849{ 3849{
3850 struct attribute **extra_attr = NULL;
3851 struct attribute **to_free = NULL;
3850 union cpuid10_edx edx; 3852 union cpuid10_edx edx;
3851 union cpuid10_eax eax; 3853 union cpuid10_eax eax;
3852 union cpuid10_ebx ebx; 3854 union cpuid10_ebx ebx;
@@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
3854 unsigned int unused; 3856 unsigned int unused;
3855 struct extra_reg *er; 3857 struct extra_reg *er;
3856 int version, i; 3858 int version, i;
3857 struct attribute **extra_attr = NULL;
3858 char *name; 3859 char *name;
3859 3860
3860 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 3861 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
4294 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? 4295 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
4295 hsw_format_attr : nhm_format_attr; 4296 hsw_format_attr : nhm_format_attr;
4296 extra_attr = merge_attr(extra_attr, skl_format_attr); 4297 extra_attr = merge_attr(extra_attr, skl_format_attr);
4298 to_free = extra_attr;
4297 x86_pmu.cpu_events = get_hsw_events_attrs(); 4299 x86_pmu.cpu_events = get_hsw_events_attrs();
4298 intel_pmu_pebs_data_source_skl( 4300 intel_pmu_pebs_data_source_skl(
4299 boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); 4301 boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
@@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
4401 pr_cont("full-width counters, "); 4403 pr_cont("full-width counters, ");
4402 } 4404 }
4403 4405
4406 kfree(to_free);
4404 return 0; 4407 return 0;
4405} 4408}
4406 4409
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 3674a4b6f8bd..8156e47da7ba 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -3,16 +3,19 @@
3#include <linux/types.h> 3#include <linux/types.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5 5
6#include <asm/cpu_entry_area.h>
6#include <asm/perf_event.h> 7#include <asm/perf_event.h>
8#include <asm/tlbflush.h>
7#include <asm/insn.h> 9#include <asm/insn.h>
8 10
9#include "../perf_event.h" 11#include "../perf_event.h"
10 12
13/* Waste a full page so it can be mapped into the cpu_entry_area */
14DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
15
11/* The size of a BTS record in bytes: */ 16/* The size of a BTS record in bytes: */
12#define BTS_RECORD_SIZE 24 17#define BTS_RECORD_SIZE 24
13 18
14#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
15#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
16#define PEBS_FIXUP_SIZE PAGE_SIZE 19#define PEBS_FIXUP_SIZE PAGE_SIZE
17 20
18/* 21/*
@@ -279,17 +282,67 @@ void fini_debug_store_on_cpu(int cpu)
279 282
280static DEFINE_PER_CPU(void *, insn_buffer); 283static DEFINE_PER_CPU(void *, insn_buffer);
281 284
282static int alloc_pebs_buffer(int cpu) 285static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
283{ 286{
284 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 287 unsigned long start = (unsigned long)cea;
288 phys_addr_t pa;
289 size_t msz = 0;
290
291 pa = virt_to_phys(addr);
292
293 preempt_disable();
294 for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
295 cea_set_pte(cea, pa, prot);
296
297 /*
298 * This is a cross-CPU update of the cpu_entry_area, we must shoot down
299 * all TLB entries for it.
300 */
301 flush_tlb_kernel_range(start, start + size);
302 preempt_enable();
303}
304
305static void ds_clear_cea(void *cea, size_t size)
306{
307 unsigned long start = (unsigned long)cea;
308 size_t msz = 0;
309
310 preempt_disable();
311 for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
312 cea_set_pte(cea, 0, PAGE_NONE);
313
314 flush_tlb_kernel_range(start, start + size);
315 preempt_enable();
316}
317
318static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
319{
320 unsigned int order = get_order(size);
285 int node = cpu_to_node(cpu); 321 int node = cpu_to_node(cpu);
286 int max; 322 struct page *page;
287 void *buffer, *ibuffer; 323
324 page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
325 return page ? page_address(page) : NULL;
326}
327
328static void dsfree_pages(const void *buffer, size_t size)
329{
330 if (buffer)
331 free_pages((unsigned long)buffer, get_order(size));
332}
333
334static int alloc_pebs_buffer(int cpu)
335{
336 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
337 struct debug_store *ds = hwev->ds;
338 size_t bsiz = x86_pmu.pebs_buffer_size;
339 int max, node = cpu_to_node(cpu);
340 void *buffer, *ibuffer, *cea;
288 341
289 if (!x86_pmu.pebs) 342 if (!x86_pmu.pebs)
290 return 0; 343 return 0;
291 344
292 buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); 345 buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
293 if (unlikely(!buffer)) 346 if (unlikely(!buffer))
294 return -ENOMEM; 347 return -ENOMEM;
295 348
@@ -300,25 +353,27 @@ static int alloc_pebs_buffer(int cpu)
300 if (x86_pmu.intel_cap.pebs_format < 2) { 353 if (x86_pmu.intel_cap.pebs_format < 2) {
301 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); 354 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
302 if (!ibuffer) { 355 if (!ibuffer) {
303 kfree(buffer); 356 dsfree_pages(buffer, bsiz);
304 return -ENOMEM; 357 return -ENOMEM;
305 } 358 }
306 per_cpu(insn_buffer, cpu) = ibuffer; 359 per_cpu(insn_buffer, cpu) = ibuffer;
307 } 360 }
308 361 hwev->ds_pebs_vaddr = buffer;
309 max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; 362 /* Update the cpu entry area mapping */
310 363 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
311 ds->pebs_buffer_base = (u64)(unsigned long)buffer; 364 ds->pebs_buffer_base = (unsigned long) cea;
365 ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
312 ds->pebs_index = ds->pebs_buffer_base; 366 ds->pebs_index = ds->pebs_buffer_base;
313 ds->pebs_absolute_maximum = ds->pebs_buffer_base + 367 max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
314 max * x86_pmu.pebs_record_size; 368 ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
315
316 return 0; 369 return 0;
317} 370}
318 371
319static void release_pebs_buffer(int cpu) 372static void release_pebs_buffer(int cpu)
320{ 373{
321 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 374 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
375 struct debug_store *ds = hwev->ds;
376 void *cea;
322 377
323 if (!ds || !x86_pmu.pebs) 378 if (!ds || !x86_pmu.pebs)
324 return; 379 return;
@@ -326,73 +381,70 @@ static void release_pebs_buffer(int cpu)
326 kfree(per_cpu(insn_buffer, cpu)); 381 kfree(per_cpu(insn_buffer, cpu));
327 per_cpu(insn_buffer, cpu) = NULL; 382 per_cpu(insn_buffer, cpu) = NULL;
328 383
329 kfree((void *)(unsigned long)ds->pebs_buffer_base); 384 /* Clear the fixmap */
385 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
386 ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
330 ds->pebs_buffer_base = 0; 387 ds->pebs_buffer_base = 0;
388 dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
389 hwev->ds_pebs_vaddr = NULL;
331} 390}
332 391
333static int alloc_bts_buffer(int cpu) 392static int alloc_bts_buffer(int cpu)
334{ 393{
335 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 394 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
336 int node = cpu_to_node(cpu); 395 struct debug_store *ds = hwev->ds;
337 int max, thresh; 396 void *buffer, *cea;
338 void *buffer; 397 int max;
339 398
340 if (!x86_pmu.bts) 399 if (!x86_pmu.bts)
341 return 0; 400 return 0;
342 401
343 buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); 402 buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
344 if (unlikely(!buffer)) { 403 if (unlikely(!buffer)) {
345 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); 404 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
346 return -ENOMEM; 405 return -ENOMEM;
347 } 406 }
348 407 hwev->ds_bts_vaddr = buffer;
349 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; 408 /* Update the fixmap */
350 thresh = max / 16; 409 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
351 410 ds->bts_buffer_base = (unsigned long) cea;
352 ds->bts_buffer_base = (u64)(unsigned long)buffer; 411 ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
353 ds->bts_index = ds->bts_buffer_base; 412 ds->bts_index = ds->bts_buffer_base;
354 ds->bts_absolute_maximum = ds->bts_buffer_base + 413 max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
355 max * BTS_RECORD_SIZE; 414 ds->bts_absolute_maximum = ds->bts_buffer_base + max;
356 ds->bts_interrupt_threshold = ds->bts_absolute_maximum - 415 ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
357 thresh * BTS_RECORD_SIZE;
358
359 return 0; 416 return 0;
360} 417}
361 418
362static void release_bts_buffer(int cpu) 419static void release_bts_buffer(int cpu)
363{ 420{
364 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 421 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
422 struct debug_store *ds = hwev->ds;
423 void *cea;
365 424
366 if (!ds || !x86_pmu.bts) 425 if (!ds || !x86_pmu.bts)
367 return; 426 return;
368 427
369 kfree((void *)(unsigned long)ds->bts_buffer_base); 428 /* Clear the fixmap */
429 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
430 ds_clear_cea(cea, BTS_BUFFER_SIZE);
370 ds->bts_buffer_base = 0; 431 ds->bts_buffer_base = 0;
432 dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
433 hwev->ds_bts_vaddr = NULL;
371} 434}
372 435
373static int alloc_ds_buffer(int cpu) 436static int alloc_ds_buffer(int cpu)
374{ 437{
375 int node = cpu_to_node(cpu); 438 struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
376 struct debug_store *ds;
377
378 ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
379 if (unlikely(!ds))
380 return -ENOMEM;
381 439
440 memset(ds, 0, sizeof(*ds));
382 per_cpu(cpu_hw_events, cpu).ds = ds; 441 per_cpu(cpu_hw_events, cpu).ds = ds;
383
384 return 0; 442 return 0;
385} 443}
386 444
387static void release_ds_buffer(int cpu) 445static void release_ds_buffer(int cpu)
388{ 446{
389 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
390
391 if (!ds)
392 return;
393
394 per_cpu(cpu_hw_events, cpu).ds = NULL; 447 per_cpu(cpu_hw_events, cpu).ds = NULL;
395 kfree(ds);
396} 448}
397 449
398void release_ds_buffers(void) 450void release_ds_buffers(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f7aaadf9331f..8e4ea143ed96 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -14,6 +14,8 @@
14 14
15#include <linux/perf_event.h> 15#include <linux/perf_event.h>
16 16
17#include <asm/intel_ds.h>
18
17/* To enable MSR tracing please use the generic trace points. */ 19/* To enable MSR tracing please use the generic trace points. */
18 20
19/* 21/*
@@ -77,8 +79,6 @@ struct amd_nb {
77 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 79 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
78}; 80};
79 81
80/* The maximal number of PEBS events: */
81#define MAX_PEBS_EVENTS 8
82#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) 82#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
83 83
84/* 84/*
@@ -95,23 +95,6 @@ struct amd_nb {
95 PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ 95 PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
96 PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) 96 PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
97 97
98/*
99 * A debug store configuration.
100 *
101 * We only support architectures that use 64bit fields.
102 */
103struct debug_store {
104 u64 bts_buffer_base;
105 u64 bts_index;
106 u64 bts_absolute_maximum;
107 u64 bts_interrupt_threshold;
108 u64 pebs_buffer_base;
109 u64 pebs_index;
110 u64 pebs_absolute_maximum;
111 u64 pebs_interrupt_threshold;
112 u64 pebs_event_reset[MAX_PEBS_EVENTS];
113};
114
115#define PEBS_REGS \ 98#define PEBS_REGS \
116 (PERF_REG_X86_AX | \ 99 (PERF_REG_X86_AX | \
117 PERF_REG_X86_BX | \ 100 PERF_REG_X86_BX | \
@@ -216,6 +199,8 @@ struct cpu_hw_events {
216 * Intel DebugStore bits 199 * Intel DebugStore bits
217 */ 200 */
218 struct debug_store *ds; 201 struct debug_store *ds;
202 void *ds_pebs_vaddr;
203 void *ds_bts_vaddr;
219 u64 pebs_enabled; 204 u64 pebs_enabled;
220 int n_pebs; 205 int n_pebs;
221 int n_large_pebs; 206 int n_large_pebs;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index dbfd0854651f..cf5961ca8677 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
140 ".popsection\n" \ 140 ".popsection\n" \
141 ".pushsection .altinstr_replacement, \"ax\"\n" \ 141 ".pushsection .altinstr_replacement, \"ax\"\n" \
142 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ 142 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
143 ".popsection" 143 ".popsection\n"
144 144
145#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ 145#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
146 OLDINSTR_2(oldinstr, 1, 2) \ 146 OLDINSTR_2(oldinstr, 1, 2) \
@@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
151 ".pushsection .altinstr_replacement, \"ax\"\n" \ 151 ".pushsection .altinstr_replacement, \"ax\"\n" \
152 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ 152 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
153 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ 153 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
154 ".popsection" 154 ".popsection\n"
155 155
156/* 156/*
157 * Alternative instructions for different CPU types or capabilities. 157 * Alternative instructions for different CPU types or capabilities.
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index ff700d81e91e..0927cdc4f946 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -11,7 +11,32 @@
11#include <asm/pgtable.h> 11#include <asm/pgtable.h>
12#include <asm/special_insns.h> 12#include <asm/special_insns.h>
13#include <asm/preempt.h> 13#include <asm/preempt.h>
14#include <asm/asm.h>
14 15
15#ifndef CONFIG_X86_CMPXCHG64 16#ifndef CONFIG_X86_CMPXCHG64
16extern void cmpxchg8b_emu(void); 17extern void cmpxchg8b_emu(void);
17#endif 18#endif
19
20#ifdef CONFIG_RETPOLINE
21#ifdef CONFIG_X86_32
22#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
23#else
24#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
25INDIRECT_THUNK(8)
26INDIRECT_THUNK(9)
27INDIRECT_THUNK(10)
28INDIRECT_THUNK(11)
29INDIRECT_THUNK(12)
30INDIRECT_THUNK(13)
31INDIRECT_THUNK(14)
32INDIRECT_THUNK(15)
33#endif
34INDIRECT_THUNK(ax)
35INDIRECT_THUNK(bx)
36INDIRECT_THUNK(cx)
37INDIRECT_THUNK(dx)
38INDIRECT_THUNK(si)
39INDIRECT_THUNK(di)
40INDIRECT_THUNK(bp)
41INDIRECT_THUNK(sp)
42#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 219faaec51df..386a6900e206 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -136,6 +136,7 @@
136#endif 136#endif
137 137
138#ifndef __ASSEMBLY__ 138#ifndef __ASSEMBLY__
139#ifndef __BPF__
139/* 140/*
140 * This output constraint should be used for any inline asm which has a "call" 141 * This output constraint should be used for any inline asm which has a "call"
141 * instruction. Otherwise the asm may be inserted before the frame pointer 142 * instruction. Otherwise the asm may be inserted before the frame pointer
@@ -145,5 +146,6 @@
145register unsigned long current_stack_pointer asm(_ASM_SP); 146register unsigned long current_stack_pointer asm(_ASM_SP);
146#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) 147#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
147#endif 148#endif
149#endif
148 150
149#endif /* _ASM_X86_ASM_H */ 151#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644
index 000000000000..4a7884b8dca5
--- /dev/null
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,81 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#ifndef _ASM_X86_CPU_ENTRY_AREA_H
4#define _ASM_X86_CPU_ENTRY_AREA_H
5
6#include <linux/percpu-defs.h>
7#include <asm/processor.h>
8#include <asm/intel_ds.h>
9
10/*
11 * cpu_entry_area is a percpu region that contains things needed by the CPU
12 * and early entry/exit code. Real types aren't used for all fields here
13 * to avoid circular header dependencies.
14 *
15 * Every field is a virtual alias of some other allocated backing store.
16 * There is no direct allocation of a struct cpu_entry_area.
17 */
18struct cpu_entry_area {
19 char gdt[PAGE_SIZE];
20
21 /*
22 * The GDT is just below entry_stack and thus serves (on x86_64) as
23 * a a read-only guard page.
24 */
25 struct entry_stack_page entry_stack_page;
26
27 /*
28 * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
29 * we need task switches to work, and task switches write to the TSS.
30 */
31 struct tss_struct tss;
32
33 char entry_trampoline[PAGE_SIZE];
34
35#ifdef CONFIG_X86_64
36 /*
37 * Exception stacks used for IST entries.
38 *
39 * In the future, this should have a separate slot for each stack
40 * with guard pages between them.
41 */
42 char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
43#endif
44#ifdef CONFIG_CPU_SUP_INTEL
45 /*
46 * Per CPU debug store for Intel performance monitoring. Wastes a
47 * full page at the moment.
48 */
49 struct debug_store cpu_debug_store;
50 /*
51 * The actual PEBS/BTS buffers must be mapped to user space
52 * Reserve enough fixmap PTEs.
53 */
54 struct debug_store_buffers cpu_debug_buffers;
55#endif
56};
57
58#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
59#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
60
61DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
62
63extern void setup_cpu_entry_areas(void);
64extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
65
66#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
67#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
68
69#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
70
71#define CPU_ENTRY_AREA_MAP_SIZE \
72 (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
73
74extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
75
76static inline struct entry_stack *cpu_entry_stack(int cpu)
77{
78 return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
79}
80
81#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bf6a76202a77..ea9a7dde62e5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
135 set_bit(bit, (unsigned long *)cpu_caps_set); \ 135 set_bit(bit, (unsigned long *)cpu_caps_set); \
136} while (0) 136} while (0)
137 137
138#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
139
138#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) 140#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
139/* 141/*
140 * Static testing of CPU features. Used the same as boot_cpu_has(). 142 * Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 800104c8a3ed..f275447862f4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -197,11 +197,14 @@
197#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ 197#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ 198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ 199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
200#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
200 201
201#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 202#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
202#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 203#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
203#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ 204#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
204 205#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
206#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
207#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
205#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ 208#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
206#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ 209#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
207#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ 210#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -340,5 +343,8 @@
340#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ 343#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
341#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ 344#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
342#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ 345#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
346#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
347#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
348#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
343 349
344#endif /* _ASM_X86_CPUFEATURES_H */ 350#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4011cb03ef08..13c5ee878a47 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -7,6 +7,7 @@
7#include <asm/mmu.h> 7#include <asm/mmu.h>
8#include <asm/fixmap.h> 8#include <asm/fixmap.h>
9#include <asm/irq_vectors.h> 9#include <asm/irq_vectors.h>
10#include <asm/cpu_entry_area.h>
10 11
11#include <linux/smp.h> 12#include <linux/smp.h>
12#include <linux/percpu.h> 13#include <linux/percpu.h>
@@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
20 21
21 desc->type = (info->read_exec_only ^ 1) << 1; 22 desc->type = (info->read_exec_only ^ 1) << 1;
22 desc->type |= info->contents << 2; 23 desc->type |= info->contents << 2;
24 /* Set the ACCESS bit so it can be mapped RO */
25 desc->type |= 1;
23 26
24 desc->s = 1; 27 desc->s = 1;
25 desc->dpl = 0x3; 28 desc->dpl = 0x3;
@@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
60 return this_cpu_ptr(&gdt_page)->gdt; 63 return this_cpu_ptr(&gdt_page)->gdt;
61} 64}
62 65
63/* Get the fixmap index for a specific processor */
64static inline unsigned int get_cpu_gdt_ro_index(int cpu)
65{
66 return FIX_GDT_REMAP_BEGIN + cpu;
67}
68
69/* Provide the fixmap address of the remapped GDT */ 66/* Provide the fixmap address of the remapped GDT */
70static inline struct desc_struct *get_cpu_gdt_ro(int cpu) 67static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
71{ 68{
72 unsigned int idx = get_cpu_gdt_ro_index(cpu); 69 return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
73 return (struct desc_struct *)__fix_to_virt(idx);
74} 70}
75 71
76/* Provide the current read-only GDT */ 72/* Provide the current read-only GDT */
@@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
185#endif 181#endif
186} 182}
187 183
188static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) 184static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
189{ 185{
190 struct desc_struct *d = get_cpu_gdt_rw(cpu); 186 struct desc_struct *d = get_cpu_gdt_rw(cpu);
191 tss_desc tss; 187 tss_desc tss;
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 14d6d5007314..b027633e7300 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,12 @@
50# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) 50# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
51#endif 51#endif
52 52
53#ifdef CONFIG_PAGE_TABLE_ISOLATION
54# define DISABLE_PTI 0
55#else
56# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
57#endif
58
53/* 59/*
54 * Make sure to add features to the correct mask 60 * Make sure to add features to the correct mask
55 */ 61 */
@@ -60,7 +66,7 @@
60#define DISABLED_MASK4 (DISABLE_PCID) 66#define DISABLED_MASK4 (DISABLE_PCID)
61#define DISABLED_MASK5 0 67#define DISABLED_MASK5 0
62#define DISABLED_MASK6 0 68#define DISABLED_MASK6 0
63#define DISABLED_MASK7 0 69#define DISABLED_MASK7 (DISABLE_PTI)
64#define DISABLED_MASK8 0 70#define DISABLED_MASK8 0
65#define DISABLED_MASK9 (DISABLE_MPX) 71#define DISABLED_MASK9 (DISABLE_MPX)
66#define DISABLED_MASK10 0 72#define DISABLED_MASK10 0
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 0211029076ea..6777480d8a42 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -2,7 +2,7 @@
2#ifndef _ASM_X86_ESPFIX_H 2#ifndef _ASM_X86_ESPFIX_H
3#define _ASM_X86_ESPFIX_H 3#define _ASM_X86_ESPFIX_H
4 4
5#ifdef CONFIG_X86_64 5#ifdef CONFIG_X86_ESPFIX64
6 6
7#include <asm/percpu.h> 7#include <asm/percpu.h>
8 8
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
11 11
12extern void init_espfix_bsp(void); 12extern void init_espfix_bsp(void);
13extern void init_espfix_ap(int cpu); 13extern void init_espfix_ap(int cpu);
14 14#else
15#endif /* CONFIG_X86_64 */ 15static inline void init_espfix_ap(int cpu) { }
16#endif
16 17
17#endif /* _ASM_X86_ESPFIX_H */ 18#endif /* _ASM_X86_ESPFIX_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b0c505fe9a95..64c4a30e0d39 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
44 PAGE_SIZE) 44 PAGE_SIZE)
45#endif 45#endif
46 46
47
48/* 47/*
49 * Here we define all the compile-time 'special' virtual 48 * Here we define all the compile-time 'special' virtual
50 * addresses. The point is to have a constant address at 49 * addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
84 FIX_IO_APIC_BASE_0, 83 FIX_IO_APIC_BASE_0,
85 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, 84 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
86#endif 85#endif
87 FIX_RO_IDT, /* Virtual mapping for read-only IDT */
88#ifdef CONFIG_X86_32 86#ifdef CONFIG_X86_32
89 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ 87 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
90 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, 88 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
100#ifdef CONFIG_X86_INTEL_MID 98#ifdef CONFIG_X86_INTEL_MID
101 FIX_LNW_VRTC, 99 FIX_LNW_VRTC,
102#endif 100#endif
103 /* Fixmap entries to remap the GDTs, one per processor. */
104 FIX_GDT_REMAP_BEGIN,
105 FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
106 101
107#ifdef CONFIG_ACPI_APEI_GHES 102#ifdef CONFIG_ACPI_APEI_GHES
108 /* Used for GHES mapping from assorted contexts */ 103 /* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
143extern void reserve_top_address(unsigned long reserve); 138extern void reserve_top_address(unsigned long reserve);
144 139
145#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) 140#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
146#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) 141#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
147 142
148extern int fixmaps_set; 143extern int fixmaps_set;
149 144
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 1b0a5abcd8ae..96aa6b9884dc 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
20#ifndef _ASM_X86_HYPERVISOR_H 20#ifndef _ASM_X86_HYPERVISOR_H
21#define _ASM_X86_HYPERVISOR_H 21#define _ASM_X86_HYPERVISOR_H
22 22
23#ifdef CONFIG_HYPERVISOR_GUEST 23/* x86 hypervisor types */
24
25#include <asm/kvm_para.h>
26#include <asm/x86_init.h>
27#include <asm/xen/hypervisor.h>
28
29/*
30 * x86 hypervisor information
31 */
32
33enum x86_hypervisor_type { 24enum x86_hypervisor_type {
34 X86_HYPER_NATIVE = 0, 25 X86_HYPER_NATIVE = 0,
35 X86_HYPER_VMWARE, 26 X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
39 X86_HYPER_KVM, 30 X86_HYPER_KVM,
40}; 31};
41 32
33#ifdef CONFIG_HYPERVISOR_GUEST
34
35#include <asm/kvm_para.h>
36#include <asm/x86_init.h>
37#include <asm/xen/hypervisor.h>
38
42struct hypervisor_x86 { 39struct hypervisor_x86 {
43 /* Hypervisor name */ 40 /* Hypervisor name */
44 const char *name; 41 const char *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
58 55
59extern enum x86_hypervisor_type x86_hyper_type; 56extern enum x86_hypervisor_type x86_hyper_type;
60extern void init_hypervisor_platform(void); 57extern void init_hypervisor_platform(void);
58static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
59{
60 return x86_hyper_type == type;
61}
61#else 62#else
62static inline void init_hypervisor_platform(void) { } 63static inline void init_hypervisor_platform(void) { }
64static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
65{
66 return type == X86_HYPER_NATIVE;
67}
63#endif /* CONFIG_HYPERVISOR_GUEST */ 68#endif /* CONFIG_HYPERVISOR_GUEST */
64#endif /* _ASM_X86_HYPERVISOR_H */ 69#endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
new file mode 100644
index 000000000000..62a9f4966b42
--- /dev/null
+++ b/arch/x86/include/asm/intel_ds.h
@@ -0,0 +1,36 @@
1#ifndef _ASM_INTEL_DS_H
2#define _ASM_INTEL_DS_H
3
4#include <linux/percpu-defs.h>
5
6#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
7#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
8
9/* The maximal number of PEBS events: */
10#define MAX_PEBS_EVENTS 8
11
12/*
13 * A debug store configuration.
14 *
15 * We only support architectures that use 64bit fields.
16 */
17struct debug_store {
18 u64 bts_buffer_base;
19 u64 bts_index;
20 u64 bts_absolute_maximum;
21 u64 bts_interrupt_threshold;
22 u64 pebs_buffer_base;
23 u64 pebs_index;
24 u64 pebs_absolute_maximum;
25 u64 pebs_interrupt_threshold;
26 u64 pebs_event_reset[MAX_PEBS_EVENTS];
27} __aligned(PAGE_SIZE);
28
29DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
30
31struct debug_store_buffers {
32 char bts_buffer[BTS_BUFFER_SIZE];
33 char pebs_buffer[PEBS_BUFFER_SIZE];
34};
35
36#endif
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
new file mode 100644
index 000000000000..989cfa86de85
--- /dev/null
+++ b/arch/x86/include/asm/invpcid.h
@@ -0,0 +1,53 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_X86_INVPCID
3#define _ASM_X86_INVPCID
4
5static inline void __invpcid(unsigned long pcid, unsigned long addr,
6 unsigned long type)
7{
8 struct { u64 d[2]; } desc = { { pcid, addr } };
9
10 /*
11 * The memory clobber is because the whole point is to invalidate
12 * stale TLB entries and, especially if we're flushing global
13 * mappings, we don't want the compiler to reorder any subsequent
14 * memory accesses before the TLB flush.
15 *
16 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
17 * invpcid (%rcx), %rax in long mode.
18 */
19 asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
20 : : "m" (desc), "a" (type), "c" (&desc) : "memory");
21}
22
23#define INVPCID_TYPE_INDIV_ADDR 0
24#define INVPCID_TYPE_SINGLE_CTXT 1
25#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
26#define INVPCID_TYPE_ALL_NON_GLOBAL 3
27
28/* Flush all mappings for a given pcid and addr, not including globals. */
29static inline void invpcid_flush_one(unsigned long pcid,
30 unsigned long addr)
31{
32 __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
33}
34
35/* Flush all mappings for a given PCID, not including globals. */
36static inline void invpcid_flush_single_context(unsigned long pcid)
37{
38 __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
39}
40
41/* Flush all mappings, including globals, for all PCIDs. */
42static inline void invpcid_flush_all(void)
43{
44 __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
45}
46
47/* Flush all mappings for all PCIDs except globals. */
48static inline void invpcid_flush_all_nonglobals(void)
49{
50 __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
51}
52
53#endif /* _ASM_X86_INVPCID */
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index 139feef467f7..c066ffae222b 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
44extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, 44extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
45 unsigned int nr_irqs); 45 unsigned int nr_irqs);
46extern int mp_irqdomain_activate(struct irq_domain *domain, 46extern int mp_irqdomain_activate(struct irq_domain *domain,
47 struct irq_data *irq_data, bool early); 47 struct irq_data *irq_data, bool reserve);
48extern void mp_irqdomain_deactivate(struct irq_domain *domain, 48extern void mp_irqdomain_deactivate(struct irq_domain *domain,
49 struct irq_data *irq_data); 49 struct irq_data *irq_data);
50extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); 50extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c8ef23f2c28f..89f08955fff7 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
142 swapgs; \ 142 swapgs; \
143 sysretl 143 sysretl
144 144
145#ifdef CONFIG_DEBUG_ENTRY
146#define SAVE_FLAGS(x) pushfq; popq %rax
147#endif
145#else 148#else
146#define INTERRUPT_RETURN iret 149#define INTERRUPT_RETURN iret
147#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit 150#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f86a8caa561e..395c9631e000 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
26extern int __must_check __die(const char *, struct pt_regs *, long); 26extern int __must_check __die(const char *, struct pt_regs *, long);
27extern void show_stack_regs(struct pt_regs *regs); 27extern void show_stack_regs(struct pt_regs *regs);
28extern void __show_regs(struct pt_regs *regs, int all); 28extern void __show_regs(struct pt_regs *regs, int all);
29extern void show_iret_regs(struct pt_regs *regs);
29extern unsigned long oops_begin(void); 30extern unsigned long oops_begin(void);
30extern void oops_end(unsigned long, struct pt_regs *, int signr); 31extern void oops_end(unsigned long, struct pt_regs *, int signr);
31 32
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 9ea26f167497..5ff3e8af2c20 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,6 +3,7 @@
3#define _ASM_X86_MMU_H 3#define _ASM_X86_MMU_H
4 4
5#include <linux/spinlock.h> 5#include <linux/spinlock.h>
6#include <linux/rwsem.h>
6#include <linux/mutex.h> 7#include <linux/mutex.h>
7#include <linux/atomic.h> 8#include <linux/atomic.h>
8 9
@@ -27,7 +28,8 @@ typedef struct {
27 atomic64_t tlb_gen; 28 atomic64_t tlb_gen;
28 29
29#ifdef CONFIG_MODIFY_LDT_SYSCALL 30#ifdef CONFIG_MODIFY_LDT_SYSCALL
30 struct ldt_struct *ldt; 31 struct rw_semaphore ldt_usr_sem;
32 struct ldt_struct *ldt;
31#endif 33#endif
32 34
33#ifdef CONFIG_X86_64 35#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6d16d15d09a0..c931b88982a0 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -50,22 +50,53 @@ struct ldt_struct {
50 * call gates. On native, we could merge the ldt_struct and LDT 50 * call gates. On native, we could merge the ldt_struct and LDT
51 * allocations, but it's not worth trying to optimize. 51 * allocations, but it's not worth trying to optimize.
52 */ 52 */
53 struct desc_struct *entries; 53 struct desc_struct *entries;
54 unsigned int nr_entries; 54 unsigned int nr_entries;
55
56 /*
57 * If PTI is in use, then the entries array is not mapped while we're
58 * in user mode. The whole array will be aliased at the addressed
59 * given by ldt_slot_va(slot). We use two slots so that we can allocate
60 * and map, and enable a new LDT without invalidating the mapping
61 * of an older, still-in-use LDT.
62 *
63 * slot will be -1 if this LDT doesn't have an alias mapping.
64 */
65 int slot;
55}; 66};
56 67
68/* This is a multiple of PAGE_SIZE. */
69#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
70
71static inline void *ldt_slot_va(int slot)
72{
73#ifdef CONFIG_X86_64
74 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
75#else
76 BUG();
77#endif
78}
79
57/* 80/*
58 * Used for LDT copy/destruction. 81 * Used for LDT copy/destruction.
59 */ 82 */
60int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); 83static inline void init_new_context_ldt(struct mm_struct *mm)
84{
85 mm->context.ldt = NULL;
86 init_rwsem(&mm->context.ldt_usr_sem);
87}
88int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
61void destroy_context_ldt(struct mm_struct *mm); 89void destroy_context_ldt(struct mm_struct *mm);
90void ldt_arch_exit_mmap(struct mm_struct *mm);
62#else /* CONFIG_MODIFY_LDT_SYSCALL */ 91#else /* CONFIG_MODIFY_LDT_SYSCALL */
63static inline int init_new_context_ldt(struct task_struct *tsk, 92static inline void init_new_context_ldt(struct mm_struct *mm) { }
64 struct mm_struct *mm) 93static inline int ldt_dup_context(struct mm_struct *oldmm,
94 struct mm_struct *mm)
65{ 95{
66 return 0; 96 return 0;
67} 97}
68static inline void destroy_context_ldt(struct mm_struct *mm) {} 98static inline void destroy_context_ldt(struct mm_struct *mm) { }
99static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
69#endif 100#endif
70 101
71static inline void load_mm_ldt(struct mm_struct *mm) 102static inline void load_mm_ldt(struct mm_struct *mm)
@@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
90 * that we can see. 121 * that we can see.
91 */ 122 */
92 123
93 if (unlikely(ldt)) 124 if (unlikely(ldt)) {
94 set_ldt(ldt->entries, ldt->nr_entries); 125 if (static_cpu_has(X86_FEATURE_PTI)) {
95 else 126 if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
127 /*
128 * Whoops -- either the new LDT isn't mapped
129 * (if slot == -1) or is mapped into a bogus
130 * slot (if slot > 1).
131 */
132 clear_LDT();
133 return;
134 }
135
136 /*
137 * If page table isolation is enabled, ldt->entries
138 * will not be mapped in the userspace pagetables.
139 * Tell the CPU to access the LDT through the alias
140 * at ldt_slot_va(ldt->slot).
141 */
142 set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
143 } else {
144 set_ldt(ldt->entries, ldt->nr_entries);
145 }
146 } else {
96 clear_LDT(); 147 clear_LDT();
148 }
97#else 149#else
98 clear_LDT(); 150 clear_LDT();
99#endif 151#endif
@@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
132static inline int init_new_context(struct task_struct *tsk, 184static inline int init_new_context(struct task_struct *tsk,
133 struct mm_struct *mm) 185 struct mm_struct *mm)
134{ 186{
187 mutex_init(&mm->context.lock);
188
135 mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); 189 mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
136 atomic64_set(&mm->context.tlb_gen, 0); 190 atomic64_set(&mm->context.tlb_gen, 0);
137 191
138 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 192#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
139 if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { 193 if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
140 /* pkey 0 is the default and always allocated */ 194 /* pkey 0 is the default and always allocated */
141 mm->context.pkey_allocation_map = 0x1; 195 mm->context.pkey_allocation_map = 0x1;
142 /* -1 means unallocated or invalid */ 196 /* -1 means unallocated or invalid */
143 mm->context.execute_only_pkey = -1; 197 mm->context.execute_only_pkey = -1;
144 } 198 }
145 #endif 199#endif
146 return init_new_context_ldt(tsk, mm); 200 init_new_context_ldt(mm);
201 return 0;
147} 202}
148static inline void destroy_context(struct mm_struct *mm) 203static inline void destroy_context(struct mm_struct *mm)
149{ 204{
@@ -176,15 +231,16 @@ do { \
176} while (0) 231} while (0)
177#endif 232#endif
178 233
179static inline void arch_dup_mmap(struct mm_struct *oldmm, 234static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
180 struct mm_struct *mm)
181{ 235{
182 paravirt_arch_dup_mmap(oldmm, mm); 236 paravirt_arch_dup_mmap(oldmm, mm);
237 return ldt_dup_context(oldmm, mm);
183} 238}
184 239
185static inline void arch_exit_mmap(struct mm_struct *mm) 240static inline void arch_exit_mmap(struct mm_struct *mm)
186{ 241{
187 paravirt_arch_exit_mmap(mm); 242 paravirt_arch_exit_mmap(mm);
243 ldt_arch_exit_mmap(mm);
188} 244}
189 245
190#ifdef CONFIG_X86_64 246#ifdef CONFIG_X86_64
@@ -282,33 +338,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
282} 338}
283 339
284/* 340/*
285 * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
286 * bits. This serves two purposes. It prevents a nasty situation in
287 * which PCID-unaware code saves CR3, loads some other value (with PCID
288 * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
289 * the saved ASID was nonzero. It also means that any bugs involving
290 * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
291 * deterministically.
292 */
293
294static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
295{
296 if (static_cpu_has(X86_FEATURE_PCID)) {
297 VM_WARN_ON_ONCE(asid > 4094);
298 return __sme_pa(mm->pgd) | (asid + 1);
299 } else {
300 VM_WARN_ON_ONCE(asid != 0);
301 return __sme_pa(mm->pgd);
302 }
303}
304
305static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
306{
307 VM_WARN_ON_ONCE(asid > 4094);
308 return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
309}
310
311/*
312 * This can be used from process context to figure out what the value of 341 * This can be used from process context to figure out what the value of
313 * CR3 is without needing to do a (slow) __read_cr3(). 342 * CR3 is without needing to do a (slow) __read_cr3().
314 * 343 *
@@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
317 */ 346 */
318static inline unsigned long __get_current_cr3_fast(void) 347static inline unsigned long __get_current_cr3_fast(void)
319{ 348{
320 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), 349 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
321 this_cpu_read(cpu_tlbstate.loaded_mm_asid)); 350 this_cpu_read(cpu_tlbstate.loaded_mm_asid));
322 351
323 /* For now, be very restrictive about when this can be called. */ 352 /* For now, be very restrictive about when this can be called. */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 5400add2885b..8bf450b13d9f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -7,6 +7,7 @@
7#include <linux/nmi.h> 7#include <linux/nmi.h>
8#include <asm/io.h> 8#include <asm/io.h>
9#include <asm/hyperv.h> 9#include <asm/hyperv.h>
10#include <asm/nospec-branch.h>
10 11
11/* 12/*
12 * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent 13 * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
@@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
186 return U64_MAX; 187 return U64_MAX;
187 188
188 __asm__ __volatile__("mov %4, %%r8\n" 189 __asm__ __volatile__("mov %4, %%r8\n"
189 "call *%5" 190 CALL_NOSPEC
190 : "=a" (hv_status), ASM_CALL_CONSTRAINT, 191 : "=a" (hv_status), ASM_CALL_CONSTRAINT,
191 "+c" (control), "+d" (input_address) 192 "+c" (control), "+d" (input_address)
192 : "r" (output_address), "m" (hv_hypercall_pg) 193 : "r" (output_address),
194 THUNK_TARGET(hv_hypercall_pg)
193 : "cc", "memory", "r8", "r9", "r10", "r11"); 195 : "cc", "memory", "r8", "r9", "r10", "r11");
194#else 196#else
195 u32 input_address_hi = upper_32_bits(input_address); 197 u32 input_address_hi = upper_32_bits(input_address);
@@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
200 if (!hv_hypercall_pg) 202 if (!hv_hypercall_pg)
201 return U64_MAX; 203 return U64_MAX;
202 204
203 __asm__ __volatile__("call *%7" 205 __asm__ __volatile__(CALL_NOSPEC
204 : "=A" (hv_status), 206 : "=A" (hv_status),
205 "+c" (input_address_lo), ASM_CALL_CONSTRAINT 207 "+c" (input_address_lo), ASM_CALL_CONSTRAINT
206 : "A" (control), 208 : "A" (control),
207 "b" (input_address_hi), 209 "b" (input_address_hi),
208 "D"(output_address_hi), "S"(output_address_lo), 210 "D"(output_address_hi), "S"(output_address_lo),
209 "m" (hv_hypercall_pg) 211 THUNK_TARGET(hv_hypercall_pg)
210 : "cc", "memory"); 212 : "cc", "memory");
211#endif /* !x86_64 */ 213#endif /* !x86_64 */
212 return hv_status; 214 return hv_status;
@@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
227 229
228#ifdef CONFIG_X86_64 230#ifdef CONFIG_X86_64
229 { 231 {
230 __asm__ __volatile__("call *%4" 232 __asm__ __volatile__(CALL_NOSPEC
231 : "=a" (hv_status), ASM_CALL_CONSTRAINT, 233 : "=a" (hv_status), ASM_CALL_CONSTRAINT,
232 "+c" (control), "+d" (input1) 234 "+c" (control), "+d" (input1)
233 : "m" (hv_hypercall_pg) 235 : THUNK_TARGET(hv_hypercall_pg)
234 : "cc", "r8", "r9", "r10", "r11"); 236 : "cc", "r8", "r9", "r10", "r11");
235 } 237 }
236#else 238#else
@@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
238 u32 input1_hi = upper_32_bits(input1); 240 u32 input1_hi = upper_32_bits(input1);
239 u32 input1_lo = lower_32_bits(input1); 241 u32 input1_lo = lower_32_bits(input1);
240 242
241 __asm__ __volatile__ ("call *%5" 243 __asm__ __volatile__ (CALL_NOSPEC
242 : "=A"(hv_status), 244 : "=A"(hv_status),
243 "+c"(input1_lo), 245 "+c"(input1_lo),
244 ASM_CALL_CONSTRAINT 246 ASM_CALL_CONSTRAINT
245 : "A" (control), 247 : "A" (control),
246 "b" (input1_hi), 248 "b" (input1_hi),
247 "m" (hv_hypercall_pg) 249 THUNK_TARGET(hv_hypercall_pg)
248 : "cc", "edi", "esi"); 250 : "cc", "edi", "esi");
249 } 251 }
250#endif 252#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 34c4922bbc3f..e7b983a35506 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -355,6 +355,9 @@
355#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL 355#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
356#define FAM10H_MMIO_CONF_BASE_SHIFT 20 356#define FAM10H_MMIO_CONF_BASE_SHIFT 20
357#define MSR_FAM10H_NODE_ID 0xc001100c 357#define MSR_FAM10H_NODE_ID 0xc001100c
358#define MSR_F10H_DECFG 0xc0011029
359#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
360#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
358 361
359/* K8 MSRs */ 362/* K8 MSRs */
360#define MSR_K8_TOP_MEM1 0xc001001a 363#define MSR_K8_TOP_MEM1 0xc001001a
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644
index 000000000000..402a11c803c3
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,214 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#ifndef __NOSPEC_BRANCH_H__
4#define __NOSPEC_BRANCH_H__
5
6#include <asm/alternative.h>
7#include <asm/alternative-asm.h>
8#include <asm/cpufeatures.h>
9
10/*
11 * Fill the CPU return stack buffer.
12 *
13 * Each entry in the RSB, if used for a speculative 'ret', contains an
14 * infinite 'pause; jmp' loop to capture speculative execution.
15 *
16 * This is required in various cases for retpoline and IBRS-based
17 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
18 * eliminate potentially bogus entries from the RSB, and sometimes
19 * purely to ensure that it doesn't get empty, which on some CPUs would
20 * allow predictions from other (unwanted!) sources to be used.
21 *
22 * We define a CPP macro such that it can be used from both .S files and
23 * inline assembly. It's possible to do a .macro and then include that
24 * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
25 */
26
27#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
28#define RSB_FILL_LOOPS 16 /* To avoid underflow */
29
30/*
31 * Google experimented with loop-unrolling and this turned out to be
32 * the optimal version — two calls, each with their own speculation
33 * trap should their return address end up getting used, in a loop.
34 */
35#define __FILL_RETURN_BUFFER(reg, nr, sp) \
36 mov $(nr/2), reg; \
37771: \
38 call 772f; \
39773: /* speculation trap */ \
40 pause; \
41 jmp 773b; \
42772: \
43 call 774f; \
44775: /* speculation trap */ \
45 pause; \
46 jmp 775b; \
47774: \
48 dec reg; \
49 jnz 771b; \
50 add $(BITS_PER_LONG/8) * nr, sp;
51
52#ifdef __ASSEMBLY__
53
54/*
55 * This should be used immediately before a retpoline alternative. It tells
56 * objtool where the retpolines are so that it can make sense of the control
57 * flow by just reading the original instruction(s) and ignoring the
58 * alternatives.
59 */
60.macro ANNOTATE_NOSPEC_ALTERNATIVE
61 .Lannotate_\@:
62 .pushsection .discard.nospec
63 .long .Lannotate_\@ - .
64 .popsection
65.endm
66
67/*
68 * These are the bare retpoline primitives for indirect jmp and call.
69 * Do not use these directly; they only exist to make the ALTERNATIVE
70 * invocation below less ugly.
71 */
72.macro RETPOLINE_JMP reg:req
73 call .Ldo_rop_\@
74.Lspec_trap_\@:
75 pause
76 jmp .Lspec_trap_\@
77.Ldo_rop_\@:
78 mov \reg, (%_ASM_SP)
79 ret
80.endm
81
82/*
83 * This is a wrapper around RETPOLINE_JMP so the called function in reg
84 * returns to the instruction after the macro.
85 */
86.macro RETPOLINE_CALL reg:req
87 jmp .Ldo_call_\@
88.Ldo_retpoline_jmp_\@:
89 RETPOLINE_JMP \reg
90.Ldo_call_\@:
91 call .Ldo_retpoline_jmp_\@
92.endm
93
94/*
95 * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
96 * indirect jmp/call which may be susceptible to the Spectre variant 2
97 * attack.
98 */
99.macro JMP_NOSPEC reg:req
100#ifdef CONFIG_RETPOLINE
101 ANNOTATE_NOSPEC_ALTERNATIVE
102 ALTERNATIVE_2 __stringify(jmp *\reg), \
103 __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
104 __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
105#else
106 jmp *\reg
107#endif
108.endm
109
110.macro CALL_NOSPEC reg:req
111#ifdef CONFIG_RETPOLINE
112 ANNOTATE_NOSPEC_ALTERNATIVE
113 ALTERNATIVE_2 __stringify(call *\reg), \
114 __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
115 __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
116#else
117 call *\reg
118#endif
119.endm
120
121 /*
122 * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
123 * monstrosity above, manually.
124 */
125.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
126#ifdef CONFIG_RETPOLINE
127 ANNOTATE_NOSPEC_ALTERNATIVE
128 ALTERNATIVE "jmp .Lskip_rsb_\@", \
129 __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
130 \ftr
131.Lskip_rsb_\@:
132#endif
133.endm
134
135#else /* __ASSEMBLY__ */
136
137#define ANNOTATE_NOSPEC_ALTERNATIVE \
138 "999:\n\t" \
139 ".pushsection .discard.nospec\n\t" \
140 ".long 999b - .\n\t" \
141 ".popsection\n\t"
142
143#if defined(CONFIG_X86_64) && defined(RETPOLINE)
144
145/*
146 * Since the inline asm uses the %V modifier which is only in newer GCC,
147 * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
148 */
149# define CALL_NOSPEC \
150 ANNOTATE_NOSPEC_ALTERNATIVE \
151 ALTERNATIVE( \
152 "call *%[thunk_target]\n", \
153 "call __x86_indirect_thunk_%V[thunk_target]\n", \
154 X86_FEATURE_RETPOLINE)
155# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
156
157#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
158/*
159 * For i386 we use the original ret-equivalent retpoline, because
160 * otherwise we'll run out of registers. We don't care about CET
161 * here, anyway.
162 */
163# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
164 " jmp 904f;\n" \
165 " .align 16\n" \
166 "901: call 903f;\n" \
167 "902: pause;\n" \
168 " jmp 902b;\n" \
169 " .align 16\n" \
170 "903: addl $4, %%esp;\n" \
171 " pushl %[thunk_target];\n" \
172 " ret;\n" \
173 " .align 16\n" \
174 "904: call 901b;\n", \
175 X86_FEATURE_RETPOLINE)
176
177# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
178#else /* No retpoline for C / inline asm */
179# define CALL_NOSPEC "call *%[thunk_target]\n"
180# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
181#endif
182
183/* The Spectre V2 mitigation variants */
184enum spectre_v2_mitigation {
185 SPECTRE_V2_NONE,
186 SPECTRE_V2_RETPOLINE_MINIMAL,
187 SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
188 SPECTRE_V2_RETPOLINE_GENERIC,
189 SPECTRE_V2_RETPOLINE_AMD,
190 SPECTRE_V2_IBRS,
191};
192
193/*
194 * On VMEXIT we must ensure that no RSB predictions learned in the guest
195 * can be followed in the host, by overwriting the RSB completely. Both
196 * retpoline and IBRS mitigations for Spectre v2 need this; only on future
197 * CPUs with IBRS_ATT *might* it be avoided.
198 */
199static inline void vmexit_fill_RSB(void)
200{
201#ifdef CONFIG_RETPOLINE
202 unsigned long loops = RSB_CLEAR_LOOPS / 2;
203
204 asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
205 ALTERNATIVE("jmp 910f",
206 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
207 X86_FEATURE_RETPOLINE)
208 "910:"
209 : "=&r" (loops), ASM_CALL_CONSTRAINT
210 : "r" (loops) : "memory" );
211#endif
212}
213#endif /* __ASSEMBLY__ */
214#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 283efcaac8af..892df375b615 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
928 CLBR_NONE, \ 928 CLBR_NONE, \
929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
930
931#ifdef CONFIG_DEBUG_ENTRY
932#define SAVE_FLAGS(clobbers) \
933 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
934 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
935 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
936 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
937#endif
938
930#endif /* CONFIG_X86_32 */ 939#endif /* CONFIG_X86_32 */
931 940
932#endif /* __ASSEMBLY__ */ 941#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 7a5d6695abd3..eb66fa9cd0fc 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -38,6 +38,7 @@ do { \
38#define PCI_NOASSIGN_ROMS 0x80000 38#define PCI_NOASSIGN_ROMS 0x80000
39#define PCI_ROOT_NO_CRS 0x100000 39#define PCI_ROOT_NO_CRS 0x100000
40#define PCI_NOASSIGN_BARS 0x200000 40#define PCI_NOASSIGN_BARS 0x200000
41#define PCI_BIG_ROOT_WINDOW 0x400000
41 42
42extern unsigned int pci_probe; 43extern unsigned int pci_probe;
43extern unsigned long pirq_table_addr; 44extern unsigned long pirq_table_addr;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 4b5e1eafada7..aff42e1da6ee 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
30 */ 30 */
31extern gfp_t __userpte_alloc_gfp; 31extern gfp_t __userpte_alloc_gfp;
32 32
33#ifdef CONFIG_PAGE_TABLE_ISOLATION
34/*
35 * Instead of one PGD, we acquire two PGDs. Being order-1, it is
36 * both 8k in size and 8k-aligned. That lets us just flip bit 12
37 * in a pointer to swap between the two 4k halves.
38 */
39#define PGD_ALLOCATION_ORDER 1
40#else
41#define PGD_ALLOCATION_ORDER 0
42#endif
43
33/* 44/*
34 * Allocate and free page tables. 45 * Allocate and free page tables.
35 */ 46 */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 95e2dfd75521..e42b8943cb1a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
28int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); 28int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
29 29
30void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); 30void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
31void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
31void ptdump_walk_pgd_level_checkwx(void); 32void ptdump_walk_pgd_level_checkwx(void);
32 33
33#ifdef CONFIG_DEBUG_WX 34#ifdef CONFIG_DEBUG_WX
@@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
841 842
842static inline int p4d_bad(p4d_t p4d) 843static inline int p4d_bad(p4d_t p4d)
843{ 844{
844 return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; 845 unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
846
847 if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
848 ignore_flags |= _PAGE_NX;
849
850 return (p4d_flags(p4d) & ~ignore_flags) != 0;
845} 851}
846#endif /* CONFIG_PGTABLE_LEVELS > 3 */ 852#endif /* CONFIG_PGTABLE_LEVELS > 3 */
847 853
@@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
875 881
876static inline int pgd_bad(pgd_t pgd) 882static inline int pgd_bad(pgd_t pgd)
877{ 883{
878 return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; 884 unsigned long ignore_flags = _PAGE_USER;
885
886 if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
887 ignore_flags |= _PAGE_NX;
888
889 return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
879} 890}
880 891
881static inline int pgd_none(pgd_t pgd) 892static inline int pgd_none(pgd_t pgd)
@@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
904 * pgd_offset() returns a (pgd_t *) 915 * pgd_offset() returns a (pgd_t *)
905 * pgd_index() is used get the offset into the pgd page's array of pgd_t's; 916 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
906 */ 917 */
907#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) 918#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
919/*
920 * a shortcut to get a pgd_t in a given mm
921 */
922#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
908/* 923/*
909 * a shortcut which implies the use of the kernel's pgd, instead 924 * a shortcut which implies the use of the kernel's pgd, instead
910 * of a process's 925 * of a process's
@@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud)
1106 */ 1121 */
1107static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) 1122static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
1108{ 1123{
1109 memcpy(dst, src, count * sizeof(pgd_t)); 1124 memcpy(dst, src, count * sizeof(pgd_t));
1125#ifdef CONFIG_PAGE_TABLE_ISOLATION
1126 if (!static_cpu_has(X86_FEATURE_PTI))
1127 return;
1128 /* Clone the user space pgd as well */
1129 memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
1130 count * sizeof(pgd_t));
1131#endif
1110} 1132}
1111 1133
1112#define PTE_SHIFT ilog2(PTRS_PER_PTE) 1134#define PTE_SHIFT ilog2(PTRS_PER_PTE)
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index f2ca9b28fd68..ce245b0cdfca 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
38#define LAST_PKMAP 1024 38#define LAST_PKMAP 1024
39#endif 39#endif
40 40
41#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ 41/*
42 & PMD_MASK) 42 * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
43 * to avoid include recursion hell
44 */
45#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
46
47#define CPU_ENTRY_AREA_BASE \
48 ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
49
50#define PKMAP_BASE \
51 ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
43 52
44#ifdef CONFIG_HIGHMEM 53#ifdef CONFIG_HIGHMEM
45# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) 54# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
46#else 55#else
47# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) 56# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
48#endif 57#endif
49 58
50#define MODULES_VADDR VMALLOC_START 59#define MODULES_VADDR VMALLOC_START
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e9f05331e732..81462e9a34f6 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
131#endif 131#endif
132} 132}
133 133
134#ifdef CONFIG_PAGE_TABLE_ISOLATION
135/*
136 * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
137 * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
138 * the user one is in the last 4k. To switch between them, you
139 * just need to flip the 12th bit in their addresses.
140 */
141#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
142
143/*
144 * This generates better code than the inline assembly in
145 * __set_bit().
146 */
147static inline void *ptr_set_bit(void *ptr, int bit)
148{
149 unsigned long __ptr = (unsigned long)ptr;
150
151 __ptr |= BIT(bit);
152 return (void *)__ptr;
153}
154static inline void *ptr_clear_bit(void *ptr, int bit)
155{
156 unsigned long __ptr = (unsigned long)ptr;
157
158 __ptr &= ~BIT(bit);
159 return (void *)__ptr;
160}
161
162static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
163{
164 return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
165}
166
167static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
168{
169 return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
170}
171
172static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
173{
174 return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
175}
176
177static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
178{
179 return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
180}
181#endif /* CONFIG_PAGE_TABLE_ISOLATION */
182
183/*
184 * Page table pages are page-aligned. The lower half of the top
185 * level is used for userspace and the top half for the kernel.
186 *
187 * Returns true for parts of the PGD that map userspace and
188 * false for the parts that map the kernel.
189 */
190static inline bool pgdp_maps_userspace(void *__ptr)
191{
192 unsigned long ptr = (unsigned long)__ptr;
193
194 return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
195}
196
197#ifdef CONFIG_PAGE_TABLE_ISOLATION
198pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
199
200/*
201 * Take a PGD location (pgdp) and a pgd value that needs to be set there.
202 * Populates the user and returns the resulting PGD that must be set in
203 * the kernel copy of the page tables.
204 */
205static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
206{
207 if (!static_cpu_has(X86_FEATURE_PTI))
208 return pgd;
209 return __pti_set_user_pgd(pgdp, pgd);
210}
211#else
212static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
213{
214 return pgd;
215}
216#endif
217
134static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) 218static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
135{ 219{
220#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
221 p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
222#else
136 *p4dp = p4d; 223 *p4dp = p4d;
224#endif
137} 225}
138 226
139static inline void native_p4d_clear(p4d_t *p4d) 227static inline void native_p4d_clear(p4d_t *p4d)
@@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
147 235
148static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) 236static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
149{ 237{
238#ifdef CONFIG_PAGE_TABLE_ISOLATION
239 *pgdp = pti_set_user_pgd(pgdp, pgd);
240#else
150 *pgdp = pgd; 241 *pgdp = pgd;
242#endif
151} 243}
152 244
153static inline void native_pgd_clear(pgd_t *pgd) 245static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6d5f45dcd4a1..6b8f73dcbc2c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -75,33 +75,52 @@ typedef struct { pteval_t pte; } pte_t;
75#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) 75#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
76#define PGDIR_MASK (~(PGDIR_SIZE - 1)) 76#define PGDIR_MASK (~(PGDIR_SIZE - 1))
77 77
78/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ 78/*
79#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) 79 * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
80 *
81 * Be very careful vs. KASLR when changing anything here. The KASLR address
82 * range must not overlap with anything except the KASAN shadow area, which
83 * is correct as KASAN disables KASLR.
84 */
85#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
86
80#ifdef CONFIG_X86_5LEVEL 87#ifdef CONFIG_X86_5LEVEL
81#define VMALLOC_SIZE_TB _AC(16384, UL) 88# define VMALLOC_SIZE_TB _AC(12800, UL)
82#define __VMALLOC_BASE _AC(0xff92000000000000, UL) 89# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
83#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) 90# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
91# define LDT_PGD_ENTRY _AC(-112, UL)
92# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
84#else 93#else
85#define VMALLOC_SIZE_TB _AC(32, UL) 94# define VMALLOC_SIZE_TB _AC(32, UL)
86#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) 95# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
87#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) 96# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
97# define LDT_PGD_ENTRY _AC(-3, UL)
98# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
88#endif 99#endif
100
89#ifdef CONFIG_RANDOMIZE_MEMORY 101#ifdef CONFIG_RANDOMIZE_MEMORY
90#define VMALLOC_START vmalloc_base 102# define VMALLOC_START vmalloc_base
91#define VMEMMAP_START vmemmap_base 103# define VMEMMAP_START vmemmap_base
92#else 104#else
93#define VMALLOC_START __VMALLOC_BASE 105# define VMALLOC_START __VMALLOC_BASE
94#define VMEMMAP_START __VMEMMAP_BASE 106# define VMEMMAP_START __VMEMMAP_BASE
95#endif /* CONFIG_RANDOMIZE_MEMORY */ 107#endif /* CONFIG_RANDOMIZE_MEMORY */
96#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) 108
97#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) 109#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
110
111#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
98/* The module sections ends with the start of the fixmap */ 112/* The module sections ends with the start of the fixmap */
99#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) 113#define MODULES_END _AC(0xffffffffff000000, UL)
100#define MODULES_LEN (MODULES_END - MODULES_VADDR) 114#define MODULES_LEN (MODULES_END - MODULES_VADDR)
101#define ESPFIX_PGD_ENTRY _AC(-2, UL) 115
102#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) 116#define ESPFIX_PGD_ENTRY _AC(-2, UL)
103#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) 117#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
104#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) 118
119#define CPU_ENTRY_AREA_PGD _AC(-4, UL)
120#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
121
122#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
123#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
105 124
106#define EARLY_DYNAMIC_PAGE_TABLES 64 125#define EARLY_DYNAMIC_PAGE_TABLES 64
107 126
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 43212a43ee69..625a52a5594f 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -38,6 +38,11 @@
38#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) 38#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
39#define CR3_PCID_MASK 0xFFFull 39#define CR3_PCID_MASK 0xFFFull
40#define CR3_NOFLUSH BIT_ULL(63) 40#define CR3_NOFLUSH BIT_ULL(63)
41
42#ifdef CONFIG_PAGE_TABLE_ISOLATION
43# define X86_CR3_PTI_PCID_USER_BIT 11
44#endif
45
41#else 46#else
42/* 47/*
43 * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save 48 * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cc16fa882e3e..d3a67fba200a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
163extern struct cpuinfo_x86 boot_cpu_data; 163extern struct cpuinfo_x86 boot_cpu_data;
164extern struct cpuinfo_x86 new_cpu_data; 164extern struct cpuinfo_x86 new_cpu_data;
165 165
166extern struct tss_struct doublefault_tss; 166extern struct x86_hw_tss doublefault_tss;
167extern __u32 cpu_caps_cleared[NCAPINTS]; 167extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
168extern __u32 cpu_caps_set[NCAPINTS]; 168extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
169 169
170#ifdef CONFIG_SMP 170#ifdef CONFIG_SMP
171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); 171DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
253 write_cr3(__sme_pa(pgdir)); 253 write_cr3(__sme_pa(pgdir));
254} 254}
255 255
256/*
257 * Note that while the legacy 'TSS' name comes from 'Task State Segment',
258 * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
259 * unrelated to the task-switch mechanism:
260 */
256#ifdef CONFIG_X86_32 261#ifdef CONFIG_X86_32
257/* This is the TSS defined by the hardware. */ 262/* This is the TSS defined by the hardware. */
258struct x86_hw_tss { 263struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
305struct x86_hw_tss { 310struct x86_hw_tss {
306 u32 reserved1; 311 u32 reserved1;
307 u64 sp0; 312 u64 sp0;
313
314 /*
315 * We store cpu_current_top_of_stack in sp1 so it's always accessible.
316 * Linux does not use ring 1, so sp1 is not otherwise needed.
317 */
308 u64 sp1; 318 u64 sp1;
319
309 u64 sp2; 320 u64 sp2;
310 u64 reserved2; 321 u64 reserved2;
311 u64 ist[7]; 322 u64 ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
323#define IO_BITMAP_BITS 65536 334#define IO_BITMAP_BITS 65536
324#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) 335#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
325#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) 336#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
326#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) 337#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
327#define INVALID_IO_BITMAP_OFFSET 0x8000 338#define INVALID_IO_BITMAP_OFFSET 0x8000
328 339
340struct entry_stack {
341 unsigned long words[64];
342};
343
344struct entry_stack_page {
345 struct entry_stack stack;
346} __aligned(PAGE_SIZE);
347
329struct tss_struct { 348struct tss_struct {
330 /* 349 /*
331 * The hardware state: 350 * The fixed hardware portion. This must not cross a page boundary
351 * at risk of violating the SDM's advice and potentially triggering
352 * errata.
332 */ 353 */
333 struct x86_hw_tss x86_tss; 354 struct x86_hw_tss x86_tss;
334 355
@@ -339,18 +360,9 @@ struct tss_struct {
339 * be within the limit. 360 * be within the limit.
340 */ 361 */
341 unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; 362 unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
363} __aligned(PAGE_SIZE);
342 364
343#ifdef CONFIG_X86_32 365DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
344 /*
345 * Space for the temporary SYSENTER stack.
346 */
347 unsigned long SYSENTER_stack_canary;
348 unsigned long SYSENTER_stack[64];
349#endif
350
351} ____cacheline_aligned;
352
353DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
354 366
355/* 367/*
356 * sizeof(unsigned long) coming from an extra "long" at the end 368 * sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
364 376
365#ifdef CONFIG_X86_32 377#ifdef CONFIG_X86_32
366DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); 378DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
379#else
380/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
381#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
367#endif 382#endif
368 383
369/* 384/*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
523static inline void 538static inline void
524native_load_sp0(unsigned long sp0) 539native_load_sp0(unsigned long sp0)
525{ 540{
526 this_cpu_write(cpu_tss.x86_tss.sp0, sp0); 541 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
527} 542}
528 543
529static inline void native_swapgs(void) 544static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
535 550
536static inline unsigned long current_top_of_stack(void) 551static inline unsigned long current_top_of_stack(void)
537{ 552{
538#ifdef CONFIG_X86_64 553 /*
539 return this_cpu_read_stable(cpu_tss.x86_tss.sp0); 554 * We can't read directly from tss.sp0: sp0 on x86_32 is special in
540#else 555 * and around vm86 mode and sp0 on x86_64 is special because of the
541 /* sp0 on x86_32 is special in and around vm86 mode. */ 556 * entry trampoline.
557 */
542 return this_cpu_read_stable(cpu_current_top_of_stack); 558 return this_cpu_read_stable(cpu_current_top_of_stack);
543#endif
544} 559}
545 560
546static inline bool on_thread_stack(void) 561static inline bool on_thread_stack(void)
@@ -837,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x)
837 852
838#else 853#else
839/* 854/*
840 * User space process size. 47bits minus one guard page. The guard 855 * User space process size. This is the first address outside the user range.
841 * page is necessary on Intel CPUs: if a SYSCALL instruction is at 856 * There are a few constraints that determine this:
842 * the highest possible canonical userspace address, then that 857 *
843 * syscall will enter the kernel with a non-canonical return 858 * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
844 * address, and SYSRET will explode dangerously. We avoid this 859 * address, then that syscall will enter the kernel with a
845 * particular problem by preventing anything from being mapped 860 * non-canonical return address, and SYSRET will explode dangerously.
846 * at the maximum canonical address. 861 * We avoid this particular problem by preventing anything executable
862 * from being mapped at the maximum canonical address.
863 *
864 * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
865 * CPUs malfunction if they execute code from the highest canonical page.
866 * They'll speculate right off the end of the canonical space, and
867 * bad things happen. This is worked around in the same way as the
868 * Intel problem.
869 *
870 * With page table isolation enabled, we map the LDT in ... [stay tuned]
847 */ 871 */
848#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) 872#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
849 873
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
new file mode 100644
index 000000000000..0b5ef05b2d2d
--- /dev/null
+++ b/arch/x86/include/asm/pti.h
@@ -0,0 +1,14 @@
1// SPDX-License-Identifier: GPL-2.0
2#ifndef _ASM_X86_PTI_H
3#define _ASM_X86_PTI_H
4#ifndef __ASSEMBLY__
5
6#ifdef CONFIG_PAGE_TABLE_ISOLATION
7extern void pti_init(void);
8extern void pti_check_boottime_disable(void);
9#else
10static inline void pti_check_boottime_disable(void) { }
11#endif
12
13#endif /* __ASSEMBLY__ */
14#endif /* _ASM_X86_PTI_H */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 8da111b3c342..f73706878772 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,6 +16,7 @@ enum stack_type {
16 STACK_TYPE_TASK, 16 STACK_TYPE_TASK,
17 STACK_TYPE_IRQ, 17 STACK_TYPE_IRQ,
18 STACK_TYPE_SOFTIRQ, 18 STACK_TYPE_SOFTIRQ,
19 STACK_TYPE_ENTRY,
19 STACK_TYPE_EXCEPTION, 20 STACK_TYPE_EXCEPTION,
20 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, 21 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
21}; 22};
@@ -28,6 +29,8 @@ struct stack_info {
28bool in_task_stack(unsigned long *stack, struct task_struct *task, 29bool in_task_stack(unsigned long *stack, struct task_struct *task,
29 struct stack_info *info); 30 struct stack_info *info);
30 31
32bool in_entry_stack(unsigned long *stack, struct stack_info *info);
33
31int get_stack_info(unsigned long *stack, struct task_struct *task, 34int get_stack_info(unsigned long *stack, struct task_struct *task,
32 struct stack_info *info, unsigned long *visit_mask); 35 struct stack_info *info, unsigned long *visit_mask);
33 36
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8c6bd6863db9..eb5f7999a893 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
16 struct tss_struct *tss); 16 struct tss_struct *tss);
17 17
18/* This runs runs on the previous thread's stack. */ 18/* This runs runs on the previous thread's stack. */
19static inline void prepare_switch_to(struct task_struct *prev, 19static inline void prepare_switch_to(struct task_struct *next)
20 struct task_struct *next)
21{ 20{
22#ifdef CONFIG_VMAP_STACK 21#ifdef CONFIG_VMAP_STACK
23 /* 22 /*
@@ -70,7 +69,7 @@ struct fork_frame {
70 69
71#define switch_to(prev, next, last) \ 70#define switch_to(prev, next, last) \
72do { \ 71do { \
73 prepare_switch_to(prev, next); \ 72 prepare_switch_to(next); \
74 \ 73 \
75 ((last) = __switch_to_asm((prev), (next))); \ 74 ((last) = __switch_to_asm((prev), (next))); \
76} while (0) 75} while (0)
@@ -79,10 +78,10 @@ do { \
79static inline void refresh_sysenter_cs(struct thread_struct *thread) 78static inline void refresh_sysenter_cs(struct thread_struct *thread)
80{ 79{
81 /* Only happens when SEP is enabled, no need to test "SEP"arately: */ 80 /* Only happens when SEP is enabled, no need to test "SEP"arately: */
82 if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) 81 if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
83 return; 82 return;
84 83
85 this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); 84 this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
86 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); 85 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
87} 86}
88#endif 87#endif
@@ -90,10 +89,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
90/* This is used when switching tasks or entering/exiting vm86 mode. */ 89/* This is used when switching tasks or entering/exiting vm86 mode. */
91static inline void update_sp0(struct task_struct *task) 90static inline void update_sp0(struct task_struct *task)
92{ 91{
92 /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
93#ifdef CONFIG_X86_32 93#ifdef CONFIG_X86_32
94 load_sp0(task->thread.sp0); 94 load_sp0(task->thread.sp0);
95#else 95#else
96 load_sp0(task_top_of_stack(task)); 96 if (static_cpu_has(X86_FEATURE_XENPV))
97 load_sp0(task_top_of_stack(task));
97#endif 98#endif
98} 99}
99 100
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 70f425947dc5..00223333821a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
207#else /* !__ASSEMBLY__ */ 207#else /* !__ASSEMBLY__ */
208 208
209#ifdef CONFIG_X86_64 209#ifdef CONFIG_X86_64
210# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) 210# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
211#endif 211#endif
212 212
213#endif 213#endif
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 877b5c1a1b12..d33e4a26dc7e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -9,70 +9,130 @@
9#include <asm/cpufeature.h> 9#include <asm/cpufeature.h>
10#include <asm/special_insns.h> 10#include <asm/special_insns.h>
11#include <asm/smp.h> 11#include <asm/smp.h>
12#include <asm/invpcid.h>
13#include <asm/pti.h>
14#include <asm/processor-flags.h>
12 15
13static inline void __invpcid(unsigned long pcid, unsigned long addr, 16/*
14 unsigned long type) 17 * The x86 feature is called PCID (Process Context IDentifier). It is similar
15{ 18 * to what is traditionally called ASID on the RISC processors.
16 struct { u64 d[2]; } desc = { { pcid, addr } }; 19 *
20 * We don't use the traditional ASID implementation, where each process/mm gets
21 * its own ASID and flush/restart when we run out of ASID space.
22 *
23 * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
24 * that came by on this CPU, allowing cheaper switch_mm between processes on
25 * this CPU.
26 *
27 * We end up with different spaces for different things. To avoid confusion we
28 * use different names for each of them:
29 *
30 * ASID - [0, TLB_NR_DYN_ASIDS-1]
31 * the canonical identifier for an mm
32 *
33 * kPCID - [1, TLB_NR_DYN_ASIDS]
34 * the value we write into the PCID part of CR3; corresponds to the
35 * ASID+1, because PCID 0 is special.
36 *
37 * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
38 * for KPTI each mm has two address spaces and thus needs two
39 * PCID values, but we can still do with a single ASID denomination
40 * for each mm. Corresponds to kPCID + 2048.
41 *
42 */
17 43
18 /* 44/* There are 12 bits of space for ASIDS in CR3 */
19 * The memory clobber is because the whole point is to invalidate 45#define CR3_HW_ASID_BITS 12
20 * stale TLB entries and, especially if we're flushing global
21 * mappings, we don't want the compiler to reorder any subsequent
22 * memory accesses before the TLB flush.
23 *
24 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
25 * invpcid (%rcx), %rax in long mode.
26 */
27 asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
28 : : "m" (desc), "a" (type), "c" (&desc) : "memory");
29}
30 46
31#define INVPCID_TYPE_INDIV_ADDR 0 47/*
32#define INVPCID_TYPE_SINGLE_CTXT 1 48 * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
33#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 49 * user/kernel switches
34#define INVPCID_TYPE_ALL_NON_GLOBAL 3 50 */
51#ifdef CONFIG_PAGE_TABLE_ISOLATION
52# define PTI_CONSUMED_PCID_BITS 1
53#else
54# define PTI_CONSUMED_PCID_BITS 0
55#endif
35 56
36/* Flush all mappings for a given pcid and addr, not including globals. */ 57#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
37static inline void invpcid_flush_one(unsigned long pcid, 58
38 unsigned long addr) 59/*
39{ 60 * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
40 __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); 61 * for them being zero-based. Another -1 is because PCID 0 is reserved for
41} 62 * use by non-PCID-aware users.
63 */
64#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
65
66/*
67 * 6 because 6 should be plenty and struct tlb_state will fit in two cache
68 * lines.
69 */
70#define TLB_NR_DYN_ASIDS 6
42 71
43/* Flush all mappings for a given PCID, not including globals. */ 72/*
44static inline void invpcid_flush_single_context(unsigned long pcid) 73 * Given @asid, compute kPCID
74 */
75static inline u16 kern_pcid(u16 asid)
45{ 76{
46 __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); 77 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
78
79#ifdef CONFIG_PAGE_TABLE_ISOLATION
80 /*
81 * Make sure that the dynamic ASID space does not confict with the
82 * bit we are using to switch between user and kernel ASIDs.
83 */
84 BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
85
86 /*
87 * The ASID being passed in here should have respected the
88 * MAX_ASID_AVAILABLE and thus never have the switch bit set.
89 */
90 VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
91#endif
92 /*
93 * The dynamically-assigned ASIDs that get passed in are small
94 * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set,
95 * so do not bother to clear it.
96 *
97 * If PCID is on, ASID-aware code paths put the ASID+1 into the
98 * PCID bits. This serves two purposes. It prevents a nasty
99 * situation in which PCID-unaware code saves CR3, loads some other
100 * value (with PCID == 0), and then restores CR3, thus corrupting
101 * the TLB for ASID 0 if the saved ASID was nonzero. It also means
102 * that any bugs involving loading a PCID-enabled CR3 with
103 * CR4.PCIDE off will trigger deterministically.
104 */
105 return asid + 1;
47} 106}
48 107
49/* Flush all mappings, including globals, for all PCIDs. */ 108/*
50static inline void invpcid_flush_all(void) 109 * Given @asid, compute uPCID
110 */
111static inline u16 user_pcid(u16 asid)
51{ 112{
52 __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); 113 u16 ret = kern_pcid(asid);
114#ifdef CONFIG_PAGE_TABLE_ISOLATION
115 ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
116#endif
117 return ret;
53} 118}
54 119
55/* Flush all mappings for all PCIDs except globals. */ 120struct pgd_t;
56static inline void invpcid_flush_all_nonglobals(void) 121static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
57{ 122{
58 __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); 123 if (static_cpu_has(X86_FEATURE_PCID)) {
124 return __sme_pa(pgd) | kern_pcid(asid);
125 } else {
126 VM_WARN_ON_ONCE(asid != 0);
127 return __sme_pa(pgd);
128 }
59} 129}
60 130
61static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) 131static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
62{ 132{
63 u64 new_tlb_gen; 133 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
64 134 VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
65 /* 135 return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
66 * Bump the generation count. This also serves as a full barrier
67 * that synchronizes with switch_mm(): callers are required to order
68 * their read of mm_cpumask after their writes to the paging
69 * structures.
70 */
71 smp_mb__before_atomic();
72 new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
73 smp_mb__after_atomic();
74
75 return new_tlb_gen;
76} 136}
77 137
78#ifdef CONFIG_PARAVIRT 138#ifdef CONFIG_PARAVIRT
@@ -99,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
99 return !static_cpu_has(X86_FEATURE_PCID); 159 return !static_cpu_has(X86_FEATURE_PCID);
100} 160}
101 161
102/*
103 * 6 because 6 should be plenty and struct tlb_state will fit in
104 * two cache lines.
105 */
106#define TLB_NR_DYN_ASIDS 6
107
108struct tlb_context { 162struct tlb_context {
109 u64 ctx_id; 163 u64 ctx_id;
110 u64 tlb_gen; 164 u64 tlb_gen;
@@ -139,6 +193,24 @@ struct tlb_state {
139 bool is_lazy; 193 bool is_lazy;
140 194
141 /* 195 /*
196 * If set we changed the page tables in such a way that we
197 * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
198 * This tells us to go invalidate all the non-loaded ctxs[]
199 * on the next context switch.
200 *
201 * The current ctx was kept up-to-date as it ran and does not
202 * need to be invalidated.
203 */
204 bool invalidate_other;
205
206 /*
207 * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
208 * the corresponding user PCID needs a flush next time we
209 * switch to it; see SWITCH_TO_USER_CR3.
210 */
211 unsigned short user_pcid_flush_mask;
212
213 /*
142 * Access to this CR4 shadow and to H/W CR4 is protected by 214 * Access to this CR4 shadow and to H/W CR4 is protected by
143 * disabling interrupts when modifying either one. 215 * disabling interrupts when modifying either one.
144 */ 216 */
@@ -219,6 +291,14 @@ static inline unsigned long cr4_read_shadow(void)
219} 291}
220 292
221/* 293/*
294 * Mark all other ASIDs as invalid, preserves the current.
295 */
296static inline void invalidate_other_asid(void)
297{
298 this_cpu_write(cpu_tlbstate.invalidate_other, true);
299}
300
301/*
222 * Save some of cr4 feature set we're using (e.g. Pentium 4MB 302 * Save some of cr4 feature set we're using (e.g. Pentium 4MB
223 * enable and PPro Global page enable), so that any CPU's that boot 303 * enable and PPro Global page enable), so that any CPU's that boot
224 * up after us can get the correct flags. This should only be used 304 * up after us can get the correct flags. This should only be used
@@ -237,37 +317,63 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
237 317
238extern void initialize_tlbstate_and_flush(void); 318extern void initialize_tlbstate_and_flush(void);
239 319
240static inline void __native_flush_tlb(void) 320/*
321 * Given an ASID, flush the corresponding user ASID. We can delay this
322 * until the next time we switch to it.
323 *
324 * See SWITCH_TO_USER_CR3.
325 */
326static inline void invalidate_user_asid(u16 asid)
241{ 327{
328 /* There is no user ASID if address space separation is off */
329 if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
330 return;
331
242 /* 332 /*
243 * If current->mm == NULL then we borrow a mm which may change during a 333 * We only have a single ASID if PCID is off and the CR3
244 * task switch and therefore we must not be preempted while we write CR3 334 * write will have flushed it.
245 * back:
246 */ 335 */
247 preempt_disable(); 336 if (!cpu_feature_enabled(X86_FEATURE_PCID))
248 native_write_cr3(__native_read_cr3()); 337 return;
249 preempt_enable(); 338
339 if (!static_cpu_has(X86_FEATURE_PTI))
340 return;
341
342 __set_bit(kern_pcid(asid),
343 (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
250} 344}
251 345
252static inline void __native_flush_tlb_global_irq_disabled(void) 346/*
347 * flush the entire current user mapping
348 */
349static inline void __native_flush_tlb(void)
253{ 350{
254 unsigned long cr4; 351 /*
352 * Preemption or interrupts must be disabled to protect the access
353 * to the per CPU variable and to prevent being preempted between
354 * read_cr3() and write_cr3().
355 */
356 WARN_ON_ONCE(preemptible());
255 357
256 cr4 = this_cpu_read(cpu_tlbstate.cr4); 358 invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
257 /* clear PGE */ 359
258 native_write_cr4(cr4 & ~X86_CR4_PGE); 360 /* If current->mm == NULL then the read_cr3() "borrows" an mm */
259 /* write old PGE again and flush TLBs */ 361 native_write_cr3(__native_read_cr3());
260 native_write_cr4(cr4);
261} 362}
262 363
364/*
365 * flush everything
366 */
263static inline void __native_flush_tlb_global(void) 367static inline void __native_flush_tlb_global(void)
264{ 368{
265 unsigned long flags; 369 unsigned long cr4, flags;
266 370
267 if (static_cpu_has(X86_FEATURE_INVPCID)) { 371 if (static_cpu_has(X86_FEATURE_INVPCID)) {
268 /* 372 /*
269 * Using INVPCID is considerably faster than a pair of writes 373 * Using INVPCID is considerably faster than a pair of writes
270 * to CR4 sandwiched inside an IRQ flag save/restore. 374 * to CR4 sandwiched inside an IRQ flag save/restore.
375 *
376 * Note, this works with CR4.PCIDE=0 or 1.
271 */ 377 */
272 invpcid_flush_all(); 378 invpcid_flush_all();
273 return; 379 return;
@@ -280,36 +386,69 @@ static inline void __native_flush_tlb_global(void)
280 */ 386 */
281 raw_local_irq_save(flags); 387 raw_local_irq_save(flags);
282 388
283 __native_flush_tlb_global_irq_disabled(); 389 cr4 = this_cpu_read(cpu_tlbstate.cr4);
390 /* toggle PGE */
391 native_write_cr4(cr4 ^ X86_CR4_PGE);
392 /* write old PGE again and flush TLBs */
393 native_write_cr4(cr4);
284 394
285 raw_local_irq_restore(flags); 395 raw_local_irq_restore(flags);
286} 396}
287 397
398/*
399 * flush one page in the user mapping
400 */
288static inline void __native_flush_tlb_single(unsigned long addr) 401static inline void __native_flush_tlb_single(unsigned long addr)
289{ 402{
403 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
404
290 asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); 405 asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
406
407 if (!static_cpu_has(X86_FEATURE_PTI))
408 return;
409
410 /*
411 * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
412 * Just use invalidate_user_asid() in case we are called early.
413 */
414 if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
415 invalidate_user_asid(loaded_mm_asid);
416 else
417 invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
291} 418}
292 419
420/*
421 * flush everything
422 */
293static inline void __flush_tlb_all(void) 423static inline void __flush_tlb_all(void)
294{ 424{
295 if (boot_cpu_has(X86_FEATURE_PGE)) 425 if (boot_cpu_has(X86_FEATURE_PGE)) {
296 __flush_tlb_global(); 426 __flush_tlb_global();
297 else 427 } else {
428 /*
429 * !PGE -> !PCID (setup_pcid()), thus every flush is total.
430 */
298 __flush_tlb(); 431 __flush_tlb();
299 432 }
300 /*
301 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
302 * we'd end up flushing kernel translations for the current ASID but
303 * we might fail to flush kernel translations for other cached ASIDs.
304 *
305 * To avoid this issue, we force PCID off if PGE is off.
306 */
307} 433}
308 434
435/*
436 * flush one page in the kernel mapping
437 */
309static inline void __flush_tlb_one(unsigned long addr) 438static inline void __flush_tlb_one(unsigned long addr)
310{ 439{
311 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); 440 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
312 __flush_tlb_single(addr); 441 __flush_tlb_single(addr);
442
443 if (!static_cpu_has(X86_FEATURE_PTI))
444 return;
445
446 /*
447 * __flush_tlb_single() will have cleared the TLB entry for this ASID,
448 * but since kernel space is replicated across all, we must also
449 * invalidate all others.
450 */
451 invalidate_other_asid();
313} 452}
314 453
315#define TLB_FLUSH_ALL -1UL 454#define TLB_FLUSH_ALL -1UL
@@ -370,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
370void native_flush_tlb_others(const struct cpumask *cpumask, 509void native_flush_tlb_others(const struct cpumask *cpumask,
371 const struct flush_tlb_info *info); 510 const struct flush_tlb_info *info);
372 511
512static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
513{
514 /*
515 * Bump the generation count. This also serves as a full barrier
516 * that synchronizes with switch_mm(): callers are required to order
517 * their read of mm_cpumask after their writes to the paging
518 * structures.
519 */
520 return atomic64_inc_return(&mm->context.tlb_gen);
521}
522
373static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, 523static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
374 struct mm_struct *mm) 524 struct mm_struct *mm)
375{ 525{
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 84b9ec0c1bc0..22647a642e98 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed,
283DECLARE_EVENT_CLASS(vector_activate, 283DECLARE_EVENT_CLASS(vector_activate,
284 284
285 TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, 285 TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
286 bool early), 286 bool reserve),
287 287
288 TP_ARGS(irq, is_managed, can_reserve, early), 288 TP_ARGS(irq, is_managed, can_reserve, reserve),
289 289
290 TP_STRUCT__entry( 290 TP_STRUCT__entry(
291 __field( unsigned int, irq ) 291 __field( unsigned int, irq )
292 __field( bool, is_managed ) 292 __field( bool, is_managed )
293 __field( bool, can_reserve ) 293 __field( bool, can_reserve )
294 __field( bool, early ) 294 __field( bool, reserve )
295 ), 295 ),
296 296
297 TP_fast_assign( 297 TP_fast_assign(
298 __entry->irq = irq; 298 __entry->irq = irq;
299 __entry->is_managed = is_managed; 299 __entry->is_managed = is_managed;
300 __entry->can_reserve = can_reserve; 300 __entry->can_reserve = can_reserve;
301 __entry->early = early; 301 __entry->reserve = reserve;
302 ), 302 ),
303 303
304 TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", 304 TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d",
305 __entry->irq, __entry->is_managed, __entry->can_reserve, 305 __entry->irq, __entry->is_managed, __entry->can_reserve,
306 __entry->early) 306 __entry->reserve)
307); 307);
308 308
309#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ 309#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \
310DEFINE_EVENT_FN(vector_activate, name, \ 310DEFINE_EVENT_FN(vector_activate, name, \
311 TP_PROTO(unsigned int irq, bool is_managed, \ 311 TP_PROTO(unsigned int irq, bool is_managed, \
312 bool can_reserve, bool early), \ 312 bool can_reserve, bool reserve), \
313 TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ 313 TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \
314 314
315DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); 315DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
316DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); 316DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1fadd310ff68..31051f35cbb7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
75dotraplinkage void do_stack_segment(struct pt_regs *, long); 75dotraplinkage void do_stack_segment(struct pt_regs *, long);
76#ifdef CONFIG_X86_64 76#ifdef CONFIG_X86_64
77dotraplinkage void do_double_fault(struct pt_regs *, long); 77dotraplinkage void do_double_fault(struct pt_regs *, long);
78asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
79#endif 78#endif
80dotraplinkage void do_general_protection(struct pt_regs *, long); 79dotraplinkage void do_general_protection(struct pt_regs *, long);
81dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); 80dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e9cc6fe1fc6f..1f86e1b0a5cd 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -7,6 +7,9 @@
7#include <asm/ptrace.h> 7#include <asm/ptrace.h>
8#include <asm/stacktrace.h> 8#include <asm/stacktrace.h>
9 9
10#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
11#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
12
10struct unwind_state { 13struct unwind_state {
11 struct stack_info stack_info; 14 struct stack_info stack_info;
12 unsigned long stack_mask; 15 unsigned long stack_mask;
@@ -52,15 +55,28 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
52} 55}
53 56
54#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) 57#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
55static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 58/*
59 * If 'partial' returns true, only the iret frame registers are valid.
60 */
61static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
62 bool *partial)
56{ 63{
57 if (unwind_done(state)) 64 if (unwind_done(state))
58 return NULL; 65 return NULL;
59 66
67 if (partial) {
68#ifdef CONFIG_UNWINDER_ORC
69 *partial = !state->full_regs;
70#else
71 *partial = false;
72#endif
73 }
74
60 return state->regs; 75 return state->regs;
61} 76}
62#else 77#else
63static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 78static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
79 bool *partial)
64{ 80{
65 return NULL; 81 return NULL;
66} 82}
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d9a7c659009c..b986b2ca688a 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -7,6 +7,7 @@
7 7
8#ifdef CONFIG_X86_VSYSCALL_EMULATION 8#ifdef CONFIG_X86_VSYSCALL_EMULATION
9extern void map_vsyscall(void); 9extern void map_vsyscall(void);
10extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
10 11
11/* 12/*
12 * Called on instruction fetch fault in vsyscall page. 13 * Called on instruction fetch fault in vsyscall page.
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 7cb282e9e587..bfd882617613 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -44,6 +44,7 @@
44#include <asm/page.h> 44#include <asm/page.h>
45#include <asm/pgtable.h> 45#include <asm/pgtable.h>
46#include <asm/smap.h> 46#include <asm/smap.h>
47#include <asm/nospec-branch.h>
47 48
48#include <xen/interface/xen.h> 49#include <xen/interface/xen.h>
49#include <xen/interface/sched.h> 50#include <xen/interface/sched.h>
@@ -217,9 +218,9 @@ privcmd_call(unsigned call,
217 __HYPERCALL_5ARG(a1, a2, a3, a4, a5); 218 __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
218 219
219 stac(); 220 stac();
220 asm volatile("call *%[call]" 221 asm volatile(CALL_NOSPEC
221 : __HYPERCALL_5PARAM 222 : __HYPERCALL_5PARAM
222 : [call] "a" (&hypercall_page[call]) 223 : [thunk_target] "a" (&hypercall_page[call])
223 : __HYPERCALL_CLOBBER5); 224 : __HYPERCALL_CLOBBER5);
224 clac(); 225 clac();
225 226
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 7e1e730396ae..bcba3c643e63 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -78,7 +78,12 @@
78#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) 78#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
79#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ 79#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
80#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) 80#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
81#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ 81
82#define X86_CR3_PCID_BITS 12
83#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
84
85#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
86#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
82 87
83/* 88/*
84 * Intel CPU features in CR4 89 * Intel CPU features in CR4
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index dbaf14d69ebd..4817d743c263 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -344,9 +344,12 @@ done:
344static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) 344static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
345{ 345{
346 unsigned long flags; 346 unsigned long flags;
347 int i;
347 348
348 if (instr[0] != 0x90) 349 for (i = 0; i < a->padlen; i++) {
349 return; 350 if (instr[i] != 0x90)
351 return;
352 }
350 353
351 local_irq_save(flags); 354 local_irq_save(flags);
352 add_nops(instr + (a->instrlen - a->padlen), a->padlen); 355 add_nops(instr + (a->instrlen - a->padlen), a->padlen);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6e272f3ea984..880441f24146 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg)
2626 apic_verbosity = APIC_DEBUG; 2626 apic_verbosity = APIC_DEBUG;
2627 else if (strcmp("verbose", arg) == 0) 2627 else if (strcmp("verbose", arg) == 0)
2628 apic_verbosity = APIC_VERBOSE; 2628 apic_verbosity = APIC_VERBOSE;
2629#ifdef CONFIG_X86_64
2629 else { 2630 else {
2630 pr_warning("APIC Verbosity level %s not recognised" 2631 pr_warning("APIC Verbosity level %s not recognised"
2631 " use apic=verbose or apic=debug\n", arg); 2632 " use apic=verbose or apic=debug\n", arg);
2632 return -EINVAL; 2633 return -EINVAL;
2633 } 2634 }
2635#endif
2634 2636
2635 return 0; 2637 return 0;
2636} 2638}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index aa85690e9b64..25a87028cb3f 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = {
151 .apic_id_valid = default_apic_id_valid, 151 .apic_id_valid = default_apic_id_valid,
152 .apic_id_registered = flat_apic_id_registered, 152 .apic_id_registered = flat_apic_id_registered,
153 153
154 .irq_delivery_mode = dest_LowestPrio, 154 .irq_delivery_mode = dest_Fixed,
155 .irq_dest_mode = 1, /* logical */ 155 .irq_dest_mode = 1, /* logical */
156 156
157 .disable_esr = 0, 157 .disable_esr = 0,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 7b659c4480c9..5078b5ce63a7 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = {
110 .apic_id_valid = default_apic_id_valid, 110 .apic_id_valid = default_apic_id_valid,
111 .apic_id_registered = noop_apic_id_registered, 111 .apic_id_registered = noop_apic_id_registered,
112 112
113 .irq_delivery_mode = dest_LowestPrio, 113 .irq_delivery_mode = dest_Fixed,
114 /* logical delivery broadcast to all CPUs: */ 114 /* logical delivery broadcast to all CPUs: */
115 .irq_dest_mode = 1, 115 .irq_dest_mode = 1,
116 116
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 201579dc5242..8a7963421460 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2988} 2988}
2989 2989
2990int mp_irqdomain_activate(struct irq_domain *domain, 2990int mp_irqdomain_activate(struct irq_domain *domain,
2991 struct irq_data *irq_data, bool early) 2991 struct irq_data *irq_data, bool reserve)
2992{ 2992{
2993 unsigned long flags; 2993 unsigned long flags;
2994 2994
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 9b18be764422..ce503c99f5c4 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
39 ((apic->irq_dest_mode == 0) ? 39 ((apic->irq_dest_mode == 0) ?
40 MSI_ADDR_DEST_MODE_PHYSICAL : 40 MSI_ADDR_DEST_MODE_PHYSICAL :
41 MSI_ADDR_DEST_MODE_LOGICAL) | 41 MSI_ADDR_DEST_MODE_LOGICAL) |
42 ((apic->irq_delivery_mode != dest_LowestPrio) ? 42 MSI_ADDR_REDIRECTION_CPU |
43 MSI_ADDR_REDIRECTION_CPU :
44 MSI_ADDR_REDIRECTION_LOWPRI) |
45 MSI_ADDR_DEST_ID(cfg->dest_apicid); 43 MSI_ADDR_DEST_ID(cfg->dest_apicid);
46 44
47 msg->data = 45 msg->data =
48 MSI_DATA_TRIGGER_EDGE | 46 MSI_DATA_TRIGGER_EDGE |
49 MSI_DATA_LEVEL_ASSERT | 47 MSI_DATA_LEVEL_ASSERT |
50 ((apic->irq_delivery_mode != dest_LowestPrio) ? 48 MSI_DATA_DELIVERY_FIXED |
51 MSI_DATA_DELIVERY_FIXED :
52 MSI_DATA_DELIVERY_LOWPRI) |
53 MSI_DATA_VECTOR(cfg->vector); 49 MSI_DATA_VECTOR(cfg->vector);
54} 50}
55 51
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index fa22017de806..02e8acb134f8 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = {
105 .apic_id_valid = default_apic_id_valid, 105 .apic_id_valid = default_apic_id_valid,
106 .apic_id_registered = default_apic_id_registered, 106 .apic_id_registered = default_apic_id_registered,
107 107
108 .irq_delivery_mode = dest_LowestPrio, 108 .irq_delivery_mode = dest_Fixed,
109 /* logical delivery broadcast to all CPUs: */ 109 /* logical delivery broadcast to all CPUs: */
110 .irq_dest_mode = 1, 110 .irq_dest_mode = 1,
111 111
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 750449152b04..f8b03bb8e725 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd)
184 irq_matrix_reserve(vector_matrix); 184 irq_matrix_reserve(vector_matrix);
185 apicd->can_reserve = true; 185 apicd->can_reserve = true;
186 apicd->has_reserved = true; 186 apicd->has_reserved = true;
187 irqd_set_can_reserve(irqd);
187 trace_vector_reserve(irqd->irq, 0); 188 trace_vector_reserve(irqd->irq, 0);
188 vector_assign_managed_shutdown(irqd); 189 vector_assign_managed_shutdown(irqd);
189} 190}
@@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd)
368 int ret; 369 int ret;
369 370
370 ret = assign_irq_vector_any_locked(irqd); 371 ret = assign_irq_vector_any_locked(irqd);
371 if (!ret) 372 if (!ret) {
372 apicd->has_reserved = false; 373 apicd->has_reserved = false;
374 /*
375 * Core might have disabled reservation mode after
376 * allocating the irq descriptor. Ideally this should
377 * happen before allocation time, but that would require
378 * completely convoluted ways of transporting that
379 * information.
380 */
381 if (!irqd_can_reserve(irqd))
382 apicd->can_reserve = false;
383 }
373 return ret; 384 return ret;
374} 385}
375 386
@@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd)
398} 409}
399 410
400static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, 411static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
401 bool early) 412 bool reserve)
402{ 413{
403 struct apic_chip_data *apicd = apic_chip_data(irqd); 414 struct apic_chip_data *apicd = apic_chip_data(irqd);
404 unsigned long flags; 415 unsigned long flags;
405 int ret = 0; 416 int ret = 0;
406 417
407 trace_vector_activate(irqd->irq, apicd->is_managed, 418 trace_vector_activate(irqd->irq, apicd->is_managed,
408 apicd->can_reserve, early); 419 apicd->can_reserve, reserve);
409 420
410 /* Nothing to do for fixed assigned vectors */ 421 /* Nothing to do for fixed assigned vectors */
411 if (!apicd->can_reserve && !apicd->is_managed) 422 if (!apicd->can_reserve && !apicd->is_managed)
412 return 0; 423 return 0;
413 424
414 raw_spin_lock_irqsave(&vector_lock, flags); 425 raw_spin_lock_irqsave(&vector_lock, flags);
415 if (early || irqd_is_managed_and_shutdown(irqd)) 426 if (reserve || irqd_is_managed_and_shutdown(irqd))
416 vector_assign_managed_shutdown(irqd); 427 vector_assign_managed_shutdown(irqd);
417 else if (apicd->is_managed) 428 else if (apicd->is_managed)
418 ret = activate_managed(irqd); 429 ret = activate_managed(irqd);
@@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
478 } else { 489 } else {
479 /* Release the vector */ 490 /* Release the vector */
480 apicd->can_reserve = true; 491 apicd->can_reserve = true;
492 irqd_set_can_reserve(irqd);
481 clear_irq_vector(irqd); 493 clear_irq_vector(irqd);
482 realloc = true; 494 realloc = true;
483 } 495 }
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 622f13ca8a94..8b04234e010b 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
184 .apic_id_valid = x2apic_apic_id_valid, 184 .apic_id_valid = x2apic_apic_id_valid,
185 .apic_id_registered = x2apic_apic_id_registered, 185 .apic_id_registered = x2apic_apic_id_registered,
186 186
187 .irq_delivery_mode = dest_LowestPrio, 187 .irq_delivery_mode = dest_Fixed,
188 .irq_dest_mode = 1, /* logical */ 188 .irq_dest_mode = 1, /* logical */
189 189
190 .disable_esr = 0, 190 .disable_esr = 0,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8ea78275480d..76417a9aab73 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -17,6 +17,7 @@
17#include <asm/sigframe.h> 17#include <asm/sigframe.h>
18#include <asm/bootparam.h> 18#include <asm/bootparam.h>
19#include <asm/suspend.h> 19#include <asm/suspend.h>
20#include <asm/tlbflush.h>
20 21
21#ifdef CONFIG_XEN 22#ifdef CONFIG_XEN
22#include <xen/interface/xen.h> 23#include <xen/interface/xen.h>
@@ -93,4 +94,13 @@ void common(void) {
93 94
94 BLANK(); 95 BLANK();
95 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); 96 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
97
98 /* TLB state for the entry code */
99 OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
100
101 /* Layout info for cpu_entry_area */
102 OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
103 OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
104 OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
105 DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
96} 106}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index dedf428b20b6..fa1261eefa16 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,13 +47,8 @@ void foo(void)
47 BLANK(); 47 BLANK();
48 48
49 /* Offset from the sysenter stack to tss.sp0 */ 49 /* Offset from the sysenter stack to tss.sp0 */
50 DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - 50 DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
51 offsetofend(struct tss_struct, SYSENTER_stack)); 51 offsetofend(struct cpu_entry_area, entry_stack_page.stack));
52
53 /* Offset from cpu_tss to SYSENTER_stack */
54 OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
55 /* Size of SYSENTER_stack */
56 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
57 52
58#ifdef CONFIG_CC_STACKPROTECTOR 53#ifdef CONFIG_CC_STACKPROTECTOR
59 BLANK(); 54 BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 630212fa9b9d..bf51e51d808d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,6 +23,9 @@ int main(void)
23#ifdef CONFIG_PARAVIRT 23#ifdef CONFIG_PARAVIRT
24 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); 24 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
25 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 25 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
26#ifdef CONFIG_DEBUG_ENTRY
27 OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
28#endif
26 BLANK(); 29 BLANK();
27#endif 30#endif
28 31
@@ -63,6 +66,7 @@ int main(void)
63 66
64 OFFSET(TSS_ist, tss_struct, x86_tss.ist); 67 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
65 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); 68 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
69 OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
66 BLANK(); 70 BLANK();
67 71
68#ifdef CONFIG_CC_STACKPROTECTOR 72#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bcb75dc97d44..ea831c858195 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
829 set_cpu_cap(c, X86_FEATURE_K8); 829 set_cpu_cap(c, X86_FEATURE_K8);
830 830
831 if (cpu_has(c, X86_FEATURE_XMM2)) { 831 if (cpu_has(c, X86_FEATURE_XMM2)) {
832 /* MFENCE stops RDTSC speculation */ 832 unsigned long long val;
833 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); 833 int ret;
834
835 /*
836 * A serializing LFENCE has less overhead than MFENCE, so
837 * use it for execution serialization. On families which
838 * don't have that MSR, LFENCE is already serializing.
839 * msr_set_bit() uses the safe accessors, too, even if the MSR
840 * is not present.
841 */
842 msr_set_bit(MSR_F10H_DECFG,
843 MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
844
845 /*
846 * Verify that the MSR write was successful (could be running
847 * under a hypervisor) and only then assume that LFENCE is
848 * serializing.
849 */
850 ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
851 if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
852 /* A serializing LFENCE stops RDTSC speculation */
853 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
854 } else {
855 /* MFENCE stops RDTSC speculation */
856 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
857 }
834 } 858 }
835 859
836 /* 860 /*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ba0b2424c9b0..e4dc26185aa7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -10,6 +10,10 @@
10 */ 10 */
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/utsname.h> 12#include <linux/utsname.h>
13#include <linux/cpu.h>
14
15#include <asm/nospec-branch.h>
16#include <asm/cmdline.h>
13#include <asm/bugs.h> 17#include <asm/bugs.h>
14#include <asm/processor.h> 18#include <asm/processor.h>
15#include <asm/processor-flags.h> 19#include <asm/processor-flags.h>
@@ -20,6 +24,8 @@
20#include <asm/pgtable.h> 24#include <asm/pgtable.h>
21#include <asm/set_memory.h> 25#include <asm/set_memory.h>
22 26
27static void __init spectre_v2_select_mitigation(void);
28
23void __init check_bugs(void) 29void __init check_bugs(void)
24{ 30{
25 identify_boot_cpu(); 31 identify_boot_cpu();
@@ -29,6 +35,9 @@ void __init check_bugs(void)
29 print_cpu_info(&boot_cpu_data); 35 print_cpu_info(&boot_cpu_data);
30 } 36 }
31 37
38 /* Select the proper spectre mitigation before patching alternatives */
39 spectre_v2_select_mitigation();
40
32#ifdef CONFIG_X86_32 41#ifdef CONFIG_X86_32
33 /* 42 /*
34 * Check whether we are able to run this kernel safely on SMP. 43 * Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +69,179 @@ void __init check_bugs(void)
60 set_memory_4k((unsigned long)__va(0), 1); 69 set_memory_4k((unsigned long)__va(0), 1);
61#endif 70#endif
62} 71}
72
73/* The kernel command line selection */
74enum spectre_v2_mitigation_cmd {
75 SPECTRE_V2_CMD_NONE,
76 SPECTRE_V2_CMD_AUTO,
77 SPECTRE_V2_CMD_FORCE,
78 SPECTRE_V2_CMD_RETPOLINE,
79 SPECTRE_V2_CMD_RETPOLINE_GENERIC,
80 SPECTRE_V2_CMD_RETPOLINE_AMD,
81};
82
83static const char *spectre_v2_strings[] = {
84 [SPECTRE_V2_NONE] = "Vulnerable",
85 [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
86 [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
87 [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
88 [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
89};
90
91#undef pr_fmt
92#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
93
94static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
95
96static void __init spec2_print_if_insecure(const char *reason)
97{
98 if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
99 pr_info("%s\n", reason);
100}
101
102static void __init spec2_print_if_secure(const char *reason)
103{
104 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
105 pr_info("%s\n", reason);
106}
107
108static inline bool retp_compiler(void)
109{
110 return __is_defined(RETPOLINE);
111}
112
113static inline bool match_option(const char *arg, int arglen, const char *opt)
114{
115 int len = strlen(opt);
116
117 return len == arglen && !strncmp(arg, opt, len);
118}
119
120static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
121{
122 char arg[20];
123 int ret;
124
125 ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
126 sizeof(arg));
127 if (ret > 0) {
128 if (match_option(arg, ret, "off")) {
129 goto disable;
130 } else if (match_option(arg, ret, "on")) {
131 spec2_print_if_secure("force enabled on command line.");
132 return SPECTRE_V2_CMD_FORCE;
133 } else if (match_option(arg, ret, "retpoline")) {
134 spec2_print_if_insecure("retpoline selected on command line.");
135 return SPECTRE_V2_CMD_RETPOLINE;
136 } else if (match_option(arg, ret, "retpoline,amd")) {
137 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
138 pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
139 return SPECTRE_V2_CMD_AUTO;
140 }
141 spec2_print_if_insecure("AMD retpoline selected on command line.");
142 return SPECTRE_V2_CMD_RETPOLINE_AMD;
143 } else if (match_option(arg, ret, "retpoline,generic")) {
144 spec2_print_if_insecure("generic retpoline selected on command line.");
145 return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
146 } else if (match_option(arg, ret, "auto")) {
147 return SPECTRE_V2_CMD_AUTO;
148 }
149 }
150
151 if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
152 return SPECTRE_V2_CMD_AUTO;
153disable:
154 spec2_print_if_insecure("disabled on command line.");
155 return SPECTRE_V2_CMD_NONE;
156}
157
158static void __init spectre_v2_select_mitigation(void)
159{
160 enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
161 enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
162
163 /*
164 * If the CPU is not affected and the command line mode is NONE or AUTO
165 * then nothing to do.
166 */
167 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
168 (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
169 return;
170
171 switch (cmd) {
172 case SPECTRE_V2_CMD_NONE:
173 return;
174
175 case SPECTRE_V2_CMD_FORCE:
176 /* FALLTRHU */
177 case SPECTRE_V2_CMD_AUTO:
178 goto retpoline_auto;
179
180 case SPECTRE_V2_CMD_RETPOLINE_AMD:
181 if (IS_ENABLED(CONFIG_RETPOLINE))
182 goto retpoline_amd;
183 break;
184 case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
185 if (IS_ENABLED(CONFIG_RETPOLINE))
186 goto retpoline_generic;
187 break;
188 case SPECTRE_V2_CMD_RETPOLINE:
189 if (IS_ENABLED(CONFIG_RETPOLINE))
190 goto retpoline_auto;
191 break;
192 }
193 pr_err("kernel not compiled with retpoline; no mitigation available!");
194 return;
195
196retpoline_auto:
197 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
198 retpoline_amd:
199 if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
200 pr_err("LFENCE not serializing. Switching to generic retpoline\n");
201 goto retpoline_generic;
202 }
203 mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
204 SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
205 setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
206 setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
207 } else {
208 retpoline_generic:
209 mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
210 SPECTRE_V2_RETPOLINE_MINIMAL;
211 setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
212 }
213
214 spectre_v2_enabled = mode;
215 pr_info("%s\n", spectre_v2_strings[mode]);
216}
217
218#undef pr_fmt
219
220#ifdef CONFIG_SYSFS
221ssize_t cpu_show_meltdown(struct device *dev,
222 struct device_attribute *attr, char *buf)
223{
224 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
225 return sprintf(buf, "Not affected\n");
226 if (boot_cpu_has(X86_FEATURE_PTI))
227 return sprintf(buf, "Mitigation: PTI\n");
228 return sprintf(buf, "Vulnerable\n");
229}
230
231ssize_t cpu_show_spectre_v1(struct device *dev,
232 struct device_attribute *attr, char *buf)
233{
234 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
235 return sprintf(buf, "Not affected\n");
236 return sprintf(buf, "Vulnerable\n");
237}
238
239ssize_t cpu_show_spectre_v2(struct device *dev,
240 struct device_attribute *attr, char *buf)
241{
242 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
243 return sprintf(buf, "Not affected\n");
244
245 return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
246}
247#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fa998ca8aa5a..ef29ad001991 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
476 return NULL; /* Not found */ 476 return NULL; /* Not found */
477} 477}
478 478
479__u32 cpu_caps_cleared[NCAPINTS]; 479__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
480__u32 cpu_caps_set[NCAPINTS]; 480__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
481 481
482void load_percpu_segment(int cpu) 482void load_percpu_segment(int cpu)
483{ 483{
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
490 load_stack_canary_segment(); 490 load_stack_canary_segment();
491} 491}
492 492
493/* Setup the fixmap mapping only once per-processor */ 493#ifdef CONFIG_X86_32
494static inline void setup_fixmap_gdt(int cpu) 494/* The 32-bit entry code needs to find cpu_entry_area. */
495{ 495DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
496#ifdef CONFIG_X86_64
497 /* On 64-bit systems, we use a read-only fixmap GDT. */
498 pgprot_t prot = PAGE_KERNEL_RO;
499#else
500 /*
501 * On native 32-bit systems, the GDT cannot be read-only because
502 * our double fault handler uses a task gate, and entering through
503 * a task gate needs to change an available TSS to busy. If the GDT
504 * is read-only, that will triple fault.
505 *
506 * On Xen PV, the GDT must be read-only because the hypervisor requires
507 * it.
508 */
509 pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
510 PAGE_KERNEL_RO : PAGE_KERNEL;
511#endif 496#endif
512 497
513 __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); 498#ifdef CONFIG_X86_64
514} 499/*
500 * Special IST stacks which the CPU switches to when it calls
501 * an IST-marked descriptor entry. Up to 7 stacks (hardware
502 * limit), all of them are 4K, except the debug stack which
503 * is 8K.
504 */
505static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
506 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
507 [DEBUG_STACK - 1] = DEBUG_STKSZ
508};
509#endif
515 510
516/* Load the original GDT from the per-cpu structure */ 511/* Load the original GDT from the per-cpu structure */
517void load_direct_gdt(int cpu) 512void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
747{ 742{
748 int i; 743 int i;
749 744
750 for (i = 0; i < NCAPINTS; i++) { 745 for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
751 c->x86_capability[i] &= ~cpu_caps_cleared[i]; 746 c->x86_capability[i] &= ~cpu_caps_cleared[i];
752 c->x86_capability[i] |= cpu_caps_set[i]; 747 c->x86_capability[i] |= cpu_caps_set[i];
753 } 748 }
@@ -927,6 +922,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
927 } 922 }
928 923
929 setup_force_cpu_cap(X86_FEATURE_ALWAYS); 924 setup_force_cpu_cap(X86_FEATURE_ALWAYS);
925
926 if (c->x86_vendor != X86_VENDOR_AMD)
927 setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
928
929 setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
930 setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
931
930 fpu__init_system(c); 932 fpu__init_system(c);
931 933
932#ifdef CONFIG_X86_32 934#ifdef CONFIG_X86_32
@@ -1250,7 +1252,7 @@ void enable_sep_cpu(void)
1250 return; 1252 return;
1251 1253
1252 cpu = get_cpu(); 1254 cpu = get_cpu();
1253 tss = &per_cpu(cpu_tss, cpu); 1255 tss = &per_cpu(cpu_tss_rw, cpu);
1254 1256
1255 /* 1257 /*
1256 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- 1258 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1261,7 @@ void enable_sep_cpu(void)
1259 1261
1260 tss->x86_tss.ss1 = __KERNEL_CS; 1262 tss->x86_tss.ss1 = __KERNEL_CS;
1261 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); 1263 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
1262 1264 wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
1263 wrmsr(MSR_IA32_SYSENTER_ESP,
1264 (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
1265 0);
1266
1267 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); 1265 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
1268 1266
1269 put_cpu(); 1267 put_cpu();
@@ -1357,25 +1355,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
1357DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; 1355DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1358EXPORT_PER_CPU_SYMBOL(__preempt_count); 1356EXPORT_PER_CPU_SYMBOL(__preempt_count);
1359 1357
1360/*
1361 * Special IST stacks which the CPU switches to when it calls
1362 * an IST-marked descriptor entry. Up to 7 stacks (hardware
1363 * limit), all of them are 4K, except the debug stack which
1364 * is 8K.
1365 */
1366static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
1367 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1368 [DEBUG_STACK - 1] = DEBUG_STKSZ
1369};
1370
1371static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1372 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
1373
1374/* May not be marked __init: used by software suspend */ 1358/* May not be marked __init: used by software suspend */
1375void syscall_init(void) 1359void syscall_init(void)
1376{ 1360{
1361 extern char _entry_trampoline[];
1362 extern char entry_SYSCALL_64_trampoline[];
1363
1364 int cpu = smp_processor_id();
1365 unsigned long SYSCALL64_entry_trampoline =
1366 (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
1367 (entry_SYSCALL_64_trampoline - _entry_trampoline);
1368
1377 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); 1369 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1378 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); 1370 if (static_cpu_has(X86_FEATURE_PTI))
1371 wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
1372 else
1373 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
1379 1374
1380#ifdef CONFIG_IA32_EMULATION 1375#ifdef CONFIG_IA32_EMULATION
1381 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); 1376 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1381,7 @@ void syscall_init(void)
1386 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). 1381 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
1387 */ 1382 */
1388 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); 1383 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
1389 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); 1384 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
1390 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); 1385 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
1391#else 1386#else
1392 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); 1387 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1525,7 @@ void cpu_init(void)
1530 if (cpu) 1525 if (cpu)
1531 load_ucode_ap(); 1526 load_ucode_ap();
1532 1527
1533 t = &per_cpu(cpu_tss, cpu); 1528 t = &per_cpu(cpu_tss_rw, cpu);
1534 oist = &per_cpu(orig_ist, cpu); 1529 oist = &per_cpu(orig_ist, cpu);
1535 1530
1536#ifdef CONFIG_NUMA 1531#ifdef CONFIG_NUMA
@@ -1569,7 +1564,7 @@ void cpu_init(void)
1569 * set up and load the per-CPU TSS 1564 * set up and load the per-CPU TSS
1570 */ 1565 */
1571 if (!oist->ist[0]) { 1566 if (!oist->ist[0]) {
1572 char *estacks = per_cpu(exception_stacks, cpu); 1567 char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
1573 1568
1574 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1569 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1575 estacks += exception_stack_sizes[v]; 1570 estacks += exception_stack_sizes[v];
@@ -1580,7 +1575,7 @@ void cpu_init(void)
1580 } 1575 }
1581 } 1576 }
1582 1577
1583 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1578 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1584 1579
1585 /* 1580 /*
1586 * <= is required because the CPU will access up to 1581 * <= is required because the CPU will access up to
@@ -1596,11 +1591,12 @@ void cpu_init(void)
1596 enter_lazy_tlb(&init_mm, me); 1591 enter_lazy_tlb(&init_mm, me);
1597 1592
1598 /* 1593 /*
1599 * Initialize the TSS. Don't bother initializing sp0, as the initial 1594 * Initialize the TSS. sp0 points to the entry trampoline stack
1600 * task never enters user mode. 1595 * regardless of what task is running.
1601 */ 1596 */
1602 set_tss_desc(cpu, t); 1597 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1603 load_TR_desc(); 1598 load_TR_desc();
1599 load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
1604 1600
1605 load_mm_ldt(&init_mm); 1601 load_mm_ldt(&init_mm);
1606 1602
@@ -1612,7 +1608,6 @@ void cpu_init(void)
1612 if (is_uv_system()) 1608 if (is_uv_system())
1613 uv_cpu_init(); 1609 uv_cpu_init();
1614 1610
1615 setup_fixmap_gdt(cpu);
1616 load_fixmap_gdt(cpu); 1611 load_fixmap_gdt(cpu);
1617} 1612}
1618 1613
@@ -1622,7 +1617,7 @@ void cpu_init(void)
1622{ 1617{
1623 int cpu = smp_processor_id(); 1618 int cpu = smp_processor_id();
1624 struct task_struct *curr = current; 1619 struct task_struct *curr = current;
1625 struct tss_struct *t = &per_cpu(cpu_tss, cpu); 1620 struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
1626 1621
1627 wait_for_master_cpu(cpu); 1622 wait_for_master_cpu(cpu);
1628 1623
@@ -1657,12 +1652,12 @@ void cpu_init(void)
1657 * Initialize the TSS. Don't bother initializing sp0, as the initial 1652 * Initialize the TSS. Don't bother initializing sp0, as the initial
1658 * task never enters user mode. 1653 * task never enters user mode.
1659 */ 1654 */
1660 set_tss_desc(cpu, t); 1655 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1661 load_TR_desc(); 1656 load_TR_desc();
1662 1657
1663 load_mm_ldt(&init_mm); 1658 load_mm_ldt(&init_mm);
1664 1659
1665 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1660 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1666 1661
1667#ifdef CONFIG_DOUBLEFAULT 1662#ifdef CONFIG_DOUBLEFAULT
1668 /* Set up doublefault TSS pointer in the GDT */ 1663 /* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1669,6 @@ void cpu_init(void)
1674 1669
1675 fpu__init_cpu(); 1670 fpu__init_cpu();
1676 1671
1677 setup_fixmap_gdt(cpu);
1678 load_fixmap_gdt(cpu); 1672 load_fixmap_gdt(cpu);
1679} 1673}
1680#endif 1674#endif
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 7dbcb7adf797..d9e460fc7a3b 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
565} 565}
566#else 566#else
567 567
568/*
569 * Flush global tlb. We only do this in x86_64 where paging has been enabled
570 * already and PGE should be enabled as well.
571 */
572static inline void flush_tlb_early(void)
573{
574 __native_flush_tlb_global_irq_disabled();
575}
576
577static inline void print_ucode(struct ucode_cpu_info *uci) 568static inline void print_ucode(struct ucode_cpu_info *uci)
578{ 569{
579 struct microcode_intel *mc; 570 struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
602 if (rev != mc->hdr.rev) 593 if (rev != mc->hdr.rev)
603 return -1; 594 return -1;
604 595
605#ifdef CONFIG_X86_64
606 /* Flush global tlb. This is precaution. */
607 flush_tlb_early();
608#endif
609 uci->cpu_sig.rev = rev; 596 uci->cpu_sig.rev = rev;
610 597
611 if (early) 598 if (early)
@@ -923,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
923{ 910{
924 struct cpuinfo_x86 *c = &cpu_data(cpu); 911 struct cpuinfo_x86 *c = &cpu_data(cpu);
925 912
926 if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { 913 /*
927 pr_err_once("late loading on model 79 is disabled.\n"); 914 * Late loading on model 79 with microcode revision less than 0x0b000021
915 * may result in a system hang. This behavior is documented in item
916 * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
917 */
918 if (c->x86 == 6 &&
919 c->x86_model == INTEL_FAM6_BROADWELL_X &&
920 c->x86_mask == 0x01 &&
921 c->microcode < 0x0b000021) {
922 pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
923 pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
928 return true; 924 return true;
929 } 925 }
930 926
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index 0e662c55ae90..0b8cedb20d6d 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
50 cpu_relax(); 50 cpu_relax();
51} 51}
52 52
53struct tss_struct doublefault_tss __cacheline_aligned = { 53struct x86_hw_tss doublefault_tss __cacheline_aligned = {
54 .x86_tss = { 54 .sp0 = STACK_START,
55 .sp0 = STACK_START, 55 .ss0 = __KERNEL_DS,
56 .ss0 = __KERNEL_DS, 56 .ldt = 0,
57 .ldt = 0, 57 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
58 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, 58
59 59 .ip = (unsigned long) doublefault_fn,
60 .ip = (unsigned long) doublefault_fn, 60 /* 0x2 bit is always set */
61 /* 0x2 bit is always set */ 61 .flags = X86_EFLAGS_SF | 0x2,
62 .flags = X86_EFLAGS_SF | 0x2, 62 .sp = STACK_START,
63 .sp = STACK_START, 63 .es = __USER_DS,
64 .es = __USER_DS, 64 .cs = __KERNEL_CS,
65 .cs = __KERNEL_CS, 65 .ss = __KERNEL_DS,
66 .ss = __KERNEL_DS, 66 .ds = __USER_DS,
67 .ds = __USER_DS, 67 .fs = __KERNEL_PERCPU,
68 .fs = __KERNEL_PERCPU, 68
69 69 .__cr3 = __pa_nodebug(swapper_pg_dir),
70 .__cr3 = __pa_nodebug(swapper_pg_dir),
71 }
72}; 70};
73 71
74/* dummy for do_double_fault() call */ 72/* dummy for do_double_fault() call */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f13b4c00a5de..afbecff161d1 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,6 +18,7 @@
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/sysfs.h> 19#include <linux/sysfs.h>
20 20
21#include <asm/cpu_entry_area.h>
21#include <asm/stacktrace.h> 22#include <asm/stacktrace.h>
22#include <asm/unwind.h> 23#include <asm/unwind.h>
23 24
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
43 return true; 44 return true;
44} 45}
45 46
47bool in_entry_stack(unsigned long *stack, struct stack_info *info)
48{
49 struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
50
51 void *begin = ss;
52 void *end = ss + 1;
53
54 if ((void *)stack < begin || (void *)stack >= end)
55 return false;
56
57 info->type = STACK_TYPE_ENTRY;
58 info->begin = begin;
59 info->end = end;
60 info->next_sp = NULL;
61
62 return true;
63}
64
46static void printk_stack_address(unsigned long address, int reliable, 65static void printk_stack_address(unsigned long address, int reliable,
47 char *log_lvl) 66 char *log_lvl)
48{ 67{
@@ -50,6 +69,39 @@ static void printk_stack_address(unsigned long address, int reliable,
50 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); 69 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
51} 70}
52 71
72void show_iret_regs(struct pt_regs *regs)
73{
74 printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
75 printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
76 regs->sp, regs->flags);
77}
78
79static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
80 bool partial)
81{
82 /*
83 * These on_stack() checks aren't strictly necessary: the unwind code
84 * has already validated the 'regs' pointer. The checks are done for
85 * ordering reasons: if the registers are on the next stack, we don't
86 * want to print them out yet. Otherwise they'll be shown as part of
87 * the wrong stack. Later, when show_trace_log_lvl() switches to the
88 * next stack, this function will be called again with the same regs so
89 * they can be printed in the right context.
90 */
91 if (!partial && on_stack(info, regs, sizeof(*regs))) {
92 __show_regs(regs, 0);
93
94 } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
95 IRET_FRAME_SIZE)) {
96 /*
97 * When an interrupt or exception occurs in entry code, the
98 * full pt_regs might not have been saved yet. In that case
99 * just print the iret frame.
100 */
101 show_iret_regs(regs);
102 }
103}
104
53void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 105void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
54 unsigned long *stack, char *log_lvl) 106 unsigned long *stack, char *log_lvl)
55{ 107{
@@ -57,11 +109,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
57 struct stack_info stack_info = {0}; 109 struct stack_info stack_info = {0};
58 unsigned long visit_mask = 0; 110 unsigned long visit_mask = 0;
59 int graph_idx = 0; 111 int graph_idx = 0;
112 bool partial;
60 113
61 printk("%sCall Trace:\n", log_lvl); 114 printk("%sCall Trace:\n", log_lvl);
62 115
63 unwind_start(&state, task, regs, stack); 116 unwind_start(&state, task, regs, stack);
64 stack = stack ? : get_stack_pointer(task, regs); 117 stack = stack ? : get_stack_pointer(task, regs);
118 regs = unwind_get_entry_regs(&state, &partial);
65 119
66 /* 120 /*
67 * Iterate through the stacks, starting with the current stack pointer. 121 * Iterate through the stacks, starting with the current stack pointer.
@@ -71,31 +125,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
71 * - task stack 125 * - task stack
72 * - interrupt stack 126 * - interrupt stack
73 * - HW exception stacks (double fault, nmi, debug, mce) 127 * - HW exception stacks (double fault, nmi, debug, mce)
128 * - entry stack
74 * 129 *
75 * x86-32 can have up to three stacks: 130 * x86-32 can have up to four stacks:
76 * - task stack 131 * - task stack
77 * - softirq stack 132 * - softirq stack
78 * - hardirq stack 133 * - hardirq stack
134 * - entry stack
79 */ 135 */
80 for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { 136 for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
81 const char *stack_name; 137 const char *stack_name;
82 138
83 /* 139 if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
84 * If we overflowed the task stack into a guard page, jump back 140 /*
85 * to the bottom of the usable stack. 141 * We weren't on a valid stack. It's possible that
86 */ 142 * we overflowed a valid stack into a guard page.
87 if (task_stack_page(task) - (void *)stack < PAGE_SIZE) 143 * See if the next page up is valid so that we can
88 stack = task_stack_page(task); 144 * generate some kind of backtrace if this happens.
89 145 */
90 if (get_stack_info(stack, task, &stack_info, &visit_mask)) 146 stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
91 break; 147 if (get_stack_info(stack, task, &stack_info, &visit_mask))
148 break;
149 }
92 150
93 stack_name = stack_type_name(stack_info.type); 151 stack_name = stack_type_name(stack_info.type);
94 if (stack_name) 152 if (stack_name)
95 printk("%s <%s>\n", log_lvl, stack_name); 153 printk("%s <%s>\n", log_lvl, stack_name);
96 154
97 if (regs && on_stack(&stack_info, regs, sizeof(*regs))) 155 if (regs)
98 __show_regs(regs, 0); 156 show_regs_if_on_stack(&stack_info, regs, partial);
99 157
100 /* 158 /*
101 * Scan the stack, printing any text addresses we find. At the 159 * Scan the stack, printing any text addresses we find. At the
@@ -119,7 +177,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
119 177
120 /* 178 /*
121 * Don't print regs->ip again if it was already printed 179 * Don't print regs->ip again if it was already printed
122 * by __show_regs() below. 180 * by show_regs_if_on_stack().
123 */ 181 */
124 if (regs && stack == &regs->ip) 182 if (regs && stack == &regs->ip)
125 goto next; 183 goto next;
@@ -154,9 +212,9 @@ next:
154 unwind_next_frame(&state); 212 unwind_next_frame(&state);
155 213
156 /* if the frame has entry regs, print them */ 214 /* if the frame has entry regs, print them */
157 regs = unwind_get_entry_regs(&state); 215 regs = unwind_get_entry_regs(&state, &partial);
158 if (regs && on_stack(&stack_info, regs, sizeof(*regs))) 216 if (regs)
159 __show_regs(regs, 0); 217 show_regs_if_on_stack(&stack_info, regs, partial);
160 } 218 }
161 219
162 if (stack_name) 220 if (stack_name)
@@ -252,11 +310,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
252 unsigned long sp; 310 unsigned long sp;
253#endif 311#endif
254 printk(KERN_DEFAULT 312 printk(KERN_DEFAULT
255 "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, 313 "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
256 IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", 314 IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
257 IS_ENABLED(CONFIG_SMP) ? " SMP" : "", 315 IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
258 debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", 316 debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
259 IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); 317 IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
318 IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
319 (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
260 320
261 if (notify_die(DIE_OOPS, str, regs, err, 321 if (notify_die(DIE_OOPS, str, regs, err,
262 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 322 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index daefae83a3aa..04170f63e3a1 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
26 if (type == STACK_TYPE_SOFTIRQ) 26 if (type == STACK_TYPE_SOFTIRQ)
27 return "SOFTIRQ"; 27 return "SOFTIRQ";
28 28
29 if (type == STACK_TYPE_ENTRY)
30 return "ENTRY_TRAMPOLINE";
31
29 return NULL; 32 return NULL;
30} 33}
31 34
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
93 if (task != current) 96 if (task != current)
94 goto unknown; 97 goto unknown;
95 98
99 if (in_entry_stack(stack, info))
100 goto recursion_check;
101
96 if (in_hardirq_stack(stack, info)) 102 if (in_hardirq_stack(stack, info))
97 goto recursion_check; 103 goto recursion_check;
98 104
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 88ce2ffdb110..563e28d14f2c 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
37 if (type == STACK_TYPE_IRQ) 37 if (type == STACK_TYPE_IRQ)
38 return "IRQ"; 38 return "IRQ";
39 39
40 if (type == STACK_TYPE_ENTRY) {
41 /*
42 * On 64-bit, we have a generic entry stack that we
43 * use for all the kernel entry points, including
44 * SYSENTER.
45 */
46 return "ENTRY_TRAMPOLINE";
47 }
48
40 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) 49 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
41 return exception_stack_names[type - STACK_TYPE_EXCEPTION]; 50 return exception_stack_names[type - STACK_TYPE_EXCEPTION];
42 51
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
115 if (in_irq_stack(stack, info)) 124 if (in_irq_stack(stack, info))
116 goto recursion_check; 125 goto recursion_check;
117 126
127 if (in_entry_stack(stack, info))
128 goto recursion_check;
129
118 goto unknown; 130 goto unknown;
119 131
120recursion_check: 132recursion_check:
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index b6c6468e10bc..4c8440de3355 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -8,6 +8,7 @@
8#include <asm/segment.h> 8#include <asm/segment.h>
9#include <asm/export.h> 9#include <asm/export.h>
10#include <asm/ftrace.h> 10#include <asm/ftrace.h>
11#include <asm/nospec-branch.h>
11 12
12#ifdef CC_USING_FENTRY 13#ifdef CC_USING_FENTRY
13# define function_hook __fentry__ 14# define function_hook __fentry__
@@ -197,7 +198,8 @@ ftrace_stub:
197 movl 0x4(%ebp), %edx 198 movl 0x4(%ebp), %edx
198 subl $MCOUNT_INSN_SIZE, %eax 199 subl $MCOUNT_INSN_SIZE, %eax
199 200
200 call *ftrace_trace_function 201 movl ftrace_trace_function, %ecx
202 CALL_NOSPEC %ecx
201 203
202 popl %edx 204 popl %edx
203 popl %ecx 205 popl %ecx
@@ -241,5 +243,5 @@ return_to_handler:
241 movl %eax, %ecx 243 movl %eax, %ecx
242 popl %edx 244 popl %edx
243 popl %eax 245 popl %eax
244 jmp *%ecx 246 JMP_NOSPEC %ecx
245#endif 247#endif
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index c832291d948a..7cb8ba08beb9 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -7,7 +7,7 @@
7#include <asm/ptrace.h> 7#include <asm/ptrace.h>
8#include <asm/ftrace.h> 8#include <asm/ftrace.h>
9#include <asm/export.h> 9#include <asm/export.h>
10 10#include <asm/nospec-branch.h>
11 11
12 .code64 12 .code64
13 .section .entry.text, "ax" 13 .section .entry.text, "ax"
@@ -286,8 +286,8 @@ trace:
286 * ip and parent ip are used and the list function is called when 286 * ip and parent ip are used and the list function is called when
287 * function tracing is enabled. 287 * function tracing is enabled.
288 */ 288 */
289 call *ftrace_trace_function 289 movq ftrace_trace_function, %r8
290 290 CALL_NOSPEC %r8
291 restore_mcount_regs 291 restore_mcount_regs
292 292
293 jmp fgraph_trace 293 jmp fgraph_trace
@@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
329 movq 8(%rsp), %rdx 329 movq 8(%rsp), %rdx
330 movq (%rsp), %rax 330 movq (%rsp), %rax
331 addq $24, %rsp 331 addq $24, %rsp
332 jmp *%rdi 332 JMP_NOSPEC %rdi
333#endif 333#endif
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7dca675fe78d..04a625f0fcda 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
341 .balign PAGE_SIZE; \ 341 .balign PAGE_SIZE; \
342GLOBAL(name) 342GLOBAL(name)
343 343
344#ifdef CONFIG_PAGE_TABLE_ISOLATION
345/*
346 * Each PGD needs to be 8k long and 8k aligned. We do not
347 * ever go out to userspace with these, so we do not
348 * strictly *need* the second page, but this allows us to
349 * have a single set_pgd() implementation that does not
350 * need to worry about whether it has 4k or 8k to work
351 * with.
352 *
353 * This ensures PGDs are 8k long:
354 */
355#define PTI_USER_PGD_FILL 512
356/* This ensures they are 8k-aligned: */
357#define NEXT_PGD_PAGE(name) \
358 .balign 2 * PAGE_SIZE; \
359GLOBAL(name)
360#else
361#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
362#define PTI_USER_PGD_FILL 0
363#endif
364
344/* Automate the creation of 1 to 1 mapping pmd entries */ 365/* Automate the creation of 1 to 1 mapping pmd entries */
345#define PMDS(START, PERM, COUNT) \ 366#define PMDS(START, PERM, COUNT) \
346 i = 0 ; \ 367 i = 0 ; \
@@ -350,13 +371,14 @@ GLOBAL(name)
350 .endr 371 .endr
351 372
352 __INITDATA 373 __INITDATA
353NEXT_PAGE(early_top_pgt) 374NEXT_PGD_PAGE(early_top_pgt)
354 .fill 511,8,0 375 .fill 511,8,0
355#ifdef CONFIG_X86_5LEVEL 376#ifdef CONFIG_X86_5LEVEL
356 .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 377 .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
357#else 378#else
358 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 379 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
359#endif 380#endif
381 .fill PTI_USER_PGD_FILL,8,0
360 382
361NEXT_PAGE(early_dynamic_pgts) 383NEXT_PAGE(early_dynamic_pgts)
362 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 384 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
364 .data 386 .data
365 387
366#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) 388#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
367NEXT_PAGE(init_top_pgt) 389NEXT_PGD_PAGE(init_top_pgt)
368 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 390 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
369 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 391 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
370 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 392 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
371 .org init_top_pgt + PGD_START_KERNEL*8, 0 393 .org init_top_pgt + PGD_START_KERNEL*8, 0
372 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 394 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
373 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 395 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
396 .fill PTI_USER_PGD_FILL,8,0
374 397
375NEXT_PAGE(level3_ident_pgt) 398NEXT_PAGE(level3_ident_pgt)
376 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 399 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
381 */ 404 */
382 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 405 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
383#else 406#else
384NEXT_PAGE(init_top_pgt) 407NEXT_PGD_PAGE(init_top_pgt)
385 .fill 512,8,0 408 .fill 512,8,0
409 .fill PTI_USER_PGD_FILL,8,0
386#endif 410#endif
387 411
388#ifdef CONFIG_X86_5LEVEL 412#ifdef CONFIG_X86_5LEVEL
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 3feb648781c4..2f723301eb58 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
67 * because the ->io_bitmap_max value must match the bitmap 67 * because the ->io_bitmap_max value must match the bitmap
68 * contents: 68 * contents:
69 */ 69 */
70 tss = &per_cpu(cpu_tss, get_cpu()); 70 tss = &per_cpu(cpu_tss_rw, get_cpu());
71 71
72 if (turn_on) 72 if (turn_on)
73 bitmap_clear(t->io_bitmap_ptr, from, num); 73 bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 49cfd9fe7589..68e1867cca80 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
219 /* high bit used in ret_from_ code */ 219 /* high bit used in ret_from_ code */
220 unsigned vector = ~regs->orig_ax; 220 unsigned vector = ~regs->orig_ax;
221 221
222 /*
223 * NB: Unlike exception entries, IRQ entries do not reliably
224 * handle context tracking in the low-level entry code. This is
225 * because syscall entries execute briefly with IRQs on before
226 * updating context tracking state, so we can take an IRQ from
227 * kernel mode with CONTEXT_USER. The low-level entry code only
228 * updates the context if we came from user mode, so we won't
229 * switch to CONTEXT_KERNEL. We'll fix that once the syscall
230 * code is cleaned up enough that we can cleanly defer enabling
231 * IRQs.
232 */
233
234 entering_irq(); 222 entering_irq();
235 223
236 /* entering_irq() tells RCU that we're not quiescent. Check it. */ 224 /* entering_irq() tells RCU that we're not quiescent. Check it. */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a83b3346a0e1..c1bdbd3d3232 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -20,6 +20,7 @@
20#include <linux/mm.h> 20#include <linux/mm.h>
21 21
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/nospec-branch.h>
23 24
24#ifdef CONFIG_DEBUG_STACKOVERFLOW 25#ifdef CONFIG_DEBUG_STACKOVERFLOW
25 26
@@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
55static void call_on_stack(void *func, void *stack) 56static void call_on_stack(void *func, void *stack)
56{ 57{
57 asm volatile("xchgl %%ebx,%%esp \n" 58 asm volatile("xchgl %%ebx,%%esp \n"
58 "call *%%edi \n" 59 CALL_NOSPEC
59 "movl %%ebx,%%esp \n" 60 "movl %%ebx,%%esp \n"
60 : "=b" (stack) 61 : "=b" (stack)
61 : "0" (stack), 62 : "0" (stack),
62 "D"(func) 63 [thunk_target] "D"(func)
63 : "memory", "cc", "edx", "ecx", "eax"); 64 : "memory", "cc", "edx", "ecx", "eax");
64} 65}
65 66
@@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
95 call_on_stack(print_stack_overflow, isp); 96 call_on_stack(print_stack_overflow, isp);
96 97
97 asm volatile("xchgl %%ebx,%%esp \n" 98 asm volatile("xchgl %%ebx,%%esp \n"
98 "call *%%edi \n" 99 CALL_NOSPEC
99 "movl %%ebx,%%esp \n" 100 "movl %%ebx,%%esp \n"
100 : "=a" (arg1), "=b" (isp) 101 : "=a" (arg1), "=b" (isp)
101 : "0" (desc), "1" (isp), 102 : "0" (desc), "1" (isp),
102 "D" (desc->handle_irq) 103 [thunk_target] "D" (desc->handle_irq)
103 : "memory", "cc", "ecx"); 104 : "memory", "cc", "ecx");
104 return 1; 105 return 1;
105} 106}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 020efbf5786b..d86e344f5b3d 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
57 if (regs->sp >= estack_top && regs->sp <= estack_bottom) 57 if (regs->sp >= estack_top && regs->sp <= estack_bottom)
58 return; 58 return;
59 59
60 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", 60 WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
61 current->comm, curbase, regs->sp, 61 current->comm, curbase, regs->sp,
62 irq_stack_top, irq_stack_bottom, 62 irq_stack_top, irq_stack_bottom,
63 estack_top, estack_bottom); 63 estack_top, estack_bottom, (void *)regs->ip);
64 64
65 if (sysctl_panic_on_stackoverflow) 65 if (sysctl_panic_on_stackoverflow)
66 panic("low stack detected by irq handler - check messages\n"); 66 panic("low stack detected by irq handler - check messages\n");
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 1c1eae961340..26d713ecad34 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -5,6 +5,11 @@
5 * Copyright (C) 2002 Andi Kleen 5 * Copyright (C) 2002 Andi Kleen
6 * 6 *
7 * This handles calls from both 32bit and 64bit mode. 7 * This handles calls from both 32bit and 64bit mode.
8 *
9 * Lock order:
10 * contex.ldt_usr_sem
11 * mmap_sem
12 * context.lock
8 */ 13 */
9 14
10#include <linux/errno.h> 15#include <linux/errno.h>
@@ -19,6 +24,7 @@
19#include <linux/uaccess.h> 24#include <linux/uaccess.h>
20 25
21#include <asm/ldt.h> 26#include <asm/ldt.h>
27#include <asm/tlb.h>
22#include <asm/desc.h> 28#include <asm/desc.h>
23#include <asm/mmu_context.h> 29#include <asm/mmu_context.h>
24#include <asm/syscalls.h> 30#include <asm/syscalls.h>
@@ -42,17 +48,15 @@ static void refresh_ldt_segments(void)
42#endif 48#endif
43} 49}
44 50
45/* context.lock is held for us, so we don't need any locking. */ 51/* context.lock is held by the task which issued the smp function call */
46static void flush_ldt(void *__mm) 52static void flush_ldt(void *__mm)
47{ 53{
48 struct mm_struct *mm = __mm; 54 struct mm_struct *mm = __mm;
49 mm_context_t *pc;
50 55
51 if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) 56 if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
52 return; 57 return;
53 58
54 pc = &mm->context; 59 load_mm_ldt(mm);
55 set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
56 60
57 refresh_ldt_segments(); 61 refresh_ldt_segments();
58} 62}
@@ -89,25 +93,143 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
89 return NULL; 93 return NULL;
90 } 94 }
91 95
96 /* The new LDT isn't aliased for PTI yet. */
97 new_ldt->slot = -1;
98
92 new_ldt->nr_entries = num_entries; 99 new_ldt->nr_entries = num_entries;
93 return new_ldt; 100 return new_ldt;
94} 101}
95 102
103/*
104 * If PTI is enabled, this maps the LDT into the kernelmode and
105 * usermode tables for the given mm.
106 *
107 * There is no corresponding unmap function. Even if the LDT is freed, we
108 * leave the PTEs around until the slot is reused or the mm is destroyed.
109 * This is harmless: the LDT is always in ordinary memory, and no one will
110 * access the freed slot.
111 *
112 * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
113 * it useful, and the flush would slow down modify_ldt().
114 */
115static int
116map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
117{
118#ifdef CONFIG_PAGE_TABLE_ISOLATION
119 bool is_vmalloc, had_top_level_entry;
120 unsigned long va;
121 spinlock_t *ptl;
122 pgd_t *pgd;
123 int i;
124
125 if (!static_cpu_has(X86_FEATURE_PTI))
126 return 0;
127
128 /*
129 * Any given ldt_struct should have map_ldt_struct() called at most
130 * once.
131 */
132 WARN_ON(ldt->slot != -1);
133
134 /*
135 * Did we already have the top level entry allocated? We can't
136 * use pgd_none() for this because it doens't do anything on
137 * 4-level page table kernels.
138 */
139 pgd = pgd_offset(mm, LDT_BASE_ADDR);
140 had_top_level_entry = (pgd->pgd != 0);
141
142 is_vmalloc = is_vmalloc_addr(ldt->entries);
143
144 for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
145 unsigned long offset = i << PAGE_SHIFT;
146 const void *src = (char *)ldt->entries + offset;
147 unsigned long pfn;
148 pte_t pte, *ptep;
149
150 va = (unsigned long)ldt_slot_va(slot) + offset;
151 pfn = is_vmalloc ? vmalloc_to_pfn(src) :
152 page_to_pfn(virt_to_page(src));
153 /*
154 * Treat the PTI LDT range as a *userspace* range.
155 * get_locked_pte() will allocate all needed pagetables
156 * and account for them in this mm.
157 */
158 ptep = get_locked_pte(mm, va, &ptl);
159 if (!ptep)
160 return -ENOMEM;
161 /*
162 * Map it RO so the easy to find address is not a primary
163 * target via some kernel interface which misses a
164 * permission check.
165 */
166 pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
167 set_pte_at(mm, va, ptep, pte);
168 pte_unmap_unlock(ptep, ptl);
169 }
170
171 if (mm->context.ldt) {
172 /*
173 * We already had an LDT. The top-level entry should already
174 * have been allocated and synchronized with the usermode
175 * tables.
176 */
177 WARN_ON(!had_top_level_entry);
178 if (static_cpu_has(X86_FEATURE_PTI))
179 WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
180 } else {
181 /*
182 * This is the first time we're mapping an LDT for this process.
183 * Sync the pgd to the usermode tables.
184 */
185 WARN_ON(had_top_level_entry);
186 if (static_cpu_has(X86_FEATURE_PTI)) {
187 WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
188 set_pgd(kernel_to_user_pgdp(pgd), *pgd);
189 }
190 }
191
192 va = (unsigned long)ldt_slot_va(slot);
193 flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
194
195 ldt->slot = slot;
196#endif
197 return 0;
198}
199
200static void free_ldt_pgtables(struct mm_struct *mm)
201{
202#ifdef CONFIG_PAGE_TABLE_ISOLATION
203 struct mmu_gather tlb;
204 unsigned long start = LDT_BASE_ADDR;
205 unsigned long end = start + (1UL << PGDIR_SHIFT);
206
207 if (!static_cpu_has(X86_FEATURE_PTI))
208 return;
209
210 tlb_gather_mmu(&tlb, mm, start, end);
211 free_pgd_range(&tlb, start, end, start, end);
212 tlb_finish_mmu(&tlb, start, end);
213#endif
214}
215
96/* After calling this, the LDT is immutable. */ 216/* After calling this, the LDT is immutable. */
97static void finalize_ldt_struct(struct ldt_struct *ldt) 217static void finalize_ldt_struct(struct ldt_struct *ldt)
98{ 218{
99 paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); 219 paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
100} 220}
101 221
102/* context.lock is held */ 222static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
103static void install_ldt(struct mm_struct *current_mm,
104 struct ldt_struct *ldt)
105{ 223{
224 mutex_lock(&mm->context.lock);
225
106 /* Synchronizes with READ_ONCE in load_mm_ldt. */ 226 /* Synchronizes with READ_ONCE in load_mm_ldt. */
107 smp_store_release(&current_mm->context.ldt, ldt); 227 smp_store_release(&mm->context.ldt, ldt);
108 228
109 /* Activate the LDT for all CPUs using current_mm. */ 229 /* Activate the LDT for all CPUs using currents mm. */
110 on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); 230 on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
231
232 mutex_unlock(&mm->context.lock);
111} 233}
112 234
113static void free_ldt_struct(struct ldt_struct *ldt) 235static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +246,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
124} 246}
125 247
126/* 248/*
127 * we do not have to muck with descriptors here, that is 249 * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
128 * done in switch_mm() as needed. 250 * the new task is not running, so nothing can be installed.
129 */ 251 */
130int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) 252int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
131{ 253{
132 struct ldt_struct *new_ldt; 254 struct ldt_struct *new_ldt;
133 struct mm_struct *old_mm;
134 int retval = 0; 255 int retval = 0;
135 256
136 mutex_init(&mm->context.lock); 257 if (!old_mm)
137 old_mm = current->mm;
138 if (!old_mm) {
139 mm->context.ldt = NULL;
140 return 0; 258 return 0;
141 }
142 259
143 mutex_lock(&old_mm->context.lock); 260 mutex_lock(&old_mm->context.lock);
144 if (!old_mm->context.ldt) { 261 if (!old_mm->context.ldt)
145 mm->context.ldt = NULL;
146 goto out_unlock; 262 goto out_unlock;
147 }
148 263
149 new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); 264 new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
150 if (!new_ldt) { 265 if (!new_ldt) {
@@ -156,6 +271,12 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
156 new_ldt->nr_entries * LDT_ENTRY_SIZE); 271 new_ldt->nr_entries * LDT_ENTRY_SIZE);
157 finalize_ldt_struct(new_ldt); 272 finalize_ldt_struct(new_ldt);
158 273
274 retval = map_ldt_struct(mm, new_ldt, 0);
275 if (retval) {
276 free_ldt_pgtables(mm);
277 free_ldt_struct(new_ldt);
278 goto out_unlock;
279 }
159 mm->context.ldt = new_ldt; 280 mm->context.ldt = new_ldt;
160 281
161out_unlock: 282out_unlock:
@@ -174,13 +295,18 @@ void destroy_context_ldt(struct mm_struct *mm)
174 mm->context.ldt = NULL; 295 mm->context.ldt = NULL;
175} 296}
176 297
298void ldt_arch_exit_mmap(struct mm_struct *mm)
299{
300 free_ldt_pgtables(mm);
301}
302
177static int read_ldt(void __user *ptr, unsigned long bytecount) 303static int read_ldt(void __user *ptr, unsigned long bytecount)
178{ 304{
179 struct mm_struct *mm = current->mm; 305 struct mm_struct *mm = current->mm;
180 unsigned long entries_size; 306 unsigned long entries_size;
181 int retval; 307 int retval;
182 308
183 mutex_lock(&mm->context.lock); 309 down_read(&mm->context.ldt_usr_sem);
184 310
185 if (!mm->context.ldt) { 311 if (!mm->context.ldt) {
186 retval = 0; 312 retval = 0;
@@ -209,7 +335,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
209 retval = bytecount; 335 retval = bytecount;
210 336
211out_unlock: 337out_unlock:
212 mutex_unlock(&mm->context.lock); 338 up_read(&mm->context.ldt_usr_sem);
213 return retval; 339 return retval;
214} 340}
215 341
@@ -269,7 +395,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
269 ldt.avl = 0; 395 ldt.avl = 0;
270 } 396 }
271 397
272 mutex_lock(&mm->context.lock); 398 if (down_write_killable(&mm->context.ldt_usr_sem))
399 return -EINTR;
273 400
274 old_ldt = mm->context.ldt; 401 old_ldt = mm->context.ldt;
275 old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; 402 old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -286,12 +413,31 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
286 new_ldt->entries[ldt_info.entry_number] = ldt; 413 new_ldt->entries[ldt_info.entry_number] = ldt;
287 finalize_ldt_struct(new_ldt); 414 finalize_ldt_struct(new_ldt);
288 415
416 /*
417 * If we are using PTI, map the new LDT into the userspace pagetables.
418 * If there is already an LDT, use the other slot so that other CPUs
419 * will continue to use the old LDT until install_ldt() switches
420 * them over to the new LDT.
421 */
422 error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
423 if (error) {
424 /*
425 * This only can fail for the first LDT setup. If an LDT is
426 * already installed then the PTE page is already
427 * populated. Mop up a half populated page table.
428 */
429 if (!WARN_ON_ONCE(old_ldt))
430 free_ldt_pgtables(mm);
431 free_ldt_struct(new_ldt);
432 goto out_unlock;
433 }
434
289 install_ldt(mm, new_ldt); 435 install_ldt(mm, new_ldt);
290 free_ldt_struct(old_ldt); 436 free_ldt_struct(old_ldt);
291 error = 0; 437 error = 0;
292 438
293out_unlock: 439out_unlock:
294 mutex_unlock(&mm->context.lock); 440 up_write(&mm->context.ldt_usr_sem);
295out: 441out:
296 return error; 442 return error;
297} 443}
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 00bc751c861c..edfede768688 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -48,8 +48,6 @@ static void load_segments(void)
48 "\tmovl $"STR(__KERNEL_DS)",%%eax\n" 48 "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
49 "\tmovl %%eax,%%ds\n" 49 "\tmovl %%eax,%%ds\n"
50 "\tmovl %%eax,%%es\n" 50 "\tmovl %%eax,%%es\n"
51 "\tmovl %%eax,%%fs\n"
52 "\tmovl %%eax,%%gs\n"
53 "\tmovl %%eax,%%ss\n" 51 "\tmovl %%eax,%%ss\n"
54 : : : "eax", "memory"); 52 : : : "eax", "memory");
55#undef STR 53#undef STR
@@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image)
232 * The gdt & idt are now invalid. 230 * The gdt & idt are now invalid.
233 * If you want to load them you must set up your own idt & gdt. 231 * If you want to load them you must set up your own idt & gdt.
234 */ 232 */
235 set_gdt(phys_to_virt(0), 0);
236 idt_invalidate(phys_to_virt(0)); 233 idt_invalidate(phys_to_virt(0));
234 set_gdt(phys_to_virt(0), 0);
237 235
238 /* now call it */ 236 /* now call it */
239 image->start = relocate_kernel_ptr((unsigned long)image->head, 237 image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index ac0be8283325..9edadabf04f6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); 10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); 11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); 12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
13DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
14DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); 13DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
15 14
16DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); 15DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
60 PATCH_SITE(pv_mmu_ops, read_cr2); 59 PATCH_SITE(pv_mmu_ops, read_cr2);
61 PATCH_SITE(pv_mmu_ops, read_cr3); 60 PATCH_SITE(pv_mmu_ops, read_cr3);
62 PATCH_SITE(pv_mmu_ops, write_cr3); 61 PATCH_SITE(pv_mmu_ops, write_cr3);
63 PATCH_SITE(pv_mmu_ops, flush_tlb_single);
64 PATCH_SITE(pv_cpu_ops, wbinvd); 62 PATCH_SITE(pv_cpu_ops, wbinvd);
65#if defined(CONFIG_PARAVIRT_SPINLOCKS) 63#if defined(CONFIG_PARAVIRT_SPINLOCKS)
66 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): 64 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index bb988a24db92..832a6acd730f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
47 * section. Since TSS's are completely CPU-local, we want them 47 * section. Since TSS's are completely CPU-local, we want them
48 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 48 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
49 */ 49 */
50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { 50__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
51 .x86_tss = { 51 .x86_tss = {
52 /* 52 /*
53 * .sp0 is only used when entering ring 0 from a lower 53 * .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
56 * Poison it. 56 * Poison it.
57 */ 57 */
58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, 58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
59
60#ifdef CONFIG_X86_64
61 /*
62 * .sp1 is cpu_current_top_of_stack. The init task never
63 * runs user code, but cpu_current_top_of_stack should still
64 * be well defined before the first context switch.
65 */
66 .sp1 = TOP_OF_INIT_STACK,
67#endif
68
59#ifdef CONFIG_X86_32 69#ifdef CONFIG_X86_32
60 .ss0 = __KERNEL_DS, 70 .ss0 = __KERNEL_DS,
61 .ss1 = __KERNEL_CS, 71 .ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
71 */ 81 */
72 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, 82 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
73#endif 83#endif
74#ifdef CONFIG_X86_32
75 .SYSENTER_stack_canary = STACK_END_MAGIC,
76#endif
77}; 84};
78EXPORT_PER_CPU_SYMBOL(cpu_tss); 85EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
79 86
80DEFINE_PER_CPU(bool, __tss_limit_invalid); 87DEFINE_PER_CPU(bool, __tss_limit_invalid);
81EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); 88EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
104 struct fpu *fpu = &t->fpu; 111 struct fpu *fpu = &t->fpu;
105 112
106 if (bp) { 113 if (bp) {
107 struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); 114 struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
108 115
109 t->io_bitmap_ptr = NULL; 116 t->io_bitmap_ptr = NULL;
110 clear_thread_flag(TIF_IO_BITMAP); 117 clear_thread_flag(TIF_IO_BITMAP);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 45bf0c5f93e1..5224c6099184 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
234 struct fpu *prev_fpu = &prev->fpu; 234 struct fpu *prev_fpu = &prev->fpu;
235 struct fpu *next_fpu = &next->fpu; 235 struct fpu *next_fpu = &next->fpu;
236 int cpu = smp_processor_id(); 236 int cpu = smp_processor_id();
237 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 237 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
238 238
239 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 239 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
240 240
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eeeb34f85c25..c75466232016 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
69 unsigned int fsindex, gsindex; 69 unsigned int fsindex, gsindex;
70 unsigned int ds, cs, es; 70 unsigned int ds, cs, es;
71 71
72 printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); 72 show_iret_regs(regs);
73 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, 73
74 regs->sp, regs->flags);
75 if (regs->orig_ax != -1) 74 if (regs->orig_ax != -1)
76 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); 75 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
77 else 76 else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
88 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", 87 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
89 regs->r13, regs->r14, regs->r15); 88 regs->r13, regs->r14, regs->r15);
90 89
90 if (!all)
91 return;
92
91 asm("movl %%ds,%0" : "=r" (ds)); 93 asm("movl %%ds,%0" : "=r" (ds));
92 asm("movl %%cs,%0" : "=r" (cs)); 94 asm("movl %%cs,%0" : "=r" (cs));
93 asm("movl %%es,%0" : "=r" (es)); 95 asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
98 rdmsrl(MSR_GS_BASE, gs); 100 rdmsrl(MSR_GS_BASE, gs);
99 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 101 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
100 102
101 if (!all)
102 return;
103
104 cr0 = read_cr0(); 103 cr0 = read_cr0();
105 cr2 = read_cr2(); 104 cr2 = read_cr2();
106 cr3 = __read_cr3(); 105 cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
400 struct fpu *prev_fpu = &prev->fpu; 399 struct fpu *prev_fpu = &prev->fpu;
401 struct fpu *next_fpu = &next->fpu; 400 struct fpu *next_fpu = &next->fpu;
402 int cpu = smp_processor_id(); 401 int cpu = smp_processor_id();
403 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 402 struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
404 403
405 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && 404 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
406 this_cpu_read(irq_count) != -1); 405 this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
462 * Switch the PDA and FPU contexts. 461 * Switch the PDA and FPU contexts.
463 */ 462 */
464 this_cpu_write(current_task, next_p); 463 this_cpu_write(current_task, next_p);
464 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
465 465
466 /* Reload sp0. */ 466 /* Reload sp0. */
467 update_sp0(next_p); 467 update_sp0(next_p);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 8af2e8d0c0a1..145810b0edf6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -906,9 +906,6 @@ void __init setup_arch(char **cmdline_p)
906 set_bit(EFI_BOOT, &efi.flags); 906 set_bit(EFI_BOOT, &efi.flags);
907 set_bit(EFI_64BIT, &efi.flags); 907 set_bit(EFI_64BIT, &efi.flags);
908 } 908 }
909
910 if (efi_enabled(EFI_BOOT))
911 efi_memblock_x86_reserve_range();
912#endif 909#endif
913 910
914 x86_init.oem.arch_setup(); 911 x86_init.oem.arch_setup();
@@ -962,6 +959,8 @@ void __init setup_arch(char **cmdline_p)
962 959
963 parse_early_param(); 960 parse_early_param();
964 961
962 if (efi_enabled(EFI_BOOT))
963 efi_memblock_x86_reserve_range();
965#ifdef CONFIG_MEMORY_HOTPLUG 964#ifdef CONFIG_MEMORY_HOTPLUG
966 /* 965 /*
967 * Memory used by the kernel cannot be hot-removed because Linux 966 * Memory used by the kernel cannot be hot-removed because Linux
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 35cb20994e32..ed556d50d7ed 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -126,14 +126,10 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
126 spin_lock_irqsave(&rtc_lock, flags); 126 spin_lock_irqsave(&rtc_lock, flags);
127 CMOS_WRITE(0xa, 0xf); 127 CMOS_WRITE(0xa, 0xf);
128 spin_unlock_irqrestore(&rtc_lock, flags); 128 spin_unlock_irqrestore(&rtc_lock, flags);
129 local_flush_tlb();
130 pr_debug("1.\n");
131 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = 129 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
132 start_eip >> 4; 130 start_eip >> 4;
133 pr_debug("2.\n");
134 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 131 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
135 start_eip & 0xf; 132 start_eip & 0xf;
136 pr_debug("3.\n");
137} 133}
138 134
139static inline void smpboot_restore_warm_reset_vector(void) 135static inline void smpboot_restore_warm_reset_vector(void)
@@ -141,11 +137,6 @@ static inline void smpboot_restore_warm_reset_vector(void)
141 unsigned long flags; 137 unsigned long flags;
142 138
143 /* 139 /*
144 * Install writable page 0 entry to set BIOS data area.
145 */
146 local_flush_tlb();
147
148 /*
149 * Paranoid: Set warm reset code and vector here back 140 * Paranoid: Set warm reset code and vector here back
150 * to default values. 141 * to default values.
151 */ 142 */
@@ -932,12 +923,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
932 initial_code = (unsigned long)start_secondary; 923 initial_code = (unsigned long)start_secondary;
933 initial_stack = idle->thread.sp; 924 initial_stack = idle->thread.sp;
934 925
935 /* 926 /* Enable the espfix hack for this CPU */
936 * Enable the espfix hack for this CPU
937 */
938#ifdef CONFIG_X86_ESPFIX64
939 init_espfix_ap(cpu); 927 init_espfix_ap(cpu);
940#endif
941 928
942 /* So we see what's up */ 929 /* So we see what's up */
943 announce_cpu(cpu, apicid); 930 announce_cpu(cpu, apicid);
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 77835bc021c7..093f2ea5dd56 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -102,7 +102,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
102 for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); 102 for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
103 unwind_next_frame(&state)) { 103 unwind_next_frame(&state)) {
104 104
105 regs = unwind_get_entry_regs(&state); 105 regs = unwind_get_entry_regs(&state, NULL);
106 if (regs) { 106 if (regs) {
107 /* 107 /*
108 * Kernel mode registers on the stack indicate an 108 * Kernel mode registers on the stack indicate an
@@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk,
164{ 164{
165 int ret; 165 int ret;
166 166
167 /*
168 * If the task doesn't have a stack (e.g., a zombie), the stack is
169 * "reliably" empty.
170 */
167 if (!try_get_task_stack(tsk)) 171 if (!try_get_task_stack(tsk))
168 return -EINVAL; 172 return 0;
169 173
170 ret = __save_stack_trace_reliable(trace, tsk); 174 ret = __save_stack_trace_reliable(trace, tsk);
171 175
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index a4eb27918ceb..a2486f444073 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
138 return -1; 138 return -1;
139 set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); 139 set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
140 pte_unmap(pte); 140 pte_unmap(pte);
141
142 /*
143 * PTI poisons low addresses in the kernel page tables in the
144 * name of making them unusable for userspace. To execute
145 * code at such a low address, the poison must be cleared.
146 *
147 * Note: 'pgd' actually gets set in p4d_alloc() _or_
148 * pud_alloc() depending on 4/5-level paging.
149 */
150 pgd->pgd &= ~_PAGE_NX;
151
141 return 0; 152 return 0;
142} 153}
143 154
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 9a9c9b076955..a5b802a12212 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
93 cpu = get_cpu(); 93 cpu = get_cpu();
94 94
95 while (n-- > 0) { 95 while (n-- > 0) {
96 if (LDT_empty(info) || LDT_zero(info)) { 96 if (LDT_empty(info) || LDT_zero(info))
97 memset(desc, 0, sizeof(*desc)); 97 memset(desc, 0, sizeof(*desc));
98 } else { 98 else
99 fill_ldt(desc, info); 99 fill_ldt(desc, info);
100
101 /*
102 * Always set the accessed bit so that the CPU
103 * doesn't try to write to the (read-only) GDT.
104 */
105 desc->type |= 1;
106 }
107 ++info; 100 ++info;
108 ++desc; 101 ++desc;
109 } 102 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 989514c94a55..446c9ef8cfc3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
51#include <asm/traps.h> 51#include <asm/traps.h>
52#include <asm/desc.h> 52#include <asm/desc.h>
53#include <asm/fpu/internal.h> 53#include <asm/fpu/internal.h>
54#include <asm/cpu_entry_area.h>
54#include <asm/mce.h> 55#include <asm/mce.h>
55#include <asm/fixmap.h> 56#include <asm/fixmap.h>
56#include <asm/mach_traps.h> 57#include <asm/mach_traps.h>
@@ -348,23 +349,42 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
348 349
349 /* 350 /*
350 * If IRET takes a non-IST fault on the espfix64 stack, then we 351 * If IRET takes a non-IST fault on the espfix64 stack, then we
351 * end up promoting it to a doublefault. In that case, modify 352 * end up promoting it to a doublefault. In that case, take
352 * the stack to make it look like we just entered the #GP 353 * advantage of the fact that we're not using the normal (TSS.sp0)
353 * handler from user space, similar to bad_iret. 354 * stack right now. We can write a fake #GP(0) frame at TSS.sp0
355 * and then modify our own IRET frame so that, when we return,
356 * we land directly at the #GP(0) vector with the stack already
357 * set up according to its expectations.
358 *
359 * The net result is that our #GP handler will think that we
360 * entered from usermode with the bad user context.
354 * 361 *
355 * No need for ist_enter here because we don't use RCU. 362 * No need for ist_enter here because we don't use RCU.
356 */ 363 */
357 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && 364 if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
358 regs->cs == __KERNEL_CS && 365 regs->cs == __KERNEL_CS &&
359 regs->ip == (unsigned long)native_irq_return_iret) 366 regs->ip == (unsigned long)native_irq_return_iret)
360 { 367 {
361 struct pt_regs *normal_regs = task_pt_regs(current); 368 struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
362 369
363 /* Fake a #GP(0) from userspace. */ 370 /*
364 memmove(&normal_regs->ip, (void *)regs->sp, 5*8); 371 * regs->sp points to the failing IRET frame on the
365 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 372 * ESPFIX64 stack. Copy it to the entry stack. This fills
373 * in gpregs->ss through gpregs->ip.
374 *
375 */
376 memmove(&gpregs->ip, (void *)regs->sp, 5*8);
377 gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
378
379 /*
380 * Adjust our frame so that we return straight to the #GP
381 * vector with the expected RSP value. This is safe because
382 * we won't enable interupts or schedule before we invoke
383 * general_protection, so nothing will clobber the stack
384 * frame we just set up.
385 */
366 regs->ip = (unsigned long)general_protection; 386 regs->ip = (unsigned long)general_protection;
367 regs->sp = (unsigned long)&normal_regs->orig_ax; 387 regs->sp = (unsigned long)&gpregs->orig_ax;
368 388
369 return; 389 return;
370 } 390 }
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
389 * 409 *
390 * Processors update CR2 whenever a page fault is detected. If a 410 * Processors update CR2 whenever a page fault is detected. If a
391 * second page fault occurs while an earlier page fault is being 411 * second page fault occurs while an earlier page fault is being
392 * deliv- ered, the faulting linear address of the second fault will 412 * delivered, the faulting linear address of the second fault will
393 * overwrite the contents of CR2 (replacing the previous 413 * overwrite the contents of CR2 (replacing the previous
394 * address). These updates to CR2 occur even if the page fault 414 * address). These updates to CR2 occur even if the page fault
395 * results in a double fault or occurs during the delivery of a 415 * results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
605 625
606#ifdef CONFIG_X86_64 626#ifdef CONFIG_X86_64
607/* 627/*
608 * Help handler running on IST stack to switch off the IST stack if the 628 * Help handler running on a per-cpu (IST or entry trampoline) stack
609 * interrupted code was in user mode. The actual stack switch is done in 629 * to switch to the normal thread stack if the interrupted code was in
610 * entry_64.S 630 * user mode. The actual stack switch is done in entry_64.S
611 */ 631 */
612asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) 632asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
613{ 633{
614 struct pt_regs *regs = task_pt_regs(current); 634 struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
615 *regs = *eregs; 635 if (regs != eregs)
636 *regs = *eregs;
616 return regs; 637 return regs;
617} 638}
618NOKPROBE_SYMBOL(sync_regs); 639NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
628 /* 649 /*
629 * This is called from entry_64.S early in handling a fault 650 * This is called from entry_64.S early in handling a fault
630 * caused by a bad iret to user mode. To handle the fault 651 * caused by a bad iret to user mode. To handle the fault
631 * correctly, we want move our stack frame to task_pt_regs 652 * correctly, we want to move our stack frame to where it would
632 * and we want to pretend that the exception came from the 653 * be had we entered directly on the entry stack (rather than
633 * iret target. 654 * just below the IRET frame) and we want to pretend that the
655 * exception came from the IRET target.
634 */ 656 */
635 struct bad_iret_stack *new_stack = 657 struct bad_iret_stack *new_stack =
636 container_of(task_pt_regs(current), 658 (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
637 struct bad_iret_stack, regs);
638 659
639 /* Copy the IRET target to the new stack. */ 660 /* Copy the IRET target to the new stack. */
640 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); 661 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
795 debug_stack_usage_dec(); 816 debug_stack_usage_dec();
796 817
797exit: 818exit:
798#if defined(CONFIG_X86_32)
799 /*
800 * This is the most likely code path that involves non-trivial use
801 * of the SYSENTER stack. Check that we haven't overrun it.
802 */
803 WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
804 "Overran or corrupted SYSENTER stack\n");
805#endif
806 ist_exit(regs); 819 ist_exit(regs);
807} 820}
808NOKPROBE_SYMBOL(do_debug); 821NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
929 942
930void __init trap_init(void) 943void __init trap_init(void)
931{ 944{
945 /* Init cpu_entry_area before IST entries are set up */
946 setup_cpu_entry_areas();
947
932 idt_setup_traps(); 948 idt_setup_traps();
933 949
934 /* 950 /*
@@ -936,8 +952,9 @@ void __init trap_init(void)
936 * "sidt" instruction will not leak the location of the kernel, and 952 * "sidt" instruction will not leak the location of the kernel, and
937 * to defend the IDT against arbitrary memory write vulnerabilities. 953 * to defend the IDT against arbitrary memory write vulnerabilities.
938 * It will be reloaded in cpu_init() */ 954 * It will be reloaded in cpu_init() */
939 __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); 955 cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
940 idt_descr.address = fix_to_virt(FIX_RO_IDT); 956 PAGE_KERNEL_RO);
957 idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
941 958
942 /* 959 /*
943 * Should be a barrier for any external CPU state: 960 * Should be a barrier for any external CPU state:
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index a3f973b2c97a..be86a865087a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
253 return NULL; 253 return NULL;
254} 254}
255 255
256static bool stack_access_ok(struct unwind_state *state, unsigned long addr, 256static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
257 size_t len) 257 size_t len)
258{ 258{
259 struct stack_info *info = &state->stack_info; 259 struct stack_info *info = &state->stack_info;
260 void *addr = (void *)_addr;
260 261
261 /* 262 if (!on_stack(info, addr, len) &&
262 * If the address isn't on the current stack, switch to the next one. 263 (get_stack_info(addr, state->task, info, &state->stack_mask)))
263 * 264 return false;
264 * We may have to traverse multiple stacks to deal with the possibility
265 * that info->next_sp could point to an empty stack and the address
266 * could be on a subsequent stack.
267 */
268 while (!on_stack(info, (void *)addr, len))
269 if (get_stack_info(info->next_sp, state->task, info,
270 &state->stack_mask))
271 return false;
272 265
273 return true; 266 return true;
274} 267}
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
283 return true; 276 return true;
284} 277}
285 278
286#define REGS_SIZE (sizeof(struct pt_regs))
287#define SP_OFFSET (offsetof(struct pt_regs, sp))
288#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
289#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
290
291static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, 279static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
292 unsigned long *ip, unsigned long *sp, bool full) 280 unsigned long *ip, unsigned long *sp)
293{ 281{
294 size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; 282 struct pt_regs *regs = (struct pt_regs *)addr;
295 size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
296 struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
297
298 if (IS_ENABLED(CONFIG_X86_64)) {
299 if (!stack_access_ok(state, addr, regs_size))
300 return false;
301 283
302 *ip = regs->ip; 284 /* x86-32 support will be more complicated due to the &regs->sp hack */
303 *sp = regs->sp; 285 BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
304 286
305 return true; 287 if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
306 }
307
308 if (!stack_access_ok(state, addr, sp_offset))
309 return false; 288 return false;
310 289
311 *ip = regs->ip; 290 *ip = regs->ip;
291 *sp = regs->sp;
292 return true;
293}
312 294
313 if (user_mode(regs)) { 295static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
314 if (!stack_access_ok(state, addr + sp_offset, 296 unsigned long *ip, unsigned long *sp)
315 REGS_SIZE - SP_OFFSET)) 297{
316 return false; 298 struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
317 299
318 *sp = regs->sp; 300 if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
319 } else 301 return false;
320 *sp = (unsigned long)&regs->sp;
321 302
303 *ip = regs->ip;
304 *sp = regs->sp;
322 return true; 305 return true;
323} 306}
324 307
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
327 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; 310 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
328 enum stack_type prev_type = state->stack_info.type; 311 enum stack_type prev_type = state->stack_info.type;
329 struct orc_entry *orc; 312 struct orc_entry *orc;
330 struct pt_regs *ptregs;
331 bool indirect = false; 313 bool indirect = false;
332 314
333 if (unwind_done(state)) 315 if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
435 break; 417 break;
436 418
437 case ORC_TYPE_REGS: 419 case ORC_TYPE_REGS:
438 if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { 420 if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
439 orc_warn("can't dereference registers at %p for ip %pB\n", 421 orc_warn("can't dereference registers at %p for ip %pB\n",
440 (void *)sp, (void *)orig_ip); 422 (void *)sp, (void *)orig_ip);
441 goto done; 423 goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
447 break; 429 break;
448 430
449 case ORC_TYPE_REGS_IRET: 431 case ORC_TYPE_REGS_IRET:
450 if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { 432 if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
451 orc_warn("can't dereference iret registers at %p for ip %pB\n", 433 orc_warn("can't dereference iret registers at %p for ip %pB\n",
452 (void *)sp, (void *)orig_ip); 434 (void *)sp, (void *)orig_ip);
453 goto done; 435 goto done;
454 } 436 }
455 437
456 ptregs = container_of((void *)sp, struct pt_regs, ip); 438 state->regs = (void *)sp - IRET_FRAME_OFFSET;
457 if ((unsigned long)ptregs >= prev_sp && 439 state->full_regs = false;
458 on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
459 state->regs = ptregs;
460 state->full_regs = false;
461 } else
462 state->regs = NULL;
463
464 state->signal = true; 440 state->signal = true;
465 break; 441 break;
466 442
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
553 } 529 }
554 530
555 if (get_stack_info((unsigned long *)state->sp, state->task, 531 if (get_stack_info((unsigned long *)state->sp, state->task,
556 &state->stack_info, &state->stack_mask)) 532 &state->stack_info, &state->stack_mask)) {
557 return; 533 /*
534 * We weren't on a valid stack. It's possible that
535 * we overflowed a valid stack into a guard page.
536 * See if the next page up is valid so that we can
537 * generate some kind of backtrace if this happens.
538 */
539 void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
540 if (get_stack_info(next_page, state->task, &state->stack_info,
541 &state->stack_mask))
542 return;
543 }
558 544
559 /* 545 /*
560 * The caller can provide the address of the first frame directly 546 * The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a4009fb9be87..1e413a9326aa 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -61,11 +61,17 @@ jiffies_64 = jiffies;
61 . = ALIGN(HPAGE_SIZE); \ 61 . = ALIGN(HPAGE_SIZE); \
62 __end_rodata_hpage_align = .; 62 __end_rodata_hpage_align = .;
63 63
64#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
65#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
66
64#else 67#else
65 68
66#define X64_ALIGN_RODATA_BEGIN 69#define X64_ALIGN_RODATA_BEGIN
67#define X64_ALIGN_RODATA_END 70#define X64_ALIGN_RODATA_END
68 71
72#define ALIGN_ENTRY_TEXT_BEGIN
73#define ALIGN_ENTRY_TEXT_END
74
69#endif 75#endif
70 76
71PHDRS { 77PHDRS {
@@ -102,11 +108,22 @@ SECTIONS
102 CPUIDLE_TEXT 108 CPUIDLE_TEXT
103 LOCK_TEXT 109 LOCK_TEXT
104 KPROBES_TEXT 110 KPROBES_TEXT
111 ALIGN_ENTRY_TEXT_BEGIN
105 ENTRY_TEXT 112 ENTRY_TEXT
106 IRQENTRY_TEXT 113 IRQENTRY_TEXT
114 ALIGN_ENTRY_TEXT_END
107 SOFTIRQENTRY_TEXT 115 SOFTIRQENTRY_TEXT
108 *(.fixup) 116 *(.fixup)
109 *(.gnu.warning) 117 *(.gnu.warning)
118
119#ifdef CONFIG_X86_64
120 . = ALIGN(PAGE_SIZE);
121 _entry_trampoline = .;
122 *(.entry_trampoline)
123 . = ALIGN(PAGE_SIZE);
124 ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
125#endif
126
110 /* End of text section */ 127 /* End of text section */
111 _etext = .; 128 _etext = .;
112 } :text = 0x9090 129 } :text = 0x9090
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index abe74f779f9d..b514b2b2845a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2390} 2390}
2391 2391
2392static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, 2392static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2393 u64 cr0, u64 cr4) 2393 u64 cr0, u64 cr3, u64 cr4)
2394{ 2394{
2395 int bad; 2395 int bad;
2396 u64 pcid;
2397
2398 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
2399 pcid = 0;
2400 if (cr4 & X86_CR4_PCIDE) {
2401 pcid = cr3 & 0xfff;
2402 cr3 &= ~0xfff;
2403 }
2404
2405 bad = ctxt->ops->set_cr(ctxt, 3, cr3);
2406 if (bad)
2407 return X86EMUL_UNHANDLEABLE;
2396 2408
2397 /* 2409 /*
2398 * First enable PAE, long mode needs it before CR0.PG = 1 is set. 2410 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2411 bad = ctxt->ops->set_cr(ctxt, 4, cr4); 2423 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2412 if (bad) 2424 if (bad)
2413 return X86EMUL_UNHANDLEABLE; 2425 return X86EMUL_UNHANDLEABLE;
2426 if (pcid) {
2427 bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
2428 if (bad)
2429 return X86EMUL_UNHANDLEABLE;
2430 }
2431
2414 } 2432 }
2415 2433
2416 return X86EMUL_CONTINUE; 2434 return X86EMUL_CONTINUE;
@@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2421 struct desc_struct desc; 2439 struct desc_struct desc;
2422 struct desc_ptr dt; 2440 struct desc_ptr dt;
2423 u16 selector; 2441 u16 selector;
2424 u32 val, cr0, cr4; 2442 u32 val, cr0, cr3, cr4;
2425 int i; 2443 int i;
2426 2444
2427 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); 2445 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2428 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); 2446 cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
2429 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; 2447 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2430 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); 2448 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2431 2449
@@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2467 2485
2468 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); 2486 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2469 2487
2470 return rsm_enter_protected_mode(ctxt, cr0, cr4); 2488 return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2471} 2489}
2472 2490
2473static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) 2491static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2474{ 2492{
2475 struct desc_struct desc; 2493 struct desc_struct desc;
2476 struct desc_ptr dt; 2494 struct desc_ptr dt;
2477 u64 val, cr0, cr4; 2495 u64 val, cr0, cr3, cr4;
2478 u32 base3; 2496 u32 base3;
2479 u16 selector; 2497 u16 selector;
2480 int i, r; 2498 int i, r;
@@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2491 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); 2509 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2492 2510
2493 cr0 = GET_SMSTATE(u64, smbase, 0x7f58); 2511 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2494 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); 2512 cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
2495 cr4 = GET_SMSTATE(u64, smbase, 0x7f48); 2513 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2496 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); 2514 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2497 val = GET_SMSTATE(u64, smbase, 0x7ed0); 2515 val = GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2519 dt.address = GET_SMSTATE(u64, smbase, 0x7e68); 2537 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2520 ctxt->ops->set_gdt(ctxt, &dt); 2538 ctxt->ops->set_gdt(ctxt, &dt);
2521 2539
2522 r = rsm_enter_protected_mode(ctxt, cr0, cr4); 2540 r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2523 if (r != X86EMUL_CONTINUE) 2541 if (r != X86EMUL_CONTINUE)
2524 return r; 2542 return r;
2525 2543
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e5e66e5c6640..2b8eb4da4d08 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3395 spin_lock(&vcpu->kvm->mmu_lock); 3395 spin_lock(&vcpu->kvm->mmu_lock);
3396 if(make_mmu_pages_available(vcpu) < 0) { 3396 if(make_mmu_pages_available(vcpu) < 0) {
3397 spin_unlock(&vcpu->kvm->mmu_lock); 3397 spin_unlock(&vcpu->kvm->mmu_lock);
3398 return 1; 3398 return -ENOSPC;
3399 } 3399 }
3400 sp = kvm_mmu_get_page(vcpu, 0, 0, 3400 sp = kvm_mmu_get_page(vcpu, 0, 0,
3401 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); 3401 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
3410 spin_lock(&vcpu->kvm->mmu_lock); 3410 spin_lock(&vcpu->kvm->mmu_lock);
3411 if (make_mmu_pages_available(vcpu) < 0) { 3411 if (make_mmu_pages_available(vcpu) < 0) {
3412 spin_unlock(&vcpu->kvm->mmu_lock); 3412 spin_unlock(&vcpu->kvm->mmu_lock);
3413 return 1; 3413 return -ENOSPC;
3414 } 3414 }
3415 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), 3415 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
3416 i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); 3416 i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3450 spin_lock(&vcpu->kvm->mmu_lock); 3450 spin_lock(&vcpu->kvm->mmu_lock);
3451 if (make_mmu_pages_available(vcpu) < 0) { 3451 if (make_mmu_pages_available(vcpu) < 0) {
3452 spin_unlock(&vcpu->kvm->mmu_lock); 3452 spin_unlock(&vcpu->kvm->mmu_lock);
3453 return 1; 3453 return -ENOSPC;
3454 } 3454 }
3455 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 3455 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
3456 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); 3456 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3487 spin_lock(&vcpu->kvm->mmu_lock); 3487 spin_lock(&vcpu->kvm->mmu_lock);
3488 if (make_mmu_pages_available(vcpu) < 0) { 3488 if (make_mmu_pages_available(vcpu) < 0) {
3489 spin_unlock(&vcpu->kvm->mmu_lock); 3489 spin_unlock(&vcpu->kvm->mmu_lock);
3490 return 1; 3490 return -ENOSPC;
3491 } 3491 }
3492 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 3492 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
3493 0, ACC_ALL); 3493 0, ACC_ALL);
@@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
3781bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) 3781bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
3782{ 3782{
3783 if (unlikely(!lapic_in_kernel(vcpu) || 3783 if (unlikely(!lapic_in_kernel(vcpu) ||
3784 kvm_event_needs_reinjection(vcpu))) 3784 kvm_event_needs_reinjection(vcpu) ||
3785 vcpu->arch.exception.pending))
3785 return false; 3786 return false;
3786 3787
3787 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) 3788 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
@@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void)
5465 5466
5466int kvm_mmu_module_init(void) 5467int kvm_mmu_module_init(void)
5467{ 5468{
5469 int ret = -ENOMEM;
5470
5468 kvm_mmu_clear_all_pte_masks(); 5471 kvm_mmu_clear_all_pte_masks();
5469 5472
5470 pte_list_desc_cache = kmem_cache_create("pte_list_desc", 5473 pte_list_desc_cache = kmem_cache_create("pte_list_desc",
5471 sizeof(struct pte_list_desc), 5474 sizeof(struct pte_list_desc),
5472 0, SLAB_ACCOUNT, NULL); 5475 0, SLAB_ACCOUNT, NULL);
5473 if (!pte_list_desc_cache) 5476 if (!pte_list_desc_cache)
5474 goto nomem; 5477 goto out;
5475 5478
5476 mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", 5479 mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
5477 sizeof(struct kvm_mmu_page), 5480 sizeof(struct kvm_mmu_page),
5478 0, SLAB_ACCOUNT, NULL); 5481 0, SLAB_ACCOUNT, NULL);
5479 if (!mmu_page_header_cache) 5482 if (!mmu_page_header_cache)
5480 goto nomem; 5483 goto out;
5481 5484
5482 if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL)) 5485 if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
5483 goto nomem; 5486 goto out;
5484 5487
5485 register_shrinker(&mmu_shrinker); 5488 ret = register_shrinker(&mmu_shrinker);
5489 if (ret)
5490 goto out;
5486 5491
5487 return 0; 5492 return 0;
5488 5493
5489nomem: 5494out:
5490 mmu_destroy_caches(); 5495 mmu_destroy_caches();
5491 return -ENOMEM; 5496 return ret;
5492} 5497}
5493 5498
5494/* 5499/*
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index eb714f1cdf7e..f40d0da1f1d3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -45,6 +45,7 @@
45#include <asm/debugreg.h> 45#include <asm/debugreg.h>
46#include <asm/kvm_para.h> 46#include <asm/kvm_para.h>
47#include <asm/irq_remapping.h> 47#include <asm/irq_remapping.h>
48#include <asm/nospec-branch.h>
48 49
49#include <asm/virtext.h> 50#include <asm/virtext.h>
50#include "trace.h" 51#include "trace.h"
@@ -361,7 +362,6 @@ static void recalc_intercepts(struct vcpu_svm *svm)
361{ 362{
362 struct vmcb_control_area *c, *h; 363 struct vmcb_control_area *c, *h;
363 struct nested_state *g; 364 struct nested_state *g;
364 u32 h_intercept_exceptions;
365 365
366 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 366 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
367 367
@@ -372,14 +372,9 @@ static void recalc_intercepts(struct vcpu_svm *svm)
372 h = &svm->nested.hsave->control; 372 h = &svm->nested.hsave->control;
373 g = &svm->nested; 373 g = &svm->nested;
374 374
375 /* No need to intercept #UD if L1 doesn't intercept it */
376 h_intercept_exceptions =
377 h->intercept_exceptions & ~(1U << UD_VECTOR);
378
379 c->intercept_cr = h->intercept_cr | g->intercept_cr; 375 c->intercept_cr = h->intercept_cr | g->intercept_cr;
380 c->intercept_dr = h->intercept_dr | g->intercept_dr; 376 c->intercept_dr = h->intercept_dr | g->intercept_dr;
381 c->intercept_exceptions = 377 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
382 h_intercept_exceptions | g->intercept_exceptions;
383 c->intercept = h->intercept | g->intercept; 378 c->intercept = h->intercept | g->intercept;
384} 379}
385 380
@@ -2202,7 +2197,6 @@ static int ud_interception(struct vcpu_svm *svm)
2202{ 2197{
2203 int er; 2198 int er;
2204 2199
2205 WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
2206 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); 2200 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
2207 if (er == EMULATE_USER_EXIT) 2201 if (er == EMULATE_USER_EXIT)
2208 return 0; 2202 return 0;
@@ -4986,6 +4980,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
4986 "mov %%r14, %c[r14](%[svm]) \n\t" 4980 "mov %%r14, %c[r14](%[svm]) \n\t"
4987 "mov %%r15, %c[r15](%[svm]) \n\t" 4981 "mov %%r15, %c[r15](%[svm]) \n\t"
4988#endif 4982#endif
4983 /*
4984 * Clear host registers marked as clobbered to prevent
4985 * speculative use.
4986 */
4987 "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
4988 "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
4989 "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
4990 "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
4991 "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
4992#ifdef CONFIG_X86_64
4993 "xor %%r8, %%r8 \n\t"
4994 "xor %%r9, %%r9 \n\t"
4995 "xor %%r10, %%r10 \n\t"
4996 "xor %%r11, %%r11 \n\t"
4997 "xor %%r12, %%r12 \n\t"
4998 "xor %%r13, %%r13 \n\t"
4999 "xor %%r14, %%r14 \n\t"
5000 "xor %%r15, %%r15 \n\t"
5001#endif
4989 "pop %%" _ASM_BP 5002 "pop %%" _ASM_BP
4990 : 5003 :
4991 : [svm]"a"(svm), 5004 : [svm]"a"(svm),
@@ -5015,6 +5028,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5015#endif 5028#endif
5016 ); 5029 );
5017 5030
5031 /* Eliminate branch target predictions from guest mode */
5032 vmexit_fill_RSB();
5033
5018#ifdef CONFIG_X86_64 5034#ifdef CONFIG_X86_64
5019 wrmsrl(MSR_GS_BASE, svm->host.gs_base); 5035 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
5020#else 5036#else
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8eba631c4dbd..c829d89e2e63 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -50,6 +50,7 @@
50#include <asm/apic.h> 50#include <asm/apic.h>
51#include <asm/irq_remapping.h> 51#include <asm/irq_remapping.h>
52#include <asm/mmu_context.h> 52#include <asm/mmu_context.h>
53#include <asm/nospec-branch.h>
53 54
54#include "trace.h" 55#include "trace.h"
55#include "pmu.h" 56#include "pmu.h"
@@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
899{ 900{
900 BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); 901 BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
901 902
902 if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || 903 if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
903 vmcs_field_to_offset_table[field] == 0) 904 return -ENOENT;
905
906 /*
907 * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
908 * generic mechanism.
909 */
910 asm("lfence");
911
912 if (vmcs_field_to_offset_table[field] == 0)
904 return -ENOENT; 913 return -ENOENT;
905 914
906 return vmcs_field_to_offset_table[field]; 915 return vmcs_field_to_offset_table[field];
@@ -1887,7 +1896,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1887{ 1896{
1888 u32 eb; 1897 u32 eb;
1889 1898
1890 eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) | 1899 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
1891 (1u << DB_VECTOR) | (1u << AC_VECTOR); 1900 (1u << DB_VECTOR) | (1u << AC_VECTOR);
1892 if ((vcpu->guest_debug & 1901 if ((vcpu->guest_debug &
1893 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == 1902 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
@@ -1905,8 +1914,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1905 */ 1914 */
1906 if (is_guest_mode(vcpu)) 1915 if (is_guest_mode(vcpu))
1907 eb |= get_vmcs12(vcpu)->exception_bitmap; 1916 eb |= get_vmcs12(vcpu)->exception_bitmap;
1908 else
1909 eb |= 1u << UD_VECTOR;
1910 1917
1911 vmcs_write32(EXCEPTION_BITMAP, eb); 1918 vmcs_write32(EXCEPTION_BITMAP, eb);
1912} 1919}
@@ -2302,7 +2309,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2302 * processors. See 22.2.4. 2309 * processors. See 22.2.4.
2303 */ 2310 */
2304 vmcs_writel(HOST_TR_BASE, 2311 vmcs_writel(HOST_TR_BASE,
2305 (unsigned long)this_cpu_ptr(&cpu_tss)); 2312 (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
2306 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ 2313 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
2307 2314
2308 /* 2315 /*
@@ -5917,7 +5924,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
5917 return 1; /* already handled by vmx_vcpu_run() */ 5924 return 1; /* already handled by vmx_vcpu_run() */
5918 5925
5919 if (is_invalid_opcode(intr_info)) { 5926 if (is_invalid_opcode(intr_info)) {
5920 WARN_ON_ONCE(is_guest_mode(vcpu));
5921 er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); 5927 er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
5922 if (er == EMULATE_USER_EXIT) 5928 if (er == EMULATE_USER_EXIT)
5923 return 0; 5929 return 0;
@@ -9415,6 +9421,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9415 /* Save guest registers, load host registers, keep flags */ 9421 /* Save guest registers, load host registers, keep flags */
9416 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" 9422 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
9417 "pop %0 \n\t" 9423 "pop %0 \n\t"
9424 "setbe %c[fail](%0)\n\t"
9418 "mov %%" _ASM_AX ", %c[rax](%0) \n\t" 9425 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
9419 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" 9426 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
9420 __ASM_SIZE(pop) " %c[rcx](%0) \n\t" 9427 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -9431,12 +9438,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9431 "mov %%r13, %c[r13](%0) \n\t" 9438 "mov %%r13, %c[r13](%0) \n\t"
9432 "mov %%r14, %c[r14](%0) \n\t" 9439 "mov %%r14, %c[r14](%0) \n\t"
9433 "mov %%r15, %c[r15](%0) \n\t" 9440 "mov %%r15, %c[r15](%0) \n\t"
9441 "xor %%r8d, %%r8d \n\t"
9442 "xor %%r9d, %%r9d \n\t"
9443 "xor %%r10d, %%r10d \n\t"
9444 "xor %%r11d, %%r11d \n\t"
9445 "xor %%r12d, %%r12d \n\t"
9446 "xor %%r13d, %%r13d \n\t"
9447 "xor %%r14d, %%r14d \n\t"
9448 "xor %%r15d, %%r15d \n\t"
9434#endif 9449#endif
9435 "mov %%cr2, %%" _ASM_AX " \n\t" 9450 "mov %%cr2, %%" _ASM_AX " \n\t"
9436 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" 9451 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
9437 9452
9453 "xor %%eax, %%eax \n\t"
9454 "xor %%ebx, %%ebx \n\t"
9455 "xor %%esi, %%esi \n\t"
9456 "xor %%edi, %%edi \n\t"
9438 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" 9457 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
9439 "setbe %c[fail](%0) \n\t"
9440 ".pushsection .rodata \n\t" 9458 ".pushsection .rodata \n\t"
9441 ".global vmx_return \n\t" 9459 ".global vmx_return \n\t"
9442 "vmx_return: " _ASM_PTR " 2b \n\t" 9460 "vmx_return: " _ASM_PTR " 2b \n\t"
@@ -9473,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9473#endif 9491#endif
9474 ); 9492 );
9475 9493
9494 /* Eliminate branch target predictions from guest mode */
9495 vmexit_fill_RSB();
9496
9476 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ 9497 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
9477 if (debugctlmsr) 9498 if (debugctlmsr)
9478 update_debugctlmsr(debugctlmsr); 9499 update_debugctlmsr(debugctlmsr);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index faf843c9b916..1cec2c62a0b0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
4384 addr, n, v)) 4384 addr, n, v))
4385 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) 4385 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
4386 break; 4386 break;
4387 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); 4387 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
4388 handled += n; 4388 handled += n;
4389 addr += n; 4389 addr += n;
4390 len -= n; 4390 len -= n;
@@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4643{ 4643{
4644 if (vcpu->mmio_read_completed) { 4644 if (vcpu->mmio_read_completed) {
4645 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, 4645 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4646 vcpu->mmio_fragments[0].gpa, *(u64 *)val); 4646 vcpu->mmio_fragments[0].gpa, val);
4647 vcpu->mmio_read_completed = 0; 4647 vcpu->mmio_read_completed = 0;
4648 return 1; 4648 return 1;
4649 } 4649 }
@@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4665 4665
4666static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) 4666static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4667{ 4667{
4668 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); 4668 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
4669 return vcpu_mmio_write(vcpu, gpa, bytes, val); 4669 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4670} 4670}
4671 4671
4672static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, 4672static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4673 void *val, int bytes) 4673 void *val, int bytes)
4674{ 4674{
4675 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 4675 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
4676 return X86EMUL_IO_NEEDED; 4676 return X86EMUL_IO_NEEDED;
4677} 4677}
4678 4678
@@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
7264 7264
7265int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 7265int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7266{ 7266{
7267 struct fpu *fpu = &current->thread.fpu;
7268 int r; 7267 int r;
7269 7268
7270 fpu__initialize(fpu);
7271
7272 kvm_sigset_activate(vcpu); 7269 kvm_sigset_activate(vcpu);
7273 7270
7271 kvm_load_guest_fpu(vcpu);
7272
7274 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 7273 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
7275 if (kvm_run->immediate_exit) { 7274 if (kvm_run->immediate_exit) {
7276 r = -EINTR; 7275 r = -EINTR;
@@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7296 } 7295 }
7297 } 7296 }
7298 7297
7299 kvm_load_guest_fpu(vcpu);
7300
7301 if (unlikely(vcpu->arch.complete_userspace_io)) { 7298 if (unlikely(vcpu->arch.complete_userspace_io)) {
7302 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; 7299 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
7303 vcpu->arch.complete_userspace_io = NULL; 7300 vcpu->arch.complete_userspace_io = NULL;
7304 r = cui(vcpu); 7301 r = cui(vcpu);
7305 if (r <= 0) 7302 if (r <= 0)
7306 goto out_fpu; 7303 goto out;
7307 } else 7304 } else
7308 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); 7305 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
7309 7306
@@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7312 else 7309 else
7313 r = vcpu_run(vcpu); 7310 r = vcpu_run(vcpu);
7314 7311
7315out_fpu:
7316 kvm_put_guest_fpu(vcpu);
7317out: 7312out:
7313 kvm_put_guest_fpu(vcpu);
7318 post_kvm_run_save(vcpu); 7314 post_kvm_run_save(vcpu);
7319 kvm_sigset_deactivate(vcpu); 7315 kvm_sigset_deactivate(vcpu);
7320 7316
@@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
7384#endif 7380#endif
7385 7381
7386 kvm_rip_write(vcpu, regs->rip); 7382 kvm_rip_write(vcpu, regs->rip);
7387 kvm_set_rflags(vcpu, regs->rflags); 7383 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
7388 7384
7389 vcpu->arch.exception.pending = false; 7385 vcpu->arch.exception.pending = false;
7390 7386
@@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
7498} 7494}
7499EXPORT_SYMBOL_GPL(kvm_task_switch); 7495EXPORT_SYMBOL_GPL(kvm_task_switch);
7500 7496
7497int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
7498{
7499 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
7500 /*
7501 * When EFER.LME and CR0.PG are set, the processor is in
7502 * 64-bit mode (though maybe in a 32-bit code segment).
7503 * CR4.PAE and EFER.LMA must be set.
7504 */
7505 if (!(sregs->cr4 & X86_CR4_PAE_BIT)
7506 || !(sregs->efer & EFER_LMA))
7507 return -EINVAL;
7508 } else {
7509 /*
7510 * Not in 64-bit mode: EFER.LMA is clear and the code
7511 * segment cannot be 64-bit.
7512 */
7513 if (sregs->efer & EFER_LMA || sregs->cs.l)
7514 return -EINVAL;
7515 }
7516
7517 return 0;
7518}
7519
7501int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 7520int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7502 struct kvm_sregs *sregs) 7521 struct kvm_sregs *sregs)
7503{ 7522{
@@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7510 (sregs->cr4 & X86_CR4_OSXSAVE)) 7529 (sregs->cr4 & X86_CR4_OSXSAVE))
7511 return -EINVAL; 7530 return -EINVAL;
7512 7531
7532 if (kvm_valid_sregs(vcpu, sregs))
7533 return -EINVAL;
7534
7513 apic_base_msr.data = sregs->apic_base; 7535 apic_base_msr.data = sregs->apic_base;
7514 apic_base_msr.host_initiated = true; 7536 apic_base_msr.host_initiated = true;
7515 if (kvm_set_apic_base(vcpu, &apic_base_msr)) 7537 if (kvm_set_apic_base(vcpu, &apic_base_msr))
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 7b181b61170e..f23934bbaf4e 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
26lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o 26lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
27lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o 27lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o 28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
29lib-$(CONFIG_RETPOLINE) += retpoline.o
29 30
30obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o 31obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
31 32
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4d34bb548b41..46e71a74e612 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -29,7 +29,8 @@
29#include <asm/errno.h> 29#include <asm/errno.h>
30#include <asm/asm.h> 30#include <asm/asm.h>
31#include <asm/export.h> 31#include <asm/export.h>
32 32#include <asm/nospec-branch.h>
33
33/* 34/*
34 * computes a partial checksum, e.g. for TCP/UDP fragments 35 * computes a partial checksum, e.g. for TCP/UDP fragments
35 */ 36 */
@@ -156,7 +157,7 @@ ENTRY(csum_partial)
156 negl %ebx 157 negl %ebx
157 lea 45f(%ebx,%ebx,2), %ebx 158 lea 45f(%ebx,%ebx,2), %ebx
158 testl %esi, %esi 159 testl %esi, %esi
159 jmp *%ebx 160 JMP_NOSPEC %ebx
160 161
161 # Handle 2-byte-aligned regions 162 # Handle 2-byte-aligned regions
16220: addw (%esi), %ax 16320: addw (%esi), %ax
@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
439 andl $-32,%edx 440 andl $-32,%edx
440 lea 3f(%ebx,%ebx), %ebx 441 lea 3f(%ebx,%ebx), %ebx
441 testl %esi, %esi 442 testl %esi, %esi
442 jmp *%ebx 443 JMP_NOSPEC %ebx
4431: addl $64,%esi 4441: addl $64,%esi
444 addl $64,%edi 445 addl $64,%edi
445 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) 446 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 553f8fd23cc4..4846eff7e4c8 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
107 delay = min_t(u64, MWAITX_MAX_LOOPS, loops); 107 delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
108 108
109 /* 109 /*
110 * Use cpu_tss as a cacheline-aligned, seldomly 110 * Use cpu_tss_rw as a cacheline-aligned, seldomly
111 * accessed per-cpu variable as the monitor target. 111 * accessed per-cpu variable as the monitor target.
112 */ 112 */
113 __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); 113 __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
114 114
115 /* 115 /*
116 * AMD, like Intel, supports the EAX hint and EAX=0xf 116 * AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644
index 000000000000..cb45c6cb465f
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,48 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#include <linux/stringify.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6#include <asm/cpufeatures.h>
7#include <asm/alternative-asm.h>
8#include <asm/export.h>
9#include <asm/nospec-branch.h>
10
11.macro THUNK reg
12 .section .text.__x86.indirect_thunk.\reg
13
14ENTRY(__x86_indirect_thunk_\reg)
15 CFI_STARTPROC
16 JMP_NOSPEC %\reg
17 CFI_ENDPROC
18ENDPROC(__x86_indirect_thunk_\reg)
19.endm
20
21/*
22 * Despite being an assembler file we can't just use .irp here
23 * because __KSYM_DEPS__ only uses the C preprocessor and would
24 * only see one instance of "__x86_indirect_thunk_\reg" rather
25 * than one per register with the correct names. So we do it
26 * the simple and nasty way...
27 */
28#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
29#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
30
31GENERATE_THUNK(_ASM_AX)
32GENERATE_THUNK(_ASM_BX)
33GENERATE_THUNK(_ASM_CX)
34GENERATE_THUNK(_ASM_DX)
35GENERATE_THUNK(_ASM_SI)
36GENERATE_THUNK(_ASM_DI)
37GENERATE_THUNK(_ASM_BP)
38GENERATE_THUNK(_ASM_SP)
39#ifdef CONFIG_64BIT
40GENERATE_THUNK(r8)
41GENERATE_THUNK(r9)
42GENERATE_THUNK(r10)
43GENERATE_THUNK(r11)
44GENERATE_THUNK(r12)
45GENERATE_THUNK(r13)
46GENERATE_THUNK(r14)
47GENERATE_THUNK(r15)
48#endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 8e13b8cc6bed..27e9e90a8d35 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
10endif 10endif
11 11
12obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 12obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
13 pat.o pgtable.o physaddr.o setup_nx.o tlb.o 13 pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
14 14
15# Make sure __phys_addr has no stackprotector 15# Make sure __phys_addr has no stackprotector
16nostackp := $(call cc-option, -fno-stack-protector) 16nostackp := $(call cc-option, -fno-stack-protector)
@@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
41obj-$(CONFIG_ACPI_NUMA) += srat.o 41obj-$(CONFIG_ACPI_NUMA) += srat.o
42obj-$(CONFIG_NUMA_EMU) += numa_emulation.o 42obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
43 43
44obj-$(CONFIG_X86_INTEL_MPX) += mpx.o 44obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
45obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o 45obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
46obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o 46obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
47obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
47 48
48obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o 49obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
49obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o 50obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644
index 000000000000..b9283cc27622
--- /dev/null
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -0,0 +1,166 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/spinlock.h>
4#include <linux/percpu.h>
5
6#include <asm/cpu_entry_area.h>
7#include <asm/pgtable.h>
8#include <asm/fixmap.h>
9#include <asm/desc.h>
10
11static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
12
13#ifdef CONFIG_X86_64
14static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
15 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
16#endif
17
18struct cpu_entry_area *get_cpu_entry_area(int cpu)
19{
20 unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
21 BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
22
23 return (struct cpu_entry_area *) va;
24}
25EXPORT_SYMBOL(get_cpu_entry_area);
26
27void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
28{
29 unsigned long va = (unsigned long) cea_vaddr;
30
31 set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
32}
33
34static void __init
35cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
36{
37 for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
38 cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
39}
40
41static void percpu_setup_debug_store(int cpu)
42{
43#ifdef CONFIG_CPU_SUP_INTEL
44 int npages;
45 void *cea;
46
47 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
48 return;
49
50 cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
51 npages = sizeof(struct debug_store) / PAGE_SIZE;
52 BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
53 cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
54 PAGE_KERNEL);
55
56 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
57 /*
58 * Force the population of PMDs for not yet allocated per cpu
59 * memory like debug store buffers.
60 */
61 npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
62 for (; npages; npages--, cea += PAGE_SIZE)
63 cea_set_pte(cea, 0, PAGE_NONE);
64#endif
65}
66
67/* Setup the fixmap mappings only once per-processor */
68static void __init setup_cpu_entry_area(int cpu)
69{
70#ifdef CONFIG_X86_64
71 extern char _entry_trampoline[];
72
73 /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
74 pgprot_t gdt_prot = PAGE_KERNEL_RO;
75 pgprot_t tss_prot = PAGE_KERNEL_RO;
76#else
77 /*
78 * On native 32-bit systems, the GDT cannot be read-only because
79 * our double fault handler uses a task gate, and entering through
80 * a task gate needs to change an available TSS to busy. If the
81 * GDT is read-only, that will triple fault. The TSS cannot be
82 * read-only because the CPU writes to it on task switches.
83 *
84 * On Xen PV, the GDT must be read-only because the hypervisor
85 * requires it.
86 */
87 pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
88 PAGE_KERNEL_RO : PAGE_KERNEL;
89 pgprot_t tss_prot = PAGE_KERNEL;
90#endif
91
92 cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
93 gdt_prot);
94
95 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
96 per_cpu_ptr(&entry_stack_storage, cpu), 1,
97 PAGE_KERNEL);
98
99 /*
100 * The Intel SDM says (Volume 3, 7.2.1):
101 *
102 * Avoid placing a page boundary in the part of the TSS that the
103 * processor reads during a task switch (the first 104 bytes). The
104 * processor may not correctly perform address translations if a
105 * boundary occurs in this area. During a task switch, the processor
106 * reads and writes into the first 104 bytes of each TSS (using
107 * contiguous physical addresses beginning with the physical address
108 * of the first byte of the TSS). So, after TSS access begins, if
109 * part of the 104 bytes is not physically contiguous, the processor
110 * will access incorrect information without generating a page-fault
111 * exception.
112 *
113 * There are also a lot of errata involving the TSS spanning a page
114 * boundary. Assert that we're not doing that.
115 */
116 BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
117 offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
118 BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
119 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
120 &per_cpu(cpu_tss_rw, cpu),
121 sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
122
123#ifdef CONFIG_X86_32
124 per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
125#endif
126
127#ifdef CONFIG_X86_64
128 BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
129 BUILD_BUG_ON(sizeof(exception_stacks) !=
130 sizeof(((struct cpu_entry_area *)0)->exception_stacks));
131 cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
132 &per_cpu(exception_stacks, cpu),
133 sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
134
135 cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
136 __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
137#endif
138 percpu_setup_debug_store(cpu);
139}
140
141static __init void setup_cpu_entry_area_ptes(void)
142{
143#ifdef CONFIG_X86_32
144 unsigned long start, end;
145
146 BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
147 BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
148
149 start = CPU_ENTRY_AREA_BASE;
150 end = start + CPU_ENTRY_AREA_MAP_SIZE;
151
152 /* Careful here: start + PMD_SIZE might wrap around */
153 for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
154 populate_extra_pte(start);
155#endif
156}
157
158void __init setup_cpu_entry_areas(void)
159{
160 unsigned int cpu;
161
162 setup_cpu_entry_area_ptes();
163
164 for_each_possible_cpu(cpu)
165 setup_cpu_entry_area(cpu);
166}
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index bfcffdf6c577..421f2664ffa0 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -5,7 +5,7 @@
5 5
6static int ptdump_show(struct seq_file *m, void *v) 6static int ptdump_show(struct seq_file *m, void *v)
7{ 7{
8 ptdump_walk_pgd_level(m, NULL); 8 ptdump_walk_pgd_level_debugfs(m, NULL, false);
9 return 0; 9 return 0;
10} 10}
11 11
@@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = {
22 .release = single_release, 22 .release = single_release,
23}; 23};
24 24
25static struct dentry *pe; 25static int ptdump_show_curknl(struct seq_file *m, void *v)
26{
27 if (current->mm->pgd) {
28 down_read(&current->mm->mmap_sem);
29 ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
30 up_read(&current->mm->mmap_sem);
31 }
32 return 0;
33}
34
35static int ptdump_open_curknl(struct inode *inode, struct file *filp)
36{
37 return single_open(filp, ptdump_show_curknl, NULL);
38}
39
40static const struct file_operations ptdump_curknl_fops = {
41 .owner = THIS_MODULE,
42 .open = ptdump_open_curknl,
43 .read = seq_read,
44 .llseek = seq_lseek,
45 .release = single_release,
46};
47
48#ifdef CONFIG_PAGE_TABLE_ISOLATION
49static struct dentry *pe_curusr;
50
51static int ptdump_show_curusr(struct seq_file *m, void *v)
52{
53 if (current->mm->pgd) {
54 down_read(&current->mm->mmap_sem);
55 ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
56 up_read(&current->mm->mmap_sem);
57 }
58 return 0;
59}
60
61static int ptdump_open_curusr(struct inode *inode, struct file *filp)
62{
63 return single_open(filp, ptdump_show_curusr, NULL);
64}
65
66static const struct file_operations ptdump_curusr_fops = {
67 .owner = THIS_MODULE,
68 .open = ptdump_open_curusr,
69 .read = seq_read,
70 .llseek = seq_lseek,
71 .release = single_release,
72};
73#endif
74
75static struct dentry *dir, *pe_knl, *pe_curknl;
26 76
27static int __init pt_dump_debug_init(void) 77static int __init pt_dump_debug_init(void)
28{ 78{
29 pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL, 79 dir = debugfs_create_dir("page_tables", NULL);
30 &ptdump_fops); 80 if (!dir)
31 if (!pe)
32 return -ENOMEM; 81 return -ENOMEM;
33 82
83 pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
84 &ptdump_fops);
85 if (!pe_knl)
86 goto err;
87
88 pe_curknl = debugfs_create_file("current_kernel", 0400,
89 dir, NULL, &ptdump_curknl_fops);
90 if (!pe_curknl)
91 goto err;
92
93#ifdef CONFIG_PAGE_TABLE_ISOLATION
94 pe_curusr = debugfs_create_file("current_user", 0400,
95 dir, NULL, &ptdump_curusr_fops);
96 if (!pe_curusr)
97 goto err;
98#endif
34 return 0; 99 return 0;
100err:
101 debugfs_remove_recursive(dir);
102 return -ENOMEM;
35} 103}
36 104
37static void __exit pt_dump_debug_exit(void) 105static void __exit pt_dump_debug_exit(void)
38{ 106{
39 debugfs_remove_recursive(pe); 107 debugfs_remove_recursive(dir);
40} 108}
41 109
42module_init(pt_dump_debug_init); 110module_init(pt_dump_debug_init);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 5e3ac6fe6c9e..2a4849e92831 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -44,68 +44,97 @@ struct addr_marker {
44 unsigned long max_lines; 44 unsigned long max_lines;
45}; 45};
46 46
47/* indices for address_markers; keep sync'd w/ address_markers below */ 47/* Address space markers hints */
48
49#ifdef CONFIG_X86_64
50
48enum address_markers_idx { 51enum address_markers_idx {
49 USER_SPACE_NR = 0, 52 USER_SPACE_NR = 0,
50#ifdef CONFIG_X86_64
51 KERNEL_SPACE_NR, 53 KERNEL_SPACE_NR,
52 LOW_KERNEL_NR, 54 LOW_KERNEL_NR,
55#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
56 LDT_NR,
57#endif
53 VMALLOC_START_NR, 58 VMALLOC_START_NR,
54 VMEMMAP_START_NR, 59 VMEMMAP_START_NR,
55#ifdef CONFIG_KASAN 60#ifdef CONFIG_KASAN
56 KASAN_SHADOW_START_NR, 61 KASAN_SHADOW_START_NR,
57 KASAN_SHADOW_END_NR, 62 KASAN_SHADOW_END_NR,
58#endif 63#endif
59# ifdef CONFIG_X86_ESPFIX64 64 CPU_ENTRY_AREA_NR,
65#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
66 LDT_NR,
67#endif
68#ifdef CONFIG_X86_ESPFIX64
60 ESPFIX_START_NR, 69 ESPFIX_START_NR,
61# endif 70#endif
71#ifdef CONFIG_EFI
72 EFI_END_NR,
73#endif
62 HIGH_KERNEL_NR, 74 HIGH_KERNEL_NR,
63 MODULES_VADDR_NR, 75 MODULES_VADDR_NR,
64 MODULES_END_NR, 76 MODULES_END_NR,
65#else 77 FIXADDR_START_NR,
78 END_OF_SPACE_NR,
79};
80
81static struct addr_marker address_markers[] = {
82 [USER_SPACE_NR] = { 0, "User Space" },
83 [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" },
84 [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" },
85 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
86 [VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
87#ifdef CONFIG_KASAN
88 [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
89 [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
90#endif
91#ifdef CONFIG_MODIFY_LDT_SYSCALL
92 [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
93#endif
94 [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
95#ifdef CONFIG_X86_ESPFIX64
96 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
97#endif
98#ifdef CONFIG_EFI
99 [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
100#endif
101 [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
102 [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
103 [MODULES_END_NR] = { MODULES_END, "End Modules" },
104 [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
105 [END_OF_SPACE_NR] = { -1, NULL }
106};
107
108#else /* CONFIG_X86_64 */
109
110enum address_markers_idx {
111 USER_SPACE_NR = 0,
66 KERNEL_SPACE_NR, 112 KERNEL_SPACE_NR,
67 VMALLOC_START_NR, 113 VMALLOC_START_NR,
68 VMALLOC_END_NR, 114 VMALLOC_END_NR,
69# ifdef CONFIG_HIGHMEM 115#ifdef CONFIG_HIGHMEM
70 PKMAP_BASE_NR, 116 PKMAP_BASE_NR,
71# endif
72 FIXADDR_START_NR,
73#endif 117#endif
118 CPU_ENTRY_AREA_NR,
119 FIXADDR_START_NR,
120 END_OF_SPACE_NR,
74}; 121};
75 122
76/* Address space markers hints */
77static struct addr_marker address_markers[] = { 123static struct addr_marker address_markers[] = {
78 { 0, "User Space" }, 124 [USER_SPACE_NR] = { 0, "User Space" },
79#ifdef CONFIG_X86_64 125 [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" },
80 { 0x8000000000000000UL, "Kernel Space" }, 126 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
81 { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, 127 [VMALLOC_END_NR] = { 0UL, "vmalloc() End" },
82 { 0/* VMALLOC_START */, "vmalloc() Area" }, 128#ifdef CONFIG_HIGHMEM
83 { 0/* VMEMMAP_START */, "Vmemmap" }, 129 [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
84#ifdef CONFIG_KASAN
85 { KASAN_SHADOW_START, "KASAN shadow" },
86 { KASAN_SHADOW_END, "KASAN shadow end" },
87#endif 130#endif
88# ifdef CONFIG_X86_ESPFIX64 131 [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
89 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, 132 [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
90# endif 133 [END_OF_SPACE_NR] = { -1, NULL }
91# ifdef CONFIG_EFI
92 { EFI_VA_END, "EFI Runtime Services" },
93# endif
94 { __START_KERNEL_map, "High Kernel Mapping" },
95 { MODULES_VADDR, "Modules" },
96 { MODULES_END, "End Modules" },
97#else
98 { PAGE_OFFSET, "Kernel Mapping" },
99 { 0/* VMALLOC_START */, "vmalloc() Area" },
100 { 0/*VMALLOC_END*/, "vmalloc() End" },
101# ifdef CONFIG_HIGHMEM
102 { 0/*PKMAP_BASE*/, "Persistent kmap() Area" },
103# endif
104 { 0/*FIXADDR_START*/, "Fixmap Area" },
105#endif
106 { -1, NULL } /* End of list */
107}; 134};
108 135
136#endif /* !CONFIG_X86_64 */
137
109/* Multipliers for offsets within the PTEs */ 138/* Multipliers for offsets within the PTEs */
110#define PTE_LEVEL_MULT (PAGE_SIZE) 139#define PTE_LEVEL_MULT (PAGE_SIZE)
111#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 140#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +169,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
140 static const char * const level_name[] = 169 static const char * const level_name[] =
141 { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; 170 { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
142 171
143 if (!pgprot_val(prot)) { 172 if (!(pr & _PAGE_PRESENT)) {
144 /* Not present */ 173 /* Not present */
145 pt_dump_cont_printf(m, dmsg, " "); 174 pt_dump_cont_printf(m, dmsg, " ");
146 } else { 175 } else {
@@ -447,7 +476,7 @@ static inline bool is_hypervisor_range(int idx)
447} 476}
448 477
449static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, 478static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
450 bool checkwx) 479 bool checkwx, bool dmesg)
451{ 480{
452#ifdef CONFIG_X86_64 481#ifdef CONFIG_X86_64
453 pgd_t *start = (pgd_t *) &init_top_pgt; 482 pgd_t *start = (pgd_t *) &init_top_pgt;
@@ -460,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
460 489
461 if (pgd) { 490 if (pgd) {
462 start = pgd; 491 start = pgd;
463 st.to_dmesg = true; 492 st.to_dmesg = dmesg;
464 } 493 }
465 494
466 st.check_wx = checkwx; 495 st.check_wx = checkwx;
@@ -498,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
498 527
499void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) 528void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
500{ 529{
501 ptdump_walk_pgd_level_core(m, pgd, false); 530 ptdump_walk_pgd_level_core(m, pgd, false, true);
531}
532
533void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
534{
535#ifdef CONFIG_PAGE_TABLE_ISOLATION
536 if (user && static_cpu_has(X86_FEATURE_PTI))
537 pgd = kernel_to_user_pgdp(pgd);
538#endif
539 ptdump_walk_pgd_level_core(m, pgd, false, false);
540}
541EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
542
543static void ptdump_walk_user_pgd_level_checkwx(void)
544{
545#ifdef CONFIG_PAGE_TABLE_ISOLATION
546 pgd_t *pgd = (pgd_t *) &init_top_pgt;
547
548 if (!static_cpu_has(X86_FEATURE_PTI))
549 return;
550
551 pr_info("x86/mm: Checking user space page tables\n");
552 pgd = kernel_to_user_pgdp(pgd);
553 ptdump_walk_pgd_level_core(NULL, pgd, true, false);
554#endif
502} 555}
503EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
504 556
505void ptdump_walk_pgd_level_checkwx(void) 557void ptdump_walk_pgd_level_checkwx(void)
506{ 558{
507 ptdump_walk_pgd_level_core(NULL, NULL, true); 559 ptdump_walk_pgd_level_core(NULL, NULL, true, false);
560 ptdump_walk_user_pgd_level_checkwx();
508} 561}
509 562
510static int __init pt_dump_init(void) 563static int __init pt_dump_init(void)
@@ -525,8 +578,8 @@ static int __init pt_dump_init(void)
525 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; 578 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
526# endif 579# endif
527 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; 580 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
581 address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
528#endif 582#endif
529
530 return 0; 583 return 0;
531} 584}
532__initcall(pt_dump_init); 585__initcall(pt_dump_init);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index febf6980e653..06fe3d51d385 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
860 if (!printk_ratelimit()) 860 if (!printk_ratelimit())
861 return; 861 return;
862 862
863 printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", 863 printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
864 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 864 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
865 tsk->comm, task_pid_nr(tsk), address, 865 tsk->comm, task_pid_nr(tsk), address,
866 (void *)regs->ip, (void *)regs->sp, error_code); 866 (void *)regs->ip, (void *)regs->sp, error_code);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6fdf91ef130a..82f5252c723a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -20,6 +20,7 @@
20#include <asm/kaslr.h> 20#include <asm/kaslr.h>
21#include <asm/hypervisor.h> 21#include <asm/hypervisor.h>
22#include <asm/cpufeature.h> 22#include <asm/cpufeature.h>
23#include <asm/pti.h>
23 24
24/* 25/*
25 * We need to define the tracepoints somewhere, and tlb.c 26 * We need to define the tracepoints somewhere, and tlb.c
@@ -160,6 +161,12 @@ struct map_range {
160 161
161static int page_size_mask; 162static int page_size_mask;
162 163
164static void enable_global_pages(void)
165{
166 if (!static_cpu_has(X86_FEATURE_PTI))
167 __supported_pte_mask |= _PAGE_GLOBAL;
168}
169
163static void __init probe_page_size_mask(void) 170static void __init probe_page_size_mask(void)
164{ 171{
165 /* 172 /*
@@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void)
177 cr4_set_bits_and_update_boot(X86_CR4_PSE); 184 cr4_set_bits_and_update_boot(X86_CR4_PSE);
178 185
179 /* Enable PGE if available */ 186 /* Enable PGE if available */
187 __supported_pte_mask &= ~_PAGE_GLOBAL;
180 if (boot_cpu_has(X86_FEATURE_PGE)) { 188 if (boot_cpu_has(X86_FEATURE_PGE)) {
181 cr4_set_bits_and_update_boot(X86_CR4_PGE); 189 cr4_set_bits_and_update_boot(X86_CR4_PGE);
182 __supported_pte_mask |= _PAGE_GLOBAL; 190 enable_global_pages();
183 } else 191 }
184 __supported_pte_mask &= ~_PAGE_GLOBAL;
185 192
186 /* Enable 1 GB linear kernel mappings if available: */ 193 /* Enable 1 GB linear kernel mappings if available: */
187 if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { 194 if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
@@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void)
194 201
195static void setup_pcid(void) 202static void setup_pcid(void)
196{ 203{
197#ifdef CONFIG_X86_64 204 if (!IS_ENABLED(CONFIG_X86_64))
198 if (boot_cpu_has(X86_FEATURE_PCID)) { 205 return;
199 if (boot_cpu_has(X86_FEATURE_PGE)) { 206
200 /* 207 if (!boot_cpu_has(X86_FEATURE_PCID))
201 * This can't be cr4_set_bits_and_update_boot() -- 208 return;
202 * the trampoline code can't handle CR4.PCIDE and 209
203 * it wouldn't do any good anyway. Despite the name, 210 if (boot_cpu_has(X86_FEATURE_PGE)) {
204 * cr4_set_bits_and_update_boot() doesn't actually 211 /*
205 * cause the bits in question to remain set all the 212 * This can't be cr4_set_bits_and_update_boot() -- the
206 * way through the secondary boot asm. 213 * trampoline code can't handle CR4.PCIDE and it wouldn't
207 * 214 * do any good anyway. Despite the name,
208 * Instead, we brute-force it and set CR4.PCIDE 215 * cr4_set_bits_and_update_boot() doesn't actually cause
209 * manually in start_secondary(). 216 * the bits in question to remain set all the way through
210 */ 217 * the secondary boot asm.
211 cr4_set_bits(X86_CR4_PCIDE); 218 *
212 } else { 219 * Instead, we brute-force it and set CR4.PCIDE manually in
213 /* 220 * start_secondary().
214 * flush_tlb_all(), as currently implemented, won't 221 */
215 * work if PCID is on but PGE is not. Since that 222 cr4_set_bits(X86_CR4_PCIDE);
216 * combination doesn't exist on real hardware, there's 223
217 * no reason to try to fully support it, but it's 224 /*
218 * polite to avoid corrupting data if we're on 225 * INVPCID's single-context modes (2/3) only work if we set
219 * an improperly configured VM. 226 * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
220 */ 227 * on systems that have X86_CR4_PCIDE clear, or that have
221 setup_clear_cpu_cap(X86_FEATURE_PCID); 228 * no INVPCID support at all.
222 } 229 */
230 if (boot_cpu_has(X86_FEATURE_INVPCID))
231 setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
232 } else {
233 /*
234 * flush_tlb_all(), as currently implemented, won't work if
235 * PCID is on but PGE is not. Since that combination
236 * doesn't exist on real hardware, there's no reason to try
237 * to fully support it, but it's polite to avoid corrupting
238 * data if we're on an improperly configured VM.
239 */
240 setup_clear_cpu_cap(X86_FEATURE_PCID);
223 } 241 }
224#endif
225} 242}
226 243
227#ifdef CONFIG_X86_32 244#ifdef CONFIG_X86_32
@@ -622,6 +639,7 @@ void __init init_mem_mapping(void)
622{ 639{
623 unsigned long end; 640 unsigned long end;
624 641
642 pti_check_boottime_disable();
625 probe_page_size_mask(); 643 probe_page_size_mask();
626 setup_pcid(); 644 setup_pcid();
627 645
@@ -845,12 +863,12 @@ void __init zone_sizes_init(void)
845 free_area_init_nodes(max_zone_pfns); 863 free_area_init_nodes(max_zone_pfns);
846} 864}
847 865
848DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { 866__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
849 .loaded_mm = &init_mm, 867 .loaded_mm = &init_mm,
850 .next_asid = 1, 868 .next_asid = 1,
851 .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ 869 .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
852}; 870};
853EXPORT_SYMBOL_GPL(cpu_tlbstate); 871EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
854 872
855void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) 873void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
856{ 874{
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8a64a6f2848d..135c9a7898c7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/set_memory.h> 51#include <asm/set_memory.h>
52#include <asm/page_types.h> 52#include <asm/page_types.h>
53#include <asm/cpu_entry_area.h>
53#include <asm/init.h> 54#include <asm/init.h>
54 55
55#include "mm_internal.h" 56#include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
766 mem_init_print_info(NULL); 767 mem_init_print_info(NULL);
767 printk(KERN_INFO "virtual kernel memory layout:\n" 768 printk(KERN_INFO "virtual kernel memory layout:\n"
768 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 769 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
770 " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
769#ifdef CONFIG_HIGHMEM 771#ifdef CONFIG_HIGHMEM
770 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 772 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
771#endif 773#endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
777 FIXADDR_START, FIXADDR_TOP, 779 FIXADDR_START, FIXADDR_TOP,
778 (FIXADDR_TOP - FIXADDR_START) >> 10, 780 (FIXADDR_TOP - FIXADDR_START) >> 10,
779 781
782 CPU_ENTRY_AREA_BASE,
783 CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
784 CPU_ENTRY_AREA_MAP_SIZE >> 10,
785
780#ifdef CONFIG_HIGHMEM 786#ifdef CONFIG_HIGHMEM
781 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, 787 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
782 (LAST_PKMAP*PAGE_SIZE) >> 10, 788 (LAST_PKMAP*PAGE_SIZE) >> 10,
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 99dfed6dfef8..47388f0c0e59 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -15,6 +15,7 @@
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#include <asm/sections.h> 16#include <asm/sections.h>
17#include <asm/pgtable.h> 17#include <asm/pgtable.h>
18#include <asm/cpu_entry_area.h>
18 19
19extern struct range pfn_mapped[E820_MAX_ENTRIES]; 20extern struct range pfn_mapped[E820_MAX_ENTRIES];
20 21
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
277void __init kasan_init(void) 278void __init kasan_init(void)
278{ 279{
279 int i; 280 int i;
281 void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
280 282
281#ifdef CONFIG_KASAN_INLINE 283#ifdef CONFIG_KASAN_INLINE
282 register_die_notifier(&kasan_die_notifier); 284 register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
321 map_range(&pfn_mapped[i]); 323 map_range(&pfn_mapped[i]);
322 } 324 }
323 325
326 shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
327 shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
328 shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
329 PAGE_SIZE);
330
331 shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
332 CPU_ENTRY_AREA_MAP_SIZE);
333 shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
334 shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
335 PAGE_SIZE);
336
324 kasan_populate_zero_shadow( 337 kasan_populate_zero_shadow(
325 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), 338 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
326 kasan_mem_to_shadow((void *)__START_KERNEL_map)); 339 shadow_cpu_entry_begin);
340
341 kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
342 (unsigned long)shadow_cpu_entry_end, 0);
343
344 kasan_populate_zero_shadow(shadow_cpu_entry_end,
345 kasan_mem_to_shadow((void *)__START_KERNEL_map));
327 346
328 kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), 347 kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
329 (unsigned long)kasan_mem_to_shadow(_end), 348 (unsigned long)kasan_mem_to_shadow(_end),
330 early_pfn_to_nid(__pa(_stext))); 349 early_pfn_to_nid(__pa(_stext)));
331 350
332 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), 351 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
333 (void *)KASAN_SHADOW_END); 352 (void *)KASAN_SHADOW_END);
334 353
335 load_cr3(init_top_pgt); 354 load_cr3(init_top_pgt);
336 __flush_tlb_all(); 355 __flush_tlb_all();
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 879ef930e2c2..aedebd2ebf1e 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -34,25 +34,14 @@
34#define TB_SHIFT 40 34#define TB_SHIFT 40
35 35
36/* 36/*
37 * Virtual address start and end range for randomization. The end changes base 37 * Virtual address start and end range for randomization.
38 * on configuration to have the highest amount of space for randomization.
39 * It increases the possible random position for each randomized region.
40 * 38 *
41 * You need to add an if/def entry if you introduce a new memory region 39 * The end address could depend on more configuration options to make the
42 * compatible with KASLR. Your entry must be in logical order with memory 40 * highest amount of space for randomization available, but that's too hard
43 * layout. For example, ESPFIX is before EFI because its virtual address is 41 * to keep straight and caused issues already.
44 * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
45 * ensure that this order is correct and won't be changed.
46 */ 42 */
47static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; 43static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
48 44static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
49#if defined(CONFIG_X86_ESPFIX64)
50static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
51#elif defined(CONFIG_EFI)
52static const unsigned long vaddr_end = EFI_VA_END;
53#else
54static const unsigned long vaddr_end = __START_KERNEL_map;
55#endif
56 45
57/* Default values */ 46/* Default values */
58unsigned long page_offset_base = __PAGE_OFFSET_BASE; 47unsigned long page_offset_base = __PAGE_OFFSET_BASE;
@@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void)
101 unsigned long remain_entropy; 90 unsigned long remain_entropy;
102 91
103 /* 92 /*
104 * All these BUILD_BUG_ON checks ensures the memory layout is 93 * These BUILD_BUG_ON checks ensure the memory layout is consistent
105 * consistent with the vaddr_start/vaddr_end variables. 94 * with the vaddr_start/vaddr_end variables. These checks are very
95 * limited....
106 */ 96 */
107 BUILD_BUG_ON(vaddr_start >= vaddr_end); 97 BUILD_BUG_ON(vaddr_start >= vaddr_end);
108 BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && 98 BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
109 vaddr_end >= EFI_VA_END);
110 BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
111 IS_ENABLED(CONFIG_EFI)) &&
112 vaddr_end >= __START_KERNEL_map);
113 BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); 99 BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
114 100
115 if (!kaslr_memory_enabled()) 101 if (!kaslr_memory_enabled())
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d9a9e9fc75dd..391b13402e40 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -405,13 +405,13 @@ bool sme_active(void)
405{ 405{
406 return sme_me_mask && !sev_enabled; 406 return sme_me_mask && !sev_enabled;
407} 407}
408EXPORT_SYMBOL_GPL(sme_active); 408EXPORT_SYMBOL(sme_active);
409 409
410bool sev_active(void) 410bool sev_active(void)
411{ 411{
412 return sme_me_mask && sev_enabled; 412 return sme_me_mask && sev_enabled;
413} 413}
414EXPORT_SYMBOL_GPL(sev_active); 414EXPORT_SYMBOL(sev_active);
415 415
416static const struct dma_map_ops sev_dma_ops = { 416static const struct dma_map_ops sev_dma_ops = {
417 .alloc = sev_alloc, 417 .alloc = sev_alloc,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 96d456a94b03..004abf9ebf12 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
355 kmem_cache_free(pgd_cache, pgd); 355 kmem_cache_free(pgd_cache, pgd);
356} 356}
357#else 357#else
358
358static inline pgd_t *_pgd_alloc(void) 359static inline pgd_t *_pgd_alloc(void)
359{ 360{
360 return (pgd_t *)__get_free_page(PGALLOC_GFP); 361 return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
361} 362}
362 363
363static inline void _pgd_free(pgd_t *pgd) 364static inline void _pgd_free(pgd_t *pgd)
364{ 365{
365 free_page((unsigned long)pgd); 366 free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
366} 367}
367#endif /* CONFIG_X86_PAE */ 368#endif /* CONFIG_X86_PAE */
368 369
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6b9bf023a700..c3c5274410a9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,6 +10,7 @@
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/spinlock.h> 11#include <linux/spinlock.h>
12 12
13#include <asm/cpu_entry_area.h>
13#include <asm/pgtable.h> 14#include <asm/pgtable.h>
14#include <asm/pgalloc.h> 15#include <asm/pgalloc.h>
15#include <asm/fixmap.h> 16#include <asm/fixmap.h>
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
new file mode 100644
index 000000000000..ce38f165489b
--- /dev/null
+++ b/arch/x86/mm/pti.c
@@ -0,0 +1,368 @@
1/*
2 * Copyright(c) 2017 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * This code is based in part on work published here:
14 *
15 * https://github.com/IAIK/KAISER
16 *
17 * The original work was written by and and signed off by for the Linux
18 * kernel by:
19 *
20 * Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
21 * Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
22 * Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
23 * Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
24 *
25 * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
26 * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
27 * Andy Lutomirsky <luto@amacapital.net>
28 */
29#include <linux/kernel.h>
30#include <linux/errno.h>
31#include <linux/string.h>
32#include <linux/types.h>
33#include <linux/bug.h>
34#include <linux/init.h>
35#include <linux/spinlock.h>
36#include <linux/mm.h>
37#include <linux/uaccess.h>
38
39#include <asm/cpufeature.h>
40#include <asm/hypervisor.h>
41#include <asm/vsyscall.h>
42#include <asm/cmdline.h>
43#include <asm/pti.h>
44#include <asm/pgtable.h>
45#include <asm/pgalloc.h>
46#include <asm/tlbflush.h>
47#include <asm/desc.h>
48
49#undef pr_fmt
50#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
51
52/* Backporting helper */
53#ifndef __GFP_NOTRACK
54#define __GFP_NOTRACK 0
55#endif
56
57static void __init pti_print_if_insecure(const char *reason)
58{
59 if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
60 pr_info("%s\n", reason);
61}
62
63static void __init pti_print_if_secure(const char *reason)
64{
65 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
66 pr_info("%s\n", reason);
67}
68
69void __init pti_check_boottime_disable(void)
70{
71 char arg[5];
72 int ret;
73
74 if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
75 pti_print_if_insecure("disabled on XEN PV.");
76 return;
77 }
78
79 ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
80 if (ret > 0) {
81 if (ret == 3 && !strncmp(arg, "off", 3)) {
82 pti_print_if_insecure("disabled on command line.");
83 return;
84 }
85 if (ret == 2 && !strncmp(arg, "on", 2)) {
86 pti_print_if_secure("force enabled on command line.");
87 goto enable;
88 }
89 if (ret == 4 && !strncmp(arg, "auto", 4))
90 goto autosel;
91 }
92
93 if (cmdline_find_option_bool(boot_command_line, "nopti")) {
94 pti_print_if_insecure("disabled on command line.");
95 return;
96 }
97
98autosel:
99 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
100 return;
101enable:
102 setup_force_cpu_cap(X86_FEATURE_PTI);
103}
104
105pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
106{
107 /*
108 * Changes to the high (kernel) portion of the kernelmode page
109 * tables are not automatically propagated to the usermode tables.
110 *
111 * Users should keep in mind that, unlike the kernelmode tables,
112 * there is no vmalloc_fault equivalent for the usermode tables.
113 * Top-level entries added to init_mm's usermode pgd after boot
114 * will not be automatically propagated to other mms.
115 */
116 if (!pgdp_maps_userspace(pgdp))
117 return pgd;
118
119 /*
120 * The user page tables get the full PGD, accessible from
121 * userspace:
122 */
123 kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
124
125 /*
126 * If this is normal user memory, make it NX in the kernel
127 * pagetables so that, if we somehow screw up and return to
128 * usermode with the kernel CR3 loaded, we'll get a page fault
129 * instead of allowing user code to execute with the wrong CR3.
130 *
131 * As exceptions, we don't set NX if:
132 * - _PAGE_USER is not set. This could be an executable
133 * EFI runtime mapping or something similar, and the kernel
134 * may execute from it
135 * - we don't have NX support
136 * - we're clearing the PGD (i.e. the new pgd is not present).
137 */
138 if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
139 (__supported_pte_mask & _PAGE_NX))
140 pgd.pgd |= _PAGE_NX;
141
142 /* return the copy of the PGD we want the kernel to use: */
143 return pgd;
144}
145
146/*
147 * Walk the user copy of the page tables (optionally) trying to allocate
148 * page table pages on the way down.
149 *
150 * Returns a pointer to a P4D on success, or NULL on failure.
151 */
152static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
153{
154 pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
155 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
156
157 if (address < PAGE_OFFSET) {
158 WARN_ONCE(1, "attempt to walk user address\n");
159 return NULL;
160 }
161
162 if (pgd_none(*pgd)) {
163 unsigned long new_p4d_page = __get_free_page(gfp);
164 if (!new_p4d_page)
165 return NULL;
166
167 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
168 }
169 BUILD_BUG_ON(pgd_large(*pgd) != 0);
170
171 return p4d_offset(pgd, address);
172}
173
174/*
175 * Walk the user copy of the page tables (optionally) trying to allocate
176 * page table pages on the way down.
177 *
178 * Returns a pointer to a PMD on success, or NULL on failure.
179 */
180static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
181{
182 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
183 p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
184 pud_t *pud;
185
186 BUILD_BUG_ON(p4d_large(*p4d) != 0);
187 if (p4d_none(*p4d)) {
188 unsigned long new_pud_page = __get_free_page(gfp);
189 if (!new_pud_page)
190 return NULL;
191
192 set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
193 }
194
195 pud = pud_offset(p4d, address);
196 /* The user page tables do not use large mappings: */
197 if (pud_large(*pud)) {
198 WARN_ON(1);
199 return NULL;
200 }
201 if (pud_none(*pud)) {
202 unsigned long new_pmd_page = __get_free_page(gfp);
203 if (!new_pmd_page)
204 return NULL;
205
206 set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
207 }
208
209 return pmd_offset(pud, address);
210}
211
212#ifdef CONFIG_X86_VSYSCALL_EMULATION
213/*
214 * Walk the shadow copy of the page tables (optionally) trying to allocate
215 * page table pages on the way down. Does not support large pages.
216 *
217 * Note: this is only used when mapping *new* kernel data into the
218 * user/shadow page tables. It is never used for userspace data.
219 *
220 * Returns a pointer to a PTE on success, or NULL on failure.
221 */
222static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
223{
224 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
225 pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
226 pte_t *pte;
227
228 /* We can't do anything sensible if we hit a large mapping. */
229 if (pmd_large(*pmd)) {
230 WARN_ON(1);
231 return NULL;
232 }
233
234 if (pmd_none(*pmd)) {
235 unsigned long new_pte_page = __get_free_page(gfp);
236 if (!new_pte_page)
237 return NULL;
238
239 set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
240 }
241
242 pte = pte_offset_kernel(pmd, address);
243 if (pte_flags(*pte) & _PAGE_USER) {
244 WARN_ONCE(1, "attempt to walk to user pte\n");
245 return NULL;
246 }
247 return pte;
248}
249
250static void __init pti_setup_vsyscall(void)
251{
252 pte_t *pte, *target_pte;
253 unsigned int level;
254
255 pte = lookup_address(VSYSCALL_ADDR, &level);
256 if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
257 return;
258
259 target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
260 if (WARN_ON(!target_pte))
261 return;
262
263 *target_pte = *pte;
264 set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
265}
266#else
267static void __init pti_setup_vsyscall(void) { }
268#endif
269
270static void __init
271pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
272{
273 unsigned long addr;
274
275 /*
276 * Clone the populated PMDs which cover start to end. These PMD areas
277 * can have holes.
278 */
279 for (addr = start; addr < end; addr += PMD_SIZE) {
280 pmd_t *pmd, *target_pmd;
281 pgd_t *pgd;
282 p4d_t *p4d;
283 pud_t *pud;
284
285 pgd = pgd_offset_k(addr);
286 if (WARN_ON(pgd_none(*pgd)))
287 return;
288 p4d = p4d_offset(pgd, addr);
289 if (WARN_ON(p4d_none(*p4d)))
290 return;
291 pud = pud_offset(p4d, addr);
292 if (pud_none(*pud))
293 continue;
294 pmd = pmd_offset(pud, addr);
295 if (pmd_none(*pmd))
296 continue;
297
298 target_pmd = pti_user_pagetable_walk_pmd(addr);
299 if (WARN_ON(!target_pmd))
300 return;
301
302 /*
303 * Copy the PMD. That is, the kernelmode and usermode
304 * tables will share the last-level page tables of this
305 * address range
306 */
307 *target_pmd = pmd_clear_flags(*pmd, clear);
308 }
309}
310
311/*
312 * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
313 * next-level entry on 5-level systems.
314 */
315static void __init pti_clone_p4d(unsigned long addr)
316{
317 p4d_t *kernel_p4d, *user_p4d;
318 pgd_t *kernel_pgd;
319
320 user_p4d = pti_user_pagetable_walk_p4d(addr);
321 kernel_pgd = pgd_offset_k(addr);
322 kernel_p4d = p4d_offset(kernel_pgd, addr);
323 *user_p4d = *kernel_p4d;
324}
325
326/*
327 * Clone the CPU_ENTRY_AREA into the user space visible page table.
328 */
329static void __init pti_clone_user_shared(void)
330{
331 pti_clone_p4d(CPU_ENTRY_AREA_BASE);
332}
333
334/*
335 * Clone the ESPFIX P4D into the user space visinble page table
336 */
337static void __init pti_setup_espfix64(void)
338{
339#ifdef CONFIG_X86_ESPFIX64
340 pti_clone_p4d(ESPFIX_BASE_ADDR);
341#endif
342}
343
344/*
345 * Clone the populated PMDs of the entry and irqentry text and force it RO.
346 */
347static void __init pti_clone_entry_text(void)
348{
349 pti_clone_pmds((unsigned long) __entry_text_start,
350 (unsigned long) __irqentry_text_end,
351 _PAGE_RW | _PAGE_GLOBAL);
352}
353
354/*
355 * Initialize kernel page table isolation
356 */
357void __init pti_init(void)
358{
359 if (!static_cpu_has(X86_FEATURE_PTI))
360 return;
361
362 pr_info("enabled\n");
363
364 pti_clone_user_shared();
365 pti_clone_entry_text();
366 pti_setup_espfix64();
367 pti_setup_vsyscall();
368}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3118392cdf75..a1561957dccb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -28,6 +28,38 @@
28 * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi 28 * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
29 */ 29 */
30 30
31/*
32 * We get here when we do something requiring a TLB invalidation
33 * but could not go invalidate all of the contexts. We do the
34 * necessary invalidation by clearing out the 'ctx_id' which
35 * forces a TLB flush when the context is loaded.
36 */
37void clear_asid_other(void)
38{
39 u16 asid;
40
41 /*
42 * This is only expected to be set if we have disabled
43 * kernel _PAGE_GLOBAL pages.
44 */
45 if (!static_cpu_has(X86_FEATURE_PTI)) {
46 WARN_ON_ONCE(1);
47 return;
48 }
49
50 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
51 /* Do not need to flush the current asid */
52 if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
53 continue;
54 /*
55 * Make sure the next time we go to switch to
56 * this asid, we do a flush:
57 */
58 this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
59 }
60 this_cpu_write(cpu_tlbstate.invalidate_other, false);
61}
62
31atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); 63atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
32 64
33 65
@@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
42 return; 74 return;
43 } 75 }
44 76
77 if (this_cpu_read(cpu_tlbstate.invalidate_other))
78 clear_asid_other();
79
45 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { 80 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
46 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != 81 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
47 next->context.ctx_id) 82 next->context.ctx_id)
@@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
65 *need_flush = true; 100 *need_flush = true;
66} 101}
67 102
103static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
104{
105 unsigned long new_mm_cr3;
106
107 if (need_flush) {
108 invalidate_user_asid(new_asid);
109 new_mm_cr3 = build_cr3(pgdir, new_asid);
110 } else {
111 new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
112 }
113
114 /*
115 * Caution: many callers of this function expect
116 * that load_cr3() is serializing and orders TLB
117 * fills with respect to the mm_cpumask writes.
118 */
119 write_cr3(new_mm_cr3);
120}
121
68void leave_mm(int cpu) 122void leave_mm(int cpu)
69{ 123{
70 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); 124 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -128,7 +182,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
128 * isn't free. 182 * isn't free.
129 */ 183 */
130#ifdef CONFIG_DEBUG_VM 184#ifdef CONFIG_DEBUG_VM
131 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { 185 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
132 /* 186 /*
133 * If we were to BUG here, we'd be very likely to kill 187 * If we were to BUG here, we'd be very likely to kill
134 * the system so hard that we don't see the call trace. 188 * the system so hard that we don't see the call trace.
@@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
195 if (need_flush) { 249 if (need_flush) {
196 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); 250 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
197 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); 251 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
198 write_cr3(build_cr3(next, new_asid)); 252 load_new_mm_cr3(next->pgd, new_asid, true);
199 253
200 /* 254 /*
201 * NB: This gets called via leave_mm() in the idle path 255 * NB: This gets called via leave_mm() in the idle path
@@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
208 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 262 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
209 } else { 263 } else {
210 /* The new ASID is already up to date. */ 264 /* The new ASID is already up to date. */
211 write_cr3(build_cr3_noflush(next, new_asid)); 265 load_new_mm_cr3(next->pgd, new_asid, false);
212 266
213 /* See above wrt _rcuidle. */ 267 /* See above wrt _rcuidle. */
214 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); 268 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +342,7 @@ void initialize_tlbstate_and_flush(void)
288 !(cr4_read_shadow() & X86_CR4_PCIDE)); 342 !(cr4_read_shadow() & X86_CR4_PCIDE));
289 343
290 /* Force ASID 0 and force a TLB flush. */ 344 /* Force ASID 0 and force a TLB flush. */
291 write_cr3(build_cr3(mm, 0)); 345 write_cr3(build_cr3(mm->pgd, 0));
292 346
293 /* Reinitialize tlbstate. */ 347 /* Reinitialize tlbstate. */
294 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); 348 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +605,7 @@ static void do_kernel_range_flush(void *info)
551 605
552 /* flush range by one by one 'invlpg' */ 606 /* flush range by one by one 'invlpg' */
553 for (addr = f->start; addr < f->end; addr += PAGE_SIZE) 607 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
554 __flush_tlb_single(addr); 608 __flush_tlb_one(addr);
555} 609}
556 610
557void flush_tlb_kernel_range(unsigned long start, unsigned long end) 611void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 7a5350d08cef..563049c483a1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -594,6 +594,11 @@ char *__init pcibios_setup(char *str)
594 } else if (!strcmp(str, "nocrs")) { 594 } else if (!strcmp(str, "nocrs")) {
595 pci_probe |= PCI_ROOT_NO_CRS; 595 pci_probe |= PCI_ROOT_NO_CRS;
596 return NULL; 596 return NULL;
597#ifdef CONFIG_PHYS_ADDR_T_64BIT
598 } else if (!strcmp(str, "big_root_window")) {
599 pci_probe |= PCI_BIG_ROOT_WINDOW;
600 return NULL;
601#endif
597 } else if (!strcmp(str, "earlydump")) { 602 } else if (!strcmp(str, "earlydump")) {
598 pci_early_dump_regs = 1; 603 pci_early_dump_regs = 1;
599 return NULL; 604 return NULL;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index e663d6bf1328..f6a26e3cb476 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -662,10 +662,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
662 */ 662 */
663static void pci_amd_enable_64bit_bar(struct pci_dev *dev) 663static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
664{ 664{
665 unsigned i;
666 u32 base, limit, high; 665 u32 base, limit, high;
667 struct resource *res, *conflict;
668 struct pci_dev *other; 666 struct pci_dev *other;
667 struct resource *res;
668 unsigned i;
669 int r;
670
671 if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
672 return;
669 673
670 /* Check that we are the only device of that type */ 674 /* Check that we are the only device of that type */
671 other = pci_get_device(dev->vendor, dev->device, NULL); 675 other = pci_get_device(dev->vendor, dev->device, NULL);
@@ -699,22 +703,25 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
699 if (!res) 703 if (!res)
700 return; 704 return;
701 705
706 /*
707 * Allocate a 256GB window directly below the 0xfd00000000 hardware
708 * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
709 */
702 res->name = "PCI Bus 0000:00"; 710 res->name = "PCI Bus 0000:00";
703 res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM | 711 res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
704 IORESOURCE_MEM_64 | IORESOURCE_WINDOW; 712 IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
705 res->start = 0x100000000ull; 713 res->start = 0xbd00000000ull;
706 res->end = 0xfd00000000ull - 1; 714 res->end = 0xfd00000000ull - 1;
707 715
708 /* Just grab the free area behind system memory for this */ 716 r = request_resource(&iomem_resource, res);
709 while ((conflict = request_resource_conflict(&iomem_resource, res))) { 717 if (r) {
710 if (conflict->end >= res->end) { 718 kfree(res);
711 kfree(res); 719 return;
712 return;
713 }
714 res->start = conflict->end + 1;
715 } 720 }
716 721
717 dev_info(&dev->dev, "adding root bus resource %pR\n", res); 722 dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
723 res);
724 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
718 725
719 base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) | 726 base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
720 AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK; 727 AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6a151ce70e86..2dd15e967c3f 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -135,7 +135,9 @@ pgd_t * __init efi_call_phys_prolog(void)
135 pud[j] = *pud_offset(p4d_k, vaddr); 135 pud[j] = *pud_offset(p4d_k, vaddr);
136 } 136 }
137 } 137 }
138 pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
138 } 139 }
140
139out: 141out:
140 __flush_tlb_all(); 142 __flush_tlb_all();
141 143
@@ -196,6 +198,9 @@ static pgd_t *efi_pgd;
196 * because we want to avoid inserting EFI region mappings (EFI_VA_END 198 * because we want to avoid inserting EFI region mappings (EFI_VA_END
197 * to EFI_VA_START) into the standard kernel page tables. Everything 199 * to EFI_VA_START) into the standard kernel page tables. Everything
198 * else can be shared, see efi_sync_low_kernel_mappings(). 200 * else can be shared, see efi_sync_low_kernel_mappings().
201 *
202 * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
203 * allocation.
199 */ 204 */
200int __init efi_alloc_page_tables(void) 205int __init efi_alloc_page_tables(void)
201{ 206{
@@ -208,7 +213,7 @@ int __init efi_alloc_page_tables(void)
208 return 0; 213 return 0;
209 214
210 gfp_mask = GFP_KERNEL | __GFP_ZERO; 215 gfp_mask = GFP_KERNEL | __GFP_ZERO;
211 efi_pgd = (pgd_t *)__get_free_page(gfp_mask); 216 efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
212 if (!efi_pgd) 217 if (!efi_pgd)
213 return -ENOMEM; 218 return -ENOMEM;
214 219
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 8a99a2e96537..5b513ccffde4 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -592,7 +592,18 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
592 /* 592 /*
593 * Update the first page pointer to skip over the CSH header. 593 * Update the first page pointer to skip over the CSH header.
594 */ 594 */
595 cap_info->pages[0] += csh->headersize; 595 cap_info->phys[0] += csh->headersize;
596
597 /*
598 * cap_info->capsule should point at a virtual mapping of the entire
599 * capsule, starting at the capsule header. Our image has the Quark
600 * security header prepended, so we cannot rely on the default vmap()
601 * mapping created by the generic capsule code.
602 * Given that the Quark firmware does not appear to care about the
603 * virtual mapping, let's just point cap_info->capsule at our copy
604 * of the capsule header.
605 */
606 cap_info->capsule = &cap_info->header;
596 607
597 return 1; 608 return 1;
598} 609}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index dc036e511f48..5a0483e7bf66 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
60 return 0; 60 return 0;
61} 61}
62 62
63static const struct bt_sfi_data tng_bt_sfi_data __initdata = { 63static struct bt_sfi_data tng_bt_sfi_data __initdata = {
64 .setup = tng_bt_sfi_setup, 64 .setup = tng_bt_sfi_setup,
65}; 65};
66 66
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index f44c0bc95aa2..8538a6723171 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
299 local_flush_tlb(); 299 local_flush_tlb();
300 stat->d_alltlb++; 300 stat->d_alltlb++;
301 } else { 301 } else {
302 __flush_tlb_one(msg->address); 302 __flush_tlb_single(msg->address);
303 stat->d_onetlb++; 303 stat->d_onetlb++;
304 } 304 }
305 stat->d_requestee++; 305 stat->d_requestee++;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 5f6fd860820a..e4cb9f4cde8a 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
128 * on the specified blade to allow the sending of MSIs to the specified CPU. 128 * on the specified blade to allow the sending of MSIs to the specified CPU.
129 */ 129 */
130static int uv_domain_activate(struct irq_domain *domain, 130static int uv_domain_activate(struct irq_domain *domain,
131 struct irq_data *irq_data, bool early) 131 struct irq_data *irq_data, bool reserve)
132{ 132{
133 uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); 133 uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
134 return 0; 134 return 0;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 36a28eddb435..a7d966964c6f 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -152,17 +152,19 @@ static void do_fpu_end(void)
152static void fix_processor_context(void) 152static void fix_processor_context(void)
153{ 153{
154 int cpu = smp_processor_id(); 154 int cpu = smp_processor_id();
155 struct tss_struct *t = &per_cpu(cpu_tss, cpu);
156#ifdef CONFIG_X86_64 155#ifdef CONFIG_X86_64
157 struct desc_struct *desc = get_cpu_gdt_rw(cpu); 156 struct desc_struct *desc = get_cpu_gdt_rw(cpu);
158 tss_desc tss; 157 tss_desc tss;
159#endif 158#endif
160 set_tss_desc(cpu, t); /* 159
161 * This just modifies memory; should not be 160 /*
162 * necessary. But... This is necessary, because 161 * We need to reload TR, which requires that we change the
163 * 386 hardware has concept of busy TSS or some 162 * GDT entry to indicate "available" first.
164 * similar stupidity. 163 *
165 */ 164 * XXX: This could probably all be replaced by a call to
165 * force_reload_TR().
166 */
167 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
166 168
167#ifdef CONFIG_X86_64 169#ifdef CONFIG_X86_64
168 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); 170 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d669e9d89001..c9081c6671f0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,8 +1,12 @@
1#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
2#include <linux/bootmem.h>
3#endif
1#include <linux/cpu.h> 4#include <linux/cpu.h>
2#include <linux/kexec.h> 5#include <linux/kexec.h>
3 6
4#include <xen/features.h> 7#include <xen/features.h>
5#include <xen/page.h> 8#include <xen/page.h>
9#include <xen/interface/memory.h>
6 10
7#include <asm/xen/hypercall.h> 11#include <asm/xen/hypercall.h>
8#include <asm/xen/hypervisor.h> 12#include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
331} 335}
332EXPORT_SYMBOL(xen_arch_unregister_cpu); 336EXPORT_SYMBOL(xen_arch_unregister_cpu);
333#endif 337#endif
338
339#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
340void __init arch_xen_balloon_init(struct resource *hostmem_resource)
341{
342 struct xen_memory_map memmap;
343 int rc;
344 unsigned int i, last_guest_ram;
345 phys_addr_t max_addr = PFN_PHYS(max_pfn);
346 struct e820_table *xen_e820_table;
347 const struct e820_entry *entry;
348 struct resource *res;
349
350 if (!xen_initial_domain())
351 return;
352
353 xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
354 if (!xen_e820_table)
355 return;
356
357 memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
358 set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
359 rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
360 if (rc) {
361 pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
362 goto out;
363 }
364
365 last_guest_ram = 0;
366 for (i = 0; i < memmap.nr_entries; i++) {
367 if (xen_e820_table->entries[i].addr >= max_addr)
368 break;
369 if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
370 last_guest_ram = i;
371 }
372
373 entry = &xen_e820_table->entries[last_guest_ram];
374 if (max_addr >= entry->addr + entry->size)
375 goto out; /* No unallocated host RAM. */
376
377 hostmem_resource->start = max_addr;
378 hostmem_resource->end = entry->addr + entry->size;
379
380 /*
381 * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
382 * as unavailable. The rest of that region can be used for hotplug-based
383 * ballooning.
384 */
385 for (; i < memmap.nr_entries; i++) {
386 entry = &xen_e820_table->entries[i];
387
388 if (entry->type == E820_TYPE_RAM)
389 continue;
390
391 if (entry->addr >= hostmem_resource->end)
392 break;
393
394 res = kzalloc(sizeof(*res), GFP_KERNEL);
395 if (!res)
396 goto out;
397
398 res->name = "Unavailable host RAM";
399 res->start = entry->addr;
400 res->end = (entry->addr + entry->size < hostmem_resource->end) ?
401 entry->addr + entry->size : hostmem_resource->end;
402 rc = insert_resource(hostmem_resource, res);
403 if (rc) {
404 pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
405 __func__, res->start, res->end, rc);
406 kfree(res);
407 goto out;
408 }
409 }
410
411 out:
412 kfree(xen_e820_table);
413}
414#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index f2414c6c5e7c..c047f42552e1 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -88,6 +88,8 @@
88#include "multicalls.h" 88#include "multicalls.h"
89#include "pmu.h" 89#include "pmu.h"
90 90
91#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
92
91void *xen_initial_gdt; 93void *xen_initial_gdt;
92 94
93static int xen_cpu_up_prepare_pv(unsigned int cpu); 95static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -826,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
826 mcs = xen_mc_entry(0); 828 mcs = xen_mc_entry(0);
827 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); 829 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
828 xen_mc_issue(PARAVIRT_LAZY_CPU); 830 xen_mc_issue(PARAVIRT_LAZY_CPU);
829 this_cpu_write(cpu_tss.x86_tss.sp0, sp0); 831 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
830} 832}
831 833
832void xen_set_iopl_mask(unsigned mask) 834void xen_set_iopl_mask(unsigned mask)
@@ -1258,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
1258 __userpte_alloc_gfp &= ~__GFP_HIGHMEM; 1260 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
1259 1261
1260 /* Work out if we support NX */ 1262 /* Work out if we support NX */
1263 get_cpu_cap(&boot_cpu_data);
1261 x86_configure_nx(); 1264 x86_configure_nx();
1262 1265
1263 /* Get mfn list */ 1266 /* Get mfn list */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index fc048ec686e7..d85076223a69 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1325,20 +1325,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1325{ 1325{
1326 struct { 1326 struct {
1327 struct mmuext_op op; 1327 struct mmuext_op op;
1328#ifdef CONFIG_SMP
1329 DECLARE_BITMAP(mask, num_processors);
1330#else
1331 DECLARE_BITMAP(mask, NR_CPUS); 1328 DECLARE_BITMAP(mask, NR_CPUS);
1332#endif
1333 } *args; 1329 } *args;
1334 struct multicall_space mcs; 1330 struct multicall_space mcs;
1331 const size_t mc_entry_size = sizeof(args->op) +
1332 sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
1335 1333
1336 trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end); 1334 trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
1337 1335
1338 if (cpumask_empty(cpus)) 1336 if (cpumask_empty(cpus))
1339 return; /* nothing to do */ 1337 return; /* nothing to do */
1340 1338
1341 mcs = xen_mc_entry(sizeof(*args)); 1339 mcs = xen_mc_entry(mc_entry_size);
1342 args = mcs.args; 1340 args = mcs.args;
1343 args->op.arg2.vcpumask = to_cpumask(args->mask); 1341 args->op.arg2.vcpumask = to_cpumask(args->mask);
1344 1342
@@ -1902,6 +1900,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1902 /* Graft it onto L4[511][510] */ 1900 /* Graft it onto L4[511][510] */
1903 copy_page(level2_kernel_pgt, l2); 1901 copy_page(level2_kernel_pgt, l2);
1904 1902
1903 /*
1904 * Zap execute permission from the ident map. Due to the sharing of
1905 * L1 entries we need to do this in the L2.
1906 */
1907 if (__supported_pte_mask & _PAGE_NX) {
1908 for (i = 0; i < PTRS_PER_PMD; ++i) {
1909 if (pmd_none(level2_ident_pgt[i]))
1910 continue;
1911 level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
1912 }
1913 }
1914
1905 /* Copy the initial P->M table mappings if necessary. */ 1915 /* Copy the initial P->M table mappings if necessary. */
1906 i = pgd_index(xen_start_info->mfn_list); 1916 i = pgd_index(xen_start_info->mfn_list);
1907 if (i && i < pgd_index(__START_KERNEL_map)) 1917 if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2261 2271
2262 switch (idx) { 2272 switch (idx) {
2263 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: 2273 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
2264 case FIX_RO_IDT:
2265#ifdef CONFIG_X86_32 2274#ifdef CONFIG_X86_32
2266 case FIX_WP_TEST: 2275 case FIX_WP_TEST:
2267# ifdef CONFIG_HIGHMEM 2276# ifdef CONFIG_HIGHMEM
@@ -2272,7 +2281,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2272#endif 2281#endif
2273 case FIX_TEXT_POKE0: 2282 case FIX_TEXT_POKE0:
2274 case FIX_TEXT_POKE1: 2283 case FIX_TEXT_POKE1:
2275 case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
2276 /* All local page mappings */ 2284 /* All local page mappings */
2277 pte = pfn_pte(phys, prot); 2285 pte = pfn_pte(phys, prot);
2278 break; 2286 break;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c114ca767b3b..6e0d2086eacb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
808 addr = xen_e820_table.entries[0].addr; 808 addr = xen_e820_table.entries[0].addr;
809 size = xen_e820_table.entries[0].size; 809 size = xen_e820_table.entries[0].size;
810 while (i < xen_e820_table.nr_entries) { 810 while (i < xen_e820_table.nr_entries) {
811 bool discard = false;
812 811
813 chunk_size = size; 812 chunk_size = size;
814 type = xen_e820_table.entries[i].type; 813 type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
824 xen_add_extra_mem(pfn_s, n_pfns); 823 xen_add_extra_mem(pfn_s, n_pfns);
825 xen_max_p2m_pfn = pfn_s + n_pfns; 824 xen_max_p2m_pfn = pfn_s + n_pfns;
826 } else 825 } else
827 discard = true; 826 type = E820_TYPE_UNUSABLE;
828 } 827 }
829 828
830 if (!discard) 829 xen_align_and_add_e820_region(addr, chunk_size, type);
831 xen_align_and_add_e820_region(addr, chunk_size, type);
832 830
833 addr += chunk_size; 831 addr += chunk_size;
834 size -= chunk_size; 832 size -= chunk_size;
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 75011b80660f..3b34745d0a52 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -72,7 +72,7 @@ u64 xen_clocksource_read(void);
72void xen_setup_cpu_clockevents(void); 72void xen_setup_cpu_clockevents(void);
73void xen_save_time_memory_area(void); 73void xen_save_time_memory_area(void);
74void xen_restore_time_memory_area(void); 74void xen_restore_time_memory_area(void);
75void __init xen_init_time_ops(void); 75void __ref xen_init_time_ops(void);
76void __init xen_hvm_init_time_ops(void); 76void __init xen_hvm_init_time_ops(void);
77 77
78irqreturn_t xen_debug_interrupt(int irq, void *dev_id); 78irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
diff --git a/block/bio.c b/block/bio.c
index 8bfdea58159b..9ef6cf3addb3 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
599 bio->bi_disk = bio_src->bi_disk; 599 bio->bi_disk = bio_src->bi_disk;
600 bio->bi_partno = bio_src->bi_partno; 600 bio->bi_partno = bio_src->bi_partno;
601 bio_set_flag(bio, BIO_CLONED); 601 bio_set_flag(bio, BIO_CLONED);
602 if (bio_flagged(bio_src, BIO_THROTTLED))
603 bio_set_flag(bio, BIO_THROTTLED);
602 bio->bi_opf = bio_src->bi_opf; 604 bio->bi_opf = bio_src->bi_opf;
603 bio->bi_write_hint = bio_src->bi_write_hint; 605 bio->bi_write_hint = bio_src->bi_write_hint;
604 bio->bi_iter = bio_src->bi_iter; 606 bio->bi_iter = bio_src->bi_iter;
diff --git a/block/blk-core.c b/block/blk-core.c
index b8881750a3ac..3ba4326a63b5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -562,6 +562,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
562 } 562 }
563} 563}
564 564
565void blk_drain_queue(struct request_queue *q)
566{
567 spin_lock_irq(q->queue_lock);
568 __blk_drain_queue(q, true);
569 spin_unlock_irq(q->queue_lock);
570}
571
565/** 572/**
566 * blk_queue_bypass_start - enter queue bypass mode 573 * blk_queue_bypass_start - enter queue bypass mode
567 * @q: queue of interest 574 * @q: queue of interest
@@ -689,8 +696,6 @@ void blk_cleanup_queue(struct request_queue *q)
689 */ 696 */
690 blk_freeze_queue(q); 697 blk_freeze_queue(q);
691 spin_lock_irq(lock); 698 spin_lock_irq(lock);
692 if (!q->mq_ops)
693 __blk_drain_queue(q, true);
694 queue_flag_set(QUEUE_FLAG_DEAD, q); 699 queue_flag_set(QUEUE_FLAG_DEAD, q);
695 spin_unlock_irq(lock); 700 spin_unlock_irq(lock);
696 701
diff --git a/block/blk-map.c b/block/blk-map.c
index b21f8e86f120..d3a94719f03f 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -12,22 +12,29 @@
12#include "blk.h" 12#include "blk.h"
13 13
14/* 14/*
15 * Append a bio to a passthrough request. Only works can be merged into 15 * Append a bio to a passthrough request. Only works if the bio can be merged
16 * the request based on the driver constraints. 16 * into the request based on the driver constraints.
17 */ 17 */
18int blk_rq_append_bio(struct request *rq, struct bio *bio) 18int blk_rq_append_bio(struct request *rq, struct bio **bio)
19{ 19{
20 blk_queue_bounce(rq->q, &bio); 20 struct bio *orig_bio = *bio;
21
22 blk_queue_bounce(rq->q, bio);
21 23
22 if (!rq->bio) { 24 if (!rq->bio) {
23 blk_rq_bio_prep(rq->q, rq, bio); 25 blk_rq_bio_prep(rq->q, rq, *bio);
24 } else { 26 } else {
25 if (!ll_back_merge_fn(rq->q, rq, bio)) 27 if (!ll_back_merge_fn(rq->q, rq, *bio)) {
28 if (orig_bio != *bio) {
29 bio_put(*bio);
30 *bio = orig_bio;
31 }
26 return -EINVAL; 32 return -EINVAL;
33 }
27 34
28 rq->biotail->bi_next = bio; 35 rq->biotail->bi_next = *bio;
29 rq->biotail = bio; 36 rq->biotail = *bio;
30 rq->__data_len += bio->bi_iter.bi_size; 37 rq->__data_len += (*bio)->bi_iter.bi_size;
31 } 38 }
32 39
33 return 0; 40 return 0;
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
73 * We link the bounce buffer in and could have to traverse it 80 * We link the bounce buffer in and could have to traverse it
74 * later so we have to get a ref to prevent it from being freed 81 * later so we have to get a ref to prevent it from being freed
75 */ 82 */
76 ret = blk_rq_append_bio(rq, bio); 83 ret = blk_rq_append_bio(rq, &bio);
77 bio_get(bio);
78 if (ret) { 84 if (ret) {
79 bio_endio(bio);
80 __blk_rq_unmap_user(orig_bio); 85 __blk_rq_unmap_user(orig_bio);
81 bio_put(bio);
82 return ret; 86 return ret;
83 } 87 }
88 bio_get(bio);
84 89
85 return 0; 90 return 0;
86} 91}
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
213 int reading = rq_data_dir(rq) == READ; 218 int reading = rq_data_dir(rq) == READ;
214 unsigned long addr = (unsigned long) kbuf; 219 unsigned long addr = (unsigned long) kbuf;
215 int do_copy = 0; 220 int do_copy = 0;
216 struct bio *bio; 221 struct bio *bio, *orig_bio;
217 int ret; 222 int ret;
218 223
219 if (len > (queue_max_hw_sectors(q) << 9)) 224 if (len > (queue_max_hw_sectors(q) << 9))
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
236 if (do_copy) 241 if (do_copy)
237 rq->rq_flags |= RQF_COPY_USER; 242 rq->rq_flags |= RQF_COPY_USER;
238 243
239 ret = blk_rq_append_bio(rq, bio); 244 orig_bio = bio;
245 ret = blk_rq_append_bio(rq, &bio);
240 if (unlikely(ret)) { 246 if (unlikely(ret)) {
241 /* request is too big */ 247 /* request is too big */
242 bio_put(bio); 248 bio_put(orig_bio);
243 return ret; 249 return ret;
244 } 250 }
245 251
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 11097477eeab..3d3797327491 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -161,6 +161,8 @@ void blk_freeze_queue(struct request_queue *q)
161 * exported to drivers as the only user for unfreeze is blk_mq. 161 * exported to drivers as the only user for unfreeze is blk_mq.
162 */ 162 */
163 blk_freeze_queue_start(q); 163 blk_freeze_queue_start(q);
164 if (!q->mq_ops)
165 blk_drain_queue(q);
164 blk_mq_freeze_queue_wait(q); 166 blk_mq_freeze_queue_wait(q);
165} 167}
166 168
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 825bc29767e6..d19f416d6101 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2226,13 +2226,7 @@ again:
2226out_unlock: 2226out_unlock:
2227 spin_unlock_irq(q->queue_lock); 2227 spin_unlock_irq(q->queue_lock);
2228out: 2228out:
2229 /* 2229 bio_set_flag(bio, BIO_THROTTLED);
2230 * As multiple blk-throtls may stack in the same issue path, we
2231 * don't want bios to leave with the flag set. Clear the flag if
2232 * being issued.
2233 */
2234 if (!throttled)
2235 bio_clear_flag(bio, BIO_THROTTLED);
2236 2230
2237#ifdef CONFIG_BLK_DEV_THROTTLING_LOW 2231#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
2238 if (throttled || !td->track_bio_latency) 2232 if (throttled || !td->track_bio_latency)
diff --git a/block/blk.h b/block/blk.h
index 3f1446937aec..442098aa9463 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -330,4 +330,6 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
330} 330}
331#endif /* CONFIG_BOUNCE */ 331#endif /* CONFIG_BOUNCE */
332 332
333extern void blk_drain_queue(struct request_queue *q);
334
333#endif /* BLK_INTERNAL_H */ 335#endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index fceb1a96480b..1d05c422c932 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
200 unsigned i = 0; 200 unsigned i = 0;
201 bool bounce = false; 201 bool bounce = false;
202 int sectors = 0; 202 int sectors = 0;
203 bool passthrough = bio_is_passthrough(*bio_orig);
203 204
204 bio_for_each_segment(from, *bio_orig, iter) { 205 bio_for_each_segment(from, *bio_orig, iter) {
205 if (i++ < BIO_MAX_PAGES) 206 if (i++ < BIO_MAX_PAGES)
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
210 if (!bounce) 211 if (!bounce)
211 return; 212 return;
212 213
213 if (sectors < bio_sectors(*bio_orig)) { 214 if (!passthrough && sectors < bio_sectors(*bio_orig)) {
214 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split); 215 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
215 bio_chain(bio, *bio_orig); 216 bio_chain(bio, *bio_orig);
216 generic_make_request(*bio_orig); 217 generic_make_request(*bio_orig);
217 *bio_orig = bio; 218 *bio_orig = bio;
218 } 219 }
219 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set); 220 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
221 bounce_bio_set);
220 222
221 bio_for_each_segment_all(to, bio, i) { 223 bio_for_each_segment_all(to, bio, i) {
222 struct page *page = to->bv_page; 224 struct page *page = to->bv_page;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b4df317c2916..f95c60774ce8 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
100 unsigned int cur_domain; 100 unsigned int cur_domain;
101 unsigned int batching; 101 unsigned int batching;
102 wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS]; 102 wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
103 struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
103 atomic_t wait_index[KYBER_NUM_DOMAINS]; 104 atomic_t wait_index[KYBER_NUM_DOMAINS];
104}; 105};
105 106
107static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
108 void *key);
109
106static int rq_sched_domain(const struct request *rq) 110static int rq_sched_domain(const struct request *rq)
107{ 111{
108 unsigned int op = rq->cmd_flags; 112 unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
385 389
386 for (i = 0; i < KYBER_NUM_DOMAINS; i++) { 390 for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
387 INIT_LIST_HEAD(&khd->rqs[i]); 391 INIT_LIST_HEAD(&khd->rqs[i]);
392 init_waitqueue_func_entry(&khd->domain_wait[i],
393 kyber_domain_wake);
394 khd->domain_wait[i].private = hctx;
388 INIT_LIST_HEAD(&khd->domain_wait[i].entry); 395 INIT_LIST_HEAD(&khd->domain_wait[i].entry);
389 atomic_set(&khd->wait_index[i], 0); 396 atomic_set(&khd->wait_index[i], 0);
390 } 397 }
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
524 int nr; 531 int nr;
525 532
526 nr = __sbitmap_queue_get(domain_tokens); 533 nr = __sbitmap_queue_get(domain_tokens);
527 if (nr >= 0)
528 return nr;
529 534
530 /* 535 /*
531 * If we failed to get a domain token, make sure the hardware queue is 536 * If we failed to get a domain token, make sure the hardware queue is
532 * run when one becomes available. Note that this is serialized on 537 * run when one becomes available. Note that this is serialized on
533 * khd->lock, but we still need to be careful about the waker. 538 * khd->lock, but we still need to be careful about the waker.
534 */ 539 */
535 if (list_empty_careful(&wait->entry)) { 540 if (nr < 0 && list_empty_careful(&wait->entry)) {
536 init_waitqueue_func_entry(wait, kyber_domain_wake);
537 wait->private = hctx;
538 ws = sbq_wait_ptr(domain_tokens, 541 ws = sbq_wait_ptr(domain_tokens,
539 &khd->wait_index[sched_domain]); 542 &khd->wait_index[sched_domain]);
543 khd->domain_ws[sched_domain] = ws;
540 add_wait_queue(&ws->wait, wait); 544 add_wait_queue(&ws->wait, wait);
541 545
542 /* 546 /*
543 * Try again in case a token was freed before we got on the wait 547 * Try again in case a token was freed before we got on the wait
544 * queue. The waker may have already removed the entry from the 548 * queue.
545 * wait queue, but list_del_init() is okay with that.
546 */ 549 */
547 nr = __sbitmap_queue_get(domain_tokens); 550 nr = __sbitmap_queue_get(domain_tokens);
548 if (nr >= 0) { 551 }
549 unsigned long flags;
550 552
551 spin_lock_irqsave(&ws->wait.lock, flags); 553 /*
552 list_del_init(&wait->entry); 554 * If we got a token while we were on the wait queue, remove ourselves
553 spin_unlock_irqrestore(&ws->wait.lock, flags); 555 * from the wait queue to ensure that all wake ups make forward
554 } 556 * progress. It's possible that the waker already deleted the entry
557 * between the !list_empty_careful() check and us grabbing the lock, but
558 * list_del_init() is okay with that.
559 */
560 if (nr >= 0 && !list_empty_careful(&wait->entry)) {
561 ws = khd->domain_ws[sched_domain];
562 spin_lock_irq(&ws->wait.lock);
563 list_del_init(&wait->entry);
564 spin_unlock_irq(&ws->wait.lock);
555 } 565 }
566
556 return nr; 567 return nr;
557} 568}
558 569
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 415a54ced4d6..35d4dcea381f 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
664 unsigned int i; 664 unsigned int i;
665 665
666 list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) { 666 list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
667 ctx->rcvused -= rsgl->sg_num_bytes; 667 atomic_sub(rsgl->sg_num_bytes, &ctx->rcvused);
668 af_alg_free_sg(&rsgl->sgl); 668 af_alg_free_sg(&rsgl->sgl);
669 list_del(&rsgl->list); 669 list_del(&rsgl->list);
670 if (rsgl != &areq->first_rsgl) 670 if (rsgl != &areq->first_rsgl)
@@ -1138,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
1138 if (!af_alg_readable(sk)) 1138 if (!af_alg_readable(sk))
1139 break; 1139 break;
1140 1140
1141 if (!ctx->used) {
1142 err = af_alg_wait_for_data(sk, flags);
1143 if (err)
1144 return err;
1145 }
1146
1147 seglen = min_t(size_t, (maxsize - len), 1141 seglen = min_t(size_t, (maxsize - len),
1148 msg_data_left(msg)); 1142 msg_data_left(msg));
1149 1143
@@ -1169,7 +1163,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
1169 1163
1170 areq->last_rsgl = rsgl; 1164 areq->last_rsgl = rsgl;
1171 len += err; 1165 len += err;
1172 ctx->rcvused += err; 1166 atomic_add(err, &ctx->rcvused);
1173 rsgl->sg_num_bytes = err; 1167 rsgl->sg_num_bytes = err;
1174 iov_iter_advance(&msg->msg_iter, err); 1168 iov_iter_advance(&msg->msg_iter, err);
1175 } 1169 }
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 60d7366ed343..9a636f961572 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
167 167
168 spawn->alg = NULL; 168 spawn->alg = NULL;
169 spawns = &inst->alg.cra_users; 169 spawns = &inst->alg.cra_users;
170
171 /*
172 * We may encounter an unregistered instance here, since
173 * an instance's spawns are set up prior to the instance
174 * being registered. An unregistered instance will have
175 * NULL ->cra_users.next, since ->cra_users isn't
176 * properly initialized until registration. But an
177 * unregistered instance cannot have any users, so treat
178 * it the same as ->cra_users being empty.
179 */
180 if (spawns->next == NULL)
181 break;
170 } 182 }
171 } while ((spawns = crypto_more_spawns(alg, &stack, &top, 183 } while ((spawns = crypto_more_spawns(alg, &stack, &top,
172 &secondary_spawns))); 184 &secondary_spawns)));
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 48b34e9c6834..e9885a35ef6e 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
111 size_t usedpages = 0; /* [in] RX bufs to be used from user */ 111 size_t usedpages = 0; /* [in] RX bufs to be used from user */
112 size_t processed = 0; /* [in] TX bufs to be consumed */ 112 size_t processed = 0; /* [in] TX bufs to be consumed */
113 113
114 if (!ctx->used) {
115 err = af_alg_wait_for_data(sk, flags);
116 if (err)
117 return err;
118 }
119
114 /* 120 /*
115 * Data length provided by caller via sendmsg/sendpage that has not 121 * Data length provided by caller via sendmsg/sendpage that has not
116 * yet been processed. 122 * yet been processed.
@@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
285 /* AIO operation */ 291 /* AIO operation */
286 sock_hold(sk); 292 sock_hold(sk);
287 areq->iocb = msg->msg_iocb; 293 areq->iocb = msg->msg_iocb;
294
295 /* Remember output size that will be generated. */
296 areq->outlen = outlen;
297
288 aead_request_set_callback(&areq->cra_u.aead_req, 298 aead_request_set_callback(&areq->cra_u.aead_req,
289 CRYPTO_TFM_REQ_MAY_BACKLOG, 299 CRYPTO_TFM_REQ_MAY_BACKLOG,
290 af_alg_async_cb, areq); 300 af_alg_async_cb, areq);
@@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
292 crypto_aead_decrypt(&areq->cra_u.aead_req); 302 crypto_aead_decrypt(&areq->cra_u.aead_req);
293 303
294 /* AIO operation in progress */ 304 /* AIO operation in progress */
295 if (err == -EINPROGRESS || err == -EBUSY) { 305 if (err == -EINPROGRESS || err == -EBUSY)
296 /* Remember output size that will be generated. */
297 areq->outlen = outlen;
298
299 return -EIOCBQUEUED; 306 return -EIOCBQUEUED;
300 }
301 307
302 sock_put(sk); 308 sock_put(sk);
303 } else { 309 } else {
@@ -565,7 +571,7 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk)
565 INIT_LIST_HEAD(&ctx->tsgl_list); 571 INIT_LIST_HEAD(&ctx->tsgl_list);
566 ctx->len = len; 572 ctx->len = len;
567 ctx->used = 0; 573 ctx->used = 0;
568 ctx->rcvused = 0; 574 atomic_set(&ctx->rcvused, 0);
569 ctx->more = 0; 575 ctx->more = 0;
570 ctx->merge = 0; 576 ctx->merge = 0;
571 ctx->enc = 0; 577 ctx->enc = 0;
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 30cff827dd8f..c5c47b680152 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
72 int err = 0; 72 int err = 0;
73 size_t len = 0; 73 size_t len = 0;
74 74
75 if (!ctx->used) {
76 err = af_alg_wait_for_data(sk, flags);
77 if (err)
78 return err;
79 }
80
75 /* Allocate cipher request for current operation. */ 81 /* Allocate cipher request for current operation. */
76 areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + 82 areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
77 crypto_skcipher_reqsize(tfm)); 83 crypto_skcipher_reqsize(tfm));
@@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
119 /* AIO operation */ 125 /* AIO operation */
120 sock_hold(sk); 126 sock_hold(sk);
121 areq->iocb = msg->msg_iocb; 127 areq->iocb = msg->msg_iocb;
128
129 /* Remember output size that will be generated. */
130 areq->outlen = len;
131
122 skcipher_request_set_callback(&areq->cra_u.skcipher_req, 132 skcipher_request_set_callback(&areq->cra_u.skcipher_req,
123 CRYPTO_TFM_REQ_MAY_SLEEP, 133 CRYPTO_TFM_REQ_MAY_SLEEP,
124 af_alg_async_cb, areq); 134 af_alg_async_cb, areq);
@@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
127 crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); 137 crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
128 138
129 /* AIO operation in progress */ 139 /* AIO operation in progress */
130 if (err == -EINPROGRESS || err == -EBUSY) { 140 if (err == -EINPROGRESS || err == -EBUSY)
131 /* Remember output size that will be generated. */
132 areq->outlen = len;
133
134 return -EIOCBQUEUED; 141 return -EIOCBQUEUED;
135 }
136 142
137 sock_put(sk); 143 sock_put(sk);
138 } else { 144 } else {
@@ -384,7 +390,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
384 INIT_LIST_HEAD(&ctx->tsgl_list); 390 INIT_LIST_HEAD(&ctx->tsgl_list);
385 ctx->len = len; 391 ctx->len = len;
386 ctx->used = 0; 392 ctx->used = 0;
387 ctx->rcvused = 0; 393 atomic_set(&ctx->rcvused, 0);
388 ctx->more = 0; 394 ctx->more = 0;
389 ctx->merge = 0; 395 ctx->merge = 0;
390 ctx->enc = 0; 396 ctx->enc = 0;
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index db1bc3147bc4..600afa99941f 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -610,6 +610,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
610 algt->mask)); 610 algt->mask));
611 if (IS_ERR(poly)) 611 if (IS_ERR(poly))
612 return PTR_ERR(poly); 612 return PTR_ERR(poly);
613 poly_hash = __crypto_hash_alg_common(poly);
614
615 err = -EINVAL;
616 if (poly_hash->digestsize != POLY1305_DIGEST_SIZE)
617 goto out_put_poly;
613 618
614 err = -ENOMEM; 619 err = -ENOMEM;
615 inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); 620 inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
@@ -618,7 +623,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
618 623
619 ctx = aead_instance_ctx(inst); 624 ctx = aead_instance_ctx(inst);
620 ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize; 625 ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize;
621 poly_hash = __crypto_hash_alg_common(poly);
622 err = crypto_init_ahash_spawn(&ctx->poly, poly_hash, 626 err = crypto_init_ahash_spawn(&ctx->poly, poly_hash,
623 aead_crypto_instance(inst)); 627 aead_crypto_instance(inst));
624 if (err) 628 if (err)
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index 4e6472658852..eca04d3729b3 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue,
81 pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); 81 pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
82 crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); 82 crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
83 INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); 83 INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
84 spin_lock_init(&cpu_queue->q_lock);
84 } 85 }
85 return 0; 86 return 0;
86} 87}
@@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
104 int cpu, err; 105 int cpu, err;
105 struct mcryptd_cpu_queue *cpu_queue; 106 struct mcryptd_cpu_queue *cpu_queue;
106 107
107 cpu = get_cpu(); 108 cpu_queue = raw_cpu_ptr(queue->cpu_queue);
108 cpu_queue = this_cpu_ptr(queue->cpu_queue); 109 spin_lock(&cpu_queue->q_lock);
109 rctx->tag.cpu = cpu; 110 cpu = smp_processor_id();
111 rctx->tag.cpu = smp_processor_id();
110 112
111 err = crypto_enqueue_request(&cpu_queue->queue, request); 113 err = crypto_enqueue_request(&cpu_queue->queue, request);
112 pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", 114 pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
113 cpu, cpu_queue, request); 115 cpu, cpu_queue, request);
116 spin_unlock(&cpu_queue->q_lock);
114 queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); 117 queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
115 put_cpu();
116 118
117 return err; 119 return err;
118} 120}
@@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work)
161 cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); 163 cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
162 i = 0; 164 i = 0;
163 while (i < MCRYPTD_BATCH || single_task_running()) { 165 while (i < MCRYPTD_BATCH || single_task_running()) {
164 /* 166
165 * preempt_disable/enable is used to prevent 167 spin_lock_bh(&cpu_queue->q_lock);
166 * being preempted by mcryptd_enqueue_request()
167 */
168 local_bh_disable();
169 preempt_disable();
170 backlog = crypto_get_backlog(&cpu_queue->queue); 168 backlog = crypto_get_backlog(&cpu_queue->queue);
171 req = crypto_dequeue_request(&cpu_queue->queue); 169 req = crypto_dequeue_request(&cpu_queue->queue);
172 preempt_enable(); 170 spin_unlock_bh(&cpu_queue->q_lock);
173 local_bh_enable();
174 171
175 if (!req) { 172 if (!req) {
176 mcryptd_opportunistic_flush(); 173 mcryptd_opportunistic_flush();
@@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work)
185 ++i; 182 ++i;
186 } 183 }
187 if (cpu_queue->queue.qlen) 184 if (cpu_queue->queue.qlen)
188 queue_work(kcrypto_wq, &cpu_queue->work); 185 queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
189} 186}
190 187
191void mcryptd_flusher(struct work_struct *__work) 188void mcryptd_flusher(struct work_struct *__work)
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index ee9cfb99fe25..f8ec3d4ba4a8 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm)
254 crypto_free_aead(ctx->child); 254 crypto_free_aead(ctx->child);
255} 255}
256 256
257static void pcrypt_free(struct aead_instance *inst)
258{
259 struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst);
260
261 crypto_drop_aead(&ctx->spawn);
262 kfree(inst);
263}
264
257static int pcrypt_init_instance(struct crypto_instance *inst, 265static int pcrypt_init_instance(struct crypto_instance *inst,
258 struct crypto_alg *alg) 266 struct crypto_alg *alg)
259{ 267{
@@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
319 inst->alg.encrypt = pcrypt_aead_encrypt; 327 inst->alg.encrypt = pcrypt_aead_encrypt;
320 inst->alg.decrypt = pcrypt_aead_decrypt; 328 inst->alg.decrypt = pcrypt_aead_decrypt;
321 329
330 inst->free = pcrypt_free;
331
322 err = aead_register_instance(tmpl, inst); 332 err = aead_register_instance(tmpl, inst);
323 if (err) 333 if (err)
324 goto out_drop_aead; 334 goto out_drop_aead;
@@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb)
349 return -EINVAL; 359 return -EINVAL;
350} 360}
351 361
352static void pcrypt_free(struct crypto_instance *inst)
353{
354 struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
355
356 crypto_drop_aead(&ctx->spawn);
357 kfree(inst);
358}
359
360static int pcrypt_cpumask_change_notify(struct notifier_block *self, 362static int pcrypt_cpumask_change_notify(struct notifier_block *self,
361 unsigned long val, void *data) 363 unsigned long val, void *data)
362{ 364{
@@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt)
469static struct crypto_template pcrypt_tmpl = { 471static struct crypto_template pcrypt_tmpl = {
470 .name = "pcrypt", 472 .name = "pcrypt",
471 .create = pcrypt_create, 473 .create = pcrypt_create,
472 .free = pcrypt_free,
473 .module = THIS_MODULE, 474 .module = THIS_MODULE,
474}; 475};
475 476
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 778e0ff42bfa..11af5fd6a443 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
449 449
450 walk->total = req->cryptlen; 450 walk->total = req->cryptlen;
451 walk->nbytes = 0; 451 walk->nbytes = 0;
452 walk->iv = req->iv;
453 walk->oiv = req->iv;
452 454
453 if (unlikely(!walk->total)) 455 if (unlikely(!walk->total))
454 return 0; 456 return 0;
@@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
456 scatterwalk_start(&walk->in, req->src); 458 scatterwalk_start(&walk->in, req->src);
457 scatterwalk_start(&walk->out, req->dst); 459 scatterwalk_start(&walk->out, req->dst);
458 460
459 walk->iv = req->iv;
460 walk->oiv = req->iv;
461
462 walk->flags &= ~SKCIPHER_WALK_SLEEP; 461 walk->flags &= ~SKCIPHER_WALK_SLEEP;
463 walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? 462 walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
464 SKCIPHER_WALK_SLEEP : 0; 463 SKCIPHER_WALK_SLEEP : 0;
@@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
510 int err; 509 int err;
511 510
512 walk->nbytes = 0; 511 walk->nbytes = 0;
512 walk->iv = req->iv;
513 walk->oiv = req->iv;
513 514
514 if (unlikely(!walk->total)) 515 if (unlikely(!walk->total))
515 return 0; 516 return 0;
@@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
525 scatterwalk_done(&walk->in, 0, walk->total); 526 scatterwalk_done(&walk->in, 0, walk->total);
526 scatterwalk_done(&walk->out, 0, walk->total); 527 scatterwalk_done(&walk->out, 0, walk->total);
527 528
528 walk->iv = req->iv;
529 walk->oiv = req->iv;
530
531 if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) 529 if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
532 walk->flags |= SKCIPHER_WALK_SLEEP; 530 walk->flags |= SKCIPHER_WALK_SLEEP;
533 else 531 else
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6742f6c68034..9bff853e85f3 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1007,7 +1007,7 @@ skip:
1007 /* The record may be cleared by others, try read next record */ 1007 /* The record may be cleared by others, try read next record */
1008 if (len == -ENOENT) 1008 if (len == -ENOENT)
1009 goto skip; 1009 goto skip;
1010 else if (len < sizeof(*rcd)) { 1010 else if (len < 0 || len < sizeof(*rcd)) {
1011 rc = -EIO; 1011 rc = -EIO;
1012 goto out; 1012 goto out;
1013 } 1013 }
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 30e84cc600ae..06ea4749ebd9 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
1171 struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); 1171 struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
1172 struct cpc_register_resource *desired_reg; 1172 struct cpc_register_resource *desired_reg;
1173 int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); 1173 int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
1174 struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id]; 1174 struct cppc_pcc_data *pcc_ss_data;
1175 int ret = 0; 1175 int ret = 0;
1176 1176
1177 if (!cpc_desc || pcc_ss_id < 0) { 1177 if (!cpc_desc || pcc_ss_id < 0) {
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index ff2580e7611d..abeb4df4f22e 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
1670 dev_name(&adev_dimm->dev)); 1670 dev_name(&adev_dimm->dev));
1671 return -ENXIO; 1671 return -ENXIO;
1672 } 1672 }
1673 /*
1674 * Record nfit_mem for the notification path to track back to
1675 * the nfit sysfs attributes for this dimm device object.
1676 */
1677 dev_set_drvdata(&adev_dimm->dev, nfit_mem);
1673 1678
1674 /* 1679 /*
1675 * Until standardization materializes we need to consider 4 1680 * Until standardization materializes we need to consider 4
@@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data)
1752 sysfs_put(nfit_mem->flags_attr); 1757 sysfs_put(nfit_mem->flags_attr);
1753 nfit_mem->flags_attr = NULL; 1758 nfit_mem->flags_attr = NULL;
1754 } 1759 }
1755 if (adev_dimm) 1760 if (adev_dimm) {
1756 acpi_remove_notify_handler(adev_dimm->handle, 1761 acpi_remove_notify_handler(adev_dimm->handle,
1757 ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); 1762 ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
1763 dev_set_drvdata(&adev_dimm->dev, NULL);
1764 }
1758 } 1765 }
1759 mutex_unlock(&acpi_desc->init_mutex); 1766 mutex_unlock(&acpi_desc->init_mutex);
1760} 1767}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index bccec9de0533..a7ecfde66b7b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -482,7 +482,8 @@ enum binder_deferred_state {
482 * @tsk task_struct for group_leader of process 482 * @tsk task_struct for group_leader of process
483 * (invariant after initialized) 483 * (invariant after initialized)
484 * @files files_struct for process 484 * @files files_struct for process
485 * (invariant after initialized) 485 * (protected by @files_lock)
486 * @files_lock mutex to protect @files
486 * @deferred_work_node: element for binder_deferred_list 487 * @deferred_work_node: element for binder_deferred_list
487 * (protected by binder_deferred_lock) 488 * (protected by binder_deferred_lock)
488 * @deferred_work: bitmap of deferred work to perform 489 * @deferred_work: bitmap of deferred work to perform
@@ -530,6 +531,7 @@ struct binder_proc {
530 int pid; 531 int pid;
531 struct task_struct *tsk; 532 struct task_struct *tsk;
532 struct files_struct *files; 533 struct files_struct *files;
534 struct mutex files_lock;
533 struct hlist_node deferred_work_node; 535 struct hlist_node deferred_work_node;
534 int deferred_work; 536 int deferred_work;
535 bool is_dead; 537 bool is_dead;
@@ -877,20 +879,26 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
877 879
878static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) 880static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
879{ 881{
880 struct files_struct *files = proc->files;
881 unsigned long rlim_cur; 882 unsigned long rlim_cur;
882 unsigned long irqs; 883 unsigned long irqs;
884 int ret;
883 885
884 if (files == NULL) 886 mutex_lock(&proc->files_lock);
885 return -ESRCH; 887 if (proc->files == NULL) {
886 888 ret = -ESRCH;
887 if (!lock_task_sighand(proc->tsk, &irqs)) 889 goto err;
888 return -EMFILE; 890 }
889 891 if (!lock_task_sighand(proc->tsk, &irqs)) {
892 ret = -EMFILE;
893 goto err;
894 }
890 rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); 895 rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
891 unlock_task_sighand(proc->tsk, &irqs); 896 unlock_task_sighand(proc->tsk, &irqs);
892 897
893 return __alloc_fd(files, 0, rlim_cur, flags); 898 ret = __alloc_fd(proc->files, 0, rlim_cur, flags);
899err:
900 mutex_unlock(&proc->files_lock);
901 return ret;
894} 902}
895 903
896/* 904/*
@@ -899,8 +907,10 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
899static void task_fd_install( 907static void task_fd_install(
900 struct binder_proc *proc, unsigned int fd, struct file *file) 908 struct binder_proc *proc, unsigned int fd, struct file *file)
901{ 909{
910 mutex_lock(&proc->files_lock);
902 if (proc->files) 911 if (proc->files)
903 __fd_install(proc->files, fd, file); 912 __fd_install(proc->files, fd, file);
913 mutex_unlock(&proc->files_lock);
904} 914}
905 915
906/* 916/*
@@ -910,9 +920,11 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
910{ 920{
911 int retval; 921 int retval;
912 922
913 if (proc->files == NULL) 923 mutex_lock(&proc->files_lock);
914 return -ESRCH; 924 if (proc->files == NULL) {
915 925 retval = -ESRCH;
926 goto err;
927 }
916 retval = __close_fd(proc->files, fd); 928 retval = __close_fd(proc->files, fd);
917 /* can't restart close syscall because file table entry was cleared */ 929 /* can't restart close syscall because file table entry was cleared */
918 if (unlikely(retval == -ERESTARTSYS || 930 if (unlikely(retval == -ERESTARTSYS ||
@@ -920,7 +932,8 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
920 retval == -ERESTARTNOHAND || 932 retval == -ERESTARTNOHAND ||
921 retval == -ERESTART_RESTARTBLOCK)) 933 retval == -ERESTART_RESTARTBLOCK))
922 retval = -EINTR; 934 retval = -EINTR;
923 935err:
936 mutex_unlock(&proc->files_lock);
924 return retval; 937 return retval;
925} 938}
926 939
@@ -4627,7 +4640,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
4627 ret = binder_alloc_mmap_handler(&proc->alloc, vma); 4640 ret = binder_alloc_mmap_handler(&proc->alloc, vma);
4628 if (ret) 4641 if (ret)
4629 return ret; 4642 return ret;
4643 mutex_lock(&proc->files_lock);
4630 proc->files = get_files_struct(current); 4644 proc->files = get_files_struct(current);
4645 mutex_unlock(&proc->files_lock);
4631 return 0; 4646 return 0;
4632 4647
4633err_bad_arg: 4648err_bad_arg:
@@ -4651,6 +4666,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
4651 spin_lock_init(&proc->outer_lock); 4666 spin_lock_init(&proc->outer_lock);
4652 get_task_struct(current->group_leader); 4667 get_task_struct(current->group_leader);
4653 proc->tsk = current->group_leader; 4668 proc->tsk = current->group_leader;
4669 mutex_init(&proc->files_lock);
4654 INIT_LIST_HEAD(&proc->todo); 4670 INIT_LIST_HEAD(&proc->todo);
4655 proc->default_priority = task_nice(current); 4671 proc->default_priority = task_nice(current);
4656 binder_dev = container_of(filp->private_data, struct binder_device, 4672 binder_dev = container_of(filp->private_data, struct binder_device,
@@ -4903,9 +4919,11 @@ static void binder_deferred_func(struct work_struct *work)
4903 4919
4904 files = NULL; 4920 files = NULL;
4905 if (defer & BINDER_DEFERRED_PUT_FILES) { 4921 if (defer & BINDER_DEFERRED_PUT_FILES) {
4922 mutex_lock(&proc->files_lock);
4906 files = proc->files; 4923 files = proc->files;
4907 if (files) 4924 if (files)
4908 proc->files = NULL; 4925 proc->files = NULL;
4926 mutex_unlock(&proc->files_lock);
4909 } 4927 }
4910 4928
4911 if (defer & BINDER_DEFERRED_FLUSH) 4929 if (defer & BINDER_DEFERRED_FLUSH)
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 1e71d70cdf3f..49fd50fccd48 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -236,6 +236,9 @@ config GENERIC_CPU_DEVICES
236config GENERIC_CPU_AUTOPROBE 236config GENERIC_CPU_AUTOPROBE
237 bool 237 bool
238 238
239config GENERIC_CPU_VULNERABILITIES
240 bool
241
239config SOC_BUS 242config SOC_BUS
240 bool 243 bool
241 select GLOB 244 select GLOB
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index eb3af2739537..07532d83be0b 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf)
186 this_leaf->ways_of_associativity = (size / nr_sets) / line_size; 186 this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
187} 187}
188 188
189static bool cache_node_is_unified(struct cacheinfo *this_leaf)
190{
191 return of_property_read_bool(this_leaf->of_node, "cache-unified");
192}
193
189static void cache_of_override_properties(unsigned int cpu) 194static void cache_of_override_properties(unsigned int cpu)
190{ 195{
191 int index; 196 int index;
@@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu)
194 199
195 for (index = 0; index < cache_leaves(cpu); index++) { 200 for (index = 0; index < cache_leaves(cpu); index++) {
196 this_leaf = this_cpu_ci->info_list + index; 201 this_leaf = this_cpu_ci->info_list + index;
202 /*
203 * init_cache_level must setup the cache level correctly
204 * overriding the architecturally specified levels, so
205 * if type is NONE at this stage, it should be unified
206 */
207 if (this_leaf->type == CACHE_TYPE_NOCACHE &&
208 cache_node_is_unified(this_leaf))
209 this_leaf->type = CACHE_TYPE_UNIFIED;
197 cache_size(this_leaf); 210 cache_size(this_leaf);
198 cache_get_line_size(this_leaf); 211 cache_get_line_size(this_leaf);
199 cache_nr_sets(this_leaf); 212 cache_nr_sets(this_leaf);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 58a9b608d821..d99038487a0d 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -511,10 +511,58 @@ static void __init cpu_dev_register_generic(void)
511#endif 511#endif
512} 512}
513 513
514#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
515
516ssize_t __weak cpu_show_meltdown(struct device *dev,
517 struct device_attribute *attr, char *buf)
518{
519 return sprintf(buf, "Not affected\n");
520}
521
522ssize_t __weak cpu_show_spectre_v1(struct device *dev,
523 struct device_attribute *attr, char *buf)
524{
525 return sprintf(buf, "Not affected\n");
526}
527
528ssize_t __weak cpu_show_spectre_v2(struct device *dev,
529 struct device_attribute *attr, char *buf)
530{
531 return sprintf(buf, "Not affected\n");
532}
533
534static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
535static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
536static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
537
538static struct attribute *cpu_root_vulnerabilities_attrs[] = {
539 &dev_attr_meltdown.attr,
540 &dev_attr_spectre_v1.attr,
541 &dev_attr_spectre_v2.attr,
542 NULL
543};
544
545static const struct attribute_group cpu_root_vulnerabilities_group = {
546 .name = "vulnerabilities",
547 .attrs = cpu_root_vulnerabilities_attrs,
548};
549
550static void __init cpu_register_vulnerabilities(void)
551{
552 if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
553 &cpu_root_vulnerabilities_group))
554 pr_err("Unable to register CPU vulnerabilities\n");
555}
556
557#else
558static inline void cpu_register_vulnerabilities(void) { }
559#endif
560
514void __init cpu_dev_init(void) 561void __init cpu_dev_init(void)
515{ 562{
516 if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) 563 if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
517 panic("Failed to register CPU subsystem"); 564 panic("Failed to register CPU subsystem");
518 565
519 cpu_dev_register_generic(); 566 cpu_dev_register_generic();
567 cpu_register_vulnerabilities();
520} 568}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bc8e61506968..d5fe720cf149 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1581,9 +1581,8 @@ out:
1581 return err; 1581 return err;
1582} 1582}
1583 1583
1584static void lo_release(struct gendisk *disk, fmode_t mode) 1584static void __lo_release(struct loop_device *lo)
1585{ 1585{
1586 struct loop_device *lo = disk->private_data;
1587 int err; 1586 int err;
1588 1587
1589 if (atomic_dec_return(&lo->lo_refcnt)) 1588 if (atomic_dec_return(&lo->lo_refcnt))
@@ -1610,6 +1609,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
1610 mutex_unlock(&lo->lo_ctl_mutex); 1609 mutex_unlock(&lo->lo_ctl_mutex);
1611} 1610}
1612 1611
1612static void lo_release(struct gendisk *disk, fmode_t mode)
1613{
1614 mutex_lock(&loop_index_mutex);
1615 __lo_release(disk->private_data);
1616 mutex_unlock(&loop_index_mutex);
1617}
1618
1613static const struct block_device_operations lo_fops = { 1619static const struct block_device_operations lo_fops = {
1614 .owner = THIS_MODULE, 1620 .owner = THIS_MODULE,
1615 .open = lo_open, 1621 .open = lo_open,
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ccb9975a97fa..ad0477ae820f 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
35struct nullb_cmd { 35struct nullb_cmd {
36 struct list_head list; 36 struct list_head list;
37 struct llist_node ll_list; 37 struct llist_node ll_list;
38 call_single_data_t csd; 38 struct __call_single_data csd;
39 struct request *rq; 39 struct request *rq;
40 struct bio *bio; 40 struct bio *bio;
41 unsigned int tag; 41 unsigned int tag;
42 blk_status_t error;
42 struct nullb_queue *nq; 43 struct nullb_queue *nq;
43 struct hrtimer timer; 44 struct hrtimer timer;
44 blk_status_t error;
45}; 45};
46 46
47struct nullb_queue { 47struct nullb_queue {
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 38fc5f397fde..cc93522a6d41 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3047,13 +3047,21 @@ static void format_lock_cookie(struct rbd_device *rbd_dev, char *buf)
3047 mutex_unlock(&rbd_dev->watch_mutex); 3047 mutex_unlock(&rbd_dev->watch_mutex);
3048} 3048}
3049 3049
3050static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie)
3051{
3052 struct rbd_client_id cid = rbd_get_cid(rbd_dev);
3053
3054 strcpy(rbd_dev->lock_cookie, cookie);
3055 rbd_set_owner_cid(rbd_dev, &cid);
3056 queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
3057}
3058
3050/* 3059/*
3051 * lock_rwsem must be held for write 3060 * lock_rwsem must be held for write
3052 */ 3061 */
3053static int rbd_lock(struct rbd_device *rbd_dev) 3062static int rbd_lock(struct rbd_device *rbd_dev)
3054{ 3063{
3055 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 3064 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
3056 struct rbd_client_id cid = rbd_get_cid(rbd_dev);
3057 char cookie[32]; 3065 char cookie[32];
3058 int ret; 3066 int ret;
3059 3067
@@ -3068,9 +3076,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
3068 return ret; 3076 return ret;
3069 3077
3070 rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED; 3078 rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
3071 strcpy(rbd_dev->lock_cookie, cookie); 3079 __rbd_lock(rbd_dev, cookie);
3072 rbd_set_owner_cid(rbd_dev, &cid);
3073 queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
3074 return 0; 3080 return 0;
3075} 3081}
3076 3082
@@ -3856,7 +3862,7 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
3856 queue_delayed_work(rbd_dev->task_wq, 3862 queue_delayed_work(rbd_dev->task_wq,
3857 &rbd_dev->lock_dwork, 0); 3863 &rbd_dev->lock_dwork, 0);
3858 } else { 3864 } else {
3859 strcpy(rbd_dev->lock_cookie, cookie); 3865 __rbd_lock(rbd_dev, cookie);
3860 } 3866 }
3861} 3867}
3862 3868
@@ -4381,7 +4387,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
4381 segment_size = rbd_obj_bytes(&rbd_dev->header); 4387 segment_size = rbd_obj_bytes(&rbd_dev->header);
4382 blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); 4388 blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
4383 q->limits.max_sectors = queue_max_hw_sectors(q); 4389 q->limits.max_sectors = queue_max_hw_sectors(q);
4384 blk_queue_max_segments(q, segment_size / SECTOR_SIZE); 4390 blk_queue_max_segments(q, USHRT_MAX);
4385 blk_queue_max_segment_size(q, segment_size); 4391 blk_queue_max_segment_size(q, segment_size);
4386 blk_queue_io_min(q, segment_size); 4392 blk_queue_io_min(q, segment_size);
4387 blk_queue_io_opt(q, segment_size); 4393 blk_queue_io_opt(q, segment_size);
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index 328ca93781cf..1b76d9585902 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = {
178 .match = sunxi_rsb_device_match, 178 .match = sunxi_rsb_device_match,
179 .probe = sunxi_rsb_device_probe, 179 .probe = sunxi_rsb_device_probe,
180 .remove = sunxi_rsb_device_remove, 180 .remove = sunxi_rsb_device_remove,
181 .uevent = of_device_uevent_modalias,
181}; 182};
182 183
183static void sunxi_rsb_dev_release(struct device *dev) 184static void sunxi_rsb_dev_release(struct device *dev)
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 647d056df88c..b56c11f51baf 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -220,7 +220,8 @@ static bool clk_core_is_enabled(struct clk_core *core)
220 220
221 ret = core->ops->is_enabled(core->hw); 221 ret = core->ops->is_enabled(core->hw);
222done: 222done:
223 clk_pm_runtime_put(core); 223 if (core->dev)
224 pm_runtime_put(core->dev);
224 225
225 return ret; 226 return ret;
226} 227}
@@ -1564,6 +1565,9 @@ static void clk_change_rate(struct clk_core *core)
1564 best_parent_rate = core->parent->rate; 1565 best_parent_rate = core->parent->rate;
1565 } 1566 }
1566 1567
1568 if (clk_pm_runtime_get(core))
1569 return;
1570
1567 if (core->flags & CLK_SET_RATE_UNGATE) { 1571 if (core->flags & CLK_SET_RATE_UNGATE) {
1568 unsigned long flags; 1572 unsigned long flags;
1569 1573
@@ -1634,6 +1638,8 @@ static void clk_change_rate(struct clk_core *core)
1634 /* handle the new child who might not be in core->children yet */ 1638 /* handle the new child who might not be in core->children yet */
1635 if (core->new_child) 1639 if (core->new_child)
1636 clk_change_rate(core->new_child); 1640 clk_change_rate(core->new_child);
1641
1642 clk_pm_runtime_put(core);
1637} 1643}
1638 1644
1639static int clk_core_set_rate_nolock(struct clk_core *core, 1645static int clk_core_set_rate_nolock(struct clk_core *core,
diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c
index a1a634253d6f..f00d8758ba24 100644
--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
+++ b/drivers/clk/sunxi/clk-sun9i-mmc.c
@@ -16,6 +16,7 @@
16 16
17#include <linux/clk.h> 17#include <linux/clk.h>
18#include <linux/clk-provider.h> 18#include <linux/clk-provider.h>
19#include <linux/delay.h>
19#include <linux/init.h> 20#include <linux/init.h>
20#include <linux/of.h> 21#include <linux/of.h>
21#include <linux/of_device.h> 22#include <linux/of_device.h>
@@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev,
83 return 0; 84 return 0;
84} 85}
85 86
87static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
88 unsigned long id)
89{
90 sun9i_mmc_reset_assert(rcdev, id);
91 udelay(10);
92 sun9i_mmc_reset_deassert(rcdev, id);
93
94 return 0;
95}
96
86static const struct reset_control_ops sun9i_mmc_reset_ops = { 97static const struct reset_control_ops sun9i_mmc_reset_ops = {
87 .assert = sun9i_mmc_reset_assert, 98 .assert = sun9i_mmc_reset_assert,
88 .deassert = sun9i_mmc_reset_deassert, 99 .deassert = sun9i_mmc_reset_deassert,
100 .reset = sun9i_mmc_reset_reset,
89}; 101};
90 102
91static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) 103static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 58d4f4e1ad6a..ca38229b045a 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -22,6 +22,8 @@
22 22
23#include "cpufreq_governor.h" 23#include "cpufreq_governor.h"
24 24
25#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL (2 * TICK_NSEC / NSEC_PER_USEC)
26
25static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); 27static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
26 28
27static DEFINE_MUTEX(gov_dbs_data_mutex); 29static DEFINE_MUTEX(gov_dbs_data_mutex);
@@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
47{ 49{
48 struct dbs_data *dbs_data = to_dbs_data(attr_set); 50 struct dbs_data *dbs_data = to_dbs_data(attr_set);
49 struct policy_dbs_info *policy_dbs; 51 struct policy_dbs_info *policy_dbs;
52 unsigned int sampling_interval;
50 int ret; 53 int ret;
51 ret = sscanf(buf, "%u", &dbs_data->sampling_rate); 54
52 if (ret != 1) 55 ret = sscanf(buf, "%u", &sampling_interval);
56 if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
53 return -EINVAL; 57 return -EINVAL;
54 58
59 dbs_data->sampling_rate = sampling_interval;
60
55 /* 61 /*
56 * We are operating under dbs_data->mutex and so the list and its 62 * We are operating under dbs_data->mutex and so the list and its
57 * entries can't be freed concurrently. 63 * entries can't be freed concurrently.
@@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
430 if (ret) 436 if (ret)
431 goto free_policy_dbs_info; 437 goto free_policy_dbs_info;
432 438
433 dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy); 439 /*
440 * The sampling interval should not be less than the transition latency
441 * of the CPU and it also cannot be too small for dbs_update() to work
442 * correctly.
443 */
444 dbs_data->sampling_rate = max_t(unsigned int,
445 CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
446 cpufreq_policy_transition_delay_us(policy));
434 447
435 if (!have_governor_per_policy()) 448 if (!have_governor_per_policy())
436 gov->gdbs_data = dbs_data; 449 gov->gdbs_data = dbs_data;
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 628fe899cb48..d9b2c2de49c4 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
226 val >>= OCOTP_CFG3_SPEED_SHIFT; 226 val >>= OCOTP_CFG3_SPEED_SHIFT;
227 val &= 0x3; 227 val &= 0x3;
228 228
229 if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
230 of_machine_is_compatible("fsl,imx6q"))
231 if (dev_pm_opp_disable(dev, 1200000000))
232 dev_warn(dev, "failed to disable 1.2GHz OPP\n");
233 if (val < OCOTP_CFG3_SPEED_996MHZ) 229 if (val < OCOTP_CFG3_SPEED_996MHZ)
234 if (dev_pm_opp_disable(dev, 996000000)) 230 if (dev_pm_opp_disable(dev, 996000000))
235 dev_warn(dev, "failed to disable 996MHz OPP\n"); 231 dev_warn(dev, "failed to disable 996MHz OPP\n");
236 if (of_machine_is_compatible("fsl,imx6q")) { 232
233 if (of_machine_is_compatible("fsl,imx6q") ||
234 of_machine_is_compatible("fsl,imx6qp")) {
237 if (val != OCOTP_CFG3_SPEED_852MHZ) 235 if (val != OCOTP_CFG3_SPEED_852MHZ)
238 if (dev_pm_opp_disable(dev, 852000000)) 236 if (dev_pm_opp_disable(dev, 852000000))
239 dev_warn(dev, "failed to disable 852MHz OPP\n"); 237 dev_warn(dev, "failed to disable 852MHz OPP\n");
238 if (val != OCOTP_CFG3_SPEED_1P2GHZ)
239 if (dev_pm_opp_disable(dev, 1200000000))
240 dev_warn(dev, "failed to disable 1.2GHz OPP\n");
240 } 241 }
241 iounmap(base); 242 iounmap(base);
242put_node: 243put_node:
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index 3e104f5aa0c2..b56b3f711d94 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -5,6 +5,7 @@ config CRYPTO_DEV_CHELSIO
5 select CRYPTO_SHA256 5 select CRYPTO_SHA256
6 select CRYPTO_SHA512 6 select CRYPTO_SHA512
7 select CRYPTO_AUTHENC 7 select CRYPTO_AUTHENC
8 select CRYPTO_GF128MUL
8 ---help--- 9 ---help---
9 The Chelsio Crypto Co-processor driver for T6 adapters. 10 The Chelsio Crypto Co-processor driver for T6 adapters.
10 11
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 89ba9e85c0f3..4bcef78a08aa 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -607,6 +607,7 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv
607 ndesc = ctx->handle_result(priv, ring, sreq->req, 607 ndesc = ctx->handle_result(priv, ring, sreq->req,
608 &should_complete, &ret); 608 &should_complete, &ret);
609 if (ndesc < 0) { 609 if (ndesc < 0) {
610 kfree(sreq);
610 dev_err(priv->dev, "failed to handle result (%d)", ndesc); 611 dev_err(priv->dev, "failed to handle result (%d)", ndesc);
611 return; 612 return;
612 } 613 }
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index 5438552bc6d7..fcc0a606d748 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -14,6 +14,7 @@
14 14
15#include <crypto/aes.h> 15#include <crypto/aes.h>
16#include <crypto/skcipher.h> 16#include <crypto/skcipher.h>
17#include <crypto/internal/skcipher.h>
17 18
18#include "safexcel.h" 19#include "safexcel.h"
19 20
@@ -33,6 +34,10 @@ struct safexcel_cipher_ctx {
33 unsigned int key_len; 34 unsigned int key_len;
34}; 35};
35 36
37struct safexcel_cipher_req {
38 bool needs_inv;
39};
40
36static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx, 41static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx,
37 struct crypto_async_request *async, 42 struct crypto_async_request *async,
38 struct safexcel_command_desc *cdesc, 43 struct safexcel_command_desc *cdesc,
@@ -126,9 +131,9 @@ static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
126 return 0; 131 return 0;
127} 132}
128 133
129static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, 134static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring,
130 struct crypto_async_request *async, 135 struct crypto_async_request *async,
131 bool *should_complete, int *ret) 136 bool *should_complete, int *ret)
132{ 137{
133 struct skcipher_request *req = skcipher_request_cast(async); 138 struct skcipher_request *req = skcipher_request_cast(async);
134 struct safexcel_result_desc *rdesc; 139 struct safexcel_result_desc *rdesc;
@@ -265,7 +270,6 @@ static int safexcel_aes_send(struct crypto_async_request *async,
265 spin_unlock_bh(&priv->ring[ring].egress_lock); 270 spin_unlock_bh(&priv->ring[ring].egress_lock);
266 271
267 request->req = &req->base; 272 request->req = &req->base;
268 ctx->base.handle_result = safexcel_handle_result;
269 273
270 *commands = n_cdesc; 274 *commands = n_cdesc;
271 *results = n_rdesc; 275 *results = n_rdesc;
@@ -341,8 +345,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
341 345
342 ring = safexcel_select_ring(priv); 346 ring = safexcel_select_ring(priv);
343 ctx->base.ring = ring; 347 ctx->base.ring = ring;
344 ctx->base.needs_inv = false;
345 ctx->base.send = safexcel_aes_send;
346 348
347 spin_lock_bh(&priv->ring[ring].queue_lock); 349 spin_lock_bh(&priv->ring[ring].queue_lock);
348 enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async); 350 enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
@@ -359,6 +361,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
359 return ndesc; 361 return ndesc;
360} 362}
361 363
364static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
365 struct crypto_async_request *async,
366 bool *should_complete, int *ret)
367{
368 struct skcipher_request *req = skcipher_request_cast(async);
369 struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
370 int err;
371
372 if (sreq->needs_inv) {
373 sreq->needs_inv = false;
374 err = safexcel_handle_inv_result(priv, ring, async,
375 should_complete, ret);
376 } else {
377 err = safexcel_handle_req_result(priv, ring, async,
378 should_complete, ret);
379 }
380
381 return err;
382}
383
362static int safexcel_cipher_send_inv(struct crypto_async_request *async, 384static int safexcel_cipher_send_inv(struct crypto_async_request *async,
363 int ring, struct safexcel_request *request, 385 int ring, struct safexcel_request *request,
364 int *commands, int *results) 386 int *commands, int *results)
@@ -368,8 +390,6 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async,
368 struct safexcel_crypto_priv *priv = ctx->priv; 390 struct safexcel_crypto_priv *priv = ctx->priv;
369 int ret; 391 int ret;
370 392
371 ctx->base.handle_result = safexcel_handle_inv_result;
372
373 ret = safexcel_invalidate_cache(async, &ctx->base, priv, 393 ret = safexcel_invalidate_cache(async, &ctx->base, priv,
374 ctx->base.ctxr_dma, ring, request); 394 ctx->base.ctxr_dma, ring, request);
375 if (unlikely(ret)) 395 if (unlikely(ret))
@@ -381,28 +401,46 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async,
381 return 0; 401 return 0;
382} 402}
383 403
404static int safexcel_send(struct crypto_async_request *async,
405 int ring, struct safexcel_request *request,
406 int *commands, int *results)
407{
408 struct skcipher_request *req = skcipher_request_cast(async);
409 struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
410 int ret;
411
412 if (sreq->needs_inv)
413 ret = safexcel_cipher_send_inv(async, ring, request,
414 commands, results);
415 else
416 ret = safexcel_aes_send(async, ring, request,
417 commands, results);
418 return ret;
419}
420
384static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm) 421static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
385{ 422{
386 struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); 423 struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
387 struct safexcel_crypto_priv *priv = ctx->priv; 424 struct safexcel_crypto_priv *priv = ctx->priv;
388 struct skcipher_request req; 425 SKCIPHER_REQUEST_ON_STACK(req, __crypto_skcipher_cast(tfm));
426 struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
389 struct safexcel_inv_result result = {}; 427 struct safexcel_inv_result result = {};
390 int ring = ctx->base.ring; 428 int ring = ctx->base.ring;
391 429
392 memset(&req, 0, sizeof(struct skcipher_request)); 430 memset(req, 0, sizeof(struct skcipher_request));
393 431
394 /* create invalidation request */ 432 /* create invalidation request */
395 init_completion(&result.completion); 433 init_completion(&result.completion);
396 skcipher_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG, 434 skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
397 safexcel_inv_complete, &result); 435 safexcel_inv_complete, &result);
398 436
399 skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm)); 437 skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm));
400 ctx = crypto_tfm_ctx(req.base.tfm); 438 ctx = crypto_tfm_ctx(req->base.tfm);
401 ctx->base.exit_inv = true; 439 ctx->base.exit_inv = true;
402 ctx->base.send = safexcel_cipher_send_inv; 440 sreq->needs_inv = true;
403 441
404 spin_lock_bh(&priv->ring[ring].queue_lock); 442 spin_lock_bh(&priv->ring[ring].queue_lock);
405 crypto_enqueue_request(&priv->ring[ring].queue, &req.base); 443 crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
406 spin_unlock_bh(&priv->ring[ring].queue_lock); 444 spin_unlock_bh(&priv->ring[ring].queue_lock);
407 445
408 if (!priv->ring[ring].need_dequeue) 446 if (!priv->ring[ring].need_dequeue)
@@ -424,19 +462,21 @@ static int safexcel_aes(struct skcipher_request *req,
424 enum safexcel_cipher_direction dir, u32 mode) 462 enum safexcel_cipher_direction dir, u32 mode)
425{ 463{
426 struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); 464 struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
465 struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
427 struct safexcel_crypto_priv *priv = ctx->priv; 466 struct safexcel_crypto_priv *priv = ctx->priv;
428 int ret, ring; 467 int ret, ring;
429 468
469 sreq->needs_inv = false;
430 ctx->direction = dir; 470 ctx->direction = dir;
431 ctx->mode = mode; 471 ctx->mode = mode;
432 472
433 if (ctx->base.ctxr) { 473 if (ctx->base.ctxr) {
434 if (ctx->base.needs_inv) 474 if (ctx->base.needs_inv) {
435 ctx->base.send = safexcel_cipher_send_inv; 475 sreq->needs_inv = true;
476 ctx->base.needs_inv = false;
477 }
436 } else { 478 } else {
437 ctx->base.ring = safexcel_select_ring(priv); 479 ctx->base.ring = safexcel_select_ring(priv);
438 ctx->base.send = safexcel_aes_send;
439
440 ctx->base.ctxr = dma_pool_zalloc(priv->context_pool, 480 ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
441 EIP197_GFP_FLAGS(req->base), 481 EIP197_GFP_FLAGS(req->base),
442 &ctx->base.ctxr_dma); 482 &ctx->base.ctxr_dma);
@@ -476,6 +516,11 @@ static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm)
476 alg.skcipher.base); 516 alg.skcipher.base);
477 517
478 ctx->priv = tmpl->priv; 518 ctx->priv = tmpl->priv;
519 ctx->base.send = safexcel_send;
520 ctx->base.handle_result = safexcel_handle_result;
521
522 crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
523 sizeof(struct safexcel_cipher_req));
479 524
480 return 0; 525 return 0;
481} 526}
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 74feb6227101..0c5a5820b06e 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -32,9 +32,10 @@ struct safexcel_ahash_req {
32 bool last_req; 32 bool last_req;
33 bool finish; 33 bool finish;
34 bool hmac; 34 bool hmac;
35 bool needs_inv;
35 36
36 u8 state_sz; /* expected sate size, only set once */ 37 u8 state_sz; /* expected sate size, only set once */
37 u32 state[SHA256_DIGEST_SIZE / sizeof(u32)]; 38 u32 state[SHA256_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32));
38 39
39 u64 len; 40 u64 len;
40 u64 processed; 41 u64 processed;
@@ -119,15 +120,15 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
119 } 120 }
120} 121}
121 122
122static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, 123static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring,
123 struct crypto_async_request *async, 124 struct crypto_async_request *async,
124 bool *should_complete, int *ret) 125 bool *should_complete, int *ret)
125{ 126{
126 struct safexcel_result_desc *rdesc; 127 struct safexcel_result_desc *rdesc;
127 struct ahash_request *areq = ahash_request_cast(async); 128 struct ahash_request *areq = ahash_request_cast(async);
128 struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); 129 struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
129 struct safexcel_ahash_req *sreq = ahash_request_ctx(areq); 130 struct safexcel_ahash_req *sreq = ahash_request_ctx(areq);
130 int cache_len, result_sz = sreq->state_sz; 131 int cache_len;
131 132
132 *ret = 0; 133 *ret = 0;
133 134
@@ -148,8 +149,8 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
148 spin_unlock_bh(&priv->ring[ring].egress_lock); 149 spin_unlock_bh(&priv->ring[ring].egress_lock);
149 150
150 if (sreq->finish) 151 if (sreq->finish)
151 result_sz = crypto_ahash_digestsize(ahash); 152 memcpy(areq->result, sreq->state,
152 memcpy(sreq->state, areq->result, result_sz); 153 crypto_ahash_digestsize(ahash));
153 154
154 dma_unmap_sg(priv->dev, areq->src, 155 dma_unmap_sg(priv->dev, areq->src,
155 sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE); 156 sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE);
@@ -165,9 +166,9 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
165 return 1; 166 return 1;
166} 167}
167 168
168static int safexcel_ahash_send(struct crypto_async_request *async, int ring, 169static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
169 struct safexcel_request *request, int *commands, 170 struct safexcel_request *request,
170 int *results) 171 int *commands, int *results)
171{ 172{
172 struct ahash_request *areq = ahash_request_cast(async); 173 struct ahash_request *areq = ahash_request_cast(async);
173 struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); 174 struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
@@ -273,7 +274,7 @@ send_command:
273 /* Add the token */ 274 /* Add the token */
274 safexcel_hash_token(first_cdesc, len, req->state_sz); 275 safexcel_hash_token(first_cdesc, len, req->state_sz);
275 276
276 ctx->base.result_dma = dma_map_single(priv->dev, areq->result, 277 ctx->base.result_dma = dma_map_single(priv->dev, req->state,
277 req->state_sz, DMA_FROM_DEVICE); 278 req->state_sz, DMA_FROM_DEVICE);
278 if (dma_mapping_error(priv->dev, ctx->base.result_dma)) { 279 if (dma_mapping_error(priv->dev, ctx->base.result_dma)) {
279 ret = -EINVAL; 280 ret = -EINVAL;
@@ -292,7 +293,6 @@ send_command:
292 293
293 req->processed += len; 294 req->processed += len;
294 request->req = &areq->base; 295 request->req = &areq->base;
295 ctx->base.handle_result = safexcel_handle_result;
296 296
297 *commands = n_cdesc; 297 *commands = n_cdesc;
298 *results = 1; 298 *results = 1;
@@ -374,8 +374,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
374 374
375 ring = safexcel_select_ring(priv); 375 ring = safexcel_select_ring(priv);
376 ctx->base.ring = ring; 376 ctx->base.ring = ring;
377 ctx->base.needs_inv = false;
378 ctx->base.send = safexcel_ahash_send;
379 377
380 spin_lock_bh(&priv->ring[ring].queue_lock); 378 spin_lock_bh(&priv->ring[ring].queue_lock);
381 enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async); 379 enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
@@ -392,6 +390,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
392 return 1; 390 return 1;
393} 391}
394 392
393static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
394 struct crypto_async_request *async,
395 bool *should_complete, int *ret)
396{
397 struct ahash_request *areq = ahash_request_cast(async);
398 struct safexcel_ahash_req *req = ahash_request_ctx(areq);
399 int err;
400
401 if (req->needs_inv) {
402 req->needs_inv = false;
403 err = safexcel_handle_inv_result(priv, ring, async,
404 should_complete, ret);
405 } else {
406 err = safexcel_handle_req_result(priv, ring, async,
407 should_complete, ret);
408 }
409
410 return err;
411}
412
395static int safexcel_ahash_send_inv(struct crypto_async_request *async, 413static int safexcel_ahash_send_inv(struct crypto_async_request *async,
396 int ring, struct safexcel_request *request, 414 int ring, struct safexcel_request *request,
397 int *commands, int *results) 415 int *commands, int *results)
@@ -400,7 +418,6 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async,
400 struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); 418 struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
401 int ret; 419 int ret;
402 420
403 ctx->base.handle_result = safexcel_handle_inv_result;
404 ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv, 421 ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv,
405 ctx->base.ctxr_dma, ring, request); 422 ctx->base.ctxr_dma, ring, request);
406 if (unlikely(ret)) 423 if (unlikely(ret))
@@ -412,28 +429,46 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async,
412 return 0; 429 return 0;
413} 430}
414 431
432static int safexcel_ahash_send(struct crypto_async_request *async,
433 int ring, struct safexcel_request *request,
434 int *commands, int *results)
435{
436 struct ahash_request *areq = ahash_request_cast(async);
437 struct safexcel_ahash_req *req = ahash_request_ctx(areq);
438 int ret;
439
440 if (req->needs_inv)
441 ret = safexcel_ahash_send_inv(async, ring, request,
442 commands, results);
443 else
444 ret = safexcel_ahash_send_req(async, ring, request,
445 commands, results);
446 return ret;
447}
448
415static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) 449static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
416{ 450{
417 struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); 451 struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
418 struct safexcel_crypto_priv *priv = ctx->priv; 452 struct safexcel_crypto_priv *priv = ctx->priv;
419 struct ahash_request req; 453 AHASH_REQUEST_ON_STACK(req, __crypto_ahash_cast(tfm));
454 struct safexcel_ahash_req *rctx = ahash_request_ctx(req);
420 struct safexcel_inv_result result = {}; 455 struct safexcel_inv_result result = {};
421 int ring = ctx->base.ring; 456 int ring = ctx->base.ring;
422 457
423 memset(&req, 0, sizeof(struct ahash_request)); 458 memset(req, 0, sizeof(struct ahash_request));
424 459
425 /* create invalidation request */ 460 /* create invalidation request */
426 init_completion(&result.completion); 461 init_completion(&result.completion);
427 ahash_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG, 462 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
428 safexcel_inv_complete, &result); 463 safexcel_inv_complete, &result);
429 464
430 ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm)); 465 ahash_request_set_tfm(req, __crypto_ahash_cast(tfm));
431 ctx = crypto_tfm_ctx(req.base.tfm); 466 ctx = crypto_tfm_ctx(req->base.tfm);
432 ctx->base.exit_inv = true; 467 ctx->base.exit_inv = true;
433 ctx->base.send = safexcel_ahash_send_inv; 468 rctx->needs_inv = true;
434 469
435 spin_lock_bh(&priv->ring[ring].queue_lock); 470 spin_lock_bh(&priv->ring[ring].queue_lock);
436 crypto_enqueue_request(&priv->ring[ring].queue, &req.base); 471 crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
437 spin_unlock_bh(&priv->ring[ring].queue_lock); 472 spin_unlock_bh(&priv->ring[ring].queue_lock);
438 473
439 if (!priv->ring[ring].need_dequeue) 474 if (!priv->ring[ring].need_dequeue)
@@ -481,14 +516,16 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq)
481 struct safexcel_crypto_priv *priv = ctx->priv; 516 struct safexcel_crypto_priv *priv = ctx->priv;
482 int ret, ring; 517 int ret, ring;
483 518
484 ctx->base.send = safexcel_ahash_send; 519 req->needs_inv = false;
485 520
486 if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) 521 if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
487 ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq); 522 ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq);
488 523
489 if (ctx->base.ctxr) { 524 if (ctx->base.ctxr) {
490 if (ctx->base.needs_inv) 525 if (ctx->base.needs_inv) {
491 ctx->base.send = safexcel_ahash_send_inv; 526 ctx->base.needs_inv = false;
527 req->needs_inv = true;
528 }
492 } else { 529 } else {
493 ctx->base.ring = safexcel_select_ring(priv); 530 ctx->base.ring = safexcel_select_ring(priv);
494 ctx->base.ctxr = dma_pool_zalloc(priv->context_pool, 531 ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
@@ -622,6 +659,8 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm)
622 struct safexcel_alg_template, alg.ahash); 659 struct safexcel_alg_template, alg.ahash);
623 660
624 ctx->priv = tmpl->priv; 661 ctx->priv = tmpl->priv;
662 ctx->base.send = safexcel_ahash_send;
663 ctx->base.handle_result = safexcel_handle_result;
625 664
626 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), 665 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
627 sizeof(struct safexcel_ahash_req)); 666 sizeof(struct safexcel_ahash_req));
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 48de52cf2ecc..662e709812cc 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1625,6 +1625,7 @@ static int queue_cache_init(void)
1625 CWQ_ENTRY_SIZE, 0, NULL); 1625 CWQ_ENTRY_SIZE, 0, NULL);
1626 if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) { 1626 if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) {
1627 kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); 1627 kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
1628 queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
1628 return -ENOMEM; 1629 return -ENOMEM;
1629 } 1630 }
1630 return 0; 1631 return 0;
@@ -1634,6 +1635,8 @@ static void queue_cache_destroy(void)
1634{ 1635{
1635 kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); 1636 kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
1636 kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]); 1637 kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]);
1638 queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
1639 queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL;
1637} 1640}
1638 1641
1639static long spu_queue_register_workfn(void *arg) 1642static long spu_queue_register_workfn(void *arg)
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index ec8ac5c4dd84..055e2e8f985a 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -20,10 +20,6 @@
20 20
21#define NO_FURTHER_WRITE_ACTION -1 21#define NO_FURTHER_WRITE_ACTION -1
22 22
23#ifndef phys_to_page
24#define phys_to_page(x) pfn_to_page((x) >> PAGE_SHIFT)
25#endif
26
27/** 23/**
28 * efi_free_all_buff_pages - free all previous allocated buffer pages 24 * efi_free_all_buff_pages - free all previous allocated buffer pages
29 * @cap_info: pointer to current instance of capsule_info structure 25 * @cap_info: pointer to current instance of capsule_info structure
@@ -35,7 +31,7 @@
35static void efi_free_all_buff_pages(struct capsule_info *cap_info) 31static void efi_free_all_buff_pages(struct capsule_info *cap_info)
36{ 32{
37 while (cap_info->index > 0) 33 while (cap_info->index > 0)
38 __free_page(phys_to_page(cap_info->pages[--cap_info->index])); 34 __free_page(cap_info->pages[--cap_info->index]);
39 35
40 cap_info->index = NO_FURTHER_WRITE_ACTION; 36 cap_info->index = NO_FURTHER_WRITE_ACTION;
41} 37}
@@ -71,6 +67,14 @@ int __efi_capsule_setup_info(struct capsule_info *cap_info)
71 67
72 cap_info->pages = temp_page; 68 cap_info->pages = temp_page;
73 69
70 temp_page = krealloc(cap_info->phys,
71 pages_needed * sizeof(phys_addr_t *),
72 GFP_KERNEL | __GFP_ZERO);
73 if (!temp_page)
74 return -ENOMEM;
75
76 cap_info->phys = temp_page;
77
74 return 0; 78 return 0;
75} 79}
76 80
@@ -105,9 +109,24 @@ int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
105 **/ 109 **/
106static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info) 110static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
107{ 111{
112 bool do_vunmap = false;
108 int ret; 113 int ret;
109 114
110 ret = efi_capsule_update(&cap_info->header, cap_info->pages); 115 /*
116 * cap_info->capsule may have been assigned already by a quirk
117 * handler, so only overwrite it if it is NULL
118 */
119 if (!cap_info->capsule) {
120 cap_info->capsule = vmap(cap_info->pages, cap_info->index,
121 VM_MAP, PAGE_KERNEL);
122 if (!cap_info->capsule)
123 return -ENOMEM;
124 do_vunmap = true;
125 }
126
127 ret = efi_capsule_update(cap_info->capsule, cap_info->phys);
128 if (do_vunmap)
129 vunmap(cap_info->capsule);
111 if (ret) { 130 if (ret) {
112 pr_err("capsule update failed\n"); 131 pr_err("capsule update failed\n");
113 return ret; 132 return ret;
@@ -165,10 +184,12 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
165 goto failed; 184 goto failed;
166 } 185 }
167 186
168 cap_info->pages[cap_info->index++] = page_to_phys(page); 187 cap_info->pages[cap_info->index] = page;
188 cap_info->phys[cap_info->index] = page_to_phys(page);
169 cap_info->page_bytes_remain = PAGE_SIZE; 189 cap_info->page_bytes_remain = PAGE_SIZE;
190 cap_info->index++;
170 } else { 191 } else {
171 page = phys_to_page(cap_info->pages[cap_info->index - 1]); 192 page = cap_info->pages[cap_info->index - 1];
172 } 193 }
173 194
174 kbuff = kmap(page); 195 kbuff = kmap(page);
@@ -252,6 +273,7 @@ static int efi_capsule_release(struct inode *inode, struct file *file)
252 struct capsule_info *cap_info = file->private_data; 273 struct capsule_info *cap_info = file->private_data;
253 274
254 kfree(cap_info->pages); 275 kfree(cap_info->pages);
276 kfree(cap_info->phys);
255 kfree(file->private_data); 277 kfree(file->private_data);
256 file->private_data = NULL; 278 file->private_data = NULL;
257 return 0; 279 return 0;
@@ -281,6 +303,13 @@ static int efi_capsule_open(struct inode *inode, struct file *file)
281 return -ENOMEM; 303 return -ENOMEM;
282 } 304 }
283 305
306 cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
307 if (!cap_info->phys) {
308 kfree(cap_info->pages);
309 kfree(cap_info);
310 return -ENOMEM;
311 }
312
284 file->private_data = cap_info; 313 file->private_data = cap_info;
285 314
286 return 0; 315 return 0;
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index dfcf56ee3c61..76861a00bb92 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = {
522 * category than their parents, so it won't report false recursion. 522 * category than their parents, so it won't report false recursion.
523 */ 523 */
524static struct lock_class_key gpio_lock_class; 524static struct lock_class_key gpio_lock_class;
525static struct lock_class_key gpio_request_class;
525 526
526static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, 527static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq,
527 irq_hw_number_t hwirq) 528 irq_hw_number_t hwirq)
@@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq,
531 ret = irq_set_chip_data(irq, d->host_data); 532 ret = irq_set_chip_data(irq, d->host_data);
532 if (ret < 0) 533 if (ret < 0)
533 return ret; 534 return ret;
534 irq_set_lockdep_class(irq, &gpio_lock_class); 535 irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class);
535 irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq); 536 irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq);
536 irq_set_noprobe(irq); 537 irq_set_noprobe(irq);
537 538
diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c
index 545d43a587b7..bb4f8cf18bd9 100644
--- a/drivers/gpio/gpio-brcmstb.c
+++ b/drivers/gpio/gpio-brcmstb.c
@@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank(
327 * category than their parents, so it won't report false recursion. 327 * category than their parents, so it won't report false recursion.
328 */ 328 */
329static struct lock_class_key brcmstb_gpio_irq_lock_class; 329static struct lock_class_key brcmstb_gpio_irq_lock_class;
330static struct lock_class_key brcmstb_gpio_irq_request_class;
330 331
331 332
332static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, 333static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq,
@@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq,
346 ret = irq_set_chip_data(irq, &bank->gc); 347 ret = irq_set_chip_data(irq, &bank->gc);
347 if (ret < 0) 348 if (ret < 0)
348 return ret; 349 return ret;
349 irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class); 350 irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class,
351 &brcmstb_gpio_irq_request_class);
350 irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq); 352 irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq);
351 irq_set_noprobe(irq); 353 irq_set_noprobe(irq);
352 return 0; 354 return 0;
diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c
index 23e771dba4c1..e85903eddc68 100644
--- a/drivers/gpio/gpio-reg.c
+++ b/drivers/gpio/gpio-reg.c
@@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset)
103 struct gpio_reg *r = to_gpio_reg(gc); 103 struct gpio_reg *r = to_gpio_reg(gc);
104 int irq = r->irqs[offset]; 104 int irq = r->irqs[offset];
105 105
106 if (irq >= 0 && r->irq.domain) 106 if (irq >= 0 && r->irqdomain)
107 irq = irq_find_mapping(r->irq.domain, irq); 107 irq = irq_find_mapping(r->irqdomain, irq);
108 108
109 return irq; 109 return irq;
110} 110}
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 8db47f671708..02fa8fe2292a 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = {
565 * than their parents, so it won't report false recursion. 565 * than their parents, so it won't report false recursion.
566 */ 566 */
567static struct lock_class_key gpio_lock_class; 567static struct lock_class_key gpio_lock_class;
568static struct lock_class_key gpio_request_class;
568 569
569static int tegra_gpio_probe(struct platform_device *pdev) 570static int tegra_gpio_probe(struct platform_device *pdev)
570{ 571{
@@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev)
670 671
671 bank = &tgi->bank_info[GPIO_BANK(gpio)]; 672 bank = &tgi->bank_info[GPIO_BANK(gpio)];
672 673
673 irq_set_lockdep_class(irq, &gpio_lock_class); 674 irq_set_lockdep_class(irq, &gpio_lock_class,
675 &gpio_request_class);
674 irq_set_chip_data(irq, bank); 676 irq_set_chip_data(irq, bank);
675 irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq); 677 irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq);
676 } 678 }
diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c
index 2313af82fad3..acd59113e08b 100644
--- a/drivers/gpio/gpio-xgene-sb.c
+++ b/drivers/gpio/gpio-xgene-sb.c
@@ -139,7 +139,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio)
139 139
140static int xgene_gpio_sb_domain_activate(struct irq_domain *d, 140static int xgene_gpio_sb_domain_activate(struct irq_domain *d,
141 struct irq_data *irq_data, 141 struct irq_data *irq_data,
142 bool early) 142 bool reserve)
143{ 143{
144 struct xgene_gpio_sb *priv = d->host_data; 144 struct xgene_gpio_sb *priv = d->host_data;
145 u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq); 145 u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq);
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index eb4528c87c0b..d6f3d9ee1350 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
1074 } 1074 }
1075 1075
1076 if (!chip->names) 1076 if (!chip->names)
1077 devprop_gpiochip_set_names(chip); 1077 devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent));
1078 1078
1079 acpi_gpiochip_request_regions(acpi_gpio); 1079 acpi_gpiochip_request_regions(acpi_gpio);
1080 acpi_gpiochip_scan_gpios(acpi_gpio); 1080 acpi_gpiochip_scan_gpios(acpi_gpio);
diff --git a/drivers/gpio/gpiolib-devprop.c b/drivers/gpio/gpiolib-devprop.c
index 27f383bda7d9..f748aa3e77f7 100644
--- a/drivers/gpio/gpiolib-devprop.c
+++ b/drivers/gpio/gpiolib-devprop.c
@@ -19,30 +19,27 @@
19/** 19/**
20 * devprop_gpiochip_set_names - Set GPIO line names using device properties 20 * devprop_gpiochip_set_names - Set GPIO line names using device properties
21 * @chip: GPIO chip whose lines should be named, if possible 21 * @chip: GPIO chip whose lines should be named, if possible
22 * @fwnode: Property Node containing the gpio-line-names property
22 * 23 *
23 * Looks for device property "gpio-line-names" and if it exists assigns 24 * Looks for device property "gpio-line-names" and if it exists assigns
24 * GPIO line names for the chip. The memory allocated for the assigned 25 * GPIO line names for the chip. The memory allocated for the assigned
25 * names belong to the underlying firmware node and should not be released 26 * names belong to the underlying firmware node and should not be released
26 * by the caller. 27 * by the caller.
27 */ 28 */
28void devprop_gpiochip_set_names(struct gpio_chip *chip) 29void devprop_gpiochip_set_names(struct gpio_chip *chip,
30 const struct fwnode_handle *fwnode)
29{ 31{
30 struct gpio_device *gdev = chip->gpiodev; 32 struct gpio_device *gdev = chip->gpiodev;
31 const char **names; 33 const char **names;
32 int ret, i; 34 int ret, i;
33 35
34 if (!chip->parent) { 36 ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
35 dev_warn(&gdev->dev, "GPIO chip parent is NULL\n");
36 return;
37 }
38
39 ret = device_property_read_string_array(chip->parent, "gpio-line-names",
40 NULL, 0); 37 NULL, 0);
41 if (ret < 0) 38 if (ret < 0)
42 return; 39 return;
43 40
44 if (ret != gdev->ngpio) { 41 if (ret != gdev->ngpio) {
45 dev_warn(chip->parent, 42 dev_warn(&gdev->dev,
46 "names %d do not match number of GPIOs %d\n", ret, 43 "names %d do not match number of GPIOs %d\n", ret,
47 gdev->ngpio); 44 gdev->ngpio);
48 return; 45 return;
@@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip)
52 if (!names) 49 if (!names)
53 return; 50 return;
54 51
55 ret = device_property_read_string_array(chip->parent, "gpio-line-names", 52 ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
56 names, gdev->ngpio); 53 names, gdev->ngpio);
57 if (ret < 0) { 54 if (ret < 0) {
58 dev_warn(chip->parent, "failed to read GPIO line names\n"); 55 dev_warn(&gdev->dev, "failed to read GPIO line names\n");
59 kfree(names); 56 kfree(names);
60 return; 57 return;
61 } 58 }
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index e0d59e61b52f..72a0695d2ac3 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip)
493 493
494 /* If the chip defines names itself, these take precedence */ 494 /* If the chip defines names itself, these take precedence */
495 if (!chip->names) 495 if (!chip->names)
496 devprop_gpiochip_set_names(chip); 496 devprop_gpiochip_set_names(chip,
497 of_fwnode_handle(chip->of_node));
497 498
498 of_node_get(chip->of_node); 499 of_node_get(chip->of_node);
499 500
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index aad84a6306c4..14532d9576e4 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices);
73 73
74static void gpiochip_free_hogs(struct gpio_chip *chip); 74static void gpiochip_free_hogs(struct gpio_chip *chip);
75static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, 75static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
76 struct lock_class_key *key); 76 struct lock_class_key *lock_key,
77 struct lock_class_key *request_key);
77static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); 78static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
78static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip); 79static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip);
79static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip); 80static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip);
@@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void)
1100} 1101}
1101 1102
1102int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, 1103int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
1103 struct lock_class_key *key) 1104 struct lock_class_key *lock_key,
1105 struct lock_class_key *request_key)
1104{ 1106{
1105 unsigned long flags; 1107 unsigned long flags;
1106 int status = 0; 1108 int status = 0;
@@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
1246 if (status) 1248 if (status)
1247 goto err_remove_from_list; 1249 goto err_remove_from_list;
1248 1250
1249 status = gpiochip_add_irqchip(chip, key); 1251 status = gpiochip_add_irqchip(chip, lock_key, request_key);
1250 if (status) 1252 if (status)
1251 goto err_remove_chip; 1253 goto err_remove_chip;
1252 1254
@@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
1632 * This lock class tells lockdep that GPIO irqs are in a different 1634 * This lock class tells lockdep that GPIO irqs are in a different
1633 * category than their parents, so it won't report false recursion. 1635 * category than their parents, so it won't report false recursion.
1634 */ 1636 */
1635 irq_set_lockdep_class(irq, chip->irq.lock_key); 1637 irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key);
1636 irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler); 1638 irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler);
1637 /* Chips that use nested thread handlers have them marked */ 1639 /* Chips that use nested thread handlers have them marked */
1638 if (chip->irq.threaded) 1640 if (chip->irq.threaded)
@@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
1712/** 1714/**
1713 * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip 1715 * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip
1714 * @gpiochip: the GPIO chip to add the IRQ chip to 1716 * @gpiochip: the GPIO chip to add the IRQ chip to
1715 * @lock_key: lockdep class 1717 * @lock_key: lockdep class for IRQ lock
1718 * @request_key: lockdep class for IRQ request
1716 */ 1719 */
1717static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, 1720static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
1718 struct lock_class_key *lock_key) 1721 struct lock_class_key *lock_key,
1722 struct lock_class_key *request_key)
1719{ 1723{
1720 struct irq_chip *irqchip = gpiochip->irq.chip; 1724 struct irq_chip *irqchip = gpiochip->irq.chip;
1721 const struct irq_domain_ops *ops; 1725 const struct irq_domain_ops *ops;
@@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
1753 gpiochip->to_irq = gpiochip_to_irq; 1757 gpiochip->to_irq = gpiochip_to_irq;
1754 gpiochip->irq.default_type = type; 1758 gpiochip->irq.default_type = type;
1755 gpiochip->irq.lock_key = lock_key; 1759 gpiochip->irq.lock_key = lock_key;
1760 gpiochip->irq.request_key = request_key;
1756 1761
1757 if (gpiochip->irq.domain_ops) 1762 if (gpiochip->irq.domain_ops)
1758 ops = gpiochip->irq.domain_ops; 1763 ops = gpiochip->irq.domain_ops;
@@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip)
1850 * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE 1855 * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE
1851 * to have the core avoid setting up any default type in the hardware. 1856 * to have the core avoid setting up any default type in the hardware.
1852 * @threaded: whether this irqchip uses a nested thread handler 1857 * @threaded: whether this irqchip uses a nested thread handler
1853 * @lock_key: lockdep class 1858 * @lock_key: lockdep class for IRQ lock
1859 * @request_key: lockdep class for IRQ request
1854 * 1860 *
1855 * This function closely associates a certain irqchip with a certain 1861 * This function closely associates a certain irqchip with a certain
1856 * gpiochip, providing an irq domain to translate the local IRQs to 1862 * gpiochip, providing an irq domain to translate the local IRQs to
@@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
1872 irq_flow_handler_t handler, 1878 irq_flow_handler_t handler,
1873 unsigned int type, 1879 unsigned int type,
1874 bool threaded, 1880 bool threaded,
1875 struct lock_class_key *lock_key) 1881 struct lock_class_key *lock_key,
1882 struct lock_class_key *request_key)
1876{ 1883{
1877 struct device_node *of_node; 1884 struct device_node *of_node;
1878 1885
@@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
1913 gpiochip->irq.default_type = type; 1920 gpiochip->irq.default_type = type;
1914 gpiochip->to_irq = gpiochip_to_irq; 1921 gpiochip->to_irq = gpiochip_to_irq;
1915 gpiochip->irq.lock_key = lock_key; 1922 gpiochip->irq.lock_key = lock_key;
1923 gpiochip->irq.request_key = request_key;
1916 gpiochip->irq.domain = irq_domain_add_simple(of_node, 1924 gpiochip->irq.domain = irq_domain_add_simple(of_node,
1917 gpiochip->ngpio, first_irq, 1925 gpiochip->ngpio, first_irq,
1918 &gpiochip_domain_ops, gpiochip); 1926 &gpiochip_domain_ops, gpiochip);
@@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key);
1940#else /* CONFIG_GPIOLIB_IRQCHIP */ 1948#else /* CONFIG_GPIOLIB_IRQCHIP */
1941 1949
1942static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip, 1950static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
1943 struct lock_class_key *key) 1951 struct lock_class_key *lock_key,
1952 struct lock_class_key *request_key)
1944{ 1953{
1945 return 0; 1954 return 0;
1946} 1955}
@@ -2884,6 +2893,27 @@ void gpiod_set_raw_value(struct gpio_desc *desc, int value)
2884EXPORT_SYMBOL_GPL(gpiod_set_raw_value); 2893EXPORT_SYMBOL_GPL(gpiod_set_raw_value);
2885 2894
2886/** 2895/**
2896 * gpiod_set_value_nocheck() - set a GPIO line value without checking
2897 * @desc: the descriptor to set the value on
2898 * @value: value to set
2899 *
2900 * This sets the value of a GPIO line backing a descriptor, applying
2901 * different semantic quirks like active low and open drain/source
2902 * handling.
2903 */
2904static void gpiod_set_value_nocheck(struct gpio_desc *desc, int value)
2905{
2906 if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
2907 value = !value;
2908 if (test_bit(FLAG_OPEN_DRAIN, &desc->flags))
2909 gpio_set_open_drain_value_commit(desc, value);
2910 else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
2911 gpio_set_open_source_value_commit(desc, value);
2912 else
2913 gpiod_set_raw_value_commit(desc, value);
2914}
2915
2916/**
2887 * gpiod_set_value() - assign a gpio's value 2917 * gpiod_set_value() - assign a gpio's value
2888 * @desc: gpio whose value will be assigned 2918 * @desc: gpio whose value will be assigned
2889 * @value: value to assign 2919 * @value: value to assign
@@ -2897,16 +2927,8 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_value);
2897void gpiod_set_value(struct gpio_desc *desc, int value) 2927void gpiod_set_value(struct gpio_desc *desc, int value)
2898{ 2928{
2899 VALIDATE_DESC_VOID(desc); 2929 VALIDATE_DESC_VOID(desc);
2900 /* Should be using gpiod_set_value_cansleep() */
2901 WARN_ON(desc->gdev->chip->can_sleep); 2930 WARN_ON(desc->gdev->chip->can_sleep);
2902 if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) 2931 gpiod_set_value_nocheck(desc, value);
2903 value = !value;
2904 if (test_bit(FLAG_OPEN_DRAIN, &desc->flags))
2905 gpio_set_open_drain_value_commit(desc, value);
2906 else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
2907 gpio_set_open_source_value_commit(desc, value);
2908 else
2909 gpiod_set_raw_value_commit(desc, value);
2910} 2932}
2911EXPORT_SYMBOL_GPL(gpiod_set_value); 2933EXPORT_SYMBOL_GPL(gpiod_set_value);
2912 2934
@@ -3234,9 +3256,7 @@ void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
3234{ 3256{
3235 might_sleep_if(extra_checks); 3257 might_sleep_if(extra_checks);
3236 VALIDATE_DESC_VOID(desc); 3258 VALIDATE_DESC_VOID(desc);
3237 if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) 3259 gpiod_set_value_nocheck(desc, value);
3238 value = !value;
3239 gpiod_set_raw_value_commit(desc, value);
3240} 3260}
3241EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep); 3261EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
3242 3262
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index af48322839c3..6c44d1652139 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc)
228 return desc - &desc->gdev->descs[0]; 228 return desc - &desc->gdev->descs[0];
229} 229}
230 230
231void devprop_gpiochip_set_names(struct gpio_chip *chip); 231void devprop_gpiochip_set_names(struct gpio_chip *chip,
232 const struct fwnode_handle *fwnode);
232 233
233/* With descriptor prefix */ 234/* With descriptor prefix */
234 235
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index 59849f02e2ad..1402c0e71b03 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
220 220
221 mutex_lock(&dev->mode_config.idr_mutex); 221 mutex_lock(&dev->mode_config.idr_mutex);
222 222
223 /* Insert the new lessee into the tree */
224 id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
225 if (id < 0) {
226 error = id;
227 goto out_lessee;
228 }
229
230 lessee->lessee_id = id;
231 lessee->lessor = drm_master_get(lessor);
232 list_add_tail(&lessee->lessee_list, &lessor->lessees);
233
234 idr_for_each_entry(leases, entry, object) { 223 idr_for_each_entry(leases, entry, object) {
235 error = 0; 224 error = 0;
236 if (!idr_find(&dev->mode_config.crtc_idr, object)) 225 if (!idr_find(&dev->mode_config.crtc_idr, object))
@@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
246 } 235 }
247 } 236 }
248 237
238 /* Insert the new lessee into the tree */
239 id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
240 if (id < 0) {
241 error = id;
242 goto out_lessee;
243 }
244
245 lessee->lessee_id = id;
246 lessee->lessor = drm_master_get(lessor);
247 list_add_tail(&lessee->lessee_list, &lessor->lessees);
248
249 /* Move the leases over */ 249 /* Move the leases over */
250 lessee->leases = *leases; 250 lessee->leases = *leases;
251 DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor); 251 DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor);
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 37a93cdffb4a..2c90519576a3 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format)
558} 558}
559 559
560/* 560/*
561 * setplane_internal - setplane handler for internal callers 561 * __setplane_internal - setplane handler for internal callers
562 * 562 *
563 * Note that we assume an extra reference has already been taken on fb. If the 563 * This function will take a reference on the new fb for the plane
564 * update fails, this reference will be dropped before return; if it succeeds, 564 * on success.
565 * the previous framebuffer (if any) will be unreferenced instead.
566 * 565 *
567 * src_{x,y,w,h} are provided in 16.16 fixed point format 566 * src_{x,y,w,h} are provided in 16.16 fixed point format
568 */ 567 */
@@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane,
630 if (!ret) { 629 if (!ret) {
631 plane->crtc = crtc; 630 plane->crtc = crtc;
632 plane->fb = fb; 631 plane->fb = fb;
633 fb = NULL; 632 drm_framebuffer_get(plane->fb);
634 } else { 633 } else {
635 plane->old_fb = NULL; 634 plane->old_fb = NULL;
636 } 635 }
637 636
638out: 637out:
639 if (fb)
640 drm_framebuffer_put(fb);
641 if (plane->old_fb) 638 if (plane->old_fb)
642 drm_framebuffer_put(plane->old_fb); 639 drm_framebuffer_put(plane->old_fb);
643 plane->old_fb = NULL; 640 plane->old_fb = NULL;
@@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
685 struct drm_plane *plane; 682 struct drm_plane *plane;
686 struct drm_crtc *crtc = NULL; 683 struct drm_crtc *crtc = NULL;
687 struct drm_framebuffer *fb = NULL; 684 struct drm_framebuffer *fb = NULL;
685 int ret;
688 686
689 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 687 if (!drm_core_check_feature(dev, DRIVER_MODESET))
690 return -EINVAL; 688 return -EINVAL;
@@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
717 } 715 }
718 } 716 }
719 717
720 /* 718 ret = setplane_internal(plane, crtc, fb,
721 * setplane_internal will take care of deref'ing either the old or new 719 plane_req->crtc_x, plane_req->crtc_y,
722 * framebuffer depending on success. 720 plane_req->crtc_w, plane_req->crtc_h,
723 */ 721 plane_req->src_x, plane_req->src_y,
724 return setplane_internal(plane, crtc, fb, 722 plane_req->src_w, plane_req->src_h);
725 plane_req->crtc_x, plane_req->crtc_y, 723
726 plane_req->crtc_w, plane_req->crtc_h, 724 if (fb)
727 plane_req->src_x, plane_req->src_y, 725 drm_framebuffer_put(fb);
728 plane_req->src_w, plane_req->src_h); 726
727 return ret;
729} 728}
730 729
731static int drm_mode_cursor_universal(struct drm_crtc *crtc, 730static int drm_mode_cursor_universal(struct drm_crtc *crtc,
@@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
788 src_h = fb->height << 16; 787 src_h = fb->height << 16;
789 } 788 }
790 789
791 /*
792 * setplane_internal will take care of deref'ing either the old or new
793 * framebuffer depending on success.
794 */
795 ret = __setplane_internal(crtc->cursor, crtc, fb, 790 ret = __setplane_internal(crtc->cursor, crtc, fb,
796 crtc_x, crtc_y, crtc_w, crtc_h, 791 crtc_x, crtc_y, crtc_w, crtc_h,
797 0, 0, src_w, src_h, ctx); 792 0, 0, src_w, src_h, ctx);
793
794 if (fb)
795 drm_framebuffer_put(fb);
798 796
799 /* Update successful; save new cursor position, if necessary */ 797 /* Update successful; save new cursor position, if necessary */
800 if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) { 798 if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) {
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 131695915acd..0b7b0d1ad2d5 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -399,23 +399,6 @@ static const struct file_operations drm_syncobj_file_fops = {
399 .release = drm_syncobj_file_release, 399 .release = drm_syncobj_file_release,
400}; 400};
401 401
402static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
403{
404 struct file *file = anon_inode_getfile("syncobj_file",
405 &drm_syncobj_file_fops,
406 syncobj, 0);
407 if (IS_ERR(file))
408 return PTR_ERR(file);
409
410 drm_syncobj_get(syncobj);
411 if (cmpxchg(&syncobj->file, NULL, file)) {
412 /* lost the race */
413 fput(file);
414 }
415
416 return 0;
417}
418
419/** 402/**
420 * drm_syncobj_get_fd - get a file descriptor from a syncobj 403 * drm_syncobj_get_fd - get a file descriptor from a syncobj
421 * @syncobj: Sync object to export 404 * @syncobj: Sync object to export
@@ -427,21 +410,24 @@ static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
427 */ 410 */
428int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd) 411int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd)
429{ 412{
430 int ret; 413 struct file *file;
431 int fd; 414 int fd;
432 415
433 fd = get_unused_fd_flags(O_CLOEXEC); 416 fd = get_unused_fd_flags(O_CLOEXEC);
434 if (fd < 0) 417 if (fd < 0)
435 return fd; 418 return fd;
436 419
437 if (!syncobj->file) { 420 file = anon_inode_getfile("syncobj_file",
438 ret = drm_syncobj_alloc_file(syncobj); 421 &drm_syncobj_file_fops,
439 if (ret) { 422 syncobj, 0);
440 put_unused_fd(fd); 423 if (IS_ERR(file)) {
441 return ret; 424 put_unused_fd(fd);
442 } 425 return PTR_ERR(file);
443 } 426 }
444 fd_install(fd, syncobj->file); 427
428 drm_syncobj_get(syncobj);
429 fd_install(fd, file);
430
445 *p_fd = fd; 431 *p_fd = fd;
446 return 0; 432 return 0;
447} 433}
@@ -461,31 +447,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private,
461 return ret; 447 return ret;
462} 448}
463 449
464static struct drm_syncobj *drm_syncobj_fdget(int fd)
465{
466 struct file *file = fget(fd);
467
468 if (!file)
469 return NULL;
470 if (file->f_op != &drm_syncobj_file_fops)
471 goto err;
472
473 return file->private_data;
474err:
475 fput(file);
476 return NULL;
477};
478
479static int drm_syncobj_fd_to_handle(struct drm_file *file_private, 450static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
480 int fd, u32 *handle) 451 int fd, u32 *handle)
481{ 452{
482 struct drm_syncobj *syncobj = drm_syncobj_fdget(fd); 453 struct drm_syncobj *syncobj;
454 struct file *file;
483 int ret; 455 int ret;
484 456
485 if (!syncobj) 457 file = fget(fd);
458 if (!file)
486 return -EINVAL; 459 return -EINVAL;
487 460
461 if (file->f_op != &drm_syncobj_file_fops) {
462 fput(file);
463 return -EINVAL;
464 }
465
488 /* take a reference to put in the idr */ 466 /* take a reference to put in the idr */
467 syncobj = file->private_data;
489 drm_syncobj_get(syncobj); 468 drm_syncobj_get(syncobj);
490 469
491 idr_preload(GFP_KERNEL); 470 idr_preload(GFP_KERNEL);
@@ -494,12 +473,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
494 spin_unlock(&file_private->syncobj_table_lock); 473 spin_unlock(&file_private->syncobj_table_lock);
495 idr_preload_end(); 474 idr_preload_end();
496 475
497 if (ret < 0) { 476 if (ret > 0) {
498 fput(syncobj->file); 477 *handle = ret;
499 return ret; 478 ret = 0;
500 } 479 } else
501 *handle = ret; 480 drm_syncobj_put(syncobj);
502 return 0; 481
482 fput(file);
483 return ret;
503} 484}
504 485
505static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private, 486static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index edec15d19538..c8454ac43fae 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -2819,12 +2819,12 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
2819} 2819}
2820 2820
2821static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt, 2821static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt,
2822 unsigned int opcode, int rings) 2822 unsigned int opcode, unsigned long rings)
2823{ 2823{
2824 struct cmd_info *info = NULL; 2824 struct cmd_info *info = NULL;
2825 unsigned int ring; 2825 unsigned int ring;
2826 2826
2827 for_each_set_bit(ring, (unsigned long *)&rings, I915_NUM_ENGINES) { 2827 for_each_set_bit(ring, &rings, I915_NUM_ENGINES) {
2828 info = find_cmd_entry(gvt, opcode, ring); 2828 info = find_cmd_entry(gvt, opcode, ring);
2829 if (info) 2829 if (info)
2830 break; 2830 break;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index c4f752eeadcc..a529d2bd393c 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1410,12 +1410,15 @@ static int ppgtt_handle_guest_write_page_table_bytes(
1410 return ret; 1410 return ret;
1411 } else { 1411 } else {
1412 if (!test_bit(index, spt->post_shadow_bitmap)) { 1412 if (!test_bit(index, spt->post_shadow_bitmap)) {
1413 int type = spt->shadow_page.type;
1414
1413 ppgtt_get_shadow_entry(spt, &se, index); 1415 ppgtt_get_shadow_entry(spt, &se, index);
1414 ret = ppgtt_handle_guest_entry_removal(gpt, &se, index); 1416 ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
1415 if (ret) 1417 if (ret)
1416 return ret; 1418 return ret;
1419 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1420 ppgtt_set_shadow_entry(spt, &se, index);
1417 } 1421 }
1418
1419 ppgtt_set_post_shadow(spt, index); 1422 ppgtt_set_post_shadow(spt, index);
1420 } 1423 }
1421 1424
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ba9f67c256f4..8bc3283484be 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -467,7 +467,7 @@ static void __fence_set_priority(struct dma_fence *fence, int prio)
467 struct drm_i915_gem_request *rq; 467 struct drm_i915_gem_request *rq;
468 struct intel_engine_cs *engine; 468 struct intel_engine_cs *engine;
469 469
470 if (!dma_fence_is_i915(fence)) 470 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
471 return; 471 return;
472 472
473 rq = to_request(fence); 473 rq = to_request(fence);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 41285bec8fc0..505c605eff98 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7029,6 +7029,7 @@ enum {
7029#define RESET_PCH_HANDSHAKE_ENABLE (1<<4) 7029#define RESET_PCH_HANDSHAKE_ENABLE (1<<4)
7030 7030
7031#define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430) 7031#define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430)
7032#define SKL_SELECT_ALTERNATE_DC_EXIT (1<<30)
7032#define MASK_WAKEMEM (1<<13) 7033#define MASK_WAKEMEM (1<<13)
7033 7034
7034#define SKL_DFSM _MMIO(0x51000) 7035#define SKL_DFSM _MMIO(0x51000)
@@ -7078,6 +7079,8 @@ enum {
7078#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308) 7079#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308)
7079#define DISABLE_PIXEL_MASK_CAMMING (1<<14) 7080#define DISABLE_PIXEL_MASK_CAMMING (1<<14)
7080 7081
7082#define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
7083
7081#define GEN7_L3SQCREG1 _MMIO(0xB010) 7084#define GEN7_L3SQCREG1 _MMIO(0xB010)
7082#define VLV_B0_WA_L3SQCREG1_VALUE 0x00D30000 7085#define VLV_B0_WA_L3SQCREG1_VALUE 0x00D30000
7083 7086
@@ -8585,6 +8588,7 @@ enum skl_power_gate {
8585#define BXT_CDCLK_CD2X_DIV_SEL_2 (2<<22) 8588#define BXT_CDCLK_CD2X_DIV_SEL_2 (2<<22)
8586#define BXT_CDCLK_CD2X_DIV_SEL_4 (3<<22) 8589#define BXT_CDCLK_CD2X_DIV_SEL_4 (3<<22)
8587#define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe)<<20) 8590#define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe)<<20)
8591#define CDCLK_DIVMUX_CD_OVERRIDE (1<<19)
8588#define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3) 8592#define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3)
8589#define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1<<16) 8593#define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1<<16)
8590#define CDCLK_FREQ_DECIMAL_MASK (0x7ff) 8594#define CDCLK_FREQ_DECIMAL_MASK (0x7ff)
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 9c5ceb98d48f..d77e2bec1e29 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -931,16 +931,10 @@ static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv,
931 931
932static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) 932static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco)
933{ 933{
934 int min_cdclk = skl_calc_cdclk(0, vco);
935 u32 val; 934 u32 val;
936 935
937 WARN_ON(vco != 8100000 && vco != 8640000); 936 WARN_ON(vco != 8100000 && vco != 8640000);
938 937
939 /* select the minimum CDCLK before enabling DPLL 0 */
940 val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk);
941 I915_WRITE(CDCLK_CTL, val);
942 POSTING_READ(CDCLK_CTL);
943
944 /* 938 /*
945 * We always enable DPLL0 with the lowest link rate possible, but still 939 * We always enable DPLL0 with the lowest link rate possible, but still
946 * taking into account the VCO required to operate the eDP panel at the 940 * taking into account the VCO required to operate the eDP panel at the
@@ -994,7 +988,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
994{ 988{
995 int cdclk = cdclk_state->cdclk; 989 int cdclk = cdclk_state->cdclk;
996 int vco = cdclk_state->vco; 990 int vco = cdclk_state->vco;
997 u32 freq_select; 991 u32 freq_select, cdclk_ctl;
998 int ret; 992 int ret;
999 993
1000 mutex_lock(&dev_priv->pcu_lock); 994 mutex_lock(&dev_priv->pcu_lock);
@@ -1009,7 +1003,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
1009 return; 1003 return;
1010 } 1004 }
1011 1005
1012 /* set CDCLK_CTL */ 1006 /* Choose frequency for this cdclk */
1013 switch (cdclk) { 1007 switch (cdclk) {
1014 default: 1008 default:
1015 WARN_ON(cdclk != dev_priv->cdclk.hw.ref); 1009 WARN_ON(cdclk != dev_priv->cdclk.hw.ref);
@@ -1036,10 +1030,33 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
1036 dev_priv->cdclk.hw.vco != vco) 1030 dev_priv->cdclk.hw.vco != vco)
1037 skl_dpll0_disable(dev_priv); 1031 skl_dpll0_disable(dev_priv);
1038 1032
1033 cdclk_ctl = I915_READ(CDCLK_CTL);
1034
1035 if (dev_priv->cdclk.hw.vco != vco) {
1036 /* Wa Display #1183: skl,kbl,cfl */
1037 cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK);
1038 cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk);
1039 I915_WRITE(CDCLK_CTL, cdclk_ctl);
1040 }
1041
1042 /* Wa Display #1183: skl,kbl,cfl */
1043 cdclk_ctl |= CDCLK_DIVMUX_CD_OVERRIDE;
1044 I915_WRITE(CDCLK_CTL, cdclk_ctl);
1045 POSTING_READ(CDCLK_CTL);
1046
1039 if (dev_priv->cdclk.hw.vco != vco) 1047 if (dev_priv->cdclk.hw.vco != vco)
1040 skl_dpll0_enable(dev_priv, vco); 1048 skl_dpll0_enable(dev_priv, vco);
1041 1049
1042 I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); 1050 /* Wa Display #1183: skl,kbl,cfl */
1051 cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK);
1052 I915_WRITE(CDCLK_CTL, cdclk_ctl);
1053
1054 cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk);
1055 I915_WRITE(CDCLK_CTL, cdclk_ctl);
1056
1057 /* Wa Display #1183: skl,kbl,cfl */
1058 cdclk_ctl &= ~CDCLK_DIVMUX_CD_OVERRIDE;
1059 I915_WRITE(CDCLK_CTL, cdclk_ctl);
1043 POSTING_READ(CDCLK_CTL); 1060 POSTING_READ(CDCLK_CTL);
1044 1061
1045 /* inform PCU of the change */ 1062 /* inform PCU of the change */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index ebdcbcbacb3c..6bb51a502b8b 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1338,6 +1338,11 @@ static int glk_init_workarounds(struct intel_engine_cs *engine)
1338 if (ret) 1338 if (ret)
1339 return ret; 1339 return ret;
1340 1340
1341 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1342 ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1343 if (ret)
1344 return ret;
1345
1341 /* WaToEnableHwFixForPushConstHWBug:glk */ 1346 /* WaToEnableHwFixForPushConstHWBug:glk */
1342 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1347 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1343 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1348 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 739c33b07c59..7ece2f061b9e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1002,6 +1002,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
1002 1002
1003 GEM_BUG_ON(prio == I915_PRIORITY_INVALID); 1003 GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
1004 1004
1005 if (i915_gem_request_completed(request))
1006 return;
1007
1005 if (prio <= READ_ONCE(request->priotree.priority)) 1008 if (prio <= READ_ONCE(request->priotree.priority))
1006 return; 1009 return;
1007 1010
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index db9d57f39534..d758da6156a8 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -600,6 +600,11 @@ void gen9_enable_dc5(struct drm_i915_private *dev_priv)
600 600
601 DRM_DEBUG_KMS("Enabling DC5\n"); 601 DRM_DEBUG_KMS("Enabling DC5\n");
602 602
603 /* Wa Display #1183: skl,kbl,cfl */
604 if (IS_GEN9_BC(dev_priv))
605 I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
606 SKL_SELECT_ALTERNATE_DC_EXIT);
607
603 gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5); 608 gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5);
604} 609}
605 610
@@ -627,6 +632,11 @@ void skl_disable_dc6(struct drm_i915_private *dev_priv)
627{ 632{
628 DRM_DEBUG_KMS("Disabling DC6\n"); 633 DRM_DEBUG_KMS("Disabling DC6\n");
629 634
635 /* Wa Display #1183: skl,kbl,cfl */
636 if (IS_GEN9_BC(dev_priv))
637 I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
638 SKL_SELECT_ALTERNATE_DC_EXIT);
639
630 gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); 640 gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
631} 641}
632 642
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index ce328edee7a1..41e7f2927443 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
224 /* Determine if we can get a cache-coherent map, forcing 224 /* Determine if we can get a cache-coherent map, forcing
225 * uncached mapping if we can't. 225 * uncached mapping if we can't.
226 */ 226 */
227 if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) 227 if (!nouveau_drm_use_coherent_gpu_mapping(drm))
228 nvbo->force_coherent = true; 228 nvbo->force_coherent = true;
229 } 229 }
230 230
@@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
262 if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE && 262 if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
263 (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram) 263 (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram)
264 continue; 264 continue;
265 if ((flags & TTM_PL_FLAG_TT ) && !vmm->page[i].host) 265 if ((flags & TTM_PL_FLAG_TT) &&
266 (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
266 continue; 267 continue;
267 268
268 /* Select this page size if it's the first that supports 269 /* Select this page size if it's the first that supports
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 8d4a5be3b913..56fe261b6268 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -152,9 +152,9 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence,
152 work->cli = cli; 152 work->cli = cli;
153 mutex_lock(&cli->lock); 153 mutex_lock(&cli->lock);
154 list_add_tail(&work->head, &cli->worker); 154 list_add_tail(&work->head, &cli->worker);
155 mutex_unlock(&cli->lock);
156 if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence)) 155 if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence))
157 nouveau_cli_work_fence(fence, &work->cb); 156 nouveau_cli_work_fence(fence, &work->cb);
157 mutex_unlock(&cli->lock);
158} 158}
159 159
160static void 160static void
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 3331e82ae9e7..96f6bd8aee5d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -157,8 +157,8 @@ struct nouveau_drm {
157 struct nvif_object copy; 157 struct nvif_object copy;
158 int mtrr; 158 int mtrr;
159 int type_vram; 159 int type_vram;
160 int type_host; 160 int type_host[2];
161 int type_ncoh; 161 int type_ncoh[2];
162 } ttm; 162 } ttm;
163 163
164 /* GEM interface support */ 164 /* GEM interface support */
@@ -217,6 +217,13 @@ nouveau_drm(struct drm_device *dev)
217 return dev->dev_private; 217 return dev->dev_private;
218} 218}
219 219
220static inline bool
221nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm)
222{
223 struct nvif_mmu *mmu = &drm->client.mmu;
224 return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED);
225}
226
220int nouveau_pmops_suspend(struct device *); 227int nouveau_pmops_suspend(struct device *);
221int nouveau_pmops_resume(struct device *); 228int nouveau_pmops_resume(struct device *);
222bool nouveau_pmops_runtime(void); 229bool nouveau_pmops_runtime(void);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 45a4572cd2fb..ee5d1dc2eaf5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -421,7 +421,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
421 drm_fb_helper_unregister_fbi(&fbcon->helper); 421 drm_fb_helper_unregister_fbi(&fbcon->helper);
422 drm_fb_helper_fini(&fbcon->helper); 422 drm_fb_helper_fini(&fbcon->helper);
423 423
424 if (nouveau_fb->nvbo) { 424 if (nouveau_fb && nouveau_fb->nvbo) {
425 nouveau_vma_del(&nouveau_fb->vma); 425 nouveau_vma_del(&nouveau_fb->vma);
426 nouveau_bo_unmap(nouveau_fb->nvbo); 426 nouveau_bo_unmap(nouveau_fb->nvbo);
427 nouveau_bo_unpin(nouveau_fb->nvbo); 427 nouveau_bo_unpin(nouveau_fb->nvbo);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 589a9621db76..c002f8968507 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt)
103 u8 type; 103 u8 type;
104 int ret; 104 int ret;
105 105
106 if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) 106 if (!nouveau_drm_use_coherent_gpu_mapping(drm))
107 type = drm->ttm.type_ncoh; 107 type = drm->ttm.type_ncoh[!!mem->kind];
108 else 108 else
109 type = drm->ttm.type_host; 109 type = drm->ttm.type_host[0];
110 110
111 if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND)) 111 if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND))
112 mem->comp = mem->kind = 0; 112 mem->comp = mem->kind = 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 08b974b30482..dff51a0ee028 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
235 drm->ttm.mem_global_ref.release = NULL; 235 drm->ttm.mem_global_ref.release = NULL;
236} 236}
237 237
238int 238static int
239nouveau_ttm_init(struct nouveau_drm *drm) 239nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind)
240{ 240{
241 struct nvkm_device *device = nvxx_device(&drm->client.device);
242 struct nvkm_pci *pci = device->pci;
243 struct nvif_mmu *mmu = &drm->client.mmu; 241 struct nvif_mmu *mmu = &drm->client.mmu;
244 struct drm_device *dev = drm->dev; 242 int typei;
245 int typei, ret;
246 243
247 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | 244 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE |
248 NVIF_MEM_COHERENT); 245 kind | NVIF_MEM_COHERENT);
249 if (typei < 0) 246 if (typei < 0)
250 return -ENOSYS; 247 return -ENOSYS;
251 248
252 drm->ttm.type_host = typei; 249 drm->ttm.type_host[!!kind] = typei;
253 250
254 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE); 251 typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind);
255 if (typei < 0) 252 if (typei < 0)
256 return -ENOSYS; 253 return -ENOSYS;
257 254
258 drm->ttm.type_ncoh = typei; 255 drm->ttm.type_ncoh[!!kind] = typei;
256 return 0;
257}
258
259int
260nouveau_ttm_init(struct nouveau_drm *drm)
261{
262 struct nvkm_device *device = nvxx_device(&drm->client.device);
263 struct nvkm_pci *pci = device->pci;
264 struct nvif_mmu *mmu = &drm->client.mmu;
265 struct drm_device *dev = drm->dev;
266 int typei, ret;
267
268 ret = nouveau_ttm_init_host(drm, 0);
269 if (ret)
270 return ret;
271
272 if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
273 drm->client.device.info.chipset != 0x50) {
274 ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND);
275 if (ret)
276 return ret;
277 }
259 278
260 if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC && 279 if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC &&
261 drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { 280 drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index 9e2628dd8e4d..f5371d96b003 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma)
67 nvif_vmm_put(&vma->vmm->vmm, &tmp); 67 nvif_vmm_put(&vma->vmm->vmm, &tmp);
68 } 68 }
69 list_del(&vma->head); 69 list_del(&vma->head);
70 *pvma = NULL;
71 kfree(*pvma); 70 kfree(*pvma);
71 *pvma = NULL;
72 } 72 }
73} 73}
74 74
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index e14643615698..00eeaaffeae5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2369,7 +2369,7 @@ nv13b_chipset = {
2369 .imem = gk20a_instmem_new, 2369 .imem = gk20a_instmem_new,
2370 .ltc = gp100_ltc_new, 2370 .ltc = gp100_ltc_new,
2371 .mc = gp10b_mc_new, 2371 .mc = gp10b_mc_new,
2372 .mmu = gf100_mmu_new, 2372 .mmu = gp10b_mmu_new,
2373 .secboot = gp10b_secboot_new, 2373 .secboot = gp10b_secboot_new,
2374 .pmu = gm20b_pmu_new, 2374 .pmu = gm20b_pmu_new,
2375 .timer = gk20a_timer_new, 2375 .timer = gk20a_timer_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
index a2978a37b4f3..700fc754f28a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
@@ -174,6 +174,7 @@ gf119_sor = {
174 .links = gf119_sor_dp_links, 174 .links = gf119_sor_dp_links,
175 .power = g94_sor_dp_power, 175 .power = g94_sor_dp_power,
176 .pattern = gf119_sor_dp_pattern, 176 .pattern = gf119_sor_dp_pattern,
177 .drive = gf119_sor_dp_drive,
177 .vcpi = gf119_sor_dp_vcpi, 178 .vcpi = gf119_sor_dp_vcpi,
178 .audio = gf119_sor_dp_audio, 179 .audio = gf119_sor_dp_audio,
179 .audio_sym = gf119_sor_dp_audio_sym, 180 .audio_sym = gf119_sor_dp_audio_sym,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
index 972370ed36f0..7c7efa4ea0d0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
36 if (data) { 36 if (data) {
37 *ver = nvbios_rd08(bios, data + 0x00); 37 *ver = nvbios_rd08(bios, data + 0x00);
38 switch (*ver) { 38 switch (*ver) {
39 case 0x20:
39 case 0x21: 40 case 0x21:
40 case 0x30: 41 case 0x30:
41 case 0x40: 42 case 0x40:
@@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
63 if (data && idx < *cnt) { 64 if (data && idx < *cnt) {
64 u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len); 65 u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len);
65 switch (*ver * !!outp) { 66 switch (*ver * !!outp) {
67 case 0x20:
66 case 0x21: 68 case 0x21:
67 case 0x30: 69 case 0x30:
68 *hdr = nvbios_rd08(bios, data + 0x04); 70 *hdr = nvbios_rd08(bios, data + 0x04);
@@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
96 info->type = nvbios_rd16(bios, data + 0x00); 98 info->type = nvbios_rd16(bios, data + 0x00);
97 info->mask = nvbios_rd16(bios, data + 0x02); 99 info->mask = nvbios_rd16(bios, data + 0x02);
98 switch (*ver) { 100 switch (*ver) {
101 case 0x20:
102 info->mask |= 0x00c0; /* match any link */
103 /* fall-through */
99 case 0x21: 104 case 0x21:
100 case 0x30: 105 case 0x30:
101 info->flags = nvbios_rd08(bios, data + 0x05); 106 info->flags = nvbios_rd08(bios, data + 0x05);
102 info->script[0] = nvbios_rd16(bios, data + 0x06); 107 info->script[0] = nvbios_rd16(bios, data + 0x06);
103 info->script[1] = nvbios_rd16(bios, data + 0x08); 108 info->script[1] = nvbios_rd16(bios, data + 0x08);
104 info->lnkcmp = nvbios_rd16(bios, data + 0x0a); 109 if (*len >= 0x0c)
110 info->lnkcmp = nvbios_rd16(bios, data + 0x0a);
105 if (*len >= 0x0f) { 111 if (*len >= 0x0f) {
106 info->script[2] = nvbios_rd16(bios, data + 0x0c); 112 info->script[2] = nvbios_rd16(bios, data + 0x0c);
107 info->script[3] = nvbios_rd16(bios, data + 0x0e); 113 info->script[3] = nvbios_rd16(bios, data + 0x0e);
@@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
170 memset(info, 0x00, sizeof(*info)); 176 memset(info, 0x00, sizeof(*info));
171 if (data) { 177 if (data) {
172 switch (*ver) { 178 switch (*ver) {
179 case 0x20:
173 case 0x21: 180 case 0x21:
174 info->dc = nvbios_rd08(bios, data + 0x02); 181 info->dc = nvbios_rd08(bios, data + 0x02);
175 info->pe = nvbios_rd08(bios, data + 0x03); 182 info->pe = nvbios_rd08(bios, data + 0x03);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
index 1ba7289684aa..db48a1daca0c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
@@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
249 iobj->base.memory.ptrs = &nv50_instobj_fast; 249 iobj->base.memory.ptrs = &nv50_instobj_fast;
250 else 250 else
251 iobj->base.memory.ptrs = &nv50_instobj_slow; 251 iobj->base.memory.ptrs = &nv50_instobj_slow;
252 refcount_inc(&iobj->maps); 252 refcount_set(&iobj->maps, 1);
253 } 253 }
254 254
255 mutex_unlock(&imem->subdev.mutex); 255 mutex_unlock(&imem->subdev.mutex);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
index b1b1f3626b96..deb96de54b00 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev)
136 return ret; 136 return ret;
137 137
138 pci->irq = pdev->irq; 138 pci->irq = pdev->irq;
139
140 /* Ensure MSI interrupts are armed, for the case where there are
141 * already interrupts pending (for whatever reason) at load time.
142 */
143 if (pci->msi)
144 pci->func->msi_rearm(pci);
145
139 return ret; 146 return ret;
140} 147}
141 148
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c
index e626eddf24d5..23db74ae1826 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c
@@ -78,6 +78,8 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core)
78 78
79 /* then read the message */ 79 /* then read the message */
80 msg.len = cnt & 0xf; 80 msg.len = cnt & 0xf;
81 if (msg.len > CEC_MAX_MSG_SIZE - 2)
82 msg.len = CEC_MAX_MSG_SIZE - 2;
81 msg.msg[0] = hdmi_read_reg(core->base, 83 msg.msg[0] = hdmi_read_reg(core->base,
82 HDMI_CEC_RX_CMD_HEADER); 84 HDMI_CEC_RX_CMD_HEADER);
83 msg.msg[1] = hdmi_read_reg(core->base, 85 msg.msg[1] = hdmi_read_reg(core->base,
@@ -104,26 +106,6 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core)
104 } 106 }
105} 107}
106 108
107static void hdmi_cec_transmit_fifo_empty(struct hdmi_core_data *core, u32 stat1)
108{
109 if (stat1 & 2) {
110 u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3);
111
112 cec_transmit_done(core->adap,
113 CEC_TX_STATUS_NACK |
114 CEC_TX_STATUS_MAX_RETRIES,
115 0, (dbg3 >> 4) & 7, 0, 0);
116 } else if (stat1 & 1) {
117 cec_transmit_done(core->adap,
118 CEC_TX_STATUS_ARB_LOST |
119 CEC_TX_STATUS_MAX_RETRIES,
120 0, 0, 0, 0);
121 } else if (stat1 == 0) {
122 cec_transmit_done(core->adap, CEC_TX_STATUS_OK,
123 0, 0, 0, 0);
124 }
125}
126
127void hdmi4_cec_irq(struct hdmi_core_data *core) 109void hdmi4_cec_irq(struct hdmi_core_data *core)
128{ 110{
129 u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0); 111 u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0);
@@ -132,27 +114,21 @@ void hdmi4_cec_irq(struct hdmi_core_data *core)
132 hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_0, stat0); 114 hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_0, stat0);
133 hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, stat1); 115 hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, stat1);
134 116
135 if (stat0 & 0x40) 117 if (stat0 & 0x20) {
118 cec_transmit_done(core->adap, CEC_TX_STATUS_OK,
119 0, 0, 0, 0);
136 REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); 120 REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
137 else if (stat0 & 0x24) 121 } else if (stat1 & 0x02) {
138 hdmi_cec_transmit_fifo_empty(core, stat1);
139 if (stat1 & 2) {
140 u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); 122 u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3);
141 123
142 cec_transmit_done(core->adap, 124 cec_transmit_done(core->adap,
143 CEC_TX_STATUS_NACK | 125 CEC_TX_STATUS_NACK |
144 CEC_TX_STATUS_MAX_RETRIES, 126 CEC_TX_STATUS_MAX_RETRIES,
145 0, (dbg3 >> 4) & 7, 0, 0); 127 0, (dbg3 >> 4) & 7, 0, 0);
146 } else if (stat1 & 1) { 128 REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
147 cec_transmit_done(core->adap,
148 CEC_TX_STATUS_ARB_LOST |
149 CEC_TX_STATUS_MAX_RETRIES,
150 0, 0, 0, 0);
151 } 129 }
152 if (stat0 & 0x02) 130 if (stat0 & 0x02)
153 hdmi_cec_received_msg(core); 131 hdmi_cec_received_msg(core);
154 if (stat1 & 0x3)
155 REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
156} 132}
157 133
158static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) 134static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap)
@@ -231,18 +207,14 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable)
231 /* 207 /*
232 * Enable CEC interrupts: 208 * Enable CEC interrupts:
233 * Transmit Buffer Full/Empty Change event 209 * Transmit Buffer Full/Empty Change event
234 * Transmitter FIFO Empty event
235 * Receiver FIFO Not Empty event 210 * Receiver FIFO Not Empty event
236 */ 211 */
237 hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x26); 212 hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x22);
238 /* 213 /*
239 * Enable CEC interrupts: 214 * Enable CEC interrupts:
240 * RX FIFO Overrun Error event
241 * Short Pulse Detected event
242 * Frame Retransmit Count Exceeded event 215 * Frame Retransmit Count Exceeded event
243 * Start Bit Irregularity event
244 */ 216 */
245 hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x0f); 217 hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x02);
246 218
247 /* cec calibration enable (self clearing) */ 219 /* cec calibration enable (self clearing) */
248 hdmi_write_reg(core->base, HDMI_CEC_SETUP, 0x03); 220 hdmi_write_reg(core->base, HDMI_CEC_SETUP, 0x03);
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index dda904ec0534..500b6fb3e028 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder,
175 writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG); 175 writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG);
176} 176}
177 177
178static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder,
179 const struct drm_display_mode *mode)
180{
181 struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder);
182 unsigned long rate = mode->clock * 1000;
183 unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */
184 long rounded_rate;
185
186 /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */
187 if (rate > 165000000)
188 return MODE_CLOCK_HIGH;
189 rounded_rate = clk_round_rate(hdmi->tmds_clk, rate);
190 if (rounded_rate > 0 &&
191 max_t(unsigned long, rounded_rate, rate) -
192 min_t(unsigned long, rounded_rate, rate) < diff)
193 return MODE_OK;
194 return MODE_NOCLOCK;
195}
196
178static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = { 197static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = {
179 .atomic_check = sun4i_hdmi_atomic_check, 198 .atomic_check = sun4i_hdmi_atomic_check,
180 .disable = sun4i_hdmi_disable, 199 .disable = sun4i_hdmi_disable,
181 .enable = sun4i_hdmi_enable, 200 .enable = sun4i_hdmi_enable,
182 .mode_set = sun4i_hdmi_mode_set, 201 .mode_set = sun4i_hdmi_mode_set,
202 .mode_valid = sun4i_hdmi_mode_valid,
183}; 203};
184 204
185static const struct drm_encoder_funcs sun4i_hdmi_funcs = { 205static const struct drm_encoder_funcs sun4i_hdmi_funcs = {
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index a897f82d9e66..b78fed809992 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -946,7 +946,7 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
946 if (IS_ERR(tcon->crtc)) { 946 if (IS_ERR(tcon->crtc)) {
947 dev_err(dev, "Couldn't create our CRTC\n"); 947 dev_err(dev, "Couldn't create our CRTC\n");
948 ret = PTR_ERR(tcon->crtc); 948 ret = PTR_ERR(tcon->crtc);
949 goto err_free_clocks; 949 goto err_free_dotclock;
950 } 950 }
951 951
952 /* 952 /*
@@ -965,7 +965,7 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
965 of_node_put(remote); 965 of_node_put(remote);
966 966
967 if (ret < 0) 967 if (ret < 0)
968 goto err_free_clocks; 968 goto err_free_dotclock;
969 969
970 if (tcon->quirks->needs_de_be_mux) { 970 if (tcon->quirks->needs_de_be_mux) {
971 /* 971 /*
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 4be9edf9c6fe..7d2a955fc515 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3048,6 +3048,7 @@ static int tegra_sor_probe(struct platform_device *pdev)
3048 goto remove; 3048 goto remove;
3049 } 3049 }
3050 } else { 3050 } else {
3051 /* fall back to the module clock on SOR0 (eDP/LVDS only) */
3051 sor->clk_out = sor->clk; 3052 sor->clk_out = sor->clk;
3052 } 3053 }
3053 3054
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index f1a3d55ead83..79854ab3bc47 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -1007,6 +1007,8 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
1007 pr_info("Initializing pool allocator\n"); 1007 pr_info("Initializing pool allocator\n");
1008 1008
1009 _manager = kzalloc(sizeof(*_manager), GFP_KERNEL); 1009 _manager = kzalloc(sizeof(*_manager), GFP_KERNEL);
1010 if (!_manager)
1011 return -ENOMEM;
1010 1012
1011 ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc", 0); 1013 ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc", 0);
1012 1014
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index 26eddbb62893..3dd62d75f531 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -209,9 +209,6 @@ vc4_irq_postinstall(struct drm_device *dev)
209{ 209{
210 struct vc4_dev *vc4 = to_vc4_dev(dev); 210 struct vc4_dev *vc4 = to_vc4_dev(dev);
211 211
212 /* Undo the effects of a previous vc4_irq_uninstall. */
213 enable_irq(dev->irq);
214
215 /* Enable both the render done and out of memory interrupts. */ 212 /* Enable both the render done and out of memory interrupts. */
216 V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); 213 V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
217 214
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 622cd43840b8..493f392b3a0a 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -327,6 +327,9 @@ static int vc4_v3d_runtime_resume(struct device *dev)
327 return ret; 327 return ret;
328 328
329 vc4_v3d_init_hw(vc4->dev); 329 vc4_v3d_init_hw(vc4->dev);
330
331 /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */
332 enable_irq(vc4->dev->irq);
330 vc4_irq_postinstall(vc4->dev); 333 vc4_irq_postinstall(vc4->dev);
331 334
332 return 0; 335 return 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index b700667f6f0b..c9d5cc237124 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2731,6 +2731,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
2731 } 2731 }
2732 2732
2733 view_type = vmw_view_cmd_to_type(header->id); 2733 view_type = vmw_view_cmd_to_type(header->id);
2734 if (view_type == vmw_view_max)
2735 return -EINVAL;
2734 cmd = container_of(header, typeof(*cmd), header); 2736 cmd = container_of(header, typeof(*cmd), header);
2735 ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, 2737 ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
2736 user_surface_converter, 2738 user_surface_converter,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index a2a93d7e2a04..87a443013cbf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -683,7 +683,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
683 vps->pinned = 0; 683 vps->pinned = 0;
684 684
685 /* Mapping is managed by prepare_fb/cleanup_fb */ 685 /* Mapping is managed by prepare_fb/cleanup_fb */
686 memset(&vps->guest_map, 0, sizeof(vps->guest_map));
687 memset(&vps->host_map, 0, sizeof(vps->host_map)); 686 memset(&vps->host_map, 0, sizeof(vps->host_map));
688 vps->cpp = 0; 687 vps->cpp = 0;
689 688
@@ -746,11 +745,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
746 745
747 746
748 /* Should have been freed by cleanup_fb */ 747 /* Should have been freed by cleanup_fb */
749 if (vps->guest_map.virtual) {
750 DRM_ERROR("Guest mapping not freed\n");
751 ttm_bo_kunmap(&vps->guest_map);
752 }
753
754 if (vps->host_map.virtual) { 748 if (vps->host_map.virtual) {
755 DRM_ERROR("Host mapping not freed\n"); 749 DRM_ERROR("Host mapping not freed\n");
756 ttm_bo_kunmap(&vps->host_map); 750 ttm_bo_kunmap(&vps->host_map);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index ff9c8389ff21..cd9da2dd79af 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -175,7 +175,7 @@ struct vmw_plane_state {
175 int pinned; 175 int pinned;
176 176
177 /* For CPU Blit */ 177 /* For CPU Blit */
178 struct ttm_bo_kmap_obj host_map, guest_map; 178 struct ttm_bo_kmap_obj host_map;
179 unsigned int cpp; 179 unsigned int cpp;
180}; 180};
181 181
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 90b5437fd787..b68d74888ab1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -114,7 +114,7 @@ struct vmw_screen_target_display_unit {
114 bool defined; 114 bool defined;
115 115
116 /* For CPU Blit */ 116 /* For CPU Blit */
117 struct ttm_bo_kmap_obj host_map, guest_map; 117 struct ttm_bo_kmap_obj host_map;
118 unsigned int cpp; 118 unsigned int cpp;
119}; 119};
120 120
@@ -695,7 +695,8 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
695 s32 src_pitch, dst_pitch; 695 s32 src_pitch, dst_pitch;
696 u8 *src, *dst; 696 u8 *src, *dst;
697 bool not_used; 697 bool not_used;
698 698 struct ttm_bo_kmap_obj guest_map;
699 int ret;
699 700
700 if (!dirty->num_hits) 701 if (!dirty->num_hits)
701 return; 702 return;
@@ -706,6 +707,13 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
706 if (width == 0 || height == 0) 707 if (width == 0 || height == 0)
707 return; 708 return;
708 709
710 ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
711 &guest_map);
712 if (ret) {
713 DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
714 ret);
715 goto out_cleanup;
716 }
709 717
710 /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */ 718 /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
711 src_pitch = stdu->display_srf->base_size.width * stdu->cpp; 719 src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
@@ -713,7 +721,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
713 src += ddirty->top * src_pitch + ddirty->left * stdu->cpp; 721 src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
714 722
715 dst_pitch = ddirty->pitch; 723 dst_pitch = ddirty->pitch;
716 dst = ttm_kmap_obj_virtual(&stdu->guest_map, &not_used); 724 dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
717 dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp; 725 dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
718 726
719 727
@@ -772,6 +780,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
772 vmw_fifo_commit(dev_priv, sizeof(*cmd)); 780 vmw_fifo_commit(dev_priv, sizeof(*cmd));
773 } 781 }
774 782
783 ttm_bo_kunmap(&guest_map);
775out_cleanup: 784out_cleanup:
776 ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX; 785 ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
777 ddirty->right = ddirty->bottom = S32_MIN; 786 ddirty->right = ddirty->bottom = S32_MIN;
@@ -1109,9 +1118,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
1109{ 1118{
1110 struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); 1119 struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
1111 1120
1112 if (vps->guest_map.virtual)
1113 ttm_bo_kunmap(&vps->guest_map);
1114
1115 if (vps->host_map.virtual) 1121 if (vps->host_map.virtual)
1116 ttm_bo_kunmap(&vps->host_map); 1122 ttm_bo_kunmap(&vps->host_map);
1117 1123
@@ -1277,33 +1283,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
1277 */ 1283 */
1278 if (vps->content_fb_type == SEPARATE_DMA && 1284 if (vps->content_fb_type == SEPARATE_DMA &&
1279 !(dev_priv->capabilities & SVGA_CAP_3D)) { 1285 !(dev_priv->capabilities & SVGA_CAP_3D)) {
1280
1281 struct vmw_framebuffer_dmabuf *new_vfbd;
1282
1283 new_vfbd = vmw_framebuffer_to_vfbd(new_fb);
1284
1285 ret = ttm_bo_reserve(&new_vfbd->buffer->base, false, false,
1286 NULL);
1287 if (ret)
1288 goto out_srf_unpin;
1289
1290 ret = ttm_bo_kmap(&new_vfbd->buffer->base, 0,
1291 new_vfbd->buffer->base.num_pages,
1292 &vps->guest_map);
1293
1294 ttm_bo_unreserve(&new_vfbd->buffer->base);
1295
1296 if (ret) {
1297 DRM_ERROR("Failed to map content buffer to CPU\n");
1298 goto out_srf_unpin;
1299 }
1300
1301 ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0, 1286 ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
1302 vps->surf->res.backup->base.num_pages, 1287 vps->surf->res.backup->base.num_pages,
1303 &vps->host_map); 1288 &vps->host_map);
1304 if (ret) { 1289 if (ret) {
1305 DRM_ERROR("Failed to map display buffer to CPU\n"); 1290 DRM_ERROR("Failed to map display buffer to CPU\n");
1306 ttm_bo_kunmap(&vps->guest_map);
1307 goto out_srf_unpin; 1291 goto out_srf_unpin;
1308 } 1292 }
1309 1293
@@ -1350,7 +1334,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
1350 stdu->display_srf = vps->surf; 1334 stdu->display_srf = vps->surf;
1351 stdu->content_fb_type = vps->content_fb_type; 1335 stdu->content_fb_type = vps->content_fb_type;
1352 stdu->cpp = vps->cpp; 1336 stdu->cpp = vps->cpp;
1353 memcpy(&stdu->guest_map, &vps->guest_map, sizeof(vps->guest_map));
1354 memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map)); 1337 memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
1355 1338
1356 if (!stdu->defined) 1339 if (!stdu->defined)
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f3fcb836a1f9..0c3f608131cf 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -551,7 +551,7 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item)
551 ret = hid_add_field(parser, HID_FEATURE_REPORT, data); 551 ret = hid_add_field(parser, HID_FEATURE_REPORT, data);
552 break; 552 break;
553 default: 553 default:
554 hid_err(parser->device, "unknown main item tag 0x%x\n", item->tag); 554 hid_warn(parser->device, "unknown main item tag 0x%x\n", item->tag);
555 ret = 0; 555 ret = 0;
556 } 556 }
557 557
diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c
index 68cdc962265b..271f31461da4 100644
--- a/drivers/hid/hid-cp2112.c
+++ b/drivers/hid/hid-cp2112.c
@@ -696,8 +696,16 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
696 (u8 *)&word, 2); 696 (u8 *)&word, 2);
697 break; 697 break;
698 case I2C_SMBUS_I2C_BLOCK_DATA: 698 case I2C_SMBUS_I2C_BLOCK_DATA:
699 size = I2C_SMBUS_BLOCK_DATA; 699 if (read_write == I2C_SMBUS_READ) {
700 /* fallthrough */ 700 read_length = data->block[0];
701 count = cp2112_write_read_req(buf, addr, read_length,
702 command, NULL, 0);
703 } else {
704 count = cp2112_write_req(buf, addr, command,
705 data->block + 1,
706 data->block[0]);
707 }
708 break;
701 case I2C_SMBUS_BLOCK_DATA: 709 case I2C_SMBUS_BLOCK_DATA:
702 if (I2C_SMBUS_READ == read_write) { 710 if (I2C_SMBUS_READ == read_write) {
703 count = cp2112_write_read_req(buf, addr, 711 count = cp2112_write_read_req(buf, addr,
@@ -785,6 +793,9 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
785 case I2C_SMBUS_WORD_DATA: 793 case I2C_SMBUS_WORD_DATA:
786 data->word = le16_to_cpup((__le16 *)buf); 794 data->word = le16_to_cpup((__le16 *)buf);
787 break; 795 break;
796 case I2C_SMBUS_I2C_BLOCK_DATA:
797 memcpy(data->block + 1, buf, read_length);
798 break;
788 case I2C_SMBUS_BLOCK_DATA: 799 case I2C_SMBUS_BLOCK_DATA:
789 if (read_length > I2C_SMBUS_BLOCK_MAX) { 800 if (read_length > I2C_SMBUS_BLOCK_MAX) {
790 ret = -EPROTO; 801 ret = -EPROTO;
diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c
index 9325545fc3ae..edc0f64bb584 100644
--- a/drivers/hid/hid-holtekff.c
+++ b/drivers/hid/hid-holtekff.c
@@ -32,10 +32,6 @@
32 32
33#ifdef CONFIG_HOLTEK_FF 33#ifdef CONFIG_HOLTEK_FF
34 34
35MODULE_LICENSE("GPL");
36MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>");
37MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices");
38
39/* 35/*
40 * These commands and parameters are currently known: 36 * These commands and parameters are currently known:
41 * 37 *
@@ -223,3 +219,7 @@ static struct hid_driver holtek_driver = {
223 .probe = holtek_probe, 219 .probe = holtek_probe,
224}; 220};
225module_hid_driver(holtek_driver); 221module_hid_driver(holtek_driver);
222
223MODULE_LICENSE("GPL");
224MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>");
225MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices");
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 76ed9a216f10..610223f0e945 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1378,6 +1378,8 @@ void vmbus_device_unregister(struct hv_device *device_obj)
1378 pr_debug("child device %s unregistered\n", 1378 pr_debug("child device %s unregistered\n",
1379 dev_name(&device_obj->device)); 1379 dev_name(&device_obj->device));
1380 1380
1381 kset_unregister(device_obj->channels_kset);
1382
1381 /* 1383 /*
1382 * Kick off the process of unregistering the device. 1384 * Kick off the process of unregistering the device.
1383 * This will call vmbus_remove() and eventually vmbus_device_release() 1385 * This will call vmbus_remove() and eventually vmbus_device_release()
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index c9790e2c3440..af5123042990 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev,
143 struct hwmon_device *hwdev, int index) 143 struct hwmon_device *hwdev, int index)
144{ 144{
145 struct hwmon_thermal_data *tdata; 145 struct hwmon_thermal_data *tdata;
146 struct thermal_zone_device *tzd;
146 147
147 tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL); 148 tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
148 if (!tdata) 149 if (!tdata)
@@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev,
151 tdata->hwdev = hwdev; 152 tdata->hwdev = hwdev;
152 tdata->index = index; 153 tdata->index = index;
153 154
154 devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, 155 tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
155 &hwmon_thermal_ops); 156 &hwmon_thermal_ops);
157 /*
158 * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV,
159 * so ignore that error but forward any other error.
160 */
161 if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
162 return PTR_ERR(tzd);
156 163
157 return 0; 164 return 0;
158} 165}
@@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
621 if (!chip->ops->is_visible(drvdata, hwmon_temp, 628 if (!chip->ops->is_visible(drvdata, hwmon_temp,
622 hwmon_temp_input, j)) 629 hwmon_temp_input, j))
623 continue; 630 continue;
624 if (info[i]->config[j] & HWMON_T_INPUT) 631 if (info[i]->config[j] & HWMON_T_INPUT) {
625 hwmon_thermal_add_sensor(dev, hwdev, j); 632 err = hwmon_thermal_add_sensor(dev,
633 hwdev, j);
634 if (err)
635 goto free_device;
636 }
626 } 637 }
627 } 638 }
628 } 639 }
629 640
630 return hdev; 641 return hdev;
631 642
643free_device:
644 device_unregister(hdev);
632free_hwmon: 645free_hwmon:
633 kfree(hwdev); 646 kfree(hwdev);
634ida_remove: 647ida_remove:
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index a1d687a664f8..66f0268f37a6 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -314,7 +314,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
314} 314}
315#endif 315#endif
316 316
317struct ib_device *__ib_device_get_by_index(u32 ifindex); 317struct ib_device *ib_device_get_by_index(u32 ifindex);
318/* RDMA device netlink */ 318/* RDMA device netlink */
319void nldev_init(void); 319void nldev_init(void);
320void nldev_exit(void); 320void nldev_exit(void);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 30914f3baa5f..465520627e4b 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -134,7 +134,7 @@ static int ib_device_check_mandatory(struct ib_device *device)
134 return 0; 134 return 0;
135} 135}
136 136
137struct ib_device *__ib_device_get_by_index(u32 index) 137static struct ib_device *__ib_device_get_by_index(u32 index)
138{ 138{
139 struct ib_device *device; 139 struct ib_device *device;
140 140
@@ -145,6 +145,22 @@ struct ib_device *__ib_device_get_by_index(u32 index)
145 return NULL; 145 return NULL;
146} 146}
147 147
148/*
149 * Caller is responsible to return refrerence count by calling put_device()
150 */
151struct ib_device *ib_device_get_by_index(u32 index)
152{
153 struct ib_device *device;
154
155 down_read(&lists_rwsem);
156 device = __ib_device_get_by_index(index);
157 if (device)
158 get_device(&device->dev);
159
160 up_read(&lists_rwsem);
161 return device;
162}
163
148static struct ib_device *__ib_device_get_by_name(const char *name) 164static struct ib_device *__ib_device_get_by_name(const char *name)
149{ 165{
150 struct ib_device *device; 166 struct ib_device *device;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 9a05245a1acf..0dcd1aa6f683 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -142,27 +142,34 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
142 142
143 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 143 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
144 144
145 device = __ib_device_get_by_index(index); 145 device = ib_device_get_by_index(index);
146 if (!device) 146 if (!device)
147 return -EINVAL; 147 return -EINVAL;
148 148
149 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 149 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
150 if (!msg) 150 if (!msg) {
151 return -ENOMEM; 151 err = -ENOMEM;
152 goto err;
153 }
152 154
153 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 155 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
154 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 156 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
155 0, 0); 157 0, 0);
156 158
157 err = fill_dev_info(msg, device); 159 err = fill_dev_info(msg, device);
158 if (err) { 160 if (err)
159 nlmsg_free(msg); 161 goto err_free;
160 return err;
161 }
162 162
163 nlmsg_end(msg, nlh); 163 nlmsg_end(msg, nlh);
164 164
165 put_device(&device->dev);
165 return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); 166 return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
167
168err_free:
169 nlmsg_free(msg);
170err:
171 put_device(&device->dev);
172 return err;
166} 173}
167 174
168static int _nldev_get_dumpit(struct ib_device *device, 175static int _nldev_get_dumpit(struct ib_device *device,
@@ -220,31 +227,40 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
220 return -EINVAL; 227 return -EINVAL;
221 228
222 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 229 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
223 device = __ib_device_get_by_index(index); 230 device = ib_device_get_by_index(index);
224 if (!device) 231 if (!device)
225 return -EINVAL; 232 return -EINVAL;
226 233
227 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 234 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
228 if (!rdma_is_port_valid(device, port)) 235 if (!rdma_is_port_valid(device, port)) {
229 return -EINVAL; 236 err = -EINVAL;
237 goto err;
238 }
230 239
231 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 240 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
232 if (!msg) 241 if (!msg) {
233 return -ENOMEM; 242 err = -ENOMEM;
243 goto err;
244 }
234 245
235 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 246 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
236 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 247 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
237 0, 0); 248 0, 0);
238 249
239 err = fill_port_info(msg, device, port); 250 err = fill_port_info(msg, device, port);
240 if (err) { 251 if (err)
241 nlmsg_free(msg); 252 goto err_free;
242 return err;
243 }
244 253
245 nlmsg_end(msg, nlh); 254 nlmsg_end(msg, nlh);
255 put_device(&device->dev);
246 256
247 return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); 257 return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
258
259err_free:
260 nlmsg_free(msg);
261err:
262 put_device(&device->dev);
263 return err;
248} 264}
249 265
250static int nldev_port_get_dumpit(struct sk_buff *skb, 266static int nldev_port_get_dumpit(struct sk_buff *skb,
@@ -265,7 +281,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
265 return -EINVAL; 281 return -EINVAL;
266 282
267 ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 283 ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
268 device = __ib_device_get_by_index(ifindex); 284 device = ib_device_get_by_index(ifindex);
269 if (!device) 285 if (!device)
270 return -EINVAL; 286 return -EINVAL;
271 287
@@ -299,7 +315,9 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
299 nlmsg_end(skb, nlh); 315 nlmsg_end(skb, nlh);
300 } 316 }
301 317
302out: cb->args[0] = idx; 318out:
319 put_device(&device->dev);
320 cb->args[0] = idx;
303 return skb->len; 321 return skb->len;
304} 322}
305 323
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index feafdb961c48..59b2f96d986a 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
386 if (ret) 386 if (ret)
387 return ret; 387 return ret;
388 388
389 if (!qp->qp_sec)
390 return 0;
391
389 mutex_lock(&real_qp->qp_sec->mutex); 392 mutex_lock(&real_qp->qp_sec->mutex);
390 ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys, 393 ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
391 qp->qp_sec); 394 qp->qp_sec);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index d0202bb176a4..840b24096690 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2074,8 +2074,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
2074 return -EOPNOTSUPP; 2074 return -EOPNOTSUPP;
2075 2075
2076 if (ucore->inlen > sizeof(cmd)) { 2076 if (ucore->inlen > sizeof(cmd)) {
2077 if (ib_is_udata_cleared(ucore, sizeof(cmd), 2077 if (!ib_is_udata_cleared(ucore, sizeof(cmd),
2078 ucore->inlen - sizeof(cmd))) 2078 ucore->inlen - sizeof(cmd)))
2079 return -EOPNOTSUPP; 2079 return -EOPNOTSUPP;
2080 } 2080 }
2081 2081
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 3fb8fb6cc824..e36d27ed4daa 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1438,7 +1438,8 @@ int ib_close_qp(struct ib_qp *qp)
1438 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); 1438 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
1439 1439
1440 atomic_dec(&real_qp->usecnt); 1440 atomic_dec(&real_qp->usecnt);
1441 ib_close_shared_qp_security(qp->qp_sec); 1441 if (qp->qp_sec)
1442 ib_close_shared_qp_security(qp->qp_sec);
1442 kfree(qp); 1443 kfree(qp);
1443 1444
1444 return 0; 1445 return 0;
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index b7bfc536e00f..6f2b26126c64 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -395,7 +395,7 @@ next_cqe:
395 395
396static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) 396static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
397{ 397{
398 if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) { 398 if (DRAIN_CQE(cqe)) {
399 WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid); 399 WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
400 return 0; 400 return 0;
401 } 401 }
@@ -494,7 +494,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
494 /* 494 /*
495 * Special cqe for drain WR completions... 495 * Special cqe for drain WR completions...
496 */ 496 */
497 if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { 497 if (DRAIN_CQE(hw_cqe)) {
498 *cookie = CQE_DRAIN_COOKIE(hw_cqe); 498 *cookie = CQE_DRAIN_COOKIE(hw_cqe);
499 *cqe = *hw_cqe; 499 *cqe = *hw_cqe;
500 goto skip_cqe; 500 goto skip_cqe;
@@ -571,10 +571,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
571 ret = -EAGAIN; 571 ret = -EAGAIN;
572 goto skip_cqe; 572 goto skip_cqe;
573 } 573 }
574 if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { 574 if (unlikely(!CQE_STATUS(hw_cqe) &&
575 CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
575 t4_set_wq_in_error(wq); 576 t4_set_wq_in_error(wq);
576 hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN)); 577 hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
577 goto proc_cqe;
578 } 578 }
579 goto proc_cqe; 579 goto proc_cqe;
580 } 580 }
@@ -748,9 +748,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
748 c4iw_invalidate_mr(qhp->rhp, 748 c4iw_invalidate_mr(qhp->rhp,
749 CQE_WRID_FR_STAG(&cqe)); 749 CQE_WRID_FR_STAG(&cqe));
750 break; 750 break;
751 case C4IW_DRAIN_OPCODE:
752 wc->opcode = IB_WC_SEND;
753 break;
754 default: 751 default:
755 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", 752 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
756 CQE_OPCODE(&cqe), CQE_QPID(&cqe)); 753 CQE_OPCODE(&cqe), CQE_QPID(&cqe));
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 470f97a79ebb..65dd3726ca02 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -693,8 +693,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state)
693 return IB_QPS_ERR; 693 return IB_QPS_ERR;
694} 694}
695 695
696#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN
697
698static inline u32 c4iw_ib_to_tpt_access(int a) 696static inline u32 c4iw_ib_to_tpt_access(int a)
699{ 697{
700 return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | 698 return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 38bddd02a943..d5c92fc520d6 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -790,21 +790,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
790 return 0; 790 return 0;
791} 791}
792 792
793static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) 793static int ib_to_fw_opcode(int ib_opcode)
794{
795 int opcode;
796
797 switch (ib_opcode) {
798 case IB_WR_SEND_WITH_INV:
799 opcode = FW_RI_SEND_WITH_INV;
800 break;
801 case IB_WR_SEND:
802 opcode = FW_RI_SEND;
803 break;
804 case IB_WR_RDMA_WRITE:
805 opcode = FW_RI_RDMA_WRITE;
806 break;
807 case IB_WR_RDMA_READ:
808 case IB_WR_RDMA_READ_WITH_INV:
809 opcode = FW_RI_READ_REQ;
810 break;
811 case IB_WR_REG_MR:
812 opcode = FW_RI_FAST_REGISTER;
813 break;
814 case IB_WR_LOCAL_INV:
815 opcode = FW_RI_LOCAL_INV;
816 break;
817 default:
818 opcode = -EINVAL;
819 }
820 return opcode;
821}
822
823static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
794{ 824{
795 struct t4_cqe cqe = {}; 825 struct t4_cqe cqe = {};
796 struct c4iw_cq *schp; 826 struct c4iw_cq *schp;
797 unsigned long flag; 827 unsigned long flag;
798 struct t4_cq *cq; 828 struct t4_cq *cq;
829 int opcode;
799 830
800 schp = to_c4iw_cq(qhp->ibqp.send_cq); 831 schp = to_c4iw_cq(qhp->ibqp.send_cq);
801 cq = &schp->cq; 832 cq = &schp->cq;
802 833
834 opcode = ib_to_fw_opcode(wr->opcode);
835 if (opcode < 0)
836 return opcode;
837
803 cqe.u.drain_cookie = wr->wr_id; 838 cqe.u.drain_cookie = wr->wr_id;
804 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 839 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
805 CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | 840 CQE_OPCODE_V(opcode) |
806 CQE_TYPE_V(1) | 841 CQE_TYPE_V(1) |
807 CQE_SWCQE_V(1) | 842 CQE_SWCQE_V(1) |
843 CQE_DRAIN_V(1) |
808 CQE_QPID_V(qhp->wq.sq.qid)); 844 CQE_QPID_V(qhp->wq.sq.qid));
809 845
810 spin_lock_irqsave(&schp->lock, flag); 846 spin_lock_irqsave(&schp->lock, flag);
@@ -819,6 +855,23 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
819 schp->ibcq.cq_context); 855 schp->ibcq.cq_context);
820 spin_unlock_irqrestore(&schp->comp_handler_lock, flag); 856 spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
821 } 857 }
858 return 0;
859}
860
861static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
862 struct ib_send_wr **bad_wr)
863{
864 int ret = 0;
865
866 while (wr) {
867 ret = complete_sq_drain_wr(qhp, wr);
868 if (ret) {
869 *bad_wr = wr;
870 break;
871 }
872 wr = wr->next;
873 }
874 return ret;
822} 875}
823 876
824static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) 877static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
@@ -833,9 +886,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
833 886
834 cqe.u.drain_cookie = wr->wr_id; 887 cqe.u.drain_cookie = wr->wr_id;
835 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 888 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
836 CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | 889 CQE_OPCODE_V(FW_RI_SEND) |
837 CQE_TYPE_V(0) | 890 CQE_TYPE_V(0) |
838 CQE_SWCQE_V(1) | 891 CQE_SWCQE_V(1) |
892 CQE_DRAIN_V(1) |
839 CQE_QPID_V(qhp->wq.sq.qid)); 893 CQE_QPID_V(qhp->wq.sq.qid));
840 894
841 spin_lock_irqsave(&rchp->lock, flag); 895 spin_lock_irqsave(&rchp->lock, flag);
@@ -852,6 +906,14 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
852 } 906 }
853} 907}
854 908
909static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
910{
911 while (wr) {
912 complete_rq_drain_wr(qhp, wr);
913 wr = wr->next;
914 }
915}
916
855int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 917int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
856 struct ib_send_wr **bad_wr) 918 struct ib_send_wr **bad_wr)
857{ 919{
@@ -875,7 +937,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
875 */ 937 */
876 if (qhp->wq.flushed) { 938 if (qhp->wq.flushed) {
877 spin_unlock_irqrestore(&qhp->lock, flag); 939 spin_unlock_irqrestore(&qhp->lock, flag);
878 complete_sq_drain_wr(qhp, wr); 940 err = complete_sq_drain_wrs(qhp, wr, bad_wr);
879 return err; 941 return err;
880 } 942 }
881 num_wrs = t4_sq_avail(&qhp->wq); 943 num_wrs = t4_sq_avail(&qhp->wq);
@@ -1023,7 +1085,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1023 */ 1085 */
1024 if (qhp->wq.flushed) { 1086 if (qhp->wq.flushed) {
1025 spin_unlock_irqrestore(&qhp->lock, flag); 1087 spin_unlock_irqrestore(&qhp->lock, flag);
1026 complete_rq_drain_wr(qhp, wr); 1088 complete_rq_drain_wrs(qhp, wr);
1027 return err; 1089 return err;
1028 } 1090 }
1029 num_wrs = t4_rq_avail(&qhp->wq); 1091 num_wrs = t4_rq_avail(&qhp->wq);
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index e9ea94268d51..79e8ee12c391 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -197,6 +197,11 @@ struct t4_cqe {
197#define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M) 197#define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M)
198#define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S) 198#define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S)
199 199
200#define CQE_DRAIN_S 10
201#define CQE_DRAIN_M 0x1
202#define CQE_DRAIN_G(x) ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M)
203#define CQE_DRAIN_V(x) ((x)<<CQE_DRAIN_S)
204
200#define CQE_STATUS_S 5 205#define CQE_STATUS_S 5
201#define CQE_STATUS_M 0x1F 206#define CQE_STATUS_M 0x1F
202#define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M) 207#define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M)
@@ -213,6 +218,7 @@ struct t4_cqe {
213#define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S) 218#define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S)
214 219
215#define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header))) 220#define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header)))
221#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header)))
216#define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header))) 222#define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header)))
217#define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header))) 223#define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header)))
218#define SQ_TYPE(x) (CQE_TYPE((x))) 224#define SQ_TYPE(x) (CQE_TYPE((x)))
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4a9b4d7efe63..8ce9118d4a7f 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1131,7 +1131,6 @@ struct hfi1_devdata {
1131 u16 pcie_lnkctl; 1131 u16 pcie_lnkctl;
1132 u16 pcie_devctl2; 1132 u16 pcie_devctl2;
1133 u32 pci_msix0; 1133 u32 pci_msix0;
1134 u32 pci_lnkctl3;
1135 u32 pci_tph2; 1134 u32 pci_tph2;
1136 1135
1137 /* 1136 /*
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 09e50fd2a08f..8c7e7a60b715 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd)
411 if (ret) 411 if (ret)
412 goto error; 412 goto error;
413 413
414 ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, 414 if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
415 dd->pci_lnkctl3); 415 ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2,
416 if (ret) 416 dd->pci_tph2);
417 goto error; 417 if (ret)
418 418 goto error;
419 ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); 419 }
420 if (ret)
421 goto error;
422
423 return 0; 420 return 0;
424 421
425error: 422error:
@@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd)
469 if (ret) 466 if (ret)
470 goto error; 467 goto error;
471 468
472 ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, 469 if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
473 &dd->pci_lnkctl3); 470 ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2,
474 if (ret) 471 &dd->pci_tph2);
475 goto error; 472 if (ret)
476 473 goto error;
477 ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); 474 }
478 if (ret)
479 goto error;
480
481 return 0; 475 return 0;
482 476
483error: 477error:
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 313bfb9ccb71..4975f3e6596e 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -642,7 +642,6 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
642 goto err_free_mr; 642 goto err_free_mr;
643 643
644 mr->max_pages = max_num_sg; 644 mr->max_pages = max_num_sg;
645
646 err = mlx4_mr_enable(dev->dev, &mr->mmr); 645 err = mlx4_mr_enable(dev->dev, &mr->mmr);
647 if (err) 646 if (err)
648 goto err_free_pl; 647 goto err_free_pl;
@@ -653,6 +652,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
653 return &mr->ibmr; 652 return &mr->ibmr;
654 653
655err_free_pl: 654err_free_pl:
655 mr->ibmr.device = pd->device;
656 mlx4_free_priv_pages(mr); 656 mlx4_free_priv_pages(mr);
657err_free_mr: 657err_free_mr:
658 (void) mlx4_mr_free(dev->dev, &mr->mmr); 658 (void) mlx4_mr_free(dev->dev, &mr->mmr);
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 470995fa38d2..6f6712f87a73 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -47,17 +47,6 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
47 return err; 47 return err;
48} 48}
49 49
50int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
51 bool reset, void *out, int out_size)
52{
53 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
54
55 MLX5_SET(query_cong_statistics_in, in, opcode,
56 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
57 MLX5_SET(query_cong_statistics_in, in, clear, reset);
58 return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
59}
60
61int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, 50int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
62 void *out, int out_size) 51 void *out, int out_size)
63{ 52{
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index af4c24596274..78ffded7cc2c 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -37,8 +37,6 @@
37#include <linux/mlx5/driver.h> 37#include <linux/mlx5/driver.h>
38 38
39int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); 39int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
40int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
41 bool reset, void *out, int out_size);
42int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, 40int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
43 void *out, int out_size); 41 void *out, int out_size);
44int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, 42int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 543d0a4c8bf3..8ac50de2b242 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1463,6 +1463,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1463 } 1463 }
1464 1464
1465 INIT_LIST_HEAD(&context->vma_private_list); 1465 INIT_LIST_HEAD(&context->vma_private_list);
1466 mutex_init(&context->vma_private_list_mutex);
1466 INIT_LIST_HEAD(&context->db_page_list); 1467 INIT_LIST_HEAD(&context->db_page_list);
1467 mutex_init(&context->db_page_mutex); 1468 mutex_init(&context->db_page_mutex);
1468 1469
@@ -1624,7 +1625,9 @@ static void mlx5_ib_vma_close(struct vm_area_struct *area)
1624 * mlx5_ib_disassociate_ucontext(). 1625 * mlx5_ib_disassociate_ucontext().
1625 */ 1626 */
1626 mlx5_ib_vma_priv_data->vma = NULL; 1627 mlx5_ib_vma_priv_data->vma = NULL;
1628 mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
1627 list_del(&mlx5_ib_vma_priv_data->list); 1629 list_del(&mlx5_ib_vma_priv_data->list);
1630 mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
1628 kfree(mlx5_ib_vma_priv_data); 1631 kfree(mlx5_ib_vma_priv_data);
1629} 1632}
1630 1633
@@ -1644,10 +1647,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
1644 return -ENOMEM; 1647 return -ENOMEM;
1645 1648
1646 vma_prv->vma = vma; 1649 vma_prv->vma = vma;
1650 vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
1647 vma->vm_private_data = vma_prv; 1651 vma->vm_private_data = vma_prv;
1648 vma->vm_ops = &mlx5_ib_vm_ops; 1652 vma->vm_ops = &mlx5_ib_vm_ops;
1649 1653
1654 mutex_lock(&ctx->vma_private_list_mutex);
1650 list_add(&vma_prv->list, vma_head); 1655 list_add(&vma_prv->list, vma_head);
1656 mutex_unlock(&ctx->vma_private_list_mutex);
1651 1657
1652 return 0; 1658 return 0;
1653} 1659}
@@ -1690,6 +1696,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1690 * mlx5_ib_vma_close. 1696 * mlx5_ib_vma_close.
1691 */ 1697 */
1692 down_write(&owning_mm->mmap_sem); 1698 down_write(&owning_mm->mmap_sem);
1699 mutex_lock(&context->vma_private_list_mutex);
1693 list_for_each_entry_safe(vma_private, n, &context->vma_private_list, 1700 list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
1694 list) { 1701 list) {
1695 vma = vma_private->vma; 1702 vma = vma_private->vma;
@@ -1704,6 +1711,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1704 list_del(&vma_private->list); 1711 list_del(&vma_private->list);
1705 kfree(vma_private); 1712 kfree(vma_private);
1706 } 1713 }
1714 mutex_unlock(&context->vma_private_list_mutex);
1707 up_write(&owning_mm->mmap_sem); 1715 up_write(&owning_mm->mmap_sem);
1708 mmput(owning_mm); 1716 mmput(owning_mm);
1709 put_task_struct(owning_process); 1717 put_task_struct(owning_process);
@@ -3737,34 +3745,6 @@ free:
3737 return ret; 3745 return ret;
3738} 3746}
3739 3747
3740static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
3741 struct mlx5_ib_port *port,
3742 struct rdma_hw_stats *stats)
3743{
3744 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
3745 void *out;
3746 int ret, i;
3747 int offset = port->cnts.num_q_counters;
3748
3749 out = kvzalloc(outlen, GFP_KERNEL);
3750 if (!out)
3751 return -ENOMEM;
3752
3753 ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
3754 if (ret)
3755 goto free;
3756
3757 for (i = 0; i < port->cnts.num_cong_counters; i++) {
3758 stats->value[i + offset] =
3759 be64_to_cpup((__be64 *)(out +
3760 port->cnts.offsets[i + offset]));
3761 }
3762
3763free:
3764 kvfree(out);
3765 return ret;
3766}
3767
3768static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 3748static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
3769 struct rdma_hw_stats *stats, 3749 struct rdma_hw_stats *stats,
3770 u8 port_num, int index) 3750 u8 port_num, int index)
@@ -3782,7 +3762,12 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
3782 num_counters = port->cnts.num_q_counters; 3762 num_counters = port->cnts.num_q_counters;
3783 3763
3784 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 3764 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
3785 ret = mlx5_ib_query_cong_counters(dev, port, stats); 3765 ret = mlx5_lag_query_cong_counters(dev->mdev,
3766 stats->value +
3767 port->cnts.num_q_counters,
3768 port->cnts.num_cong_counters,
3769 port->cnts.offsets +
3770 port->cnts.num_q_counters);
3786 if (ret) 3771 if (ret)
3787 return ret; 3772 return ret;
3788 num_counters += port->cnts.num_cong_counters; 3773 num_counters += port->cnts.num_cong_counters;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6dd8cac78de2..2c5f3533bbc9 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -115,6 +115,8 @@ enum {
115struct mlx5_ib_vma_private_data { 115struct mlx5_ib_vma_private_data {
116 struct list_head list; 116 struct list_head list;
117 struct vm_area_struct *vma; 117 struct vm_area_struct *vma;
118 /* protect vma_private_list add/del */
119 struct mutex *vma_private_list_mutex;
118}; 120};
119 121
120struct mlx5_ib_ucontext { 122struct mlx5_ib_ucontext {
@@ -129,6 +131,8 @@ struct mlx5_ib_ucontext {
129 /* Transport Domain number */ 131 /* Transport Domain number */
130 u32 tdn; 132 u32 tdn;
131 struct list_head vma_private_list; 133 struct list_head vma_private_list;
134 /* protect vma_private_list add/del */
135 struct mutex vma_private_list_mutex;
132 136
133 unsigned long upd_xlt_page; 137 unsigned long upd_xlt_page;
134 /* protect ODP/KSM */ 138 /* protect ODP/KSM */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ee0ee1f9994b..d109fe8290a7 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1637,6 +1637,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1637 MLX5_SET(mkc, mkc, access_mode, mr->access_mode); 1637 MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
1638 MLX5_SET(mkc, mkc, umr_en, 1); 1638 MLX5_SET(mkc, mkc, umr_en, 1);
1639 1639
1640 mr->ibmr.device = pd->device;
1640 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1641 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1641 if (err) 1642 if (err)
1642 goto err_destroy_psv; 1643 goto err_destroy_psv;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 63bc2efc34eb..4f7bd3b6a315 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -94,7 +94,7 @@ struct pvrdma_cq {
94 u32 cq_handle; 94 u32 cq_handle;
95 bool is_kernel; 95 bool is_kernel;
96 atomic_t refcnt; 96 atomic_t refcnt;
97 wait_queue_head_t wait; 97 struct completion free;
98}; 98};
99 99
100struct pvrdma_id_table { 100struct pvrdma_id_table {
@@ -175,7 +175,7 @@ struct pvrdma_srq {
175 u32 srq_handle; 175 u32 srq_handle;
176 int npages; 176 int npages;
177 refcount_t refcnt; 177 refcount_t refcnt;
178 wait_queue_head_t wait; 178 struct completion free;
179}; 179};
180 180
181struct pvrdma_qp { 181struct pvrdma_qp {
@@ -197,7 +197,7 @@ struct pvrdma_qp {
197 bool is_kernel; 197 bool is_kernel;
198 struct mutex mutex; /* QP state mutex. */ 198 struct mutex mutex; /* QP state mutex. */
199 atomic_t refcnt; 199 atomic_t refcnt;
200 wait_queue_head_t wait; 200 struct completion free;
201}; 201};
202 202
203struct pvrdma_dev { 203struct pvrdma_dev {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index 3562c0c30492..e529622cefad 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -179,7 +179,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
179 pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); 179 pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
180 180
181 atomic_set(&cq->refcnt, 1); 181 atomic_set(&cq->refcnt, 1);
182 init_waitqueue_head(&cq->wait); 182 init_completion(&cq->free);
183 spin_lock_init(&cq->cq_lock); 183 spin_lock_init(&cq->cq_lock);
184 184
185 memset(cmd, 0, sizeof(*cmd)); 185 memset(cmd, 0, sizeof(*cmd));
@@ -230,8 +230,9 @@ err_cq:
230 230
231static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) 231static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
232{ 232{
233 atomic_dec(&cq->refcnt); 233 if (atomic_dec_and_test(&cq->refcnt))
234 wait_event(cq->wait, !atomic_read(&cq->refcnt)); 234 complete(&cq->free);
235 wait_for_completion(&cq->free);
235 236
236 if (!cq->is_kernel) 237 if (!cq->is_kernel)
237 ib_umem_release(cq->umem); 238 ib_umem_release(cq->umem);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 1f4e18717a00..e92681878c93 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -346,9 +346,8 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
346 ibqp->event_handler(&e, ibqp->qp_context); 346 ibqp->event_handler(&e, ibqp->qp_context);
347 } 347 }
348 if (qp) { 348 if (qp) {
349 atomic_dec(&qp->refcnt); 349 if (atomic_dec_and_test(&qp->refcnt))
350 if (atomic_read(&qp->refcnt) == 0) 350 complete(&qp->free);
351 wake_up(&qp->wait);
352 } 351 }
353} 352}
354 353
@@ -373,9 +372,8 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
373 ibcq->event_handler(&e, ibcq->cq_context); 372 ibcq->event_handler(&e, ibcq->cq_context);
374 } 373 }
375 if (cq) { 374 if (cq) {
376 atomic_dec(&cq->refcnt); 375 if (atomic_dec_and_test(&cq->refcnt))
377 if (atomic_read(&cq->refcnt) == 0) 376 complete(&cq->free);
378 wake_up(&cq->wait);
379 } 377 }
380} 378}
381 379
@@ -404,7 +402,7 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type)
404 } 402 }
405 if (srq) { 403 if (srq) {
406 if (refcount_dec_and_test(&srq->refcnt)) 404 if (refcount_dec_and_test(&srq->refcnt))
407 wake_up(&srq->wait); 405 complete(&srq->free);
408 } 406 }
409} 407}
410 408
@@ -539,9 +537,8 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
539 if (cq && cq->ibcq.comp_handler) 537 if (cq && cq->ibcq.comp_handler)
540 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 538 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
541 if (cq) { 539 if (cq) {
542 atomic_dec(&cq->refcnt); 540 if (atomic_dec_and_test(&cq->refcnt))
543 if (atomic_read(&cq->refcnt)) 541 complete(&cq->free);
544 wake_up(&cq->wait);
545 } 542 }
546 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 543 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
547 } 544 }
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 10420a18d02f..4059308e1454 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -246,7 +246,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
246 spin_lock_init(&qp->rq.lock); 246 spin_lock_init(&qp->rq.lock);
247 mutex_init(&qp->mutex); 247 mutex_init(&qp->mutex);
248 atomic_set(&qp->refcnt, 1); 248 atomic_set(&qp->refcnt, 1);
249 init_waitqueue_head(&qp->wait); 249 init_completion(&qp->free);
250 250
251 qp->state = IB_QPS_RESET; 251 qp->state = IB_QPS_RESET;
252 252
@@ -428,8 +428,16 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
428 428
429 pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); 429 pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
430 430
431 atomic_dec(&qp->refcnt); 431 if (atomic_dec_and_test(&qp->refcnt))
432 wait_event(qp->wait, !atomic_read(&qp->refcnt)); 432 complete(&qp->free);
433 wait_for_completion(&qp->free);
434
435 if (!qp->is_kernel) {
436 if (qp->rumem)
437 ib_umem_release(qp->rumem);
438 if (qp->sumem)
439 ib_umem_release(qp->sumem);
440 }
433 441
434 pvrdma_page_dir_cleanup(dev, &qp->pdir); 442 pvrdma_page_dir_cleanup(dev, &qp->pdir);
435 443
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 826ccb864596..5acebb1ef631 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -149,7 +149,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
149 149
150 spin_lock_init(&srq->lock); 150 spin_lock_init(&srq->lock);
151 refcount_set(&srq->refcnt, 1); 151 refcount_set(&srq->refcnt, 1);
152 init_waitqueue_head(&srq->wait); 152 init_completion(&srq->free);
153 153
154 dev_dbg(&dev->pdev->dev, 154 dev_dbg(&dev->pdev->dev,
155 "create shared receive queue from user space\n"); 155 "create shared receive queue from user space\n");
@@ -236,8 +236,9 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
236 dev->srq_tbl[srq->srq_handle] = NULL; 236 dev->srq_tbl[srq->srq_handle] = NULL;
237 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); 237 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
238 238
239 refcount_dec(&srq->refcnt); 239 if (refcount_dec_and_test(&srq->refcnt))
240 wait_event(srq->wait, !refcount_read(&srq->refcnt)); 240 complete(&srq->free);
241 wait_for_completion(&srq->free);
241 242
242 /* There is no support for kernel clients, so this is safe. */ 243 /* There is no support for kernel clients, so this is safe. */
243 ib_umem_release(srq->umem); 244 ib_umem_release(srq->umem);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 3b96cdaf9a83..e6151a29c412 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1236,13 +1236,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1236 ipoib_ib_dev_down(dev); 1236 ipoib_ib_dev_down(dev);
1237 1237
1238 if (level == IPOIB_FLUSH_HEAVY) { 1238 if (level == IPOIB_FLUSH_HEAVY) {
1239 rtnl_lock();
1240 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) 1239 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
1241 ipoib_ib_dev_stop(dev); 1240 ipoib_ib_dev_stop(dev);
1242 1241
1243 result = ipoib_ib_dev_open(dev); 1242 if (ipoib_ib_dev_open(dev))
1244 rtnl_unlock();
1245 if (result)
1246 return; 1243 return;
1247 1244
1248 if (netif_queue_stopped(dev)) 1245 if (netif_queue_stopped(dev))
@@ -1282,7 +1279,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
1282 struct ipoib_dev_priv *priv = 1279 struct ipoib_dev_priv *priv =
1283 container_of(work, struct ipoib_dev_priv, flush_heavy); 1280 container_of(work, struct ipoib_dev_priv, flush_heavy);
1284 1281
1282 rtnl_lock();
1285 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); 1283 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
1284 rtnl_unlock();
1286} 1285}
1287 1286
1288void ipoib_ib_dev_cleanup(struct net_device *dev) 1287void ipoib_ib_dev_cleanup(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 12b7f911f0e5..8880351df179 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -902,8 +902,8 @@ static int path_rec_start(struct net_device *dev,
902 return 0; 902 return 0;
903} 903}
904 904
905static void neigh_add_path(struct sk_buff *skb, u8 *daddr, 905static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
906 struct net_device *dev) 906 struct net_device *dev)
907{ 907{
908 struct ipoib_dev_priv *priv = ipoib_priv(dev); 908 struct ipoib_dev_priv *priv = ipoib_priv(dev);
909 struct rdma_netdev *rn = netdev_priv(dev); 909 struct rdma_netdev *rn = netdev_priv(dev);
@@ -917,7 +917,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
917 spin_unlock_irqrestore(&priv->lock, flags); 917 spin_unlock_irqrestore(&priv->lock, flags);
918 ++dev->stats.tx_dropped; 918 ++dev->stats.tx_dropped;
919 dev_kfree_skb_any(skb); 919 dev_kfree_skb_any(skb);
920 return; 920 return NULL;
921 }
922
923 /* To avoid race condition, make sure that the
924 * neigh will be added only once.
925 */
926 if (unlikely(!list_empty(&neigh->list))) {
927 spin_unlock_irqrestore(&priv->lock, flags);
928 return neigh;
921 } 929 }
922 930
923 path = __path_find(dev, daddr + 4); 931 path = __path_find(dev, daddr + 4);
@@ -956,7 +964,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
956 path->ah->last_send = rn->send(dev, skb, path->ah->ah, 964 path->ah->last_send = rn->send(dev, skb, path->ah->ah,
957 IPOIB_QPN(daddr)); 965 IPOIB_QPN(daddr));
958 ipoib_neigh_put(neigh); 966 ipoib_neigh_put(neigh);
959 return; 967 return NULL;
960 } 968 }
961 } else { 969 } else {
962 neigh->ah = NULL; 970 neigh->ah = NULL;
@@ -973,7 +981,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
973 981
974 spin_unlock_irqrestore(&priv->lock, flags); 982 spin_unlock_irqrestore(&priv->lock, flags);
975 ipoib_neigh_put(neigh); 983 ipoib_neigh_put(neigh);
976 return; 984 return NULL;
977 985
978err_path: 986err_path:
979 ipoib_neigh_free(neigh); 987 ipoib_neigh_free(neigh);
@@ -983,6 +991,8 @@ err_drop:
983 991
984 spin_unlock_irqrestore(&priv->lock, flags); 992 spin_unlock_irqrestore(&priv->lock, flags);
985 ipoib_neigh_put(neigh); 993 ipoib_neigh_put(neigh);
994
995 return NULL;
986} 996}
987 997
988static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, 998static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
@@ -1091,8 +1101,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
1091 case htons(ETH_P_TIPC): 1101 case htons(ETH_P_TIPC):
1092 neigh = ipoib_neigh_get(dev, phdr->hwaddr); 1102 neigh = ipoib_neigh_get(dev, phdr->hwaddr);
1093 if (unlikely(!neigh)) { 1103 if (unlikely(!neigh)) {
1094 neigh_add_path(skb, phdr->hwaddr, dev); 1104 neigh = neigh_add_path(skb, phdr->hwaddr, dev);
1095 return NETDEV_TX_OK; 1105 if (likely(!neigh))
1106 return NETDEV_TX_OK;
1096 } 1107 }
1097 break; 1108 break;
1098 case htons(ETH_P_ARP): 1109 case htons(ETH_P_ARP):
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 93e149efc1f5..9b3f47ae2016 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -816,7 +816,10 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
816 spin_lock_irqsave(&priv->lock, flags); 816 spin_lock_irqsave(&priv->lock, flags);
817 if (!neigh) { 817 if (!neigh) {
818 neigh = ipoib_neigh_alloc(daddr, dev); 818 neigh = ipoib_neigh_alloc(daddr, dev);
819 if (neigh) { 819 /* Make sure that the neigh will be added only
820 * once to mcast list.
821 */
822 if (neigh && list_empty(&neigh->list)) {
820 kref_get(&mcast->ah->ref); 823 kref_get(&mcast->ah->ref);
821 neigh->ah = mcast->ah; 824 neigh->ah = mcast->ah;
822 list_add_tail(&neigh->list, &mcast->neigh_list); 825 list_add_tail(&neigh->list, &mcast->neigh_list);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 8a1bd354b1cc..bfa576aa9f03 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1013,8 +1013,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
1013 return -ENOMEM; 1013 return -ENOMEM;
1014 1014
1015 attr->qp_state = IB_QPS_INIT; 1015 attr->qp_state = IB_QPS_INIT;
1016 attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | 1016 attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
1017 IB_ACCESS_REMOTE_WRITE;
1018 attr->port_num = ch->sport->port; 1017 attr->port_num = ch->sport->port;
1019 attr->pkey_index = 0; 1018 attr->pkey_index = 0;
1020 1019
@@ -2078,7 +2077,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2078 goto destroy_ib; 2077 goto destroy_ib;
2079 } 2078 }
2080 2079
2081 guid = (__be16 *)&param->primary_path->sgid.global.interface_id; 2080 guid = (__be16 *)&param->primary_path->dgid.global.interface_id;
2082 snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x", 2081 snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
2083 be16_to_cpu(guid[0]), be16_to_cpu(guid[1]), 2082 be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
2084 be16_to_cpu(guid[2]), be16_to_cpu(guid[3])); 2083 be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 3d8ff09eba57..c868a878c84f 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -163,7 +163,7 @@ static unsigned int get_time_pit(void)
163#define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0) 163#define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0)
164#define DELTA(x,y) ((y)-(x)) 164#define DELTA(x,y) ((y)-(x))
165#define TIME_NAME "TSC" 165#define TIME_NAME "TSC"
166#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE) 166#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) || defined(CONFIG_TILE)
167#define GET_TIME(x) do { x = get_cycles(); } while (0) 167#define GET_TIME(x) do { x = get_cycles(); } while (0)
168#define DELTA(x,y) ((y)-(x)) 168#define DELTA(x,y) ((y)-(x))
169#define TIME_NAME "get_cycles" 169#define TIME_NAME "get_cycles"
diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c
index ae473123583b..3d51175c4d72 100644
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c
@@ -1651,7 +1651,7 @@ ims_pcu_get_cdc_union_desc(struct usb_interface *intf)
1651 return union_desc; 1651 return union_desc;
1652 1652
1653 dev_err(&intf->dev, 1653 dev_err(&intf->dev,
1654 "Union descriptor to short (%d vs %zd\n)", 1654 "Union descriptor too short (%d vs %zd)\n",
1655 union_desc->bLength, sizeof(*union_desc)); 1655 union_desc->bLength, sizeof(*union_desc));
1656 return NULL; 1656 return NULL;
1657 } 1657 }
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index 6bf56bb5f8d9..d91f3b1c5375 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -326,8 +326,6 @@ static int xenkbd_probe(struct xenbus_device *dev,
326 0, width, 0, 0); 326 0, width, 0, 0);
327 input_set_abs_params(mtouch, ABS_MT_POSITION_Y, 327 input_set_abs_params(mtouch, ABS_MT_POSITION_Y,
328 0, height, 0, 0); 328 0, height, 0, 0);
329 input_set_abs_params(mtouch, ABS_MT_PRESSURE,
330 0, 255, 0, 0);
331 329
332 ret = input_mt_init_slots(mtouch, num_cont, INPUT_MT_DIRECT); 330 ret = input_mt_init_slots(mtouch, num_cont, INPUT_MT_DIRECT);
333 if (ret) { 331 if (ret) {
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index b84cd978fce2..a4aaa748e987 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1613,7 +1613,7 @@ static int elantech_set_properties(struct elantech_data *etd)
1613 case 5: 1613 case 5:
1614 etd->hw_version = 3; 1614 etd->hw_version = 3;
1615 break; 1615 break;
1616 case 6 ... 14: 1616 case 6 ... 15:
1617 etd->hw_version = 4; 1617 etd->hw_version = 4;
1618 break; 1618 break;
1619 default: 1619 default:
diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
index e102d7764bc2..a458e5ec9e41 100644
--- a/drivers/input/touchscreen/elants_i2c.c
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -27,6 +27,7 @@
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/input.h> 28#include <linux/input.h>
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
30#include <linux/irq.h>
30#include <linux/platform_device.h> 31#include <linux/platform_device.h>
31#include <linux/async.h> 32#include <linux/async.h>
32#include <linux/i2c.h> 33#include <linux/i2c.h>
@@ -1261,10 +1262,13 @@ static int elants_i2c_probe(struct i2c_client *client,
1261 } 1262 }
1262 1263
1263 /* 1264 /*
1264 * Systems using device tree should set up interrupt via DTS, 1265 * Platform code (ACPI, DTS) should normally set up interrupt
1265 * the rest will use the default falling edge interrupts. 1266 * for us, but in case it did not let's fall back to using falling
1267 * edge to be compatible with older Chromebooks.
1266 */ 1268 */
1267 irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING; 1269 irqflags = irq_get_trigger_type(client->irq);
1270 if (!irqflags)
1271 irqflags = IRQF_TRIGGER_FALLING;
1268 1272
1269 error = devm_request_threaded_irq(&client->dev, client->irq, 1273 error = devm_request_threaded_irq(&client->dev, client->irq,
1270 NULL, elants_i2c_irq, 1274 NULL, elants_i2c_irq,
diff --git a/drivers/input/touchscreen/hideep.c b/drivers/input/touchscreen/hideep.c
index fc080a7c2e1f..f1cd4dd9a4a3 100644
--- a/drivers/input/touchscreen/hideep.c
+++ b/drivers/input/touchscreen/hideep.c
@@ -10,8 +10,7 @@
10#include <linux/of.h> 10#include <linux/of.h>
11#include <linux/firmware.h> 11#include <linux/firmware.h>
12#include <linux/delay.h> 12#include <linux/delay.h>
13#include <linux/gpio.h> 13#include <linux/gpio/consumer.h>
14#include <linux/gpio/machine.h>
15#include <linux/i2c.h> 14#include <linux/i2c.h>
16#include <linux/acpi.h> 15#include <linux/acpi.h>
17#include <linux/interrupt.h> 16#include <linux/interrupt.h>
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 7d5eb004091d..97baf88d9505 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4184,7 +4184,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
4184 struct irq_cfg *cfg); 4184 struct irq_cfg *cfg);
4185 4185
4186static int irq_remapping_activate(struct irq_domain *domain, 4186static int irq_remapping_activate(struct irq_domain *domain,
4187 struct irq_data *irq_data, bool early) 4187 struct irq_data *irq_data, bool reserve)
4188{ 4188{
4189 struct amd_ir_data *data = irq_data->chip_data; 4189 struct amd_ir_data *data = irq_data->chip_data;
4190 struct irq_2_irte *irte_info = &data->irq_2_irte; 4190 struct irq_2_irte *irte_info = &data->irq_2_irte;
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index f122071688fd..744592d330ca 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1698,13 +1698,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1698 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; 1698 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1699 domain->geometry.aperture_end = (1UL << ias) - 1; 1699 domain->geometry.aperture_end = (1UL << ias) - 1;
1700 domain->geometry.force_aperture = true; 1700 domain->geometry.force_aperture = true;
1701 smmu_domain->pgtbl_ops = pgtbl_ops;
1702 1701
1703 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); 1702 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1704 if (ret < 0) 1703 if (ret < 0) {
1705 free_io_pgtable_ops(pgtbl_ops); 1704 free_io_pgtable_ops(pgtbl_ops);
1705 return ret;
1706 }
1706 1707
1707 return ret; 1708 smmu_domain->pgtbl_ops = pgtbl_ops;
1709 return 0;
1708} 1710}
1709 1711
1710static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) 1712static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
@@ -1731,7 +1733,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1731 1733
1732static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) 1734static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1733{ 1735{
1734 int i; 1736 int i, j;
1735 struct arm_smmu_master_data *master = fwspec->iommu_priv; 1737 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1736 struct arm_smmu_device *smmu = master->smmu; 1738 struct arm_smmu_device *smmu = master->smmu;
1737 1739
@@ -1739,6 +1741,13 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1739 u32 sid = fwspec->ids[i]; 1741 u32 sid = fwspec->ids[i];
1740 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); 1742 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1741 1743
1744 /* Bridged PCI devices may end up with duplicated IDs */
1745 for (j = 0; j < i; j++)
1746 if (fwspec->ids[j] == sid)
1747 break;
1748 if (j < i)
1749 continue;
1750
1742 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); 1751 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1743 } 1752 }
1744} 1753}
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 76a193c7fcfc..66f69af2c219 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1397,7 +1397,7 @@ static void intel_irq_remapping_free(struct irq_domain *domain,
1397} 1397}
1398 1398
1399static int intel_irq_remapping_activate(struct irq_domain *domain, 1399static int intel_irq_remapping_activate(struct irq_domain *domain,
1400 struct irq_data *irq_data, bool early) 1400 struct irq_data *irq_data, bool reserve)
1401{ 1401{
1402 intel_ir_reconfigure_irte(irq_data, true); 1402 intel_ir_reconfigure_irte(irq_data, true);
1403 return 0; 1403 return 0;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 4039e64cd342..06f025fd5726 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2303,7 +2303,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
2303} 2303}
2304 2304
2305static int its_irq_domain_activate(struct irq_domain *domain, 2305static int its_irq_domain_activate(struct irq_domain *domain,
2306 struct irq_data *d, bool early) 2306 struct irq_data *d, bool reserve)
2307{ 2307{
2308 struct its_device *its_dev = irq_data_get_irq_chip_data(d); 2308 struct its_device *its_dev = irq_data_get_irq_chip_data(d);
2309 u32 event = its_get_event_id(d); 2309 u32 event = its_get_event_id(d);
@@ -2818,7 +2818,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
2818} 2818}
2819 2819
2820static int its_vpe_irq_domain_activate(struct irq_domain *domain, 2820static int its_vpe_irq_domain_activate(struct irq_domain *domain,
2821 struct irq_data *d, bool early) 2821 struct irq_data *d, bool reserve)
2822{ 2822{
2823 struct its_vpe *vpe = irq_data_get_irq_chip_data(d); 2823 struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
2824 struct its_node *its; 2824 struct its_node *its;
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index 06f29cf5018a..cee59fe1321c 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -342,6 +342,9 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id)
342 */ 342 */
343static struct lock_class_key intc_irqpin_irq_lock_class; 343static struct lock_class_key intc_irqpin_irq_lock_class;
344 344
345/* And this is for the request mutex */
346static struct lock_class_key intc_irqpin_irq_request_class;
347
345static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, 348static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq,
346 irq_hw_number_t hw) 349 irq_hw_number_t hw)
347{ 350{
@@ -352,7 +355,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq,
352 355
353 intc_irqpin_dbg(&p->irq[hw], "map"); 356 intc_irqpin_dbg(&p->irq[hw], "map");
354 irq_set_chip_data(virq, h->host_data); 357 irq_set_chip_data(virq, h->host_data);
355 irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class); 358 irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class,
359 &intc_irqpin_irq_request_class);
356 irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); 360 irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq);
357 return 0; 361 return 0;
358} 362}
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index fd83c7f77a95..ede4fa0ac2cc 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -188,6 +188,7 @@ void led_blink_set(struct led_classdev *led_cdev,
188{ 188{
189 del_timer_sync(&led_cdev->blink_timer); 189 del_timer_sync(&led_cdev->blink_timer);
190 190
191 clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
191 clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags); 192 clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags);
192 clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags); 193 clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags);
193 194
diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index 09cf3699e354..a307832d7e45 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c
@@ -184,6 +184,7 @@ static struct irq_chip arizona_irq_chip = {
184}; 184};
185 185
186static struct lock_class_key arizona_irq_lock_class; 186static struct lock_class_key arizona_irq_lock_class;
187static struct lock_class_key arizona_irq_request_class;
187 188
188static int arizona_irq_map(struct irq_domain *h, unsigned int virq, 189static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
189 irq_hw_number_t hw) 190 irq_hw_number_t hw)
@@ -191,7 +192,8 @@ static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
191 struct arizona *data = h->host_data; 192 struct arizona *data = h->host_data;
192 193
193 irq_set_chip_data(virq, data); 194 irq_set_chip_data(virq, data);
194 irq_set_lockdep_class(virq, &arizona_irq_lock_class); 195 irq_set_lockdep_class(virq, &arizona_irq_lock_class,
196 &arizona_irq_request_class);
195 irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq); 197 irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq);
196 irq_set_nested_thread(virq, 1); 198 irq_set_nested_thread(virq, 1);
197 irq_set_noprobe(virq); 199 irq_set_noprobe(virq);
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index c9714072e224..59c82cdcf48d 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
377 u8 *ptr; 377 u8 *ptr;
378 u8 *rx_buf; 378 u8 *rx_buf;
379 u8 sum; 379 u8 sum;
380 u8 rx_byte;
380 int ret = 0, final_ret; 381 int ret = 0, final_ret;
381 382
382 len = cros_ec_prepare_tx(ec_dev, ec_msg); 383 len = cros_ec_prepare_tx(ec_dev, ec_msg);
@@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
421 if (!ret) { 422 if (!ret) {
422 /* Verify that EC can process command */ 423 /* Verify that EC can process command */
423 for (i = 0; i < len; i++) { 424 for (i = 0; i < len; i++) {
424 switch (rx_buf[i]) { 425 rx_byte = rx_buf[i];
425 case EC_SPI_PAST_END: 426 if (rx_byte == EC_SPI_PAST_END ||
426 case EC_SPI_RX_BAD_DATA: 427 rx_byte == EC_SPI_RX_BAD_DATA ||
427 case EC_SPI_NOT_READY: 428 rx_byte == EC_SPI_NOT_READY) {
428 ret = -EAGAIN; 429 ret = -EREMOTEIO;
429 ec_msg->result = EC_RES_IN_PROGRESS;
430 default:
431 break; 430 break;
432 } 431 }
433 if (ret)
434 break;
435 } 432 }
436 if (!ret)
437 ret = cros_ec_spi_receive_packet(ec_dev,
438 ec_msg->insize + sizeof(*response));
439 } else {
440 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
441 } 433 }
442 434
435 if (!ret)
436 ret = cros_ec_spi_receive_packet(ec_dev,
437 ec_msg->insize + sizeof(*response));
438 else
439 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
440
443 final_ret = terminate_request(ec_dev); 441 final_ret = terminate_request(ec_dev);
444 442
445 spi_bus_unlock(ec_spi->spi->master); 443 spi_bus_unlock(ec_spi->spi->master);
@@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
508 int i, len; 506 int i, len;
509 u8 *ptr; 507 u8 *ptr;
510 u8 *rx_buf; 508 u8 *rx_buf;
509 u8 rx_byte;
511 int sum; 510 int sum;
512 int ret = 0, final_ret; 511 int ret = 0, final_ret;
513 512
@@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
544 if (!ret) { 543 if (!ret) {
545 /* Verify that EC can process command */ 544 /* Verify that EC can process command */
546 for (i = 0; i < len; i++) { 545 for (i = 0; i < len; i++) {
547 switch (rx_buf[i]) { 546 rx_byte = rx_buf[i];
548 case EC_SPI_PAST_END: 547 if (rx_byte == EC_SPI_PAST_END ||
549 case EC_SPI_RX_BAD_DATA: 548 rx_byte == EC_SPI_RX_BAD_DATA ||
550 case EC_SPI_NOT_READY: 549 rx_byte == EC_SPI_NOT_READY) {
551 ret = -EAGAIN; 550 ret = -EREMOTEIO;
552 ec_msg->result = EC_RES_IN_PROGRESS;
553 default:
554 break; 551 break;
555 } 552 }
556 if (ret)
557 break;
558 } 553 }
559 if (!ret)
560 ret = cros_ec_spi_receive_response(ec_dev,
561 ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
562 } else {
563 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
564 } 554 }
565 555
556 if (!ret)
557 ret = cros_ec_spi_receive_response(ec_dev,
558 ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
559 else
560 dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
561
566 final_ret = terminate_request(ec_dev); 562 final_ret = terminate_request(ec_dev);
567 563
568 spi_bus_unlock(ec_spi->spi->master); 564 spi_bus_unlock(ec_spi->spi->master);
@@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
667 sizeof(struct ec_response_get_protocol_info); 663 sizeof(struct ec_response_get_protocol_info);
668 ec_dev->dout_size = sizeof(struct ec_host_request); 664 ec_dev->dout_size = sizeof(struct ec_host_request);
669 665
666 ec_spi->last_transfer_ns = ktime_get_ns();
670 667
671 err = cros_ec_register(ec_dev); 668 err = cros_ec_register(ec_dev);
672 if (err) { 669 if (err) {
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 590fb9aad77d..c3ed885c155c 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -1543,6 +1543,9 @@ static void rtsx_pci_shutdown(struct pci_dev *pcidev)
1543 rtsx_pci_power_off(pcr, HOST_ENTER_S1); 1543 rtsx_pci_power_off(pcr, HOST_ENTER_S1);
1544 1544
1545 pci_disable_device(pcidev); 1545 pci_disable_device(pcidev);
1546 free_irq(pcr->irq, (void *)pcr);
1547 if (pcr->msi_en)
1548 pci_disable_msi(pcr->pci);
1546} 1549}
1547 1550
1548#else /* CONFIG_PM */ 1551#else /* CONFIG_PM */
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index da16bf45fab4..dc94ffc6321a 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
159EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk); 159EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
160 160
161static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata, 161static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
162 struct device_node *node) 162 struct device_node *parent)
163{ 163{
164 struct device_node *node;
165
164 if (pdata && pdata->codec) 166 if (pdata && pdata->codec)
165 return true; 167 return true;
166 168
167 if (of_find_node_by_name(node, "codec")) 169 node = of_get_child_by_name(parent, "codec");
170 if (node) {
171 of_node_put(node);
168 return true; 172 return true;
173 }
169 174
170 return false; 175 return false;
171} 176}
diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index d66502d36ba0..dd19f17a1b63 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
97}; 97};
98 98
99 99
100static bool twl6040_has_vibra(struct device_node *node) 100static bool twl6040_has_vibra(struct device_node *parent)
101{ 101{
102#ifdef CONFIG_OF 102 struct device_node *node;
103 if (of_find_node_by_name(node, "vibra")) 103
104 node = of_get_child_by_name(parent, "vibra");
105 if (node) {
106 of_node_put(node);
104 return true; 107 return true;
105#endif 108 }
109
106 return false; 110 return false;
107} 111}
108 112
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index fcf7235d5742..157e1d9e7725 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -24,6 +24,7 @@
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/clk.h> 25#include <linux/clk.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/module.h>
27#include <linux/of_device.h> 28#include <linux/of_device.h>
28#include <linux/platform_device.h> 29#include <linux/platform_device.h>
29#include <linux/mmc/host.h> 30#include <linux/mmc/host.h>
@@ -667,3 +668,5 @@ int renesas_sdhi_remove(struct platform_device *pdev)
667 return 0; 668 return 0;
668} 669}
669EXPORT_SYMBOL_GPL(renesas_sdhi_remove); 670EXPORT_SYMBOL_GPL(renesas_sdhi_remove);
671
672MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index f7f157a62a4a..555c7f133eb8 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -1424,7 +1424,9 @@ static const struct file_operations s3cmci_fops_state = {
1424struct s3cmci_reg { 1424struct s3cmci_reg {
1425 unsigned short addr; 1425 unsigned short addr;
1426 unsigned char *name; 1426 unsigned char *name;
1427} debug_regs[] = { 1427};
1428
1429static const struct s3cmci_reg debug_regs[] = {
1428 DBG_REG(CON), 1430 DBG_REG(CON),
1429 DBG_REG(PRE), 1431 DBG_REG(PRE),
1430 DBG_REG(CMDARG), 1432 DBG_REG(CMDARG),
@@ -1446,7 +1448,7 @@ struct s3cmci_reg {
1446static int s3cmci_regs_show(struct seq_file *seq, void *v) 1448static int s3cmci_regs_show(struct seq_file *seq, void *v)
1447{ 1449{
1448 struct s3cmci_host *host = seq->private; 1450 struct s3cmci_host *host = seq->private;
1449 struct s3cmci_reg *rptr = debug_regs; 1451 const struct s3cmci_reg *rptr = debug_regs;
1450 1452
1451 for (; rptr->name; rptr++) 1453 for (; rptr->name; rptr++)
1452 seq_printf(seq, "SDI%s\t=0x%08x\n", rptr->name, 1454 seq_printf(seq, "SDI%s\t=0x%08x\n", rptr->name,
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index f80e911b8843..73b605577447 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
1114 if (!ops->oobbuf) 1114 if (!ops->oobbuf)
1115 ops->ooblen = 0; 1115 ops->ooblen = 0;
1116 1116
1117 if (offs < 0 || offs + ops->len >= mtd->size) 1117 if (offs < 0 || offs + ops->len > mtd->size)
1118 return -EINVAL; 1118 return -EINVAL;
1119 1119
1120 if (ops->ooblen) { 1120 if (ops->ooblen) {
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index e0eb51d8c012..dd56a671ea42 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -1763,7 +1763,7 @@ try_dmaread:
1763 err = brcmstb_nand_verify_erased_page(mtd, chip, buf, 1763 err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
1764 addr); 1764 addr);
1765 /* erased page bitflips corrected */ 1765 /* erased page bitflips corrected */
1766 if (err > 0) 1766 if (err >= 0)
1767 return err; 1767 return err;
1768 } 1768 }
1769 1769
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index 484f7fbc3f7d..a8bde6665c24 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
253 goto out_ce; 253 goto out_ce;
254 } 254 }
255 255
256 gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW); 256 gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
257 if (IS_ERR(gpiomtd->nwp)) { 257 if (IS_ERR(gpiomtd->ale)) {
258 ret = PTR_ERR(gpiomtd->nwp); 258 ret = PTR_ERR(gpiomtd->ale);
259 goto out_ce; 259 goto out_ce;
260 } 260 }
261 261
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 50f8d4a1b983..d4d824ef64e9 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
1067 return ret; 1067 return ret;
1068 } 1068 }
1069 1069
1070 /* handle the block mark swapping */
1071 block_mark_swapping(this, payload_virt, auxiliary_virt);
1072
1073 /* Loop over status bytes, accumulating ECC status. */ 1070 /* Loop over status bytes, accumulating ECC status. */
1074 status = auxiliary_virt + nfc_geo->auxiliary_status_offset; 1071 status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
1075 1072
@@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
1158 max_bitflips = max_t(unsigned int, max_bitflips, *status); 1155 max_bitflips = max_t(unsigned int, max_bitflips, *status);
1159 } 1156 }
1160 1157
1158 /* handle the block mark swapping */
1159 block_mark_swapping(this, buf, auxiliary_virt);
1160
1161 if (oob_required) { 1161 if (oob_required) {
1162 /* 1162 /*
1163 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob() 1163 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 90b9a9ccbe60..9285f60e5783 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -963,6 +963,7 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command)
963 963
964 switch (command) { 964 switch (command) {
965 case NAND_CMD_READ0: 965 case NAND_CMD_READ0:
966 case NAND_CMD_READOOB:
966 case NAND_CMD_PAGEPROG: 967 case NAND_CMD_PAGEPROG:
967 info->use_ecc = 1; 968 info->use_ecc = 1;
968 break; 969 break;
diff --git a/drivers/mux/core.c b/drivers/mux/core.c
index 2260063b0ea8..6e5cf9d9cd99 100644
--- a/drivers/mux/core.c
+++ b/drivers/mux/core.c
@@ -413,6 +413,7 @@ static int of_dev_node_match(struct device *dev, const void *data)
413 return dev->of_node == data; 413 return dev->of_node == data;
414} 414}
415 415
416/* Note this function returns a reference to the mux_chip dev. */
416static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np) 417static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
417{ 418{
418 struct device *dev; 419 struct device *dev;
@@ -466,6 +467,7 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
466 (!args.args_count && (mux_chip->controllers > 1))) { 467 (!args.args_count && (mux_chip->controllers > 1))) {
467 dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n", 468 dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n",
468 np, args.np); 469 np, args.np);
470 put_device(&mux_chip->dev);
469 return ERR_PTR(-EINVAL); 471 return ERR_PTR(-EINVAL);
470 } 472 }
471 473
@@ -476,10 +478,10 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
476 if (controller >= mux_chip->controllers) { 478 if (controller >= mux_chip->controllers) {
477 dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n", 479 dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n",
478 np, controller, args.np); 480 np, controller, args.np);
481 put_device(&mux_chip->dev);
479 return ERR_PTR(-EINVAL); 482 return ERR_PTR(-EINVAL);
480 } 483 }
481 484
482 get_device(&mux_chip->dev);
483 return &mux_chip->mux[controller]; 485 return &mux_chip->mux[controller];
484} 486}
485EXPORT_SYMBOL_GPL(mux_control_get); 487EXPORT_SYMBOL_GPL(mux_control_get);
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 0626dcfd1f3d..760d2c07e3a2 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -526,7 +526,7 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev)
526 data = be32_to_cpup((__be32 *)&cf->data[0]); 526 data = be32_to_cpup((__be32 *)&cf->data[0]);
527 flexcan_write(data, &priv->tx_mb->data[0]); 527 flexcan_write(data, &priv->tx_mb->data[0]);
528 } 528 }
529 if (cf->can_dlc > 3) { 529 if (cf->can_dlc > 4) {
530 data = be32_to_cpup((__be32 *)&cf->data[4]); 530 data = be32_to_cpup((__be32 *)&cf->data[4]);
531 flexcan_write(data, &priv->tx_mb->data[1]); 531 flexcan_write(data, &priv->tx_mb->data[1]);
532 } 532 }
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index b00358297424..12ff0020ecd6 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -395,6 +395,7 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg)
395 395
396 if (dev->can.state == CAN_STATE_ERROR_WARNING || 396 if (dev->can.state == CAN_STATE_ERROR_WARNING ||
397 dev->can.state == CAN_STATE_ERROR_PASSIVE) { 397 dev->can.state == CAN_STATE_ERROR_PASSIVE) {
398 cf->can_id |= CAN_ERR_CRTL;
398 cf->data[1] = (txerr > rxerr) ? 399 cf->data[1] = (txerr > rxerr) ?
399 CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE; 400 CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE;
400 } 401 }
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 68ac3e88a8ce..8bf80ad9dc44 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
449 dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)", 449 dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
450 rc); 450 rc);
451 451
452 return rc; 452 return (rc > 0) ? 0 : rc;
453} 453}
454 454
455static void gs_usb_xmit_callback(struct urb *urb) 455static void gs_usb_xmit_callback(struct urb *urb)
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 8404e8852a0f..b4c4a2c76437 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -194,7 +194,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
194 tbp = peer_tb; 194 tbp = peer_tb;
195 } 195 }
196 196
197 if (tbp[IFLA_IFNAME]) { 197 if (ifmp && tbp[IFLA_IFNAME]) {
198 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 198 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
199 name_assign_type = NET_NAME_USER; 199 name_assign_type = NET_NAME_USER;
200 } else { 200 } else {
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index f5a8dd96fd75..4498ab897d94 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1500,10 +1500,13 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds,
1500{ 1500{
1501 struct b53_device *dev = ds->priv; 1501 struct b53_device *dev = ds->priv;
1502 1502
1503 /* Older models support a different tag format that we do not 1503 /* Older models (5325, 5365) support a different tag format that we do
1504 * support in net/dsa/tag_brcm.c yet. 1504 * not support in net/dsa/tag_brcm.c yet. 539x and 531x5 require managed
1505 * mode to be turned on which means we need to specifically manage ARL
1506 * misses on multicast addresses (TBD).
1505 */ 1507 */
1506 if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds, port)) 1508 if (is5325(dev) || is5365(dev) || is539x(dev) || is531x5(dev) ||
1509 !b53_can_enable_brcm_tags(ds, port))
1507 return DSA_TAG_PROTO_NONE; 1510 return DSA_TAG_PROTO_NONE;
1508 1511
1509 /* Broadcom BCM58xx chips have a flow accelerator on Port 8 1512 /* Broadcom BCM58xx chips have a flow accelerator on Port 8
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index f4e13a7014bd..36c8950dbd2d 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -602,7 +602,7 @@ struct vortex_private {
602 struct sk_buff* rx_skbuff[RX_RING_SIZE]; 602 struct sk_buff* rx_skbuff[RX_RING_SIZE];
603 struct sk_buff* tx_skbuff[TX_RING_SIZE]; 603 struct sk_buff* tx_skbuff[TX_RING_SIZE];
604 unsigned int cur_rx, cur_tx; /* The next free ring entry */ 604 unsigned int cur_rx, cur_tx; /* The next free ring entry */
605 unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ 605 unsigned int dirty_tx; /* The ring entries to be free()ed. */
606 struct vortex_extra_stats xstats; /* NIC-specific extra stats */ 606 struct vortex_extra_stats xstats; /* NIC-specific extra stats */
607 struct sk_buff *tx_skb; /* Packet being eaten by bus master ctrl. */ 607 struct sk_buff *tx_skb; /* Packet being eaten by bus master ctrl. */
608 dma_addr_t tx_skb_dma; /* Allocated DMA address for bus master ctrl DMA. */ 608 dma_addr_t tx_skb_dma; /* Allocated DMA address for bus master ctrl DMA. */
@@ -618,7 +618,6 @@ struct vortex_private {
618 618
619 /* The remainder are related to chip state, mostly media selection. */ 619 /* The remainder are related to chip state, mostly media selection. */
620 struct timer_list timer; /* Media selection timer. */ 620 struct timer_list timer; /* Media selection timer. */
621 struct timer_list rx_oom_timer; /* Rx skb allocation retry timer */
622 int options; /* User-settable misc. driver options. */ 621 int options; /* User-settable misc. driver options. */
623 unsigned int media_override:4, /* Passed-in media type. */ 622 unsigned int media_override:4, /* Passed-in media type. */
624 default_media:4, /* Read from the EEPROM/Wn3_Config. */ 623 default_media:4, /* Read from the EEPROM/Wn3_Config. */
@@ -760,7 +759,6 @@ static void mdio_sync(struct vortex_private *vp, int bits);
760static int mdio_read(struct net_device *dev, int phy_id, int location); 759static int mdio_read(struct net_device *dev, int phy_id, int location);
761static void mdio_write(struct net_device *vp, int phy_id, int location, int value); 760static void mdio_write(struct net_device *vp, int phy_id, int location, int value);
762static void vortex_timer(struct timer_list *t); 761static void vortex_timer(struct timer_list *t);
763static void rx_oom_timer(struct timer_list *t);
764static netdev_tx_t vortex_start_xmit(struct sk_buff *skb, 762static netdev_tx_t vortex_start_xmit(struct sk_buff *skb,
765 struct net_device *dev); 763 struct net_device *dev);
766static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb, 764static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb,
@@ -1601,7 +1599,6 @@ vortex_up(struct net_device *dev)
1601 1599
1602 timer_setup(&vp->timer, vortex_timer, 0); 1600 timer_setup(&vp->timer, vortex_timer, 0);
1603 mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait)); 1601 mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait));
1604 timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0);
1605 1602
1606 if (vortex_debug > 1) 1603 if (vortex_debug > 1)
1607 pr_debug("%s: Initial media type %s.\n", 1604 pr_debug("%s: Initial media type %s.\n",
@@ -1676,7 +1673,7 @@ vortex_up(struct net_device *dev)
1676 window_write16(vp, 0x0040, 4, Wn4_NetDiag); 1673 window_write16(vp, 0x0040, 4, Wn4_NetDiag);
1677 1674
1678 if (vp->full_bus_master_rx) { /* Boomerang bus master. */ 1675 if (vp->full_bus_master_rx) { /* Boomerang bus master. */
1679 vp->cur_rx = vp->dirty_rx = 0; 1676 vp->cur_rx = 0;
1680 /* Initialize the RxEarly register as recommended. */ 1677 /* Initialize the RxEarly register as recommended. */
1681 iowrite16(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD); 1678 iowrite16(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD);
1682 iowrite32(0x0020, ioaddr + PktStatus); 1679 iowrite32(0x0020, ioaddr + PktStatus);
@@ -1729,6 +1726,7 @@ vortex_open(struct net_device *dev)
1729 struct vortex_private *vp = netdev_priv(dev); 1726 struct vortex_private *vp = netdev_priv(dev);
1730 int i; 1727 int i;
1731 int retval; 1728 int retval;
1729 dma_addr_t dma;
1732 1730
1733 /* Use the now-standard shared IRQ implementation. */ 1731 /* Use the now-standard shared IRQ implementation. */
1734 if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ? 1732 if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ?
@@ -1753,7 +1751,11 @@ vortex_open(struct net_device *dev)
1753 break; /* Bad news! */ 1751 break; /* Bad news! */
1754 1752
1755 skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */ 1753 skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */
1756 vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); 1754 dma = pci_map_single(VORTEX_PCI(vp), skb->data,
1755 PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
1756 if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma))
1757 break;
1758 vp->rx_ring[i].addr = cpu_to_le32(dma);
1757 } 1759 }
1758 if (i != RX_RING_SIZE) { 1760 if (i != RX_RING_SIZE) {
1759 pr_emerg("%s: no memory for rx ring\n", dev->name); 1761 pr_emerg("%s: no memory for rx ring\n", dev->name);
@@ -2067,6 +2069,12 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
2067 int len = (skb->len + 3) & ~3; 2069 int len = (skb->len + 3) & ~3;
2068 vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len, 2070 vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len,
2069 PCI_DMA_TODEVICE); 2071 PCI_DMA_TODEVICE);
2072 if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) {
2073 dev_kfree_skb_any(skb);
2074 dev->stats.tx_dropped++;
2075 return NETDEV_TX_OK;
2076 }
2077
2070 spin_lock_irq(&vp->window_lock); 2078 spin_lock_irq(&vp->window_lock);
2071 window_set(vp, 7); 2079 window_set(vp, 7);
2072 iowrite32(vp->tx_skb_dma, ioaddr + Wn7_MasterAddr); 2080 iowrite32(vp->tx_skb_dma, ioaddr + Wn7_MasterAddr);
@@ -2593,7 +2601,7 @@ boomerang_rx(struct net_device *dev)
2593 int entry = vp->cur_rx % RX_RING_SIZE; 2601 int entry = vp->cur_rx % RX_RING_SIZE;
2594 void __iomem *ioaddr = vp->ioaddr; 2602 void __iomem *ioaddr = vp->ioaddr;
2595 int rx_status; 2603 int rx_status;
2596 int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx; 2604 int rx_work_limit = RX_RING_SIZE;
2597 2605
2598 if (vortex_debug > 5) 2606 if (vortex_debug > 5)
2599 pr_debug("boomerang_rx(): status %4.4x\n", ioread16(ioaddr+EL3_STATUS)); 2607 pr_debug("boomerang_rx(): status %4.4x\n", ioread16(ioaddr+EL3_STATUS));
@@ -2614,7 +2622,8 @@ boomerang_rx(struct net_device *dev)
2614 } else { 2622 } else {
2615 /* The packet length: up to 4.5K!. */ 2623 /* The packet length: up to 4.5K!. */
2616 int pkt_len = rx_status & 0x1fff; 2624 int pkt_len = rx_status & 0x1fff;
2617 struct sk_buff *skb; 2625 struct sk_buff *skb, *newskb;
2626 dma_addr_t newdma;
2618 dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr); 2627 dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr);
2619 2628
2620 if (vortex_debug > 4) 2629 if (vortex_debug > 4)
@@ -2633,9 +2642,27 @@ boomerang_rx(struct net_device *dev)
2633 pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); 2642 pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
2634 vp->rx_copy++; 2643 vp->rx_copy++;
2635 } else { 2644 } else {
2645 /* Pre-allocate the replacement skb. If it or its
2646 * mapping fails then recycle the buffer thats already
2647 * in place
2648 */
2649 newskb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ);
2650 if (!newskb) {
2651 dev->stats.rx_dropped++;
2652 goto clear_complete;
2653 }
2654 newdma = pci_map_single(VORTEX_PCI(vp), newskb->data,
2655 PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
2656 if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) {
2657 dev->stats.rx_dropped++;
2658 consume_skb(newskb);
2659 goto clear_complete;
2660 }
2661
2636 /* Pass up the skbuff already on the Rx ring. */ 2662 /* Pass up the skbuff already on the Rx ring. */
2637 skb = vp->rx_skbuff[entry]; 2663 skb = vp->rx_skbuff[entry];
2638 vp->rx_skbuff[entry] = NULL; 2664 vp->rx_skbuff[entry] = newskb;
2665 vp->rx_ring[entry].addr = cpu_to_le32(newdma);
2639 skb_put(skb, pkt_len); 2666 skb_put(skb, pkt_len);
2640 pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); 2667 pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
2641 vp->rx_nocopy++; 2668 vp->rx_nocopy++;
@@ -2653,55 +2680,15 @@ boomerang_rx(struct net_device *dev)
2653 netif_rx(skb); 2680 netif_rx(skb);
2654 dev->stats.rx_packets++; 2681 dev->stats.rx_packets++;
2655 } 2682 }
2656 entry = (++vp->cur_rx) % RX_RING_SIZE;
2657 }
2658 /* Refill the Rx ring buffers. */
2659 for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) {
2660 struct sk_buff *skb;
2661 entry = vp->dirty_rx % RX_RING_SIZE;
2662 if (vp->rx_skbuff[entry] == NULL) {
2663 skb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ);
2664 if (skb == NULL) {
2665 static unsigned long last_jif;
2666 if (time_after(jiffies, last_jif + 10 * HZ)) {
2667 pr_warn("%s: memory shortage\n",
2668 dev->name);
2669 last_jif = jiffies;
2670 }
2671 if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)
2672 mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1));
2673 break; /* Bad news! */
2674 }
2675 2683
2676 vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); 2684clear_complete:
2677 vp->rx_skbuff[entry] = skb;
2678 }
2679 vp->rx_ring[entry].status = 0; /* Clear complete bit. */ 2685 vp->rx_ring[entry].status = 0; /* Clear complete bit. */
2680 iowrite16(UpUnstall, ioaddr + EL3_CMD); 2686 iowrite16(UpUnstall, ioaddr + EL3_CMD);
2687 entry = (++vp->cur_rx) % RX_RING_SIZE;
2681 } 2688 }
2682 return 0; 2689 return 0;
2683} 2690}
2684 2691
2685/*
2686 * If we've hit a total OOM refilling the Rx ring we poll once a second
2687 * for some memory. Otherwise there is no way to restart the rx process.
2688 */
2689static void
2690rx_oom_timer(struct timer_list *t)
2691{
2692 struct vortex_private *vp = from_timer(vp, t, rx_oom_timer);
2693 struct net_device *dev = vp->mii.dev;
2694
2695 spin_lock_irq(&vp->lock);
2696 if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */
2697 boomerang_rx(dev);
2698 if (vortex_debug > 1) {
2699 pr_debug("%s: rx_oom_timer %s\n", dev->name,
2700 ((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying");
2701 }
2702 spin_unlock_irq(&vp->lock);
2703}
2704
2705static void 2692static void
2706vortex_down(struct net_device *dev, int final_down) 2693vortex_down(struct net_device *dev, int final_down)
2707{ 2694{
@@ -2711,7 +2698,6 @@ vortex_down(struct net_device *dev, int final_down)
2711 netdev_reset_queue(dev); 2698 netdev_reset_queue(dev);
2712 netif_stop_queue(dev); 2699 netif_stop_queue(dev);
2713 2700
2714 del_timer_sync(&vp->rx_oom_timer);
2715 del_timer_sync(&vp->timer); 2701 del_timer_sync(&vp->timer);
2716 2702
2717 /* Turn off statistics ASAP. We update dev->stats below. */ 2703 /* Turn off statistics ASAP. We update dev->stats below. */
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 97c5a89a9cf7..fbe21a817bd8 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -75,6 +75,9 @@ static struct workqueue_struct *ena_wq;
75MODULE_DEVICE_TABLE(pci, ena_pci_tbl); 75MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
76 76
77static int ena_rss_init_default(struct ena_adapter *adapter); 77static int ena_rss_init_default(struct ena_adapter *adapter);
78static void check_for_admin_com_state(struct ena_adapter *adapter);
79static void ena_destroy_device(struct ena_adapter *adapter);
80static int ena_restore_device(struct ena_adapter *adapter);
78 81
79static void ena_tx_timeout(struct net_device *dev) 82static void ena_tx_timeout(struct net_device *dev)
80{ 83{
@@ -1565,7 +1568,7 @@ static int ena_rss_configure(struct ena_adapter *adapter)
1565 1568
1566static int ena_up_complete(struct ena_adapter *adapter) 1569static int ena_up_complete(struct ena_adapter *adapter)
1567{ 1570{
1568 int rc, i; 1571 int rc;
1569 1572
1570 rc = ena_rss_configure(adapter); 1573 rc = ena_rss_configure(adapter);
1571 if (rc) 1574 if (rc)
@@ -1584,17 +1587,6 @@ static int ena_up_complete(struct ena_adapter *adapter)
1584 1587
1585 ena_napi_enable_all(adapter); 1588 ena_napi_enable_all(adapter);
1586 1589
1587 /* Enable completion queues interrupt */
1588 for (i = 0; i < adapter->num_queues; i++)
1589 ena_unmask_interrupt(&adapter->tx_ring[i],
1590 &adapter->rx_ring[i]);
1591
1592 /* schedule napi in case we had pending packets
1593 * from the last time we disable napi
1594 */
1595 for (i = 0; i < adapter->num_queues; i++)
1596 napi_schedule(&adapter->ena_napi[i].napi);
1597
1598 return 0; 1590 return 0;
1599} 1591}
1600 1592
@@ -1731,7 +1723,7 @@ create_err:
1731 1723
1732static int ena_up(struct ena_adapter *adapter) 1724static int ena_up(struct ena_adapter *adapter)
1733{ 1725{
1734 int rc; 1726 int rc, i;
1735 1727
1736 netdev_dbg(adapter->netdev, "%s\n", __func__); 1728 netdev_dbg(adapter->netdev, "%s\n", __func__);
1737 1729
@@ -1774,6 +1766,17 @@ static int ena_up(struct ena_adapter *adapter)
1774 1766
1775 set_bit(ENA_FLAG_DEV_UP, &adapter->flags); 1767 set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
1776 1768
1769 /* Enable completion queues interrupt */
1770 for (i = 0; i < adapter->num_queues; i++)
1771 ena_unmask_interrupt(&adapter->tx_ring[i],
1772 &adapter->rx_ring[i]);
1773
1774 /* schedule napi in case we had pending packets
1775 * from the last time we disable napi
1776 */
1777 for (i = 0; i < adapter->num_queues; i++)
1778 napi_schedule(&adapter->ena_napi[i].napi);
1779
1777 return rc; 1780 return rc;
1778 1781
1779err_up: 1782err_up:
@@ -1884,6 +1887,17 @@ static int ena_close(struct net_device *netdev)
1884 if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 1887 if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
1885 ena_down(adapter); 1888 ena_down(adapter);
1886 1889
1890 /* Check for device status and issue reset if needed*/
1891 check_for_admin_com_state(adapter);
1892 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
1893 netif_err(adapter, ifdown, adapter->netdev,
1894 "Destroy failure, restarting device\n");
1895 ena_dump_stats_to_dmesg(adapter);
1896 /* rtnl lock already obtained in dev_ioctl() layer */
1897 ena_destroy_device(adapter);
1898 ena_restore_device(adapter);
1899 }
1900
1887 return 0; 1901 return 0;
1888} 1902}
1889 1903
@@ -2544,11 +2558,12 @@ static void ena_destroy_device(struct ena_adapter *adapter)
2544 2558
2545 ena_com_set_admin_running_state(ena_dev, false); 2559 ena_com_set_admin_running_state(ena_dev, false);
2546 2560
2547 ena_close(netdev); 2561 if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2562 ena_down(adapter);
2548 2563
2549 /* Before releasing the ENA resources, a device reset is required. 2564 /* Before releasing the ENA resources, a device reset is required.
2550 * (to prevent the device from accessing them). 2565 * (to prevent the device from accessing them).
2551 * In case the reset flag is set and the device is up, ena_close 2566 * In case the reset flag is set and the device is up, ena_down()
2552 * already perform the reset, so it can be skipped. 2567 * already perform the reset, so it can be skipped.
2553 */ 2568 */
2554 if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) 2569 if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h
index 3c63b16d485f..d9efbc8d783b 100644
--- a/drivers/net/ethernet/arc/emac.h
+++ b/drivers/net/ethernet/arc/emac.h
@@ -159,6 +159,8 @@ struct arc_emac_priv {
159 unsigned int link; 159 unsigned int link;
160 unsigned int duplex; 160 unsigned int duplex;
161 unsigned int speed; 161 unsigned int speed;
162
163 unsigned int rx_missed_errors;
162}; 164};
163 165
164/** 166/**
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 3241af1ce718..bd277b0dc615 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -26,6 +26,8 @@
26 26
27#include "emac.h" 27#include "emac.h"
28 28
29static void arc_emac_restart(struct net_device *ndev);
30
29/** 31/**
30 * arc_emac_tx_avail - Return the number of available slots in the tx ring. 32 * arc_emac_tx_avail - Return the number of available slots in the tx ring.
31 * @priv: Pointer to ARC EMAC private data structure. 33 * @priv: Pointer to ARC EMAC private data structure.
@@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
210 continue; 212 continue;
211 } 213 }
212 214
213 pktlen = info & LEN_MASK; 215 /* Prepare the BD for next cycle. netif_receive_skb()
214 stats->rx_packets++; 216 * only if new skb was allocated and mapped to avoid holes
215 stats->rx_bytes += pktlen; 217 * in the RX fifo.
216 skb = rx_buff->skb; 218 */
217 skb_put(skb, pktlen); 219 skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
218 skb->dev = ndev; 220 if (unlikely(!skb)) {
219 skb->protocol = eth_type_trans(skb, ndev); 221 if (net_ratelimit())
220 222 netdev_err(ndev, "cannot allocate skb\n");
221 dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr), 223 /* Return ownership to EMAC */
222 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); 224 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
223
224 /* Prepare the BD for next cycle */
225 rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
226 EMAC_BUFFER_SIZE);
227 if (unlikely(!rx_buff->skb)) {
228 stats->rx_errors++; 225 stats->rx_errors++;
229 /* Because receive_skb is below, increment rx_dropped */
230 stats->rx_dropped++; 226 stats->rx_dropped++;
231 continue; 227 continue;
232 } 228 }
233 229
234 /* receive_skb only if new skb was allocated to avoid holes */ 230 addr = dma_map_single(&ndev->dev, (void *)skb->data,
235 netif_receive_skb(skb);
236
237 addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
238 EMAC_BUFFER_SIZE, DMA_FROM_DEVICE); 231 EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
239 if (dma_mapping_error(&ndev->dev, addr)) { 232 if (dma_mapping_error(&ndev->dev, addr)) {
240 if (net_ratelimit()) 233 if (net_ratelimit())
241 netdev_err(ndev, "cannot dma map\n"); 234 netdev_err(ndev, "cannot map dma buffer\n");
242 dev_kfree_skb(rx_buff->skb); 235 dev_kfree_skb(skb);
236 /* Return ownership to EMAC */
237 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
243 stats->rx_errors++; 238 stats->rx_errors++;
239 stats->rx_dropped++;
244 continue; 240 continue;
245 } 241 }
242
243 /* unmap previosly mapped skb */
244 dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
245 dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
246
247 pktlen = info & LEN_MASK;
248 stats->rx_packets++;
249 stats->rx_bytes += pktlen;
250 skb_put(rx_buff->skb, pktlen);
251 rx_buff->skb->dev = ndev;
252 rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
253
254 netif_receive_skb(rx_buff->skb);
255
256 rx_buff->skb = skb;
246 dma_unmap_addr_set(rx_buff, addr, addr); 257 dma_unmap_addr_set(rx_buff, addr, addr);
247 dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE); 258 dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
248 259
@@ -259,6 +270,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
259} 270}
260 271
261/** 272/**
273 * arc_emac_rx_miss_handle - handle R_MISS register
274 * @ndev: Pointer to the net_device structure.
275 */
276static void arc_emac_rx_miss_handle(struct net_device *ndev)
277{
278 struct arc_emac_priv *priv = netdev_priv(ndev);
279 struct net_device_stats *stats = &ndev->stats;
280 unsigned int miss;
281
282 miss = arc_reg_get(priv, R_MISS);
283 if (miss) {
284 stats->rx_errors += miss;
285 stats->rx_missed_errors += miss;
286 priv->rx_missed_errors += miss;
287 }
288}
289
290/**
291 * arc_emac_rx_stall_check - check RX stall
292 * @ndev: Pointer to the net_device structure.
293 * @budget: How many BDs requested to process on 1 call.
294 * @work_done: How many BDs processed
295 *
296 * Under certain conditions EMAC stop reception of incoming packets and
297 * continuously increment R_MISS register instead of saving data into
298 * provided buffer. This function detect that condition and restart
299 * EMAC.
300 */
301static void arc_emac_rx_stall_check(struct net_device *ndev,
302 int budget, unsigned int work_done)
303{
304 struct arc_emac_priv *priv = netdev_priv(ndev);
305 struct arc_emac_bd *rxbd;
306
307 if (work_done)
308 priv->rx_missed_errors = 0;
309
310 if (priv->rx_missed_errors && budget) {
311 rxbd = &priv->rxbd[priv->last_rx_bd];
312 if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
313 arc_emac_restart(ndev);
314 priv->rx_missed_errors = 0;
315 }
316 }
317}
318
319/**
262 * arc_emac_poll - NAPI poll handler. 320 * arc_emac_poll - NAPI poll handler.
263 * @napi: Pointer to napi_struct structure. 321 * @napi: Pointer to napi_struct structure.
264 * @budget: How many BDs to process on 1 call. 322 * @budget: How many BDs to process on 1 call.
@@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
272 unsigned int work_done; 330 unsigned int work_done;
273 331
274 arc_emac_tx_clean(ndev); 332 arc_emac_tx_clean(ndev);
333 arc_emac_rx_miss_handle(ndev);
275 334
276 work_done = arc_emac_rx(ndev, budget); 335 work_done = arc_emac_rx(ndev, budget);
277 if (work_done < budget) { 336 if (work_done < budget) {
@@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
279 arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK); 338 arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
280 } 339 }
281 340
341 arc_emac_rx_stall_check(ndev, budget, work_done);
342
282 return work_done; 343 return work_done;
283} 344}
284 345
@@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
320 if (status & MSER_MASK) { 381 if (status & MSER_MASK) {
321 stats->rx_missed_errors += 0x100; 382 stats->rx_missed_errors += 0x100;
322 stats->rx_errors += 0x100; 383 stats->rx_errors += 0x100;
384 priv->rx_missed_errors += 0x100;
385 napi_schedule(&priv->napi);
323 } 386 }
324 387
325 if (status & RXCR_MASK) { 388 if (status & RXCR_MASK) {
@@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
732} 795}
733 796
734 797
798/**
799 * arc_emac_restart - Restart EMAC
800 * @ndev: Pointer to net_device structure.
801 *
802 * This function do hardware reset of EMAC in order to restore
803 * network packets reception.
804 */
805static void arc_emac_restart(struct net_device *ndev)
806{
807 struct arc_emac_priv *priv = netdev_priv(ndev);
808 struct net_device_stats *stats = &ndev->stats;
809 int i;
810
811 if (net_ratelimit())
812 netdev_warn(ndev, "restarting stalled EMAC\n");
813
814 netif_stop_queue(ndev);
815
816 /* Disable interrupts */
817 arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
818
819 /* Disable EMAC */
820 arc_reg_clr(priv, R_CTRL, EN_MASK);
821
822 /* Return the sk_buff to system */
823 arc_free_tx_queue(ndev);
824
825 /* Clean Tx BD's */
826 priv->txbd_curr = 0;
827 priv->txbd_dirty = 0;
828 memset(priv->txbd, 0, TX_RING_SZ);
829
830 for (i = 0; i < RX_BD_NUM; i++) {
831 struct arc_emac_bd *rxbd = &priv->rxbd[i];
832 unsigned int info = le32_to_cpu(rxbd->info);
833
834 if (!(info & FOR_EMAC)) {
835 stats->rx_errors++;
836 stats->rx_dropped++;
837 }
838 /* Return ownership to EMAC */
839 rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
840 }
841 priv->last_rx_bd = 0;
842
843 /* Make sure info is visible to EMAC before enable */
844 wmb();
845
846 /* Enable interrupts */
847 arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
848
849 /* Enable EMAC */
850 arc_reg_or(priv, R_CTRL, EN_MASK);
851
852 netif_start_queue(ndev);
853}
854
735static const struct net_device_ops arc_emac_netdev_ops = { 855static const struct net_device_ops arc_emac_netdev_ops = {
736 .ndo_open = arc_emac_open, 856 .ndo_open = arc_emac_open,
737 .ndo_stop = arc_emac_stop, 857 .ndo_stop = arc_emac_stop,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 4c739d5355d2..8ae269ec17a1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3030,7 +3030,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
3030 3030
3031 del_timer_sync(&bp->timer); 3031 del_timer_sync(&bp->timer);
3032 3032
3033 if (IS_PF(bp)) { 3033 if (IS_PF(bp) && !BP_NOMCP(bp)) {
3034 /* Set ALWAYS_ALIVE bit in shmem */ 3034 /* Set ALWAYS_ALIVE bit in shmem */
3035 bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; 3035 bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
3036 bnx2x_drv_pulse(bp); 3036 bnx2x_drv_pulse(bp);
@@ -3116,7 +3116,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
3116 bp->cnic_loaded = false; 3116 bp->cnic_loaded = false;
3117 3117
3118 /* Clear driver version indication in shmem */ 3118 /* Clear driver version indication in shmem */
3119 if (IS_PF(bp)) 3119 if (IS_PF(bp) && !BP_NOMCP(bp))
3120 bnx2x_update_mng_version(bp); 3120 bnx2x_update_mng_version(bp);
3121 3121
3122 /* Check if there are pending parity attentions. If there are - set 3122 /* Check if there are pending parity attentions. If there are - set
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 91e2a7560b48..ddd5d3ebd201 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9578,6 +9578,15 @@ static int bnx2x_init_shmem(struct bnx2x *bp)
9578 9578
9579 do { 9579 do {
9580 bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR); 9580 bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR);
9581
9582 /* If we read all 0xFFs, means we are in PCI error state and
9583 * should bail out to avoid crashes on adapter's FW reads.
9584 */
9585 if (bp->common.shmem_base == 0xFFFFFFFF) {
9586 bp->flags |= NO_MCP_FLAG;
9587 return -ENODEV;
9588 }
9589
9581 if (bp->common.shmem_base) { 9590 if (bp->common.shmem_base) {
9582 val = SHMEM_RD(bp, validity_map[BP_PORT(bp)]); 9591 val = SHMEM_RD(bp, validity_map[BP_PORT(bp)]);
9583 if (val & SHR_MEM_VALIDITY_MB) 9592 if (val & SHR_MEM_VALIDITY_MB)
@@ -14320,7 +14329,10 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
14320 BNX2X_ERR("IO slot reset --> driver unload\n"); 14329 BNX2X_ERR("IO slot reset --> driver unload\n");
14321 14330
14322 /* MCP should have been reset; Need to wait for validity */ 14331 /* MCP should have been reset; Need to wait for validity */
14323 bnx2x_init_shmem(bp); 14332 if (bnx2x_init_shmem(bp)) {
14333 rtnl_unlock();
14334 return PCI_ERS_RESULT_DISCONNECT;
14335 }
14324 14336
14325 if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) { 14337 if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) {
14326 u32 v; 14338 u32 v;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 5ee18660bc33..c9617675f934 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -70,7 +70,7 @@ static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id)
70 netdev_err(bp->dev, "vf ndo called though sriov is disabled\n"); 70 netdev_err(bp->dev, "vf ndo called though sriov is disabled\n");
71 return -EINVAL; 71 return -EINVAL;
72 } 72 }
73 if (vf_id >= bp->pf.max_vfs) { 73 if (vf_id >= bp->pf.active_vfs) {
74 netdev_err(bp->dev, "Invalid VF id %d\n", vf_id); 74 netdev_err(bp->dev, "Invalid VF id %d\n", vf_id);
75 return -EINVAL; 75 return -EINVAL;
76 } 76 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 3d201d7324bd..d8fee26cd45e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -421,7 +421,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
421 } 421 }
422 422
423 /* If all IP and L4 fields are wildcarded then this is an L2 flow */ 423 /* If all IP and L4 fields are wildcarded then this is an L2 flow */
424 if (is_wildcard(&l3_mask, sizeof(l3_mask)) && 424 if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
425 is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) { 425 is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
426 flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2; 426 flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
427 } else { 427 } else {
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index de51c2177d03..8995cfefbfcf 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -4,11 +4,13 @@
4 * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com) 4 * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
5 * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com) 5 * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com)
6 * Copyright (C) 2004 Sun Microsystems Inc. 6 * Copyright (C) 2004 Sun Microsystems Inc.
7 * Copyright (C) 2005-2014 Broadcom Corporation. 7 * Copyright (C) 2005-2016 Broadcom Corporation.
8 * Copyright (C) 2016-2017 Broadcom Limited.
8 * 9 *
9 * Firmware is: 10 * Firmware is:
10 * Derived from proprietary unpublished source code, 11 * Derived from proprietary unpublished source code,
11 * Copyright (C) 2000-2003 Broadcom Corporation. 12 * Copyright (C) 2000-2016 Broadcom Corporation.
13 * Copyright (C) 2016-2017 Broadcom Ltd.
12 * 14 *
13 * Permission is hereby granted for the distribution of this firmware 15 * Permission is hereby granted for the distribution of this firmware
14 * data in hexadecimal or equivalent format, provided this copyright 16 * data in hexadecimal or equivalent format, provided this copyright
@@ -10052,6 +10054,16 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy)
10052 10054
10053 tw32(GRC_MODE, tp->grc_mode | val); 10055 tw32(GRC_MODE, tp->grc_mode | val);
10054 10056
10057 /* On one of the AMD platform, MRRS is restricted to 4000 because of
10058 * south bridge limitation. As a workaround, Driver is setting MRRS
10059 * to 2048 instead of default 4096.
10060 */
10061 if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
10062 tp->pdev->subsystem_device == TG3PCI_SUBDEVICE_ID_DELL_5762) {
10063 val = tr32(TG3PCI_DEV_STATUS_CTRL) & ~MAX_READ_REQ_MASK;
10064 tw32(TG3PCI_DEV_STATUS_CTRL, val | MAX_READ_REQ_SIZE_2048);
10065 }
10066
10055 /* Setup the timer prescalar register. Clock is always 66Mhz. */ 10067 /* Setup the timer prescalar register. Clock is always 66Mhz. */
10056 val = tr32(GRC_MISC_CFG); 10068 val = tr32(GRC_MISC_CFG);
10057 val &= ~0xff; 10069 val &= ~0xff;
@@ -14225,7 +14237,10 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
14225 /* Reset PHY, otherwise the read DMA engine will be in a mode that 14237 /* Reset PHY, otherwise the read DMA engine will be in a mode that
14226 * breaks all requests to 256 bytes. 14238 * breaks all requests to 256 bytes.
14227 */ 14239 */
14228 if (tg3_asic_rev(tp) == ASIC_REV_57766) 14240 if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
14241 tg3_asic_rev(tp) == ASIC_REV_5717 ||
14242 tg3_asic_rev(tp) == ASIC_REV_5719 ||
14243 tg3_asic_rev(tp) == ASIC_REV_5720)
14229 reset_phy = true; 14244 reset_phy = true;
14230 14245
14231 err = tg3_restart_hw(tp, reset_phy); 14246 err = tg3_restart_hw(tp, reset_phy);
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index c2d02d02d1e6..1f0271fa7c74 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -5,7 +5,8 @@
5 * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com) 5 * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
6 * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com) 6 * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com)
7 * Copyright (C) 2004 Sun Microsystems Inc. 7 * Copyright (C) 2004 Sun Microsystems Inc.
8 * Copyright (C) 2007-2014 Broadcom Corporation. 8 * Copyright (C) 2007-2016 Broadcom Corporation.
9 * Copyright (C) 2016-2017 Broadcom Limited.
9 */ 10 */
10 11
11#ifndef _T3_H 12#ifndef _T3_H
@@ -96,6 +97,7 @@
96#define TG3PCI_SUBDEVICE_ID_DELL_JAGUAR 0x0106 97#define TG3PCI_SUBDEVICE_ID_DELL_JAGUAR 0x0106
97#define TG3PCI_SUBDEVICE_ID_DELL_MERLOT 0x0109 98#define TG3PCI_SUBDEVICE_ID_DELL_MERLOT 0x0109
98#define TG3PCI_SUBDEVICE_ID_DELL_SLIM_MERLOT 0x010a 99#define TG3PCI_SUBDEVICE_ID_DELL_SLIM_MERLOT 0x010a
100#define TG3PCI_SUBDEVICE_ID_DELL_5762 0x07f0
99#define TG3PCI_SUBVENDOR_ID_COMPAQ PCI_VENDOR_ID_COMPAQ 101#define TG3PCI_SUBVENDOR_ID_COMPAQ PCI_VENDOR_ID_COMPAQ
100#define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE 0x007c 102#define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE 0x007c
101#define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE_2 0x009a 103#define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE_2 0x009a
@@ -281,6 +283,9 @@
281#define TG3PCI_STD_RING_PROD_IDX 0x00000098 /* 64-bit */ 283#define TG3PCI_STD_RING_PROD_IDX 0x00000098 /* 64-bit */
282#define TG3PCI_RCV_RET_RING_CON_IDX 0x000000a0 /* 64-bit */ 284#define TG3PCI_RCV_RET_RING_CON_IDX 0x000000a0 /* 64-bit */
283/* 0xa8 --> 0xb8 unused */ 285/* 0xa8 --> 0xb8 unused */
286#define TG3PCI_DEV_STATUS_CTRL 0x000000b4
287#define MAX_READ_REQ_SIZE_2048 0x00004000
288#define MAX_READ_REQ_MASK 0x00007000
284#define TG3PCI_DUAL_MAC_CTRL 0x000000b8 289#define TG3PCI_DUAL_MAC_CTRL 0x000000b8
285#define DUAL_MAC_CTRL_CH_MASK 0x00000003 290#define DUAL_MAC_CTRL_CH_MASK 0x00000003
286#define DUAL_MAC_CTRL_ID 0x00000004 291#define DUAL_MAC_CTRL_ID 0x00000004
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 6f9fa6e3c42a..d8424ed16c33 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -344,7 +344,6 @@ struct adapter_params {
344 344
345 unsigned int sf_size; /* serial flash size in bytes */ 345 unsigned int sf_size; /* serial flash size in bytes */
346 unsigned int sf_nsec; /* # of flash sectors */ 346 unsigned int sf_nsec; /* # of flash sectors */
347 unsigned int sf_fw_start; /* start of FW image in flash */
348 347
349 unsigned int fw_vers; /* firmware version */ 348 unsigned int fw_vers; /* firmware version */
350 unsigned int bs_vers; /* bootstrap version */ 349 unsigned int bs_vers; /* bootstrap version */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index f63210f15579..375ef86a84da 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -2844,8 +2844,6 @@ enum {
2844 SF_RD_DATA_FAST = 0xb, /* read flash */ 2844 SF_RD_DATA_FAST = 0xb, /* read flash */
2845 SF_RD_ID = 0x9f, /* read ID */ 2845 SF_RD_ID = 0x9f, /* read ID */
2846 SF_ERASE_SECTOR = 0xd8, /* erase sector */ 2846 SF_ERASE_SECTOR = 0xd8, /* erase sector */
2847
2848 FW_MAX_SIZE = 16 * SF_SEC_SIZE,
2849}; 2847};
2850 2848
2851/** 2849/**
@@ -3558,8 +3556,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
3558 const __be32 *p = (const __be32 *)fw_data; 3556 const __be32 *p = (const __be32 *)fw_data;
3559 const struct fw_hdr *hdr = (const struct fw_hdr *)fw_data; 3557 const struct fw_hdr *hdr = (const struct fw_hdr *)fw_data;
3560 unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec; 3558 unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
3561 unsigned int fw_img_start = adap->params.sf_fw_start; 3559 unsigned int fw_start_sec = FLASH_FW_START_SEC;
3562 unsigned int fw_start_sec = fw_img_start / sf_sec_size; 3560 unsigned int fw_size = FLASH_FW_MAX_SIZE;
3561 unsigned int fw_start = FLASH_FW_START;
3563 3562
3564 if (!size) { 3563 if (!size) {
3565 dev_err(adap->pdev_dev, "FW image has no data\n"); 3564 dev_err(adap->pdev_dev, "FW image has no data\n");
@@ -3575,9 +3574,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
3575 "FW image size differs from size in FW header\n"); 3574 "FW image size differs from size in FW header\n");
3576 return -EINVAL; 3575 return -EINVAL;
3577 } 3576 }
3578 if (size > FW_MAX_SIZE) { 3577 if (size > fw_size) {
3579 dev_err(adap->pdev_dev, "FW image too large, max is %u bytes\n", 3578 dev_err(adap->pdev_dev, "FW image too large, max is %u bytes\n",
3580 FW_MAX_SIZE); 3579 fw_size);
3581 return -EFBIG; 3580 return -EFBIG;
3582 } 3581 }
3583 if (!t4_fw_matches_chip(adap, hdr)) 3582 if (!t4_fw_matches_chip(adap, hdr))
@@ -3604,11 +3603,11 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
3604 */ 3603 */
3605 memcpy(first_page, fw_data, SF_PAGE_SIZE); 3604 memcpy(first_page, fw_data, SF_PAGE_SIZE);
3606 ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff); 3605 ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
3607 ret = t4_write_flash(adap, fw_img_start, SF_PAGE_SIZE, first_page); 3606 ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
3608 if (ret) 3607 if (ret)
3609 goto out; 3608 goto out;
3610 3609
3611 addr = fw_img_start; 3610 addr = fw_start;
3612 for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { 3611 for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
3613 addr += SF_PAGE_SIZE; 3612 addr += SF_PAGE_SIZE;
3614 fw_data += SF_PAGE_SIZE; 3613 fw_data += SF_PAGE_SIZE;
@@ -3618,7 +3617,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
3618 } 3617 }
3619 3618
3620 ret = t4_write_flash(adap, 3619 ret = t4_write_flash(adap,
3621 fw_img_start + offsetof(struct fw_hdr, fw_ver), 3620 fw_start + offsetof(struct fw_hdr, fw_ver),
3622 sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver); 3621 sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
3623out: 3622out:
3624 if (ret) 3623 if (ret)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 610573855213..a74300a4459c 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -818,6 +818,12 @@ static void fec_enet_bd_init(struct net_device *dev)
818 for (i = 0; i < txq->bd.ring_size; i++) { 818 for (i = 0; i < txq->bd.ring_size; i++) {
819 /* Initialize the BD for every fragment in the page. */ 819 /* Initialize the BD for every fragment in the page. */
820 bdp->cbd_sc = cpu_to_fec16(0); 820 bdp->cbd_sc = cpu_to_fec16(0);
821 if (bdp->cbd_bufaddr &&
822 !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
823 dma_unmap_single(&fep->pdev->dev,
824 fec32_to_cpu(bdp->cbd_bufaddr),
825 fec16_to_cpu(bdp->cbd_datlen),
826 DMA_TO_DEVICE);
821 if (txq->tx_skbuff[i]) { 827 if (txq->tx_skbuff[i]) {
822 dev_kfree_skb_any(txq->tx_skbuff[i]); 828 dev_kfree_skb_any(txq->tx_skbuff[i]);
823 txq->tx_skbuff[i] = NULL; 829 txq->tx_skbuff[i] = NULL;
@@ -3463,6 +3469,10 @@ fec_probe(struct platform_device *pdev)
3463 goto failed_regulator; 3469 goto failed_regulator;
3464 } 3470 }
3465 } else { 3471 } else {
3472 if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) {
3473 ret = -EPROBE_DEFER;
3474 goto failed_regulator;
3475 }
3466 fep->reg_phy = NULL; 3476 fep->reg_phy = NULL;
3467 } 3477 }
3468 3478
@@ -3546,8 +3556,9 @@ failed_clk_ipg:
3546failed_clk: 3556failed_clk:
3547 if (of_phy_is_fixed_link(np)) 3557 if (of_phy_is_fixed_link(np))
3548 of_phy_deregister_fixed_link(np); 3558 of_phy_deregister_fixed_link(np);
3549failed_phy:
3550 of_node_put(phy_node); 3559 of_node_put(phy_node);
3560failed_phy:
3561 dev_id--;
3551failed_ioremap: 3562failed_ioremap:
3552 free_netdev(ndev); 3563 free_netdev(ndev);
3553 3564
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index 544114281ea7..9f8d4f8e57e3 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -319,11 +319,10 @@ static int ptp_gianfar_adjtime(struct ptp_clock_info *ptp, s64 delta)
319 now = tmr_cnt_read(etsects); 319 now = tmr_cnt_read(etsects);
320 now += delta; 320 now += delta;
321 tmr_cnt_write(etsects, now); 321 tmr_cnt_write(etsects, now);
322 set_fipers(etsects);
322 323
323 spin_unlock_irqrestore(&etsects->lock, flags); 324 spin_unlock_irqrestore(&etsects->lock, flags);
324 325
325 set_fipers(etsects);
326
327 return 0; 326 return 0;
328} 327}
329 328
diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h
index d7bdea79e9fa..8fd2458060a0 100644
--- a/drivers/net/ethernet/intel/e1000/e1000.h
+++ b/drivers/net/ethernet/intel/e1000/e1000.h
@@ -331,7 +331,8 @@ struct e1000_adapter {
331enum e1000_state_t { 331enum e1000_state_t {
332 __E1000_TESTING, 332 __E1000_TESTING,
333 __E1000_RESETTING, 333 __E1000_RESETTING,
334 __E1000_DOWN 334 __E1000_DOWN,
335 __E1000_DISABLED
335}; 336};
336 337
337#undef pr_fmt 338#undef pr_fmt
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 1982f7917a8d..3dd4aeb2706d 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -945,7 +945,7 @@ static int e1000_init_hw_struct(struct e1000_adapter *adapter,
945static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 945static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
946{ 946{
947 struct net_device *netdev; 947 struct net_device *netdev;
948 struct e1000_adapter *adapter; 948 struct e1000_adapter *adapter = NULL;
949 struct e1000_hw *hw; 949 struct e1000_hw *hw;
950 950
951 static int cards_found; 951 static int cards_found;
@@ -955,6 +955,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
955 u16 tmp = 0; 955 u16 tmp = 0;
956 u16 eeprom_apme_mask = E1000_EEPROM_APME; 956 u16 eeprom_apme_mask = E1000_EEPROM_APME;
957 int bars, need_ioport; 957 int bars, need_ioport;
958 bool disable_dev = false;
958 959
959 /* do not allocate ioport bars when not needed */ 960 /* do not allocate ioport bars when not needed */
960 need_ioport = e1000_is_need_ioport(pdev); 961 need_ioport = e1000_is_need_ioport(pdev);
@@ -1259,11 +1260,13 @@ err_mdio_ioremap:
1259 iounmap(hw->ce4100_gbe_mdio_base_virt); 1260 iounmap(hw->ce4100_gbe_mdio_base_virt);
1260 iounmap(hw->hw_addr); 1261 iounmap(hw->hw_addr);
1261err_ioremap: 1262err_ioremap:
1263 disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags);
1262 free_netdev(netdev); 1264 free_netdev(netdev);
1263err_alloc_etherdev: 1265err_alloc_etherdev:
1264 pci_release_selected_regions(pdev, bars); 1266 pci_release_selected_regions(pdev, bars);
1265err_pci_reg: 1267err_pci_reg:
1266 pci_disable_device(pdev); 1268 if (!adapter || disable_dev)
1269 pci_disable_device(pdev);
1267 return err; 1270 return err;
1268} 1271}
1269 1272
@@ -1281,6 +1284,7 @@ static void e1000_remove(struct pci_dev *pdev)
1281 struct net_device *netdev = pci_get_drvdata(pdev); 1284 struct net_device *netdev = pci_get_drvdata(pdev);
1282 struct e1000_adapter *adapter = netdev_priv(netdev); 1285 struct e1000_adapter *adapter = netdev_priv(netdev);
1283 struct e1000_hw *hw = &adapter->hw; 1286 struct e1000_hw *hw = &adapter->hw;
1287 bool disable_dev;
1284 1288
1285 e1000_down_and_stop(adapter); 1289 e1000_down_and_stop(adapter);
1286 e1000_release_manageability(adapter); 1290 e1000_release_manageability(adapter);
@@ -1299,9 +1303,11 @@ static void e1000_remove(struct pci_dev *pdev)
1299 iounmap(hw->flash_address); 1303 iounmap(hw->flash_address);
1300 pci_release_selected_regions(pdev, adapter->bars); 1304 pci_release_selected_regions(pdev, adapter->bars);
1301 1305
1306 disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags);
1302 free_netdev(netdev); 1307 free_netdev(netdev);
1303 1308
1304 pci_disable_device(pdev); 1309 if (disable_dev)
1310 pci_disable_device(pdev);
1305} 1311}
1306 1312
1307/** 1313/**
@@ -5156,7 +5162,8 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake)
5156 if (netif_running(netdev)) 5162 if (netif_running(netdev))
5157 e1000_free_irq(adapter); 5163 e1000_free_irq(adapter);
5158 5164
5159 pci_disable_device(pdev); 5165 if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
5166 pci_disable_device(pdev);
5160 5167
5161 return 0; 5168 return 0;
5162} 5169}
@@ -5200,6 +5207,10 @@ static int e1000_resume(struct pci_dev *pdev)
5200 pr_err("Cannot enable PCI device from suspend\n"); 5207 pr_err("Cannot enable PCI device from suspend\n");
5201 return err; 5208 return err;
5202 } 5209 }
5210
5211 /* flush memory to make sure state is correct */
5212 smp_mb__before_atomic();
5213 clear_bit(__E1000_DISABLED, &adapter->flags);
5203 pci_set_master(pdev); 5214 pci_set_master(pdev);
5204 5215
5205 pci_enable_wake(pdev, PCI_D3hot, 0); 5216 pci_enable_wake(pdev, PCI_D3hot, 0);
@@ -5274,7 +5285,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
5274 5285
5275 if (netif_running(netdev)) 5286 if (netif_running(netdev))
5276 e1000_down(adapter); 5287 e1000_down(adapter);
5277 pci_disable_device(pdev); 5288
5289 if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
5290 pci_disable_device(pdev);
5278 5291
5279 /* Request a slot slot reset. */ 5292 /* Request a slot slot reset. */
5280 return PCI_ERS_RESULT_NEED_RESET; 5293 return PCI_ERS_RESULT_NEED_RESET;
@@ -5302,6 +5315,10 @@ static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev)
5302 pr_err("Cannot re-enable PCI device after reset.\n"); 5315 pr_err("Cannot re-enable PCI device after reset.\n");
5303 return PCI_ERS_RESULT_DISCONNECT; 5316 return PCI_ERS_RESULT_DISCONNECT;
5304 } 5317 }
5318
5319 /* flush memory to make sure state is correct */
5320 smp_mb__before_atomic();
5321 clear_bit(__E1000_DISABLED, &adapter->flags);
5305 pci_set_master(pdev); 5322 pci_set_master(pdev);
5306 5323
5307 pci_enable_wake(pdev, PCI_D3hot, 0); 5324 pci_enable_wake(pdev, PCI_D3hot, 0);
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index d6d4ed7acf03..31277d3bb7dc 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1367,6 +1367,9 @@ out:
1367 * Checks to see of the link status of the hardware has changed. If a 1367 * Checks to see of the link status of the hardware has changed. If a
1368 * change in link status has been detected, then we read the PHY registers 1368 * change in link status has been detected, then we read the PHY registers
1369 * to get the current speed/duplex if link exists. 1369 * to get the current speed/duplex if link exists.
1370 *
1371 * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
1372 * up).
1370 **/ 1373 **/
1371static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) 1374static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
1372{ 1375{
@@ -1382,7 +1385,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
1382 * Change or Rx Sequence Error interrupt. 1385 * Change or Rx Sequence Error interrupt.
1383 */ 1386 */
1384 if (!mac->get_link_status) 1387 if (!mac->get_link_status)
1385 return 0; 1388 return 1;
1386 1389
1387 /* First we want to see if the MII Status Register reports 1390 /* First we want to see if the MII Status Register reports
1388 * link. If so, then we want to get the current speed/duplex 1391 * link. If so, then we want to get the current speed/duplex
@@ -1613,10 +1616,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
1613 * different link partner. 1616 * different link partner.
1614 */ 1617 */
1615 ret_val = e1000e_config_fc_after_link_up(hw); 1618 ret_val = e1000e_config_fc_after_link_up(hw);
1616 if (ret_val) 1619 if (ret_val) {
1617 e_dbg("Error configuring flow control\n"); 1620 e_dbg("Error configuring flow control\n");
1621 return ret_val;
1622 }
1618 1623
1619 return ret_val; 1624 return 1;
1620} 1625}
1621 1626
1622static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) 1627static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 321d8be80871..42dcaefc4c19 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1573,11 +1573,18 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
1573 else 1573 else
1574 netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); 1574 netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
1575 1575
1576 /* Copy the address first, so that we avoid a possible race with
1577 * .set_rx_mode(). If we copy after changing the address in the filter
1578 * list, we might open ourselves to a narrow race window where
1579 * .set_rx_mode could delete our dev_addr filter and prevent traffic
1580 * from passing.
1581 */
1582 ether_addr_copy(netdev->dev_addr, addr->sa_data);
1583
1576 spin_lock_bh(&vsi->mac_filter_hash_lock); 1584 spin_lock_bh(&vsi->mac_filter_hash_lock);
1577 i40e_del_mac_filter(vsi, netdev->dev_addr); 1585 i40e_del_mac_filter(vsi, netdev->dev_addr);
1578 i40e_add_mac_filter(vsi, addr->sa_data); 1586 i40e_add_mac_filter(vsi, addr->sa_data);
1579 spin_unlock_bh(&vsi->mac_filter_hash_lock); 1587 spin_unlock_bh(&vsi->mac_filter_hash_lock);
1580 ether_addr_copy(netdev->dev_addr, addr->sa_data);
1581 if (vsi->type == I40E_VSI_MAIN) { 1588 if (vsi->type == I40E_VSI_MAIN) {
1582 i40e_status ret; 1589 i40e_status ret;
1583 1590
@@ -1923,6 +1930,14 @@ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr)
1923 struct i40e_netdev_priv *np = netdev_priv(netdev); 1930 struct i40e_netdev_priv *np = netdev_priv(netdev);
1924 struct i40e_vsi *vsi = np->vsi; 1931 struct i40e_vsi *vsi = np->vsi;
1925 1932
1933 /* Under some circumstances, we might receive a request to delete
1934 * our own device address from our uc list. Because we store the
1935 * device address in the VSI's MAC/VLAN filter list, we need to ignore
1936 * such requests and not delete our device address from this list.
1937 */
1938 if (ether_addr_equal(addr, netdev->dev_addr))
1939 return 0;
1940
1926 i40e_del_mac_filter(vsi, addr); 1941 i40e_del_mac_filter(vsi, addr);
1927 1942
1928 return 0; 1943 return 0;
@@ -6038,8 +6053,8 @@ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
6038 /* Set Bit 7 to be valid */ 6053 /* Set Bit 7 to be valid */
6039 mode = I40E_AQ_SET_SWITCH_BIT7_VALID; 6054 mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
6040 6055
6041 /* Set L4type to both TCP and UDP support */ 6056 /* Set L4type for TCP support */
6042 mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH; 6057 mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP;
6043 6058
6044 /* Set cloud filter mode */ 6059 /* Set cloud filter mode */
6045 mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL; 6060 mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
@@ -6969,18 +6984,18 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
6969 is_valid_ether_addr(filter->src_mac)) || 6984 is_valid_ether_addr(filter->src_mac)) ||
6970 (is_multicast_ether_addr(filter->dst_mac) && 6985 (is_multicast_ether_addr(filter->dst_mac) &&
6971 is_multicast_ether_addr(filter->src_mac))) 6986 is_multicast_ether_addr(filter->src_mac)))
6972 return -EINVAL; 6987 return -EOPNOTSUPP;
6973 6988
6974 /* Make sure port is specified, otherwise bail out, for channel 6989 /* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP
6975 * specific cloud filter needs 'L4 port' to be non-zero 6990 * ports are not supported via big buffer now.
6976 */ 6991 */
6977 if (!filter->dst_port) 6992 if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP)
6978 return -EINVAL; 6993 return -EOPNOTSUPP;
6979 6994
6980 /* adding filter using src_port/src_ip is not supported at this stage */ 6995 /* adding filter using src_port/src_ip is not supported at this stage */
6981 if (filter->src_port || filter->src_ipv4 || 6996 if (filter->src_port || filter->src_ipv4 ||
6982 !ipv6_addr_any(&filter->ip.v6.src_ip6)) 6997 !ipv6_addr_any(&filter->ip.v6.src_ip6))
6983 return -EINVAL; 6998 return -EOPNOTSUPP;
6984 6999
6985 /* copy element needed to add cloud filter from filter */ 7000 /* copy element needed to add cloud filter from filter */
6986 i40e_set_cld_element(filter, &cld_filter.element); 7001 i40e_set_cld_element(filter, &cld_filter.element);
@@ -6991,7 +7006,7 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
6991 is_multicast_ether_addr(filter->src_mac)) { 7006 is_multicast_ether_addr(filter->src_mac)) {
6992 /* MAC + IP : unsupported mode */ 7007 /* MAC + IP : unsupported mode */
6993 if (filter->dst_ipv4) 7008 if (filter->dst_ipv4)
6994 return -EINVAL; 7009 return -EOPNOTSUPP;
6995 7010
6996 /* since we validated that L4 port must be valid before 7011 /* since we validated that L4 port must be valid before
6997 * we get here, start with respective "flags" value 7012 * we get here, start with respective "flags" value
@@ -7356,7 +7371,7 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
7356 7371
7357 if (tc < 0) { 7372 if (tc < 0) {
7358 dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n"); 7373 dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
7359 return -EINVAL; 7374 return -EOPNOTSUPP;
7360 } 7375 }
7361 7376
7362 if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || 7377 if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 4566d66ffc7c..5bc2748ac468 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3047,10 +3047,30 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
3047 /* Walk through fragments adding latest fragment, testing it, and 3047 /* Walk through fragments adding latest fragment, testing it, and
3048 * then removing stale fragments from the sum. 3048 * then removing stale fragments from the sum.
3049 */ 3049 */
3050 stale = &skb_shinfo(skb)->frags[0]; 3050 for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
3051 for (;;) { 3051 int stale_size = skb_frag_size(stale);
3052
3052 sum += skb_frag_size(frag++); 3053 sum += skb_frag_size(frag++);
3053 3054
3055 /* The stale fragment may present us with a smaller
3056 * descriptor than the actual fragment size. To account
3057 * for that we need to remove all the data on the front and
3058 * figure out what the remainder would be in the last
3059 * descriptor associated with the fragment.
3060 */
3061 if (stale_size > I40E_MAX_DATA_PER_TXD) {
3062 int align_pad = -(stale->page_offset) &
3063 (I40E_MAX_READ_REQ_SIZE - 1);
3064
3065 sum -= align_pad;
3066 stale_size -= align_pad;
3067
3068 do {
3069 sum -= I40E_MAX_DATA_PER_TXD_ALIGNED;
3070 stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED;
3071 } while (stale_size > I40E_MAX_DATA_PER_TXD);
3072 }
3073
3054 /* if sum is negative we failed to make sufficient progress */ 3074 /* if sum is negative we failed to make sufficient progress */
3055 if (sum < 0) 3075 if (sum < 0)
3056 return true; 3076 return true;
@@ -3058,7 +3078,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
3058 if (!nr_frags--) 3078 if (!nr_frags--)
3059 break; 3079 break;
3060 3080
3061 sum -= skb_frag_size(stale++); 3081 sum -= stale_size;
3062 } 3082 }
3063 3083
3064 return false; 3084 return false;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 50864f99446d..1ba29bb85b67 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -2012,10 +2012,30 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
2012 /* Walk through fragments adding latest fragment, testing it, and 2012 /* Walk through fragments adding latest fragment, testing it, and
2013 * then removing stale fragments from the sum. 2013 * then removing stale fragments from the sum.
2014 */ 2014 */
2015 stale = &skb_shinfo(skb)->frags[0]; 2015 for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
2016 for (;;) { 2016 int stale_size = skb_frag_size(stale);
2017
2017 sum += skb_frag_size(frag++); 2018 sum += skb_frag_size(frag++);
2018 2019
2020 /* The stale fragment may present us with a smaller
2021 * descriptor than the actual fragment size. To account
2022 * for that we need to remove all the data on the front and
2023 * figure out what the remainder would be in the last
2024 * descriptor associated with the fragment.
2025 */
2026 if (stale_size > I40E_MAX_DATA_PER_TXD) {
2027 int align_pad = -(stale->page_offset) &
2028 (I40E_MAX_READ_REQ_SIZE - 1);
2029
2030 sum -= align_pad;
2031 stale_size -= align_pad;
2032
2033 do {
2034 sum -= I40E_MAX_DATA_PER_TXD_ALIGNED;
2035 stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED;
2036 } while (stale_size > I40E_MAX_DATA_PER_TXD);
2037 }
2038
2019 /* if sum is negative we failed to make sufficient progress */ 2039 /* if sum is negative we failed to make sufficient progress */
2020 if (sum < 0) 2040 if (sum < 0)
2021 return true; 2041 return true;
@@ -2023,7 +2043,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
2023 if (!nr_frags--) 2043 if (!nr_frags--)
2024 break; 2044 break;
2025 2045
2026 sum -= skb_frag_size(stale++); 2046 sum -= stale_size;
2027 } 2047 }
2028 2048
2029 return false; 2049 return false;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index bc93b69cfd1e..a539263cd79c 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
1214 val &= ~MVNETA_GMAC0_PORT_ENABLE; 1214 val &= ~MVNETA_GMAC0_PORT_ENABLE;
1215 mvreg_write(pp, MVNETA_GMAC_CTRL_0, val); 1215 mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
1216 1216
1217 pp->link = 0;
1218 pp->duplex = -1;
1219 pp->speed = 0;
1220
1217 udelay(200); 1221 udelay(200);
1218} 1222}
1219 1223
@@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
1958 1962
1959 if (!mvneta_rxq_desc_is_first_last(rx_status) || 1963 if (!mvneta_rxq_desc_is_first_last(rx_status) ||
1960 (rx_status & MVNETA_RXD_ERR_SUMMARY)) { 1964 (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
1965 mvneta_rx_error(pp, rx_desc);
1961err_drop_frame: 1966err_drop_frame:
1962 dev->stats.rx_errors++; 1967 dev->stats.rx_errors++;
1963 mvneta_rx_error(pp, rx_desc);
1964 /* leave the descriptor untouched */ 1968 /* leave the descriptor untouched */
1965 continue; 1969 continue;
1966 } 1970 }
@@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
3011{ 3015{
3012 int queue; 3016 int queue;
3013 3017
3014 for (queue = 0; queue < txq_number; queue++) 3018 for (queue = 0; queue < rxq_number; queue++)
3015 mvneta_rxq_deinit(pp, &pp->rxqs[queue]); 3019 mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
3016} 3020}
3017 3021
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 54adfd967858..fc67e35b253e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
1961 /* set GE2 TUNE */ 1961 /* set GE2 TUNE */
1962 regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0); 1962 regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
1963 1963
1964 /* GE1, Force 1000M/FD, FC ON */ 1964 /* Set linkdown as the default for each GMAC. Its own MCR would be set
1965 mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0)); 1965 * up with the more appropriate value when mtk_phy_link_adjust call is
1966 1966 * being invoked.
1967 /* GE2, Force 1000M/FD, FC ON */ 1967 */
1968 mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1)); 1968 for (i = 0; i < MTK_MAC_COUNT; i++)
1969 mtk_w32(eth, 0, MTK_MAC_MCR(i));
1969 1970
1970 /* Indicates CDM to parse the MTK special tag from CPU 1971 /* Indicates CDM to parse the MTK special tag from CPU
1971 * which also is working out for untag packets. 1972 * which also is working out for untag packets.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1fffdebbc9e8..e9a1fbcc4adf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
362 case MLX5_CMD_OP_QUERY_VPORT_COUNTER: 362 case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
363 case MLX5_CMD_OP_ALLOC_Q_COUNTER: 363 case MLX5_CMD_OP_ALLOC_Q_COUNTER:
364 case MLX5_CMD_OP_QUERY_Q_COUNTER: 364 case MLX5_CMD_OP_QUERY_Q_COUNTER:
365 case MLX5_CMD_OP_SET_RATE_LIMIT: 365 case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
366 case MLX5_CMD_OP_QUERY_RATE_LIMIT: 366 case MLX5_CMD_OP_QUERY_RATE_LIMIT:
367 case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: 367 case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
368 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: 368 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
@@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
505 MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); 505 MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
506 MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); 506 MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
507 MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); 507 MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
508 MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); 508 MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
509 MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); 509 MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
510 MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); 510 MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
511 MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); 511 MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c0872b3284cb..543060c305a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -82,6 +82,9 @@
82 max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) 82 max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
83#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) 83#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
84#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) 84#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
85#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
86 (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
87 MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
85 88
86#define MLX5_MPWRQ_LOG_WQE_SZ 18 89#define MLX5_MPWRQ_LOG_WQE_SZ 18
87#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ 90#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -590,6 +593,7 @@ struct mlx5e_channel {
590 struct mlx5_core_dev *mdev; 593 struct mlx5_core_dev *mdev;
591 struct hwtstamp_config *tstamp; 594 struct hwtstamp_config *tstamp;
592 int ix; 595 int ix;
596 int cpu;
593}; 597};
594 598
595struct mlx5e_channels { 599struct mlx5e_channels {
@@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
935 u8 cq_period_mode); 939 u8 cq_period_mode);
936void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, 940void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
937 u8 cq_period_mode); 941 u8 cq_period_mode);
938void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, 942void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
939 struct mlx5e_params *params, u8 rq_type); 943 struct mlx5e_params *params,
944 u8 rq_type);
940 945
941static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) 946static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
942{ 947{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index c6d90b6dd80e..9bcf38f4123b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
274static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, 274static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
275 struct ieee_ets *ets) 275 struct ieee_ets *ets)
276{ 276{
277 bool have_ets_tc = false;
277 int bw_sum = 0; 278 int bw_sum = 0;
278 int i; 279 int i;
279 280
@@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
288 } 289 }
289 290
290 /* Validate Bandwidth Sum */ 291 /* Validate Bandwidth Sum */
291 for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) 292 for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
292 if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) 293 if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
294 have_ets_tc = true;
293 bw_sum += ets->tc_tx_bw[i]; 295 bw_sum += ets->tc_tx_bw[i];
296 }
297 }
294 298
295 if (bw_sum != 0 && bw_sum != 100) { 299 if (have_ets_tc && bw_sum != 100) {
296 netdev_err(netdev, 300 netdev_err(netdev,
297 "Failed to validate ETS: BW sum is illegal\n"); 301 "Failed to validate ETS: BW sum is illegal\n");
298 return -EINVAL; 302 return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 23425f028405..8f05efa5c829 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
1523 new_channels.params = priv->channels.params; 1523 new_channels.params = priv->channels.params;
1524 MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); 1524 MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
1525 1525
1526 mlx5e_set_rq_type_params(priv->mdev, &new_channels.params, 1526 new_channels.params.mpwqe_log_stride_sz =
1527 new_channels.params.rq_wq_type); 1527 MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
1528 new_channels.params.mpwqe_log_num_strides =
1529 MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
1528 1530
1529 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { 1531 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
1530 priv->channels.params = new_channels.params; 1532 priv->channels.params = new_channels.params;
@@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
1536 return err; 1538 return err;
1537 1539
1538 mlx5e_switch_priv_channels(priv, &new_channels, NULL); 1540 mlx5e_switch_priv_channels(priv, &new_channels, NULL);
1541 mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
1542 MLX5E_GET_PFLAG(&priv->channels.params,
1543 MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
1544
1539 return 0; 1545 return 0;
1540} 1546}
1541 1547
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d2b057a3e512..d9d8227f195f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
71 struct mlx5e_cq_param icosq_cq; 71 struct mlx5e_cq_param icosq_cq;
72}; 72};
73 73
74static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
75{
76 return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
77}
78
79static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) 74static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
80{ 75{
81 return MLX5_CAP_GEN(mdev, striding_rq) && 76 return MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
83 MLX5_CAP_ETH(mdev, reg_umr_sq); 78 MLX5_CAP_ETH(mdev, reg_umr_sq);
84} 79}
85 80
86void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, 81void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
87 struct mlx5e_params *params, u8 rq_type) 82 struct mlx5e_params *params, u8 rq_type)
88{ 83{
89 params->rq_wq_type = rq_type; 84 params->rq_wq_type = rq_type;
90 params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; 85 params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
93 params->log_rq_size = is_kdump_kernel() ? 88 params->log_rq_size = is_kdump_kernel() ?
94 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : 89 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
95 MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; 90 MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
96 params->mpwqe_log_stride_sz = 91 params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
97 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? 92 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
98 MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
99 MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
100 params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - 93 params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
101 params->mpwqe_log_stride_sz; 94 params->mpwqe_log_stride_sz;
102 break; 95 break;
@@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
120 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); 113 MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
121} 114}
122 115
123static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) 116static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
117 struct mlx5e_params *params)
124{ 118{
125 u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && 119 u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
126 !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? 120 !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
127 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : 121 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
128 MLX5_WQ_TYPE_LINKED_LIST; 122 MLX5_WQ_TYPE_LINKED_LIST;
129 mlx5e_set_rq_type_params(mdev, params, rq_type); 123 mlx5e_init_rq_type_params(mdev, params, rq_type);
130} 124}
131 125
132static void mlx5e_update_carrier(struct mlx5e_priv *priv) 126static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
444 int wq_sz = mlx5_wq_ll_get_size(&rq->wq); 438 int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
445 int mtt_sz = mlx5e_get_wqe_mtt_sz(); 439 int mtt_sz = mlx5e_get_wqe_mtt_sz();
446 int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; 440 int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
447 int node = mlx5e_get_node(c->priv, c->ix);
448 int i; 441 int i;
449 442
450 rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), 443 rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
451 GFP_KERNEL, node); 444 GFP_KERNEL, cpu_to_node(c->cpu));
452 if (!rq->mpwqe.info) 445 if (!rq->mpwqe.info)
453 goto err_out; 446 goto err_out;
454 447
455 /* We allocate more than mtt_sz as we will align the pointer */ 448 /* We allocate more than mtt_sz as we will align the pointer */
456 rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, 449 rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
457 GFP_KERNEL, node); 450 cpu_to_node(c->cpu));
458 if (unlikely(!rq->mpwqe.mtt_no_align)) 451 if (unlikely(!rq->mpwqe.mtt_no_align))
459 goto err_free_wqe_info; 452 goto err_free_wqe_info;
460 453
@@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
562 int err; 555 int err;
563 int i; 556 int i;
564 557
565 rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 558 rqp->wq.db_numa_node = cpu_to_node(c->cpu);
566 559
567 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, 560 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
568 &rq->wq_ctrl); 561 &rq->wq_ctrl);
@@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
629 default: /* MLX5_WQ_TYPE_LINKED_LIST */ 622 default: /* MLX5_WQ_TYPE_LINKED_LIST */
630 rq->wqe.frag_info = 623 rq->wqe.frag_info =
631 kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), 624 kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
632 GFP_KERNEL, 625 GFP_KERNEL, cpu_to_node(c->cpu));
633 mlx5e_get_node(c->priv, c->ix));
634 if (!rq->wqe.frag_info) { 626 if (!rq->wqe.frag_info) {
635 err = -ENOMEM; 627 err = -ENOMEM;
636 goto err_rq_wq_destroy; 628 goto err_rq_wq_destroy;
@@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
1000 sq->uar_map = mdev->mlx5e_res.bfreg.map; 992 sq->uar_map = mdev->mlx5e_res.bfreg.map;
1001 sq->min_inline_mode = params->tx_min_inline_mode; 993 sq->min_inline_mode = params->tx_min_inline_mode;
1002 994
1003 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 995 param->wq.db_numa_node = cpu_to_node(c->cpu);
1004 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 996 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1005 if (err) 997 if (err)
1006 return err; 998 return err;
1007 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 999 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1008 1000
1009 err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1001 err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
1010 if (err) 1002 if (err)
1011 goto err_sq_wq_destroy; 1003 goto err_sq_wq_destroy;
1012 1004
@@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
1053 sq->channel = c; 1045 sq->channel = c;
1054 sq->uar_map = mdev->mlx5e_res.bfreg.map; 1046 sq->uar_map = mdev->mlx5e_res.bfreg.map;
1055 1047
1056 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1048 param->wq.db_numa_node = cpu_to_node(c->cpu);
1057 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 1049 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1058 if (err) 1050 if (err)
1059 return err; 1051 return err;
1060 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 1052 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1061 1053
1062 err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1054 err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
1063 if (err) 1055 if (err)
1064 goto err_sq_wq_destroy; 1056 goto err_sq_wq_destroy;
1065 1057
@@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
1126 if (MLX5_IPSEC_DEV(c->priv->mdev)) 1118 if (MLX5_IPSEC_DEV(c->priv->mdev))
1127 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); 1119 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
1128 1120
1129 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1121 param->wq.db_numa_node = cpu_to_node(c->cpu);
1130 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); 1122 err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
1131 if (err) 1123 if (err)
1132 return err; 1124 return err;
1133 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 1125 sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1134 1126
1135 err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix)); 1127 err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
1136 if (err) 1128 if (err)
1137 goto err_sq_wq_destroy; 1129 goto err_sq_wq_destroy;
1138 1130
@@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
1504 struct mlx5_core_dev *mdev = c->priv->mdev; 1496 struct mlx5_core_dev *mdev = c->priv->mdev;
1505 int err; 1497 int err;
1506 1498
1507 param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix); 1499 param->wq.buf_numa_node = cpu_to_node(c->cpu);
1508 param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); 1500 param->wq.db_numa_node = cpu_to_node(c->cpu);
1509 param->eq_ix = c->ix; 1501 param->eq_ix = c->ix;
1510 1502
1511 err = mlx5e_alloc_cq_common(mdev, param, cq); 1503 err = mlx5e_alloc_cq_common(mdev, param, cq);
@@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
1604 mlx5e_free_cq(cq); 1596 mlx5e_free_cq(cq);
1605} 1597}
1606 1598
1599static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
1600{
1601 return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
1602}
1603
1607static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, 1604static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1608 struct mlx5e_params *params, 1605 struct mlx5e_params *params,
1609 struct mlx5e_channel_param *cparam) 1606 struct mlx5e_channel_param *cparam)
@@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1752{ 1749{
1753 struct mlx5e_cq_moder icocq_moder = {0, 0}; 1750 struct mlx5e_cq_moder icocq_moder = {0, 0};
1754 struct net_device *netdev = priv->netdev; 1751 struct net_device *netdev = priv->netdev;
1752 int cpu = mlx5e_get_cpu(priv, ix);
1755 struct mlx5e_channel *c; 1753 struct mlx5e_channel *c;
1756 unsigned int irq; 1754 unsigned int irq;
1757 int err; 1755 int err;
1758 int eqn; 1756 int eqn;
1759 1757
1760 c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); 1758 c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
1761 if (!c) 1759 if (!c)
1762 return -ENOMEM; 1760 return -ENOMEM;
1763 1761
@@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1765 c->mdev = priv->mdev; 1763 c->mdev = priv->mdev;
1766 c->tstamp = &priv->tstamp; 1764 c->tstamp = &priv->tstamp;
1767 c->ix = ix; 1765 c->ix = ix;
1766 c->cpu = cpu;
1768 c->pdev = &priv->mdev->pdev->dev; 1767 c->pdev = &priv->mdev->pdev->dev;
1769 c->netdev = priv->netdev; 1768 c->netdev = priv->netdev;
1770 c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); 1769 c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
1853 for (tc = 0; tc < c->num_tc; tc++) 1852 for (tc = 0; tc < c->num_tc; tc++)
1854 mlx5e_activate_txqsq(&c->sq[tc]); 1853 mlx5e_activate_txqsq(&c->sq[tc]);
1855 mlx5e_activate_rq(&c->rq); 1854 mlx5e_activate_rq(&c->rq);
1856 netif_set_xps_queue(c->netdev, 1855 netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
1857 mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
1858} 1856}
1859 1857
1860static void mlx5e_deactivate_channel(struct mlx5e_channel *c) 1858static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
3679 struct sk_buff *skb, 3677 struct sk_buff *skb,
3680 netdev_features_t features) 3678 netdev_features_t features)
3681{ 3679{
3680 unsigned int offset = 0;
3682 struct udphdr *udph; 3681 struct udphdr *udph;
3683 u8 proto; 3682 u8 proto;
3684 u16 port; 3683 u16 port;
@@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
3688 proto = ip_hdr(skb)->protocol; 3687 proto = ip_hdr(skb)->protocol;
3689 break; 3688 break;
3690 case htons(ETH_P_IPV6): 3689 case htons(ETH_P_IPV6):
3691 proto = ipv6_hdr(skb)->nexthdr; 3690 proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
3692 break; 3691 break;
3693 default: 3692 default:
3694 goto out; 3693 goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 60771865c99c..e7e7cef2bde4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
466 break; 466 break;
467 case MLX5_EVENT_TYPE_CQ_ERROR: 467 case MLX5_EVENT_TYPE_CQ_ERROR:
468 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; 468 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
469 mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n", 469 mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
470 cqn, eqe->data.cq_err.syndrome); 470 cqn, eqe->data.cq_err.syndrome);
471 mlx5_cq_event(dev, cqn, eqe->type); 471 mlx5_cq_event(dev, cqn, eqe->type);
472 break; 472 break;
@@ -775,7 +775,7 @@ err1:
775 return err; 775 return err;
776} 776}
777 777
778int mlx5_stop_eqs(struct mlx5_core_dev *dev) 778void mlx5_stop_eqs(struct mlx5_core_dev *dev)
779{ 779{
780 struct mlx5_eq_table *table = &dev->priv.eq_table; 780 struct mlx5_eq_table *table = &dev->priv.eq_table;
781 int err; 781 int err;
@@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
784 if (MLX5_CAP_GEN(dev, pg)) { 784 if (MLX5_CAP_GEN(dev, pg)) {
785 err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); 785 err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
786 if (err) 786 if (err)
787 return err; 787 mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
788 err);
788 } 789 }
789#endif 790#endif
790 791
791 err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); 792 err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
792 if (err) 793 if (err)
793 return err; 794 mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
795 err);
794 796
795 mlx5_destroy_unmap_eq(dev, &table->async_eq); 797 err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
798 if (err)
799 mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
800 err);
796 mlx5_cmd_use_polling(dev); 801 mlx5_cmd_use_polling(dev);
797 802
798 err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); 803 err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
799 if (err) 804 if (err)
800 mlx5_cmd_use_events(dev); 805 mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
801 806 err);
802 return err;
803} 807}
804 808
805int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, 809int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
index 3c11d6e2160a..14962969c5ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
66 u8 actual_size; 66 u8 actual_size;
67 int err; 67 int err;
68 68
69 if (!size)
70 return -EINVAL;
71
69 if (!fdev->mdev) 72 if (!fdev->mdev)
70 return -ENOTCONN; 73 return -ENOTCONN;
71 74
@@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
95 u8 actual_size; 98 u8 actual_size;
96 int err; 99 int err;
97 100
101 if (!size)
102 return -EINVAL;
103
98 if (!fdev->mdev) 104 if (!fdev->mdev)
99 return -ENOTCONN; 105 return -ENOTCONN;
100 106
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c70fd663a633..dfaad9ecb2b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
174static void del_sw_flow_table(struct fs_node *node); 174static void del_sw_flow_table(struct fs_node *node);
175static void del_sw_flow_group(struct fs_node *node); 175static void del_sw_flow_group(struct fs_node *node);
176static void del_sw_fte(struct fs_node *node); 176static void del_sw_fte(struct fs_node *node);
177static void del_sw_prio(struct fs_node *node);
178static void del_sw_ns(struct fs_node *node);
177/* Delete rule (destination) is special case that 179/* Delete rule (destination) is special case that
178 * requires to lock the FTE for all the deletion process. 180 * requires to lock the FTE for all the deletion process.
179 */ 181 */
@@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
408 return NULL; 410 return NULL;
409} 411}
410 412
413static void del_sw_ns(struct fs_node *node)
414{
415 kfree(node);
416}
417
418static void del_sw_prio(struct fs_node *node)
419{
420 kfree(node);
421}
422
411static void del_hw_flow_table(struct fs_node *node) 423static void del_hw_flow_table(struct fs_node *node)
412{ 424{
413 struct mlx5_flow_table *ft; 425 struct mlx5_flow_table *ft;
@@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
2064 return ERR_PTR(-ENOMEM); 2076 return ERR_PTR(-ENOMEM);
2065 2077
2066 fs_prio->node.type = FS_TYPE_PRIO; 2078 fs_prio->node.type = FS_TYPE_PRIO;
2067 tree_init_node(&fs_prio->node, NULL, NULL); 2079 tree_init_node(&fs_prio->node, NULL, del_sw_prio);
2068 tree_add_node(&fs_prio->node, &ns->node); 2080 tree_add_node(&fs_prio->node, &ns->node);
2069 fs_prio->num_levels = num_levels; 2081 fs_prio->num_levels = num_levels;
2070 fs_prio->prio = prio; 2082 fs_prio->prio = prio;
@@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
2090 return ERR_PTR(-ENOMEM); 2102 return ERR_PTR(-ENOMEM);
2091 2103
2092 fs_init_namespace(ns); 2104 fs_init_namespace(ns);
2093 tree_init_node(&ns->node, NULL, NULL); 2105 tree_init_node(&ns->node, NULL, del_sw_ns);
2094 tree_add_node(&ns->node, &prio->node); 2106 tree_add_node(&ns->node, &prio->node);
2095 list_add_tail(&ns->node.list, &prio->node.children); 2107 list_add_tail(&ns->node.list, &prio->node.children);
2096 2108
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 1a0e797ad001..21d29f7936f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
241 u32 fw; 241 u32 fw;
242 int i; 242 int i;
243 243
244 /* If the syndrom is 0, the device is OK and no need to print buffer */ 244 /* If the syndrome is 0, the device is OK and no need to print buffer */
245 if (!ioread8(&h->synd)) 245 if (!ioread8(&h->synd))
246 return; 246 return;
247 247
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index d2a66dc4adc6..8812d7208e8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
57 struct mlx5e_params *params) 57 struct mlx5e_params *params)
58{ 58{
59 /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ 59 /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
60 mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); 60 mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
61 61
62 /* RQ size in ipoib by default is 512 */ 62 /* RQ size in ipoib by default is 512 */
63 params->log_rq_size = is_kdump_kernel() ? 63 params->log_rq_size = is_kdump_kernel() ?
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index f26f97fe4666..582b2f18010a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -137,6 +137,17 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
137} 137}
138EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 138EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
139 139
140static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
141 bool reset, void *out, int out_size)
142{
143 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
144
145 MLX5_SET(query_cong_statistics_in, in, opcode,
146 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
147 MLX5_SET(query_cong_statistics_in, in, clear, reset);
148 return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
149}
150
140static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) 151static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
141{ 152{
142 return dev->priv.lag; 153 return dev->priv.lag;
@@ -633,3 +644,48 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
633 /* If bonded, we do not add an IB device for PF1. */ 644 /* If bonded, we do not add an IB device for PF1. */
634 return false; 645 return false;
635} 646}
647
648int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
649 u64 *values,
650 int num_counters,
651 size_t *offsets)
652{
653 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
654 struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
655 struct mlx5_lag *ldev;
656 int num_ports;
657 int ret, i, j;
658 void *out;
659
660 out = kvzalloc(outlen, GFP_KERNEL);
661 if (!out)
662 return -ENOMEM;
663
664 memset(values, 0, sizeof(*values) * num_counters);
665
666 mutex_lock(&lag_mutex);
667 ldev = mlx5_lag_dev_get(dev);
668 if (ldev && mlx5_lag_is_bonded(ldev)) {
669 num_ports = MLX5_MAX_PORTS;
670 mdev[0] = ldev->pf[0].dev;
671 mdev[1] = ldev->pf[1].dev;
672 } else {
673 num_ports = 1;
674 mdev[0] = dev;
675 }
676
677 for (i = 0; i < num_ports; ++i) {
678 ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
679 if (ret)
680 goto unlock;
681
682 for (j = 0; j < num_counters; ++j)
683 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
684 }
685
686unlock:
687 mutex_unlock(&lag_mutex);
688 kvfree(out);
689 return ret;
690}
691EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 5f323442cc5a..8a89c7e8cd63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
317{ 317{
318 struct mlx5_priv *priv = &dev->priv; 318 struct mlx5_priv *priv = &dev->priv;
319 struct mlx5_eq_table *table = &priv->eq_table; 319 struct mlx5_eq_table *table = &priv->eq_table;
320 struct irq_affinity irqdesc = {
321 .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
322 };
323 int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); 320 int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
324 int nvec; 321 int nvec;
325 322
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
333 if (!priv->irq_info) 330 if (!priv->irq_info)
334 goto err_free_msix; 331 goto err_free_msix;
335 332
336 nvec = pci_alloc_irq_vectors_affinity(dev->pdev, 333 nvec = pci_alloc_irq_vectors(dev->pdev,
337 MLX5_EQ_VEC_COMP_BASE + 1, nvec, 334 MLX5_EQ_VEC_COMP_BASE + 1, nvec,
338 PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, 335 PCI_IRQ_MSIX);
339 &irqdesc);
340 if (nvec < 0) 336 if (nvec < 0)
341 return nvec; 337 return nvec;
342 338
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
622 return (u64)timer_l | (u64)timer_h1 << 32; 618 return (u64)timer_l | (u64)timer_h1 << 32;
623} 619}
624 620
621static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
622{
623 struct mlx5_priv *priv = &mdev->priv;
624 int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
625
626 if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
627 mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
628 return -ENOMEM;
629 }
630
631 cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
632 priv->irq_info[i].mask);
633
634 if (IS_ENABLED(CONFIG_SMP) &&
635 irq_set_affinity_hint(irq, priv->irq_info[i].mask))
636 mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
637
638 return 0;
639}
640
641static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
642{
643 struct mlx5_priv *priv = &mdev->priv;
644 int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
645
646 irq_set_affinity_hint(irq, NULL);
647 free_cpumask_var(priv->irq_info[i].mask);
648}
649
650static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
651{
652 int err;
653 int i;
654
655 for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
656 err = mlx5_irq_set_affinity_hint(mdev, i);
657 if (err)
658 goto err_out;
659 }
660
661 return 0;
662
663err_out:
664 for (i--; i >= 0; i--)
665 mlx5_irq_clear_affinity_hint(mdev, i);
666
667 return err;
668}
669
670static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
671{
672 int i;
673
674 for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
675 mlx5_irq_clear_affinity_hint(mdev, i);
676}
677
625int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, 678int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
626 unsigned int *irqn) 679 unsigned int *irqn)
627{ 680{
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1097 goto err_stop_eqs; 1150 goto err_stop_eqs;
1098 } 1151 }
1099 1152
1153 err = mlx5_irq_set_affinity_hints(dev);
1154 if (err) {
1155 dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
1156 goto err_affinity_hints;
1157 }
1158
1100 err = mlx5_init_fs(dev); 1159 err = mlx5_init_fs(dev);
1101 if (err) { 1160 if (err) {
1102 dev_err(&pdev->dev, "Failed to init flow steering\n"); 1161 dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1154,6 +1213,9 @@ err_sriov:
1154 mlx5_cleanup_fs(dev); 1213 mlx5_cleanup_fs(dev);
1155 1214
1156err_fs: 1215err_fs:
1216 mlx5_irq_clear_affinity_hints(dev);
1217
1218err_affinity_hints:
1157 free_comp_eqs(dev); 1219 free_comp_eqs(dev);
1158 1220
1159err_stop_eqs: 1221err_stop_eqs:
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1222 1284
1223 mlx5_sriov_detach(dev); 1285 mlx5_sriov_detach(dev);
1224 mlx5_cleanup_fs(dev); 1286 mlx5_cleanup_fs(dev);
1287 mlx5_irq_clear_affinity_hints(dev);
1225 free_comp_eqs(dev); 1288 free_comp_eqs(dev);
1226 mlx5_stop_eqs(dev); 1289 mlx5_stop_eqs(dev);
1227 mlx5_put_uars_page(dev, priv->uar); 1290 mlx5_put_uars_page(dev, priv->uar);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index db9e665ab104..889130edb715 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
213err_cmd: 213err_cmd:
214 memset(din, 0, sizeof(din)); 214 memset(din, 0, sizeof(din));
215 memset(dout, 0, sizeof(dout)); 215 memset(dout, 0, sizeof(dout));
216 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 216 MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
217 MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); 217 MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
218 mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); 218 mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
219 return err; 219 return err;
220} 220}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index e651e4c02867..d3c33e9eea72 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
125 return ret_entry; 125 return ret_entry;
126} 126}
127 127
128static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, 128static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
129 u32 rate, u16 index) 129 u32 rate, u16 index)
130{ 130{
131 u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; 131 u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0};
132 u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; 132 u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
133 133
134 MLX5_SET(set_rate_limit_in, in, opcode, 134 MLX5_SET(set_pp_rate_limit_in, in, opcode,
135 MLX5_CMD_OP_SET_RATE_LIMIT); 135 MLX5_CMD_OP_SET_PP_RATE_LIMIT);
136 MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); 136 MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
137 MLX5_SET(set_rate_limit_in, in, rate_limit, rate); 137 MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
138 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 138 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
139} 139}
140 140
@@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
173 entry->refcount++; 173 entry->refcount++;
174 } else { 174 } else {
175 /* new rate limit */ 175 /* new rate limit */
176 err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); 176 err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
177 if (err) { 177 if (err) {
178 mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", 178 mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
179 rate, err); 179 rate, err);
@@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
209 entry->refcount--; 209 entry->refcount--;
210 if (!entry->refcount) { 210 if (!entry->refcount) {
211 /* need to remove rate */ 211 /* need to remove rate */
212 mlx5_set_rate_limit_cmd(dev, 0, entry->index); 212 mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
213 entry->rate = 0; 213 entry->rate = 0;
214 } 214 }
215 215
@@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
262 /* Clear all configured rates */ 262 /* Clear all configured rates */
263 for (i = 0; i < table->max_size; i++) 263 for (i = 0; i < table->max_size; i++)
264 if (table->rl_entry[i].rate) 264 if (table->rl_entry[i].rate)
265 mlx5_set_rate_limit_cmd(dev, 0, 265 mlx5_set_pp_rate_limit_cmd(dev, 0,
266 table->rl_entry[i].index); 266 table->rl_entry[i].index);
267 267
268 kfree(dev->priv.rl_table.rl_entry); 268 kfree(dev->priv.rl_table.rl_entry);
269} 269}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 07a9ba6cfc70..2f74953e4561 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
71 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; 71 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
72 struct mlx5e_vxlan *vxlan; 72 struct mlx5e_vxlan *vxlan;
73 73
74 spin_lock(&vxlan_db->lock); 74 spin_lock_bh(&vxlan_db->lock);
75 vxlan = radix_tree_lookup(&vxlan_db->tree, port); 75 vxlan = radix_tree_lookup(&vxlan_db->tree, port);
76 spin_unlock(&vxlan_db->lock); 76 spin_unlock_bh(&vxlan_db->lock);
77 77
78 return vxlan; 78 return vxlan;
79} 79}
@@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
88 struct mlx5e_vxlan *vxlan; 88 struct mlx5e_vxlan *vxlan;
89 int err; 89 int err;
90 90
91 if (mlx5e_vxlan_lookup_port(priv, port)) 91 mutex_lock(&priv->state_lock);
92 vxlan = mlx5e_vxlan_lookup_port(priv, port);
93 if (vxlan) {
94 atomic_inc(&vxlan->refcount);
92 goto free_work; 95 goto free_work;
96 }
93 97
94 if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) 98 if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
95 goto free_work; 99 goto free_work;
@@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
99 goto err_delete_port; 103 goto err_delete_port;
100 104
101 vxlan->udp_port = port; 105 vxlan->udp_port = port;
106 atomic_set(&vxlan->refcount, 1);
102 107
103 spin_lock_irq(&vxlan_db->lock); 108 spin_lock_bh(&vxlan_db->lock);
104 err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); 109 err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
105 spin_unlock_irq(&vxlan_db->lock); 110 spin_unlock_bh(&vxlan_db->lock);
106 if (err) 111 if (err)
107 goto err_free; 112 goto err_free;
108 113
@@ -113,35 +118,39 @@ err_free:
113err_delete_port: 118err_delete_port:
114 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); 119 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
115free_work: 120free_work:
121 mutex_unlock(&priv->state_lock);
116 kfree(vxlan_work); 122 kfree(vxlan_work);
117} 123}
118 124
119static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) 125static void mlx5e_vxlan_del_port(struct work_struct *work)
120{ 126{
127 struct mlx5e_vxlan_work *vxlan_work =
128 container_of(work, struct mlx5e_vxlan_work, work);
129 struct mlx5e_priv *priv = vxlan_work->priv;
121 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; 130 struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
131 u16 port = vxlan_work->port;
122 struct mlx5e_vxlan *vxlan; 132 struct mlx5e_vxlan *vxlan;
133 bool remove = false;
123 134
124 spin_lock_irq(&vxlan_db->lock); 135 mutex_lock(&priv->state_lock);
125 vxlan = radix_tree_delete(&vxlan_db->tree, port); 136 spin_lock_bh(&vxlan_db->lock);
126 spin_unlock_irq(&vxlan_db->lock); 137 vxlan = radix_tree_lookup(&vxlan_db->tree, port);
127
128 if (!vxlan) 138 if (!vxlan)
129 return; 139 goto out_unlock;
130
131 mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
132
133 kfree(vxlan);
134}
135 140
136static void mlx5e_vxlan_del_port(struct work_struct *work) 141 if (atomic_dec_and_test(&vxlan->refcount)) {
137{ 142 radix_tree_delete(&vxlan_db->tree, port);
138 struct mlx5e_vxlan_work *vxlan_work = 143 remove = true;
139 container_of(work, struct mlx5e_vxlan_work, work); 144 }
140 struct mlx5e_priv *priv = vxlan_work->priv;
141 u16 port = vxlan_work->port;
142 145
143 __mlx5e_vxlan_core_del_port(priv, port); 146out_unlock:
147 spin_unlock_bh(&vxlan_db->lock);
144 148
149 if (remove) {
150 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
151 kfree(vxlan);
152 }
153 mutex_unlock(&priv->state_lock);
145 kfree(vxlan_work); 154 kfree(vxlan_work);
146} 155}
147 156
@@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
171 struct mlx5e_vxlan *vxlan; 180 struct mlx5e_vxlan *vxlan;
172 unsigned int port = 0; 181 unsigned int port = 0;
173 182
174 spin_lock_irq(&vxlan_db->lock); 183 /* Lockless since we are the only radix-tree consumers, wq is disabled */
175 while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { 184 while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
176 port = vxlan->udp_port; 185 port = vxlan->udp_port;
177 spin_unlock_irq(&vxlan_db->lock); 186 radix_tree_delete(&vxlan_db->tree, port);
178 __mlx5e_vxlan_core_del_port(priv, (u16)port); 187 mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
179 spin_lock_irq(&vxlan_db->lock); 188 kfree(vxlan);
180 } 189 }
181 spin_unlock_irq(&vxlan_db->lock);
182} 190}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
index 5def12c048e3..5ef6ae7d568a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
@@ -36,6 +36,7 @@
36#include "en.h" 36#include "en.h"
37 37
38struct mlx5e_vxlan { 38struct mlx5e_vxlan {
39 atomic_t refcount;
39 u16 udp_port; 40 u16 udp_port;
40}; 41};
41 42
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 23f7d828cf67..6ef20e5cc77d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1643,7 +1643,12 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
1643 return 0; 1643 return 0;
1644 } 1644 }
1645 1645
1646 wmb(); /* reset needs to be written before we read control register */ 1646 /* Reset needs to be written before we read control register, and
1647 * we must wait for the HW to become responsive once again
1648 */
1649 wmb();
1650 msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
1651
1647 end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); 1652 end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
1648 do { 1653 do {
1649 u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY); 1654 u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index a6441208e9d9..fb082ad21b00 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -59,6 +59,7 @@
59#define MLXSW_PCI_SW_RESET 0xF0010 59#define MLXSW_PCI_SW_RESET 0xF0010
60#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) 60#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0)
61#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 61#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000
62#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100
62#define MLXSW_PCI_FW_READY 0xA1844 63#define MLXSW_PCI_FW_READY 0xA1844
63#define MLXSW_PCI_FW_READY_MASK 0xFFFF 64#define MLXSW_PCI_FW_READY_MASK 0xFFFF
64#define MLXSW_PCI_FW_READY_MAGIC 0x5E 65#define MLXSW_PCI_FW_READY_MAGIC 0x5E
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 9bd8d28de152..c3837ca7a705 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4376,7 +4376,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
4376 } 4376 }
4377 if (!info->linking) 4377 if (!info->linking)
4378 break; 4378 break;
4379 if (netdev_has_any_upper_dev(upper_dev)) { 4379 if (netdev_has_any_upper_dev(upper_dev) &&
4380 (!netif_is_bridge_master(upper_dev) ||
4381 !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
4382 upper_dev))) {
4380 NL_SET_ERR_MSG(extack, 4383 NL_SET_ERR_MSG(extack,
4381 "spectrum: Enslaving a port to a device that already has an upper device is not supported"); 4384 "spectrum: Enslaving a port to a device that already has an upper device is not supported");
4382 return -EINVAL; 4385 return -EINVAL;
@@ -4504,6 +4507,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
4504 u16 vid) 4507 u16 vid)
4505{ 4508{
4506 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); 4509 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
4510 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
4507 struct netdev_notifier_changeupper_info *info = ptr; 4511 struct netdev_notifier_changeupper_info *info = ptr;
4508 struct netlink_ext_ack *extack; 4512 struct netlink_ext_ack *extack;
4509 struct net_device *upper_dev; 4513 struct net_device *upper_dev;
@@ -4520,7 +4524,10 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
4520 } 4524 }
4521 if (!info->linking) 4525 if (!info->linking)
4522 break; 4526 break;
4523 if (netdev_has_any_upper_dev(upper_dev)) { 4527 if (netdev_has_any_upper_dev(upper_dev) &&
4528 (!netif_is_bridge_master(upper_dev) ||
4529 !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
4530 upper_dev))) {
4524 NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); 4531 NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
4525 return -EINVAL; 4532 return -EINVAL;
4526 } 4533 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 432ab9b12b7f..05ce1befd9b3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -365,6 +365,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
365void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, 365void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
366 struct net_device *brport_dev, 366 struct net_device *brport_dev,
367 struct net_device *br_dev); 367 struct net_device *br_dev);
368bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
369 const struct net_device *br_dev);
368 370
369/* spectrum.c */ 371/* spectrum.c */
370int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, 372int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index c33beac5def0..b5397da94d7f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -46,7 +46,8 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
46 int tclass_num, u32 min, u32 max, 46 int tclass_num, u32 min, u32 max,
47 u32 probability, bool is_ecn) 47 u32 probability, bool is_ecn)
48{ 48{
49 char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)]; 49 char cwtpm_cmd[MLXSW_REG_CWTPM_LEN];
50 char cwtp_cmd[MLXSW_REG_CWTP_LEN];
50 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 51 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
51 int err; 52 int err;
52 53
@@ -60,10 +61,10 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
60 if (err) 61 if (err)
61 return err; 62 return err;
62 63
63 mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num, 64 mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num,
64 MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn); 65 MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn);
65 66
66 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd); 67 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd);
67} 68}
68 69
69static int 70static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 72ef4f8025f0..434b3922b34f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2436 rhashtable_destroy(&mlxsw_sp->router->neigh_ht); 2436 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2437} 2437}
2438 2438
2439static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
2440 const struct mlxsw_sp_rif *rif)
2441{
2442 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2443
2444 mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
2445 rif->rif_index, rif->addr);
2446 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2447}
2448
2449static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 2439static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2450 struct mlxsw_sp_rif *rif) 2440 struct mlxsw_sp_rif *rif)
2451{ 2441{
2452 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; 2442 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2453 2443
2454 mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
2455 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, 2444 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2456 rif_list_node) 2445 rif_list_node) {
2446 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2457 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 2447 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2448 }
2458} 2449}
2459 2450
2460enum mlxsw_sp_nexthop_type { 2451enum mlxsw_sp_nexthop_type {
@@ -3237,7 +3228,7 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3237{ 3228{
3238 if (!removing) 3229 if (!removing)
3239 nh->should_offload = 1; 3230 nh->should_offload = 1;
3240 else if (nh->offloaded) 3231 else
3241 nh->should_offload = 0; 3232 nh->should_offload = 0;
3242 nh->update = 1; 3233 nh->update = 1;
3243} 3234}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 7b8548e25ae7..593ad31be749 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -152,6 +152,12 @@ mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge,
152 return NULL; 152 return NULL;
153} 153}
154 154
155bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
156 const struct net_device *br_dev)
157{
158 return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
159}
160
155static struct mlxsw_sp_bridge_device * 161static struct mlxsw_sp_bridge_device *
156mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, 162mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
157 struct net_device *br_dev) 163 struct net_device *br_dev)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index e379b78e86ef..13190aa09faf 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -82,10 +82,33 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
82 return nfp_net_ebpf_capable(nn) ? "BPF" : ""; 82 return nfp_net_ebpf_capable(nn) ? "BPF" : "";
83} 83}
84 84
85static int
86nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
87{
88 int err;
89
90 nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL);
91 if (!nn->app_priv)
92 return -ENOMEM;
93
94 err = nfp_app_nic_vnic_alloc(app, nn, id);
95 if (err)
96 goto err_free_priv;
97
98 return 0;
99err_free_priv:
100 kfree(nn->app_priv);
101 return err;
102}
103
85static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) 104static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
86{ 105{
106 struct nfp_bpf_vnic *bv = nn->app_priv;
107
87 if (nn->dp.bpf_offload_xdp) 108 if (nn->dp.bpf_offload_xdp)
88 nfp_bpf_xdp_offload(app, nn, NULL); 109 nfp_bpf_xdp_offload(app, nn, NULL);
110 WARN_ON(bv->tc_prog);
111 kfree(bv);
89} 112}
90 113
91static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, 114static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
@@ -93,6 +116,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
93{ 116{
94 struct tc_cls_bpf_offload *cls_bpf = type_data; 117 struct tc_cls_bpf_offload *cls_bpf = type_data;
95 struct nfp_net *nn = cb_priv; 118 struct nfp_net *nn = cb_priv;
119 struct bpf_prog *oldprog;
120 struct nfp_bpf_vnic *bv;
121 int err;
96 122
97 if (type != TC_SETUP_CLSBPF || 123 if (type != TC_SETUP_CLSBPF ||
98 !tc_can_offload(nn->dp.netdev) || 124 !tc_can_offload(nn->dp.netdev) ||
@@ -100,8 +126,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
100 cls_bpf->common.protocol != htons(ETH_P_ALL) || 126 cls_bpf->common.protocol != htons(ETH_P_ALL) ||
101 cls_bpf->common.chain_index) 127 cls_bpf->common.chain_index)
102 return -EOPNOTSUPP; 128 return -EOPNOTSUPP;
103 if (nn->dp.bpf_offload_xdp)
104 return -EBUSY;
105 129
106 /* Only support TC direct action */ 130 /* Only support TC direct action */
107 if (!cls_bpf->exts_integrated || 131 if (!cls_bpf->exts_integrated ||
@@ -110,16 +134,25 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
110 return -EOPNOTSUPP; 134 return -EOPNOTSUPP;
111 } 135 }
112 136
113 switch (cls_bpf->command) { 137 if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
114 case TC_CLSBPF_REPLACE:
115 return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
116 case TC_CLSBPF_ADD:
117 return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
118 case TC_CLSBPF_DESTROY:
119 return nfp_net_bpf_offload(nn, NULL, true);
120 default:
121 return -EOPNOTSUPP; 138 return -EOPNOTSUPP;
139
140 bv = nn->app_priv;
141 oldprog = cls_bpf->oldprog;
142
143 /* Don't remove if oldprog doesn't match driver's state */
144 if (bv->tc_prog != oldprog) {
145 oldprog = NULL;
146 if (!cls_bpf->prog)
147 return 0;
122 } 148 }
149
150 err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
151 if (err)
152 return err;
153
154 bv->tc_prog = cls_bpf->prog;
155 return 0;
123} 156}
124 157
125static int nfp_bpf_setup_tc_block(struct net_device *netdev, 158static int nfp_bpf_setup_tc_block(struct net_device *netdev,
@@ -167,7 +200,7 @@ const struct nfp_app_type app_bpf = {
167 200
168 .extra_cap = nfp_bpf_extra_cap, 201 .extra_cap = nfp_bpf_extra_cap,
169 202
170 .vnic_alloc = nfp_app_nic_vnic_alloc, 203 .vnic_alloc = nfp_bpf_vnic_alloc,
171 .vnic_free = nfp_bpf_vnic_free, 204 .vnic_free = nfp_bpf_vnic_free,
172 205
173 .setup_tc = nfp_bpf_setup_tc, 206 .setup_tc = nfp_bpf_setup_tc,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 082a15f6dfb5..57b6043177a3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -172,6 +172,14 @@ struct nfp_prog {
172 struct list_head insns; 172 struct list_head insns;
173}; 173};
174 174
175/**
176 * struct nfp_bpf_vnic - per-vNIC BPF priv structure
177 * @tc_prog: currently loaded cls_bpf program
178 */
179struct nfp_bpf_vnic {
180 struct bpf_prog *tc_prog;
181};
182
175int nfp_bpf_jit(struct nfp_prog *prog); 183int nfp_bpf_jit(struct nfp_prog *prog);
176 184
177extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; 185extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 1a603fdd9e80..99b0487b6d82 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -568,6 +568,7 @@ nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
568 return err; 568 return err;
569 } 569 }
570 nn_writeb(nn, ctrl_offset, entry->entry); 570 nn_writeb(nn, ctrl_offset, entry->entry);
571 nfp_net_irq_unmask(nn, entry->entry);
571 572
572 return 0; 573 return 0;
573} 574}
@@ -582,6 +583,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
582 unsigned int vector_idx) 583 unsigned int vector_idx)
583{ 584{
584 nn_writeb(nn, ctrl_offset, 0xff); 585 nn_writeb(nn, ctrl_offset, 0xff);
586 nn_pci_flush(nn);
585 free_irq(nn->irq_entries[vector_idx].vector, nn); 587 free_irq(nn->irq_entries[vector_idx].vector, nn);
586} 588}
587 589
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 70c92b649b29..38c924bdd32e 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev)
253 return ret; 253 return ret;
254 } 254 }
255 255
256 ret = emac_mac_up(adpt); 256 ret = adpt->phy.open(adpt);
257 if (ret) { 257 if (ret) {
258 emac_mac_rx_tx_rings_free_all(adpt); 258 emac_mac_rx_tx_rings_free_all(adpt);
259 free_irq(irq->irq, irq); 259 free_irq(irq->irq, irq);
260 return ret; 260 return ret;
261 } 261 }
262 262
263 ret = adpt->phy.open(adpt); 263 ret = emac_mac_up(adpt);
264 if (ret) { 264 if (ret) {
265 emac_mac_down(adpt);
266 emac_mac_rx_tx_rings_free_all(adpt); 265 emac_mac_rx_tx_rings_free_all(adpt);
267 free_irq(irq->irq, irq); 266 free_irq(irq->irq, irq);
267 adpt->phy.close(adpt);
268 return ret; 268 return ret;
269 } 269 }
270 270
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 75323000c364..b9e2846589f8 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -147,7 +147,7 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
147 [FWNLCR0] = 0x0090, 147 [FWNLCR0] = 0x0090,
148 [FWALCR0] = 0x0094, 148 [FWALCR0] = 0x0094,
149 [TXNLCR1] = 0x00a0, 149 [TXNLCR1] = 0x00a0,
150 [TXALCR1] = 0x00a0, 150 [TXALCR1] = 0x00a4,
151 [RXNLCR1] = 0x00a8, 151 [RXNLCR1] = 0x00a8,
152 [RXALCR1] = 0x00ac, 152 [RXALCR1] = 0x00ac,
153 [FWNLCR1] = 0x00b0, 153 [FWNLCR1] = 0x00b0,
@@ -399,7 +399,7 @@ static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = {
399 [FWNLCR0] = 0x0090, 399 [FWNLCR0] = 0x0090,
400 [FWALCR0] = 0x0094, 400 [FWALCR0] = 0x0094,
401 [TXNLCR1] = 0x00a0, 401 [TXNLCR1] = 0x00a0,
402 [TXALCR1] = 0x00a0, 402 [TXALCR1] = 0x00a4,
403 [RXNLCR1] = 0x00a8, 403 [RXNLCR1] = 0x00a8,
404 [RXALCR1] = 0x00ac, 404 [RXALCR1] = 0x00ac,
405 [FWNLCR1] = 0x00b0, 405 [FWNLCR1] = 0x00b0,
@@ -3225,18 +3225,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
3225 /* ioremap the TSU registers */ 3225 /* ioremap the TSU registers */
3226 if (mdp->cd->tsu) { 3226 if (mdp->cd->tsu) {
3227 struct resource *rtsu; 3227 struct resource *rtsu;
3228
3228 rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1); 3229 rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
3229 mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu); 3230 if (!rtsu) {
3230 if (IS_ERR(mdp->tsu_addr)) { 3231 dev_err(&pdev->dev, "no TSU resource\n");
3231 ret = PTR_ERR(mdp->tsu_addr); 3232 ret = -ENODEV;
3233 goto out_release;
3234 }
3235 /* We can only request the TSU region for the first port
3236 * of the two sharing this TSU for the probe to succeed...
3237 */
3238 if (devno % 2 == 0 &&
3239 !devm_request_mem_region(&pdev->dev, rtsu->start,
3240 resource_size(rtsu),
3241 dev_name(&pdev->dev))) {
3242 dev_err(&pdev->dev, "can't request TSU resource.\n");
3243 ret = -EBUSY;
3244 goto out_release;
3245 }
3246 mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
3247 resource_size(rtsu));
3248 if (!mdp->tsu_addr) {
3249 dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
3250 ret = -ENOMEM;
3232 goto out_release; 3251 goto out_release;
3233 } 3252 }
3234 mdp->port = devno % 2; 3253 mdp->port = devno % 2;
3235 ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; 3254 ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
3236 } 3255 }
3237 3256
3238 /* initialize first or needed device */ 3257 /* Need to init only the first port of the two sharing a TSU */
3239 if (!devno || pd->needs_init) { 3258 if (devno % 2 == 0) {
3240 if (mdp->cd->chip_reset) 3259 if (mdp->cd->chip_reset)
3241 mdp->cd->chip_reset(ndev); 3260 mdp->cd->chip_reset(ndev);
3242 3261
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e1e5ac053760..ce2ea2d491ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -409,7 +409,7 @@ struct stmmac_desc_ops {
409 /* get timestamp value */ 409 /* get timestamp value */
410 u64(*get_timestamp) (void *desc, u32 ats); 410 u64(*get_timestamp) (void *desc, u32 ats);
411 /* get rx timestamp status */ 411 /* get rx timestamp status */
412 int (*get_rx_timestamp_status) (void *desc, u32 ats); 412 int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
413 /* Display ring */ 413 /* Display ring */
414 void (*display_ring)(void *head, unsigned int size, bool rx); 414 void (*display_ring)(void *head, unsigned int size, bool rx);
415 /* set MSS via context descriptor */ 415 /* set MSS via context descriptor */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 4b286e27c4ca..7e089bf906b4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc)
258 return ret; 258 return ret;
259} 259}
260 260
261static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) 261static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
262 u32 ats)
262{ 263{
263 struct dma_desc *p = (struct dma_desc *)desc; 264 struct dma_desc *p = (struct dma_desc *)desc;
264 int ret = -EINVAL; 265 int ret = -EINVAL;
@@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
270 271
271 /* Check if timestamp is OK from context descriptor */ 272 /* Check if timestamp is OK from context descriptor */
272 do { 273 do {
273 ret = dwmac4_rx_check_timestamp(desc); 274 ret = dwmac4_rx_check_timestamp(next_desc);
274 if (ret < 0) 275 if (ret < 0)
275 goto exit; 276 goto exit;
276 i++; 277 i++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 7546b3664113..2a828a312814 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
400 return ns; 400 return ns;
401} 401}
402 402
403static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats) 403static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
404 u32 ats)
404{ 405{
405 if (ats) { 406 if (ats) {
406 struct dma_extended_desc *p = (struct dma_extended_desc *)desc; 407 struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index f817f8f36569..db4cee57bb24 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
265 return ns; 265 return ns;
266} 266}
267 267
268static int ndesc_get_rx_timestamp_status(void *desc, u32 ats) 268static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
269{ 269{
270 struct dma_desc *p = (struct dma_desc *)desc; 270 struct dma_desc *p = (struct dma_desc *)desc;
271 271
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 721b61655261..08c19ebd5306 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
34{ 34{
35 u32 value = readl(ioaddr + PTP_TCR); 35 u32 value = readl(ioaddr + PTP_TCR);
36 unsigned long data; 36 unsigned long data;
37 u32 reg_value;
37 38
38 /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second 39 /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
39 * formula = (1/ptp_clock) * 1000000000 40 * formula = (1/ptp_clock) * 1000000000
@@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
50 51
51 data &= PTP_SSIR_SSINC_MASK; 52 data &= PTP_SSIR_SSINC_MASK;
52 53
54 reg_value = data;
53 if (gmac4) 55 if (gmac4)
54 data = data << GMAC4_PTP_SSIR_SSINC_SHIFT; 56 reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT;
55 57
56 writel(data, ioaddr + PTP_SSIR); 58 writel(reg_value, ioaddr + PTP_SSIR);
57 59
58 return data; 60 return data;
59} 61}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d7250539d0bd..c0af0bc4e714 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -364,9 +364,15 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
364bool stmmac_eee_init(struct stmmac_priv *priv) 364bool stmmac_eee_init(struct stmmac_priv *priv)
365{ 365{
366 struct net_device *ndev = priv->dev; 366 struct net_device *ndev = priv->dev;
367 int interface = priv->plat->interface;
367 unsigned long flags; 368 unsigned long flags;
368 bool ret = false; 369 bool ret = false;
369 370
371 if ((interface != PHY_INTERFACE_MODE_MII) &&
372 (interface != PHY_INTERFACE_MODE_GMII) &&
373 !phy_interface_mode_is_rgmii(interface))
374 goto out;
375
370 /* Using PCS we cannot dial with the phy registers at this stage 376 /* Using PCS we cannot dial with the phy registers at this stage
371 * so we do not support extra feature like EEE. 377 * so we do not support extra feature like EEE.
372 */ 378 */
@@ -482,7 +488,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
482 desc = np; 488 desc = np;
483 489
484 /* Check if timestamp is available */ 490 /* Check if timestamp is available */
485 if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) { 491 if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
486 ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); 492 ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
487 netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); 493 netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
488 shhwtstamp = skb_hwtstamps(skb); 494 shhwtstamp = skb_hwtstamps(skb);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b718a02a6bb6..0a48b3073d3d 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -825,6 +825,13 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
825 if (IS_ERR(rt)) 825 if (IS_ERR(rt))
826 return PTR_ERR(rt); 826 return PTR_ERR(rt);
827 827
828 if (skb_dst(skb)) {
829 int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) -
830 GENEVE_BASE_HLEN - info->options_len - 14;
831
832 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
833 }
834
828 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 835 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
829 if (geneve->collect_md) { 836 if (geneve->collect_md) {
830 tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); 837 tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
@@ -864,6 +871,13 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
864 if (IS_ERR(dst)) 871 if (IS_ERR(dst))
865 return PTR_ERR(dst); 872 return PTR_ERR(dst);
866 873
874 if (skb_dst(skb)) {
875 int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) -
876 GENEVE_BASE_HLEN - info->options_len - 14;
877
878 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
879 }
880
867 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 881 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
868 if (geneve->collect_md) { 882 if (geneve->collect_md) {
869 prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); 883 prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index a178c5efd33e..a0f2be81d52e 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1444,9 +1444,14 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
1444 return 0; 1444 return 0;
1445 1445
1446unregister_netdev: 1446unregister_netdev:
1447 /* macvlan_uninit would free the macvlan port */
1447 unregister_netdevice(dev); 1448 unregister_netdevice(dev);
1449 return err;
1448destroy_macvlan_port: 1450destroy_macvlan_port:
1449 if (create) 1451 /* the macvlan port may be freed by macvlan_uninit when fail to register.
1452 * so we destroy the macvlan port only when it's valid.
1453 */
1454 if (create && macvlan_port_get_rtnl(dev))
1450 macvlan_port_destroy(port->dev); 1455 macvlan_port_destroy(port->dev);
1451 return err; 1456 return err;
1452} 1457}
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index b5a8f750e433..82104edca393 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -879,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev)
879 879
880 /* SGMII-to-Copper mode initialization */ 880 /* SGMII-to-Copper mode initialization */
881 if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { 881 if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
882 u32 pause;
883
882 /* Select page 18 */ 884 /* Select page 18 */
883 err = marvell_set_page(phydev, 18); 885 err = marvell_set_page(phydev, 18);
884 if (err < 0) 886 if (err < 0)
@@ -902,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev)
902 err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); 904 err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
903 if (err < 0) 905 if (err < 0)
904 return err; 906 return err;
907
908 /* There appears to be a bug in the 88e1512 when used in
909 * SGMII to copper mode, where the AN advertisment register
910 * clears the pause bits each time a negotiation occurs.
911 * This means we can never be truely sure what was advertised,
912 * so disable Pause support.
913 */
914 pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause;
915 phydev->supported &= ~pause;
916 phydev->advertising &= ~pause;
905 } 917 }
906 918
907 return m88e1121_config_init(phydev); 919 return m88e1121_config_init(phydev);
@@ -2073,7 +2085,7 @@ static struct phy_driver marvell_drivers[] = {
2073 .flags = PHY_HAS_INTERRUPT, 2085 .flags = PHY_HAS_INTERRUPT,
2074 .probe = marvell_probe, 2086 .probe = marvell_probe,
2075 .config_init = &m88e1145_config_init, 2087 .config_init = &m88e1145_config_init,
2076 .config_aneg = &marvell_config_aneg, 2088 .config_aneg = &m88e1101_config_aneg,
2077 .read_status = &genphy_read_status, 2089 .read_status = &genphy_read_status,
2078 .ack_interrupt = &marvell_ack_interrupt, 2090 .ack_interrupt = &marvell_ack_interrupt,
2079 .config_intr = &marvell_config_intr, 2091 .config_intr = &marvell_config_intr,
diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c
index 135296508a7e..6425ce04d3f9 100644
--- a/drivers/net/phy/mdio-sun4i.c
+++ b/drivers/net/phy/mdio-sun4i.c
@@ -118,8 +118,10 @@ static int sun4i_mdio_probe(struct platform_device *pdev)
118 118
119 data->regulator = devm_regulator_get(&pdev->dev, "phy"); 119 data->regulator = devm_regulator_get(&pdev->dev, "phy");
120 if (IS_ERR(data->regulator)) { 120 if (IS_ERR(data->regulator)) {
121 if (PTR_ERR(data->regulator) == -EPROBE_DEFER) 121 if (PTR_ERR(data->regulator) == -EPROBE_DEFER) {
122 return -EPROBE_DEFER; 122 ret = -EPROBE_DEFER;
123 goto err_out_free_mdiobus;
124 }
123 125
124 dev_info(&pdev->dev, "no regulator found\n"); 126 dev_info(&pdev->dev, "no regulator found\n");
125 data->regulator = NULL; 127 data->regulator = NULL;
diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c
index bfd3090fb055..07c6048200c6 100644
--- a/drivers/net/phy/mdio-xgene.c
+++ b/drivers/net/phy/mdio-xgene.c
@@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata)
194 } 194 }
195 195
196 ret = xgene_enet_ecc_init(pdata); 196 ret = xgene_enet_ecc_init(pdata);
197 if (ret) 197 if (ret) {
198 if (pdata->dev->of_node)
199 clk_disable_unprepare(pdata->clk);
198 return ret; 200 return ret;
201 }
199 xgene_gmac_reset(pdata); 202 xgene_gmac_reset(pdata);
200 203
201 return 0; 204 return 0;
@@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev)
388 return ret; 391 return ret;
389 392
390 mdio_bus = mdiobus_alloc(); 393 mdio_bus = mdiobus_alloc();
391 if (!mdio_bus) 394 if (!mdio_bus) {
392 return -ENOMEM; 395 ret = -ENOMEM;
396 goto out_clk;
397 }
393 398
394 mdio_bus->name = "APM X-Gene MDIO bus"; 399 mdio_bus->name = "APM X-Gene MDIO bus";
395 400
@@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev)
418 mdio_bus->phy_mask = ~0; 423 mdio_bus->phy_mask = ~0;
419 ret = mdiobus_register(mdio_bus); 424 ret = mdiobus_register(mdio_bus);
420 if (ret) 425 if (ret)
421 goto out; 426 goto out_mdiobus;
422 427
423 acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1, 428 acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1,
424 acpi_register_phy, NULL, mdio_bus, NULL); 429 acpi_register_phy, NULL, mdio_bus, NULL);
@@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev)
426 } 431 }
427 432
428 if (ret) 433 if (ret)
429 goto out; 434 goto out_mdiobus;
430 435
431 pdata->mdio_bus = mdio_bus; 436 pdata->mdio_bus = mdio_bus;
432 xgene_mdio_status = true; 437 xgene_mdio_status = true;
433 438
434 return 0; 439 return 0;
435 440
436out: 441out_mdiobus:
437 mdiobus_free(mdio_bus); 442 mdiobus_free(mdio_bus);
438 443
444out_clk:
445 if (dev->of_node)
446 clk_disable_unprepare(pdata->clk);
447
439 return ret; 448 return ret;
440} 449}
441 450
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index ab4614113403..422ff6333c52 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -624,6 +624,7 @@ static int ksz9031_read_status(struct phy_device *phydev)
624 phydev->link = 0; 624 phydev->link = 0;
625 if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) 625 if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
626 phydev->drv->config_intr(phydev); 626 phydev->drv->config_intr(phydev);
627 return genphy_config_aneg(phydev);
627 } 628 }
628 629
629 return 0; 630 return 0;
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 5dc9668dde34..249ce5cbea22 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -526,6 +526,7 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
526 pl->link_config.pause = MLO_PAUSE_AN; 526 pl->link_config.pause = MLO_PAUSE_AN;
527 pl->link_config.speed = SPEED_UNKNOWN; 527 pl->link_config.speed = SPEED_UNKNOWN;
528 pl->link_config.duplex = DUPLEX_UNKNOWN; 528 pl->link_config.duplex = DUPLEX_UNKNOWN;
529 pl->link_config.an_enabled = true;
529 pl->ops = ops; 530 pl->ops = ops;
530 __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); 531 __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
531 532
@@ -951,6 +952,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
951 mutex_lock(&pl->state_mutex); 952 mutex_lock(&pl->state_mutex);
952 /* Configure the MAC to match the new settings */ 953 /* Configure the MAC to match the new settings */
953 linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising); 954 linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising);
955 pl->link_config.interface = config.interface;
954 pl->link_config.speed = our_kset.base.speed; 956 pl->link_config.speed = our_kset.base.speed;
955 pl->link_config.duplex = our_kset.base.duplex; 957 pl->link_config.duplex = our_kset.base.duplex;
956 pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE; 958 pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE;
@@ -1294,6 +1296,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd)
1294 switch (cmd) { 1296 switch (cmd) {
1295 case SIOCGMIIPHY: 1297 case SIOCGMIIPHY:
1296 mii->phy_id = pl->phydev->mdio.addr; 1298 mii->phy_id = pl->phydev->mdio.addr;
1299 /* fall through */
1297 1300
1298 case SIOCGMIIREG: 1301 case SIOCGMIIREG:
1299 ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num); 1302 ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num);
@@ -1316,6 +1319,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd)
1316 switch (cmd) { 1319 switch (cmd) {
1317 case SIOCGMIIPHY: 1320 case SIOCGMIIPHY:
1318 mii->phy_id = 0; 1321 mii->phy_id = 0;
1322 /* fall through */
1319 1323
1320 case SIOCGMIIREG: 1324 case SIOCGMIIREG:
1321 ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num); 1325 ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num);
@@ -1427,9 +1431,8 @@ static void phylink_sfp_link_down(void *upstream)
1427 WARN_ON(!lockdep_rtnl_is_held()); 1431 WARN_ON(!lockdep_rtnl_is_held());
1428 1432
1429 set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state); 1433 set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
1434 queue_work(system_power_efficient_wq, &pl->resolve);
1430 flush_work(&pl->resolve); 1435 flush_work(&pl->resolve);
1431
1432 netif_carrier_off(pl->netdev);
1433} 1436}
1434 1437
1435static void phylink_sfp_link_up(void *upstream) 1438static void phylink_sfp_link_up(void *upstream)
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 8a1b1f4c1b7c..ab64a142b832 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -356,7 +356,8 @@ EXPORT_SYMBOL_GPL(sfp_register_upstream);
356void sfp_unregister_upstream(struct sfp_bus *bus) 356void sfp_unregister_upstream(struct sfp_bus *bus)
357{ 357{
358 rtnl_lock(); 358 rtnl_lock();
359 sfp_unregister_bus(bus); 359 if (bus->sfp)
360 sfp_unregister_bus(bus);
360 bus->upstream = NULL; 361 bus->upstream = NULL;
361 bus->netdev = NULL; 362 bus->netdev = NULL;
362 rtnl_unlock(); 363 rtnl_unlock();
@@ -459,7 +460,8 @@ EXPORT_SYMBOL_GPL(sfp_register_socket);
459void sfp_unregister_socket(struct sfp_bus *bus) 460void sfp_unregister_socket(struct sfp_bus *bus)
460{ 461{
461 rtnl_lock(); 462 rtnl_lock();
462 sfp_unregister_bus(bus); 463 if (bus->netdev)
464 sfp_unregister_bus(bus);
463 bus->sfp_dev = NULL; 465 bus->sfp_dev = NULL;
464 bus->sfp = NULL; 466 bus->sfp = NULL;
465 bus->socket_ops = NULL; 467 bus->socket_ops = NULL;
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 3000ddd1c7e2..728819feab44 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1100,6 +1100,7 @@ static const struct usb_device_id products[] = {
1100 {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, 1100 {QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
1101 {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, 1101 {QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
1102 {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, 1102 {QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
1103 {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */
1103 {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, 1104 {QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
1104 {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ 1105 {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */
1105 {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ 1106 {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 19b9cc51079e..31f4b7911ef8 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2155 } 2155 }
2156 2156
2157 ndst = &rt->dst; 2157 ndst = &rt->dst;
2158 if (skb_dst(skb)) {
2159 int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
2160
2161 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
2162 skb, mtu);
2163 }
2164
2158 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 2165 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2159 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 2166 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
2160 err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), 2167 err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
@@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2190 goto out_unlock; 2197 goto out_unlock;
2191 } 2198 }
2192 2199
2200 if (skb_dst(skb)) {
2201 int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
2202
2203 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
2204 skb, mtu);
2205 }
2206
2193 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 2207 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
2194 ttl = ttl ? : ip6_dst_hoplimit(ndst); 2208 ttl = ttl ? : ip6_dst_hoplimit(ndst);
2195 skb_scrub_packet(skb, xnet); 2209 skb_scrub_packet(skb, xnet);
@@ -3103,6 +3117,11 @@ static void vxlan_config_apply(struct net_device *dev,
3103 3117
3104 max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : 3118 max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
3105 VXLAN_HEADROOM); 3119 VXLAN_HEADROOM);
3120 if (max_mtu < ETH_MIN_MTU)
3121 max_mtu = ETH_MIN_MTU;
3122
3123 if (!changelink && !conf->mtu)
3124 dev->mtu = max_mtu;
3106 } 3125 }
3107 3126
3108 if (dev->mtu > max_mtu) 3127 if (dev->mtu > max_mtu)
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index f7d228b5ba93..987f1252a3cf 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -384,6 +384,18 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
384 } 384 }
385 } 385 }
386 386
387 if (changed & IEEE80211_CONF_CHANGE_PS) {
388 list_for_each_entry(tmp, &wcn->vif_list, list) {
389 vif = wcn36xx_priv_to_vif(tmp);
390 if (hw->conf.flags & IEEE80211_CONF_PS) {
391 if (vif->bss_conf.ps) /* ps allowed ? */
392 wcn36xx_pmc_enter_bmps_state(wcn, vif);
393 } else {
394 wcn36xx_pmc_exit_bmps_state(wcn, vif);
395 }
396 }
397 }
398
387 mutex_unlock(&wcn->conf_mutex); 399 mutex_unlock(&wcn->conf_mutex);
388 400
389 return 0; 401 return 0;
@@ -747,17 +759,6 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw,
747 vif_priv->dtim_period = bss_conf->dtim_period; 759 vif_priv->dtim_period = bss_conf->dtim_period;
748 } 760 }
749 761
750 if (changed & BSS_CHANGED_PS) {
751 wcn36xx_dbg(WCN36XX_DBG_MAC,
752 "mac bss PS set %d\n",
753 bss_conf->ps);
754 if (bss_conf->ps) {
755 wcn36xx_pmc_enter_bmps_state(wcn, vif);
756 } else {
757 wcn36xx_pmc_exit_bmps_state(wcn, vif);
758 }
759 }
760
761 if (changed & BSS_CHANGED_BSSID) { 762 if (changed & BSS_CHANGED_BSSID) {
762 wcn36xx_dbg(WCN36XX_DBG_MAC, "mac bss changed_bssid %pM\n", 763 wcn36xx_dbg(WCN36XX_DBG_MAC, "mac bss changed_bssid %pM\n",
763 bss_conf->bssid); 764 bss_conf->bssid);
diff --git a/drivers/net/wireless/ath/wcn36xx/pmc.c b/drivers/net/wireless/ath/wcn36xx/pmc.c
index 589fe5f70971..1976b80c235f 100644
--- a/drivers/net/wireless/ath/wcn36xx/pmc.c
+++ b/drivers/net/wireless/ath/wcn36xx/pmc.c
@@ -45,8 +45,10 @@ int wcn36xx_pmc_exit_bmps_state(struct wcn36xx *wcn,
45 struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif); 45 struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
46 46
47 if (WCN36XX_BMPS != vif_priv->pw_state) { 47 if (WCN36XX_BMPS != vif_priv->pw_state) {
48 wcn36xx_err("Not in BMPS mode, no need to exit from BMPS mode!\n"); 48 /* Unbalanced call or last BMPS enter failed */
49 return -EINVAL; 49 wcn36xx_dbg(WCN36XX_DBG_PMC,
50 "Not in BMPS mode, no need to exit\n");
51 return -EALREADY;
50 } 52 }
51 wcn36xx_smd_exit_bmps(wcn, vif); 53 wcn36xx_smd_exit_bmps(wcn, vif);
52 vif_priv->pw_state = WCN36XX_FULL_POWER; 54 vif_priv->pw_state = WCN36XX_FULL_POWER;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index d749abeca3ae..403e65c309d0 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -670,11 +670,15 @@ static inline u8 iwl_pcie_get_cmd_index(struct iwl_txq *q, u32 index)
670 return index & (q->n_window - 1); 670 return index & (q->n_window - 1);
671} 671}
672 672
673static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie, 673static inline void *iwl_pcie_get_tfd(struct iwl_trans *trans,
674 struct iwl_txq *txq, int idx) 674 struct iwl_txq *txq, int idx)
675{ 675{
676 return txq->tfds + trans_pcie->tfd_size * iwl_pcie_get_cmd_index(txq, 676 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
677 idx); 677
678 if (trans->cfg->use_tfh)
679 idx = iwl_pcie_get_cmd_index(txq, idx);
680
681 return txq->tfds + trans_pcie->tfd_size * idx;
678} 682}
679 683
680static inline void iwl_enable_rfkill_int(struct iwl_trans *trans) 684static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 16b345f54ff0..6d0a907d5ba5 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -171,8 +171,6 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans,
171 171
172static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) 172static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
173{ 173{
174 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
175
176 /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and 174 /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
177 * idx is bounded by n_window 175 * idx is bounded by n_window
178 */ 176 */
@@ -181,7 +179,7 @@ static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
181 lockdep_assert_held(&txq->lock); 179 lockdep_assert_held(&txq->lock);
182 180
183 iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta, 181 iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
184 iwl_pcie_get_tfd(trans_pcie, txq, idx)); 182 iwl_pcie_get_tfd(trans, txq, idx));
185 183
186 /* free SKB */ 184 /* free SKB */
187 if (txq->entries) { 185 if (txq->entries) {
@@ -364,11 +362,9 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans,
364 struct sk_buff *skb, 362 struct sk_buff *skb,
365 struct iwl_cmd_meta *out_meta) 363 struct iwl_cmd_meta *out_meta)
366{ 364{
367 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
368 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 365 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
369 int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr); 366 int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
370 struct iwl_tfh_tfd *tfd = 367 struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx);
371 iwl_pcie_get_tfd(trans_pcie, txq, idx);
372 dma_addr_t tb_phys; 368 dma_addr_t tb_phys;
373 bool amsdu; 369 bool amsdu;
374 int i, len, tb1_len, tb2_len, hdr_len; 370 int i, len, tb1_len, tb2_len, hdr_len;
@@ -565,8 +561,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
565 u8 group_id = iwl_cmd_groupid(cmd->id); 561 u8 group_id = iwl_cmd_groupid(cmd->id);
566 const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD]; 562 const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
567 u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD]; 563 u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
568 struct iwl_tfh_tfd *tfd = 564 struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
569 iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
570 565
571 memset(tfd, 0, sizeof(*tfd)); 566 memset(tfd, 0, sizeof(*tfd));
572 567
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index fed6d842a5e1..3f85713c41dc 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -373,7 +373,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
373{ 373{
374 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 374 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
375 int i, num_tbs; 375 int i, num_tbs;
376 void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index); 376 void *tfd = iwl_pcie_get_tfd(trans, txq, index);
377 377
378 /* Sanity check on number of chunks */ 378 /* Sanity check on number of chunks */
379 num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd); 379 num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
@@ -2018,7 +2018,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
2018 } 2018 }
2019 2019
2020 trace_iwlwifi_dev_tx(trans->dev, skb, 2020 trace_iwlwifi_dev_tx(trans->dev, skb,
2021 iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr), 2021 iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
2022 trans_pcie->tfd_size, 2022 trans_pcie->tfd_size,
2023 &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 2023 &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
2024 hdr_len); 2024 hdr_len);
@@ -2092,7 +2092,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
2092 IEEE80211_CCMP_HDR_LEN : 0; 2092 IEEE80211_CCMP_HDR_LEN : 0;
2093 2093
2094 trace_iwlwifi_dev_tx(trans->dev, skb, 2094 trace_iwlwifi_dev_tx(trans->dev, skb,
2095 iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr), 2095 iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
2096 trans_pcie->tfd_size, 2096 trans_pcie->tfd_size,
2097 &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0); 2097 &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
2098 2098
@@ -2425,7 +2425,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
2425 memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr, 2425 memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
2426 IWL_FIRST_TB_SIZE); 2426 IWL_FIRST_TB_SIZE);
2427 2427
2428 tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr); 2428 tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
2429 /* Set up entry for this TFD in Tx byte-count array */ 2429 /* Set up entry for this TFD in Tx byte-count array */
2430 iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len), 2430 iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
2431 iwl_pcie_tfd_get_num_tbs(trans, tfd)); 2431 iwl_pcie_tfd_get_num_tbs(trans, tfd));
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 10b075a46b26..e8189c07b41f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
684 hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN); 684 hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN);
685 hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | 685 hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
686 IEEE80211_STYPE_NULLFUNC | 686 IEEE80211_STYPE_NULLFUNC |
687 IEEE80211_FCTL_TODS |
687 (ps ? IEEE80211_FCTL_PM : 0)); 688 (ps ? IEEE80211_FCTL_PM : 0));
688 hdr->duration_id = cpu_to_le16(0); 689 hdr->duration_id = cpu_to_le16(0);
689 memcpy(hdr->addr1, vp->bssid, ETH_ALEN); 690 memcpy(hdr->addr1, vp->bssid, ETH_ALEN);
@@ -3215,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
3215 if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info))) 3216 if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
3216 continue; 3217 continue;
3217 3218
3218 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3219 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
3219 if (!skb) { 3220 if (!skb) {
3220 res = -ENOMEM; 3221 res = -ENOMEM;
3221 goto out_err; 3222 goto out_err;
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index c5a34671abda..9bd7ddeeb6a5 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1326,6 +1326,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
1326 1326
1327 netif_carrier_off(netdev); 1327 netif_carrier_off(netdev);
1328 1328
1329 xenbus_switch_state(dev, XenbusStateInitialising);
1329 return netdev; 1330 return netdev;
1330 1331
1331 exit: 1332 exit:
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index e949e3302af4..c586bcdb5190 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
211 return ret; 211 return ret;
212} 212}
213 213
214static int btt_log_read_pair(struct arena_info *arena, u32 lane, 214static int btt_log_group_read(struct arena_info *arena, u32 lane,
215 struct log_entry *ent) 215 struct log_group *log)
216{ 216{
217 return arena_read_bytes(arena, 217 return arena_read_bytes(arena,
218 arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, 218 arena->logoff + (lane * LOG_GRP_SIZE), log,
219 2 * LOG_ENT_SIZE, 0); 219 LOG_GRP_SIZE, 0);
220} 220}
221 221
222static struct dentry *debugfs_root; 222static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
256 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); 256 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
257 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); 257 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
258 debugfs_create_x32("flags", S_IRUGO, d, &a->flags); 258 debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
259 debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
260 debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
259} 261}
260 262
261static void btt_debugfs_init(struct btt *btt) 263static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
274 } 276 }
275} 277}
276 278
279static u32 log_seq(struct log_group *log, int log_idx)
280{
281 return le32_to_cpu(log->ent[log_idx].seq);
282}
283
277/* 284/*
278 * This function accepts two log entries, and uses the 285 * This function accepts two log entries, and uses the
279 * sequence number to find the 'older' entry. 286 * sequence number to find the 'older' entry.
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt)
283 * 290 *
284 * TODO The logic feels a bit kludge-y. make it better.. 291 * TODO The logic feels a bit kludge-y. make it better..
285 */ 292 */
286static int btt_log_get_old(struct log_entry *ent) 293static int btt_log_get_old(struct arena_info *a, struct log_group *log)
287{ 294{
295 int idx0 = a->log_index[0];
296 int idx1 = a->log_index[1];
288 int old; 297 int old;
289 298
290 /* 299 /*
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent)
292 * the next time, the following logic works out to put this 301 * the next time, the following logic works out to put this
293 * (next) entry into [1] 302 * (next) entry into [1]
294 */ 303 */
295 if (ent[0].seq == 0) { 304 if (log_seq(log, idx0) == 0) {
296 ent[0].seq = cpu_to_le32(1); 305 log->ent[idx0].seq = cpu_to_le32(1);
297 return 0; 306 return 0;
298 } 307 }
299 308
300 if (ent[0].seq == ent[1].seq) 309 if (log_seq(log, idx0) == log_seq(log, idx1))
301 return -EINVAL; 310 return -EINVAL;
302 if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) 311 if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
303 return -EINVAL; 312 return -EINVAL;
304 313
305 if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { 314 if (log_seq(log, idx0) < log_seq(log, idx1)) {
306 if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) 315 if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
307 old = 0; 316 old = 0;
308 else 317 else
309 old = 1; 318 old = 1;
310 } else { 319 } else {
311 if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) 320 if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
312 old = 1; 321 old = 1;
313 else 322 else
314 old = 0; 323 old = 0;
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
328{ 337{
329 int ret; 338 int ret;
330 int old_ent, ret_ent; 339 int old_ent, ret_ent;
331 struct log_entry log[2]; 340 struct log_group log;
332 341
333 ret = btt_log_read_pair(arena, lane, log); 342 ret = btt_log_group_read(arena, lane, &log);
334 if (ret) 343 if (ret)
335 return -EIO; 344 return -EIO;
336 345
337 old_ent = btt_log_get_old(log); 346 old_ent = btt_log_get_old(arena, &log);
338 if (old_ent < 0 || old_ent > 1) { 347 if (old_ent < 0 || old_ent > 1) {
339 dev_err(to_dev(arena), 348 dev_err(to_dev(arena),
340 "log corruption (%d): lane %d seq [%d, %d]\n", 349 "log corruption (%d): lane %d seq [%d, %d]\n",
341 old_ent, lane, log[0].seq, log[1].seq); 350 old_ent, lane, log.ent[arena->log_index[0]].seq,
351 log.ent[arena->log_index[1]].seq);
342 /* TODO set error state? */ 352 /* TODO set error state? */
343 return -EIO; 353 return -EIO;
344 } 354 }
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
346 ret_ent = (old_flag ? old_ent : (1 - old_ent)); 356 ret_ent = (old_flag ? old_ent : (1 - old_ent));
347 357
348 if (ent != NULL) 358 if (ent != NULL)
349 memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); 359 memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
350 360
351 return ret_ent; 361 return ret_ent;
352} 362}
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
360 u32 sub, struct log_entry *ent, unsigned long flags) 370 u32 sub, struct log_entry *ent, unsigned long flags)
361{ 371{
362 int ret; 372 int ret;
363 /* 373 u32 group_slot = arena->log_index[sub];
364 * Ignore the padding in log_entry for calculating log_half. 374 unsigned int log_half = LOG_ENT_SIZE / 2;
365 * The entry is 'committed' when we write the sequence number,
366 * and we want to ensure that that is the last thing written.
367 * We don't bother writing the padding as that would be extra
368 * media wear and write amplification
369 */
370 unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
371 u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
372 void *src = ent; 375 void *src = ent;
376 u64 ns_off;
373 377
378 ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
379 (group_slot * LOG_ENT_SIZE);
374 /* split the 16B write into atomic, durable halves */ 380 /* split the 16B write into atomic, durable halves */
375 ret = arena_write_bytes(arena, ns_off, src, log_half, flags); 381 ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
376 if (ret) 382 if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
453{ 459{
454 size_t logsize = arena->info2off - arena->logoff; 460 size_t logsize = arena->info2off - arena->logoff;
455 size_t chunk_size = SZ_4K, offset = 0; 461 size_t chunk_size = SZ_4K, offset = 0;
456 struct log_entry log; 462 struct log_entry ent;
457 void *zerobuf; 463 void *zerobuf;
458 int ret; 464 int ret;
459 u32 i; 465 u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
485 } 491 }
486 492
487 for (i = 0; i < arena->nfree; i++) { 493 for (i = 0; i < arena->nfree; i++) {
488 log.lba = cpu_to_le32(i); 494 ent.lba = cpu_to_le32(i);
489 log.old_map = cpu_to_le32(arena->external_nlba + i); 495 ent.old_map = cpu_to_le32(arena->external_nlba + i);
490 log.new_map = cpu_to_le32(arena->external_nlba + i); 496 ent.new_map = cpu_to_le32(arena->external_nlba + i);
491 log.seq = cpu_to_le32(LOG_SEQ_INIT); 497 ent.seq = cpu_to_le32(LOG_SEQ_INIT);
492 ret = __btt_log_write(arena, i, 0, &log, 0); 498 ret = __btt_log_write(arena, i, 0, &ent, 0);
493 if (ret) 499 if (ret)
494 goto free; 500 goto free;
495 } 501 }
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
594 return 0; 600 return 0;
595} 601}
596 602
603static bool ent_is_padding(struct log_entry *ent)
604{
605 return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
606 && (ent->seq == 0);
607}
608
609/*
610 * Detecting valid log indices: We read a log group (see the comments in btt.h
611 * for a description of a 'log_group' and its 'slots'), and iterate over its
612 * four slots. We expect that a padding slot will be all-zeroes, and use this
613 * to detect a padding slot vs. an actual entry.
614 *
615 * If a log_group is in the initial state, i.e. hasn't been used since the
616 * creation of this BTT layout, it will have three of the four slots with
617 * zeroes. We skip over these log_groups for the detection of log_index. If
618 * all log_groups are in the initial state (i.e. the BTT has never been
619 * written to), it is safe to assume the 'new format' of log entries in slots
620 * (0, 1).
621 */
622static int log_set_indices(struct arena_info *arena)
623{
624 bool idx_set = false, initial_state = true;
625 int ret, log_index[2] = {-1, -1};
626 u32 i, j, next_idx = 0;
627 struct log_group log;
628 u32 pad_count = 0;
629
630 for (i = 0; i < arena->nfree; i++) {
631 ret = btt_log_group_read(arena, i, &log);
632 if (ret < 0)
633 return ret;
634
635 for (j = 0; j < 4; j++) {
636 if (!idx_set) {
637 if (ent_is_padding(&log.ent[j])) {
638 pad_count++;
639 continue;
640 } else {
641 /* Skip if index has been recorded */
642 if ((next_idx == 1) &&
643 (j == log_index[0]))
644 continue;
645 /* valid entry, record index */
646 log_index[next_idx] = j;
647 next_idx++;
648 }
649 if (next_idx == 2) {
650 /* two valid entries found */
651 idx_set = true;
652 } else if (next_idx > 2) {
653 /* too many valid indices */
654 return -ENXIO;
655 }
656 } else {
657 /*
658 * once the indices have been set, just verify
659 * that all subsequent log groups are either in
660 * their initial state or follow the same
661 * indices.
662 */
663 if (j == log_index[0]) {
664 /* entry must be 'valid' */
665 if (ent_is_padding(&log.ent[j]))
666 return -ENXIO;
667 } else if (j == log_index[1]) {
668 ;
669 /*
670 * log_index[1] can be padding if the
671 * lane never got used and it is still
672 * in the initial state (three 'padding'
673 * entries)
674 */
675 } else {
676 /* entry must be invalid (padding) */
677 if (!ent_is_padding(&log.ent[j]))
678 return -ENXIO;
679 }
680 }
681 }
682 /*
683 * If any of the log_groups have more than one valid,
684 * non-padding entry, then the we are no longer in the
685 * initial_state
686 */
687 if (pad_count < 3)
688 initial_state = false;
689 pad_count = 0;
690 }
691
692 if (!initial_state && !idx_set)
693 return -ENXIO;
694
695 /*
696 * If all the entries in the log were in the initial state,
697 * assume new padding scheme
698 */
699 if (initial_state)
700 log_index[1] = 1;
701
702 /*
703 * Only allow the known permutations of log/padding indices,
704 * i.e. (0, 1), and (0, 2)
705 */
706 if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
707 ; /* known index possibilities */
708 else {
709 dev_err(to_dev(arena), "Found an unknown padding scheme\n");
710 return -ENXIO;
711 }
712
713 arena->log_index[0] = log_index[0];
714 arena->log_index[1] = log_index[1];
715 dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
716 dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
717 return 0;
718}
719
597static int btt_rtt_init(struct arena_info *arena) 720static int btt_rtt_init(struct arena_info *arena)
598{ 721{
599 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); 722 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
650 available -= 2 * BTT_PG_SIZE; 773 available -= 2 * BTT_PG_SIZE;
651 774
652 /* The log takes a fixed amount of space based on nfree */ 775 /* The log takes a fixed amount of space based on nfree */
653 logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), 776 logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
654 BTT_PG_SIZE);
655 available -= logsize; 777 available -= logsize;
656 778
657 /* Calculate optimal split between map and data area */ 779 /* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
668 arena->mapoff = arena->dataoff + datasize; 790 arena->mapoff = arena->dataoff + datasize;
669 arena->logoff = arena->mapoff + mapsize; 791 arena->logoff = arena->mapoff + mapsize;
670 arena->info2off = arena->logoff + logsize; 792 arena->info2off = arena->logoff + logsize;
793
794 /* Default log indices are (0,1) */
795 arena->log_index[0] = 0;
796 arena->log_index[1] = 1;
671 return arena; 797 return arena;
672} 798}
673 799
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
758 arena->external_lba_start = cur_nlba; 884 arena->external_lba_start = cur_nlba;
759 parse_arena_meta(arena, super, cur_off); 885 parse_arena_meta(arena, super, cur_off);
760 886
887 ret = log_set_indices(arena);
888 if (ret) {
889 dev_err(to_dev(arena),
890 "Unable to deduce log/padding indices\n");
891 goto out;
892 }
893
761 mutex_init(&arena->err_lock); 894 mutex_init(&arena->err_lock);
762 ret = btt_freelist_init(arena); 895 ret = btt_freelist_init(arena);
763 if (ret) 896 if (ret)
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 578c2057524d..db3cb6d4d0d4 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -27,6 +27,7 @@
27#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) 27#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
28#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) 28#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
29#define MAP_ENT_NORMAL 0xC0000000 29#define MAP_ENT_NORMAL 0xC0000000
30#define LOG_GRP_SIZE sizeof(struct log_group)
30#define LOG_ENT_SIZE sizeof(struct log_entry) 31#define LOG_ENT_SIZE sizeof(struct log_entry)
31#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ 32#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */
32#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ 33#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */
@@ -50,12 +51,52 @@ enum btt_init_state {
50 INIT_READY 51 INIT_READY
51}; 52};
52 53
54/*
55 * A log group represents one log 'lane', and consists of four log entries.
56 * Two of the four entries are valid entries, and the remaining two are
57 * padding. Due to an old bug in the padding location, we need to perform a
58 * test to determine the padding scheme being used, and use that scheme
59 * thereafter.
60 *
61 * In kernels prior to 4.15, 'log group' would have actual log entries at
62 * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
63 * format has log entries at indices (0, 1) and padding at indices (2, 3).
64 *
65 * Old (pre 4.15) format:
66 * +-----------------+-----------------+
67 * | ent[0] | ent[1] |
68 * | 16B | 16B |
69 * | lba/old/new/seq | pad |
70 * +-----------------------------------+
71 * | ent[2] | ent[3] |
72 * | 16B | 16B |
73 * | lba/old/new/seq | pad |
74 * +-----------------+-----------------+
75 *
76 * New format:
77 * +-----------------+-----------------+
78 * | ent[0] | ent[1] |
79 * | 16B | 16B |
80 * | lba/old/new/seq | lba/old/new/seq |
81 * +-----------------------------------+
82 * | ent[2] | ent[3] |
83 * | 16B | 16B |
84 * | pad | pad |
85 * +-----------------+-----------------+
86 *
87 * We detect during start-up which format is in use, and set
88 * arena->log_index[(0, 1)] with the detected format.
89 */
90
53struct log_entry { 91struct log_entry {
54 __le32 lba; 92 __le32 lba;
55 __le32 old_map; 93 __le32 old_map;
56 __le32 new_map; 94 __le32 new_map;
57 __le32 seq; 95 __le32 seq;
58 __le64 padding[2]; 96};
97
98struct log_group {
99 struct log_entry ent[4];
59}; 100};
60 101
61struct btt_sb { 102struct btt_sb {
@@ -125,6 +166,8 @@ struct aligned_lock {
125 * @list: List head for list of arenas 166 * @list: List head for list of arenas
126 * @debugfs_dir: Debugfs dentry 167 * @debugfs_dir: Debugfs dentry
127 * @flags: Arena flags - may signify error states. 168 * @flags: Arena flags - may signify error states.
169 * @err_lock: Mutex for synchronizing error clearing.
170 * @log_index: Indices of the valid log entries in a log_group
128 * 171 *
129 * arena_info is a per-arena handle. Once an arena is narrowed down for an 172 * arena_info is a per-arena handle. Once an arena is narrowed down for an
130 * IO, this struct is passed around for the duration of the IO. 173 * IO, this struct is passed around for the duration of the IO.
@@ -157,6 +200,7 @@ struct arena_info {
157 /* Arena flags */ 200 /* Arena flags */
158 u32 flags; 201 u32 flags;
159 struct mutex err_lock; 202 struct mutex err_lock;
203 int log_index[2];
160}; 204};
161 205
162/** 206/**
@@ -176,6 +220,7 @@ struct arena_info {
176 * @init_lock: Mutex used for the BTT initialization 220 * @init_lock: Mutex used for the BTT initialization
177 * @init_state: Flag describing the initialization state for the BTT 221 * @init_state: Flag describing the initialization state for the BTT
178 * @num_arenas: Number of arenas in the BTT instance 222 * @num_arenas: Number of arenas in the BTT instance
223 * @phys_bb: Pointer to the namespace's badblocks structure
179 */ 224 */
180struct btt { 225struct btt {
181 struct gendisk *btt_disk; 226 struct gendisk *btt_disk;
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 65cc171c721d..2adada1a5855 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
364int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 364int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
365{ 365{
366 u64 checksum, offset; 366 u64 checksum, offset;
367 unsigned long align;
368 enum nd_pfn_mode mode; 367 enum nd_pfn_mode mode;
369 struct nd_namespace_io *nsio; 368 struct nd_namespace_io *nsio;
369 unsigned long align, start_pad;
370 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 370 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
371 struct nd_namespace_common *ndns = nd_pfn->ndns; 371 struct nd_namespace_common *ndns = nd_pfn->ndns;
372 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); 372 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
@@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
410 410
411 align = le32_to_cpu(pfn_sb->align); 411 align = le32_to_cpu(pfn_sb->align);
412 offset = le64_to_cpu(pfn_sb->dataoff); 412 offset = le64_to_cpu(pfn_sb->dataoff);
413 start_pad = le32_to_cpu(pfn_sb->start_pad);
413 if (align == 0) 414 if (align == 0)
414 align = 1UL << ilog2(offset); 415 align = 1UL << ilog2(offset);
415 mode = le32_to_cpu(pfn_sb->mode); 416 mode = le32_to_cpu(pfn_sb->mode);
@@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
468 return -EBUSY; 469 return -EBUSY;
469 } 470 }
470 471
471 if ((align && !IS_ALIGNED(offset, align)) 472 if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
472 || !IS_ALIGNED(offset, PAGE_SIZE)) { 473 || !IS_ALIGNED(offset, PAGE_SIZE)) {
473 dev_err(&nd_pfn->dev, 474 dev_err(&nd_pfn->dev,
474 "bad offset: %#llx dax disabled align: %#lx\n", 475 "bad offset: %#llx dax disabled align: %#lx\n",
@@ -582,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
582 return altmap; 583 return altmap;
583} 584}
584 585
586static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
587{
588 return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
589 ALIGN_DOWN(phys, nd_pfn->align));
590}
591
585static int nd_pfn_init(struct nd_pfn *nd_pfn) 592static int nd_pfn_init(struct nd_pfn *nd_pfn)
586{ 593{
587 u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; 594 u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
@@ -637,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
637 start = nsio->res.start; 644 start = nsio->res.start;
638 size = PHYS_SECTION_ALIGN_UP(start + size) - start; 645 size = PHYS_SECTION_ALIGN_UP(start + size) - start;
639 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, 646 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
640 IORES_DESC_NONE) == REGION_MIXED) { 647 IORES_DESC_NONE) == REGION_MIXED
648 || !IS_ALIGNED(start + resource_size(&nsio->res),
649 nd_pfn->align)) {
641 size = resource_size(&nsio->res); 650 size = resource_size(&nsio->res);
642 end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); 651 end_trunc = start + size - phys_pmem_align_down(nd_pfn,
652 start + size);
643 } 653 }
644 654
645 if (start_pad + end_trunc) 655 if (start_pad + end_trunc)
646 dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", 656 dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
647 dev_name(&ndns->dev), start_pad + end_trunc); 657 dev_name(&ndns->dev), start_pad + end_trunc);
648 658
649 /* 659 /*
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f837d666cbd4..839650e0926a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
1287 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < 1287 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
1288 NVME_DSM_MAX_RANGES); 1288 NVME_DSM_MAX_RANGES);
1289 1289
1290 queue->limits.discard_alignment = size; 1290 queue->limits.discard_alignment = 0;
1291 queue->limits.discard_granularity = size; 1291 queue->limits.discard_granularity = size;
1292 1292
1293 blk_queue_max_discard_sectors(queue, UINT_MAX); 1293 blk_queue_max_discard_sectors(queue, UINT_MAX);
@@ -1335,6 +1335,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
1335 struct nvme_ns *ns, struct nvme_id_ns *id) 1335 struct nvme_ns *ns, struct nvme_id_ns *id)
1336{ 1336{
1337 sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9); 1337 sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
1338 unsigned short bs = 1 << ns->lba_shift;
1338 unsigned stream_alignment = 0; 1339 unsigned stream_alignment = 0;
1339 1340
1340 if (ns->ctrl->nr_streams && ns->sws && ns->sgs) 1341 if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
@@ -1343,7 +1344,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
1343 blk_mq_freeze_queue(disk->queue); 1344 blk_mq_freeze_queue(disk->queue);
1344 blk_integrity_unregister(disk); 1345 blk_integrity_unregister(disk);
1345 1346
1346 blk_queue_logical_block_size(disk->queue, 1 << ns->lba_shift); 1347 blk_queue_logical_block_size(disk->queue, bs);
1348 blk_queue_physical_block_size(disk->queue, bs);
1349 blk_queue_io_min(disk->queue, bs);
1350
1347 if (ns->ms && !ns->ext && 1351 if (ns->ms && !ns->ext &&
1348 (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) 1352 (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
1349 nvme_init_integrity(disk, ns->ms, ns->pi_type); 1353 nvme_init_integrity(disk, ns->ms, ns->pi_type);
@@ -1705,7 +1709,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
1705 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); 1709 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
1706 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); 1710 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
1707 } 1711 }
1708 if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) 1712 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
1713 is_power_of_2(ctrl->max_hw_sectors))
1709 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); 1714 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
1710 blk_queue_virt_boundary(q, ctrl->page_size - 1); 1715 blk_queue_virt_boundary(q, ctrl->page_size - 1);
1711 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) 1716 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2869,7 +2874,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2869 2874
2870 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 2875 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
2871 nvme_set_queue_limits(ctrl, ns->queue); 2876 nvme_set_queue_limits(ctrl, ns->queue);
2872 nvme_setup_streams_ns(ctrl, ns);
2873 2877
2874 id = nvme_identify_ns(ctrl, nsid); 2878 id = nvme_identify_ns(ctrl, nsid);
2875 if (!id) 2879 if (!id)
@@ -2880,6 +2884,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2880 2884
2881 if (nvme_init_ns_head(ns, nsid, id, &new)) 2885 if (nvme_init_ns_head(ns, nsid, id, &new))
2882 goto out_free_id; 2886 goto out_free_id;
2887 nvme_setup_streams_ns(ctrl, ns);
2883 2888
2884#ifdef CONFIG_NVME_MULTIPATH 2889#ifdef CONFIG_NVME_MULTIPATH
2885 /* 2890 /*
@@ -2965,8 +2970,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
2965 return; 2970 return;
2966 2971
2967 if (ns->disk && ns->disk->flags & GENHD_FL_UP) { 2972 if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
2968 if (blk_get_integrity(ns->disk))
2969 blk_integrity_unregister(ns->disk);
2970 nvme_mpath_remove_disk_links(ns); 2973 nvme_mpath_remove_disk_links(ns);
2971 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, 2974 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
2972 &nvme_ns_id_attr_group); 2975 &nvme_ns_id_attr_group);
@@ -2974,6 +2977,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
2974 nvme_nvm_unregister_sysfs(ns); 2977 nvme_nvm_unregister_sysfs(ns);
2975 del_gendisk(ns->disk); 2978 del_gendisk(ns->disk);
2976 blk_cleanup_queue(ns->queue); 2979 blk_cleanup_queue(ns->queue);
2980 if (blk_get_integrity(ns->disk))
2981 blk_integrity_unregister(ns->disk);
2977 } 2982 }
2978 2983
2979 mutex_lock(&ns->ctrl->subsys->lock); 2984 mutex_lock(&ns->ctrl->subsys->lock);
@@ -2986,6 +2991,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
2986 mutex_unlock(&ns->ctrl->namespaces_mutex); 2991 mutex_unlock(&ns->ctrl->namespaces_mutex);
2987 2992
2988 synchronize_srcu(&ns->head->srcu); 2993 synchronize_srcu(&ns->head->srcu);
2994 nvme_mpath_check_last_path(ns);
2989 nvme_put_ns(ns); 2995 nvme_put_ns(ns);
2990} 2996}
2991 2997
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 76b4fe6816a0..894c2ccb3891 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -74,6 +74,7 @@ static struct nvmf_host *nvmf_host_default(void)
74 return NULL; 74 return NULL;
75 75
76 kref_init(&host->ref); 76 kref_init(&host->ref);
77 uuid_gen(&host->id);
77 snprintf(host->nqn, NVMF_NQN_SIZE, 78 snprintf(host->nqn, NVMF_NQN_SIZE,
78 "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id); 79 "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
79 80
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0a8af4daef89..794e66e4aa20 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3221 3221
3222 /* initiate nvme ctrl ref counting teardown */ 3222 /* initiate nvme ctrl ref counting teardown */
3223 nvme_uninit_ctrl(&ctrl->ctrl); 3223 nvme_uninit_ctrl(&ctrl->ctrl);
3224 nvme_put_ctrl(&ctrl->ctrl);
3225 3224
3226 /* Remove core ctrl ref. */ 3225 /* Remove core ctrl ref. */
3227 nvme_put_ctrl(&ctrl->ctrl); 3226 nvme_put_ctrl(&ctrl->ctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ea1aa5283e8e..a00eabd06427 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -417,6 +417,15 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
417 rcu_assign_pointer(head->current_path, NULL); 417 rcu_assign_pointer(head->current_path, NULL);
418} 418}
419struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); 419struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
420
421static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
422{
423 struct nvme_ns_head *head = ns->head;
424
425 if (head->disk && list_empty(&head->list))
426 kblockd_schedule_work(&head->requeue_work);
427}
428
420#else 429#else
421static inline void nvme_failover_req(struct request *req) 430static inline void nvme_failover_req(struct request *req)
422{ 431{
@@ -448,6 +457,9 @@ static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
448static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) 457static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
449{ 458{
450} 459}
460static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
461{
462}
451#endif /* CONFIG_NVME_MULTIPATH */ 463#endif /* CONFIG_NVME_MULTIPATH */
452 464
453#ifdef CONFIG_NVM 465#ifdef CONFIG_NVM
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f5800c3c9082..d53550e612bc 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -448,12 +448,31 @@ static void **nvme_pci_iod_list(struct request *req)
448 return (void **)(iod->sg + blk_rq_nr_phys_segments(req)); 448 return (void **)(iod->sg + blk_rq_nr_phys_segments(req));
449} 449}
450 450
451static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
452{
453 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
454 unsigned int avg_seg_size;
455
456 avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
457 blk_rq_nr_phys_segments(req));
458
459 if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
460 return false;
461 if (!iod->nvmeq->qid)
462 return false;
463 if (!sgl_threshold || avg_seg_size < sgl_threshold)
464 return false;
465 return true;
466}
467
451static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) 468static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
452{ 469{
453 struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); 470 struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
454 int nseg = blk_rq_nr_phys_segments(rq); 471 int nseg = blk_rq_nr_phys_segments(rq);
455 unsigned int size = blk_rq_payload_bytes(rq); 472 unsigned int size = blk_rq_payload_bytes(rq);
456 473
474 iod->use_sgl = nvme_pci_use_sgls(dev, rq);
475
457 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { 476 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
458 size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg, 477 size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
459 iod->use_sgl); 478 iod->use_sgl);
@@ -604,8 +623,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
604 dma_addr_t prp_dma; 623 dma_addr_t prp_dma;
605 int nprps, i; 624 int nprps, i;
606 625
607 iod->use_sgl = false;
608
609 length -= (page_size - offset); 626 length -= (page_size - offset);
610 if (length <= 0) { 627 if (length <= 0) {
611 iod->first_dma = 0; 628 iod->first_dma = 0;
@@ -715,8 +732,6 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
715 int entries = iod->nents, i = 0; 732 int entries = iod->nents, i = 0;
716 dma_addr_t sgl_dma; 733 dma_addr_t sgl_dma;
717 734
718 iod->use_sgl = true;
719
720 /* setting the transfer type as SGL */ 735 /* setting the transfer type as SGL */
721 cmd->flags = NVME_CMD_SGL_METABUF; 736 cmd->flags = NVME_CMD_SGL_METABUF;
722 737
@@ -770,23 +785,6 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
770 return BLK_STS_OK; 785 return BLK_STS_OK;
771} 786}
772 787
773static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
774{
775 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
776 unsigned int avg_seg_size;
777
778 avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
779 blk_rq_nr_phys_segments(req));
780
781 if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
782 return false;
783 if (!iod->nvmeq->qid)
784 return false;
785 if (!sgl_threshold || avg_seg_size < sgl_threshold)
786 return false;
787 return true;
788}
789
790static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, 788static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
791 struct nvme_command *cmnd) 789 struct nvme_command *cmnd)
792{ 790{
@@ -806,7 +804,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
806 DMA_ATTR_NO_WARN)) 804 DMA_ATTR_NO_WARN))
807 goto out; 805 goto out;
808 806
809 if (nvme_pci_use_sgls(dev, req)) 807 if (iod->use_sgl)
810 ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); 808 ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
811 else 809 else
812 ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); 810 ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 37af56596be6..2a0bba7f50cf 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -974,12 +974,18 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
974 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 974 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
975 nvme_start_queues(&ctrl->ctrl); 975 nvme_start_queues(&ctrl->ctrl);
976 976
977 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
978 /* state change failure should never happen */
979 WARN_ON_ONCE(1);
980 return;
981 }
982
977 nvme_rdma_reconnect_or_remove(ctrl); 983 nvme_rdma_reconnect_or_remove(ctrl);
978} 984}
979 985
980static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) 986static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
981{ 987{
982 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) 988 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
983 return; 989 return;
984 990
985 queue_work(nvme_wq, &ctrl->err_work); 991 queue_work(nvme_wq, &ctrl->err_work);
@@ -1753,6 +1759,12 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
1753 nvme_stop_ctrl(&ctrl->ctrl); 1759 nvme_stop_ctrl(&ctrl->ctrl);
1754 nvme_rdma_shutdown_ctrl(ctrl, false); 1760 nvme_rdma_shutdown_ctrl(ctrl, false);
1755 1761
1762 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
1763 /* state change failure should never happen */
1764 WARN_ON_ONCE(1);
1765 return;
1766 }
1767
1756 ret = nvme_rdma_configure_admin_queue(ctrl, false); 1768 ret = nvme_rdma_configure_admin_queue(ctrl, false);
1757 if (ret) 1769 if (ret)
1758 goto out_fail; 1770 goto out_fail;
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 7b75d9de55ab..6a018a0bd6ce 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -1085,7 +1085,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
1085 const char *buf, size_t count) 1085 const char *buf, size_t count)
1086{ 1086{
1087 struct fcloop_nport *nport = NULL, *tmpport; 1087 struct fcloop_nport *nport = NULL, *tmpport;
1088 struct fcloop_tport *tport; 1088 struct fcloop_tport *tport = NULL;
1089 u64 nodename, portname; 1089 u64 nodename, portname;
1090 unsigned long flags; 1090 unsigned long flags;
1091 int ret; 1091 int ret;
diff --git a/drivers/nvmem/meson-mx-efuse.c b/drivers/nvmem/meson-mx-efuse.c
index a346b4923550..41d3a3c1104e 100644
--- a/drivers/nvmem/meson-mx-efuse.c
+++ b/drivers/nvmem/meson-mx-efuse.c
@@ -156,8 +156,8 @@ static int meson_mx_efuse_read(void *context, unsigned int offset,
156 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE, 156 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE,
157 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE); 157 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE);
158 158
159 for (i = offset; i < offset + bytes; i += efuse->config.word_size) { 159 for (i = 0; i < bytes; i += efuse->config.word_size) {
160 addr = i / efuse->config.word_size; 160 addr = (offset + i) / efuse->config.word_size;
161 161
162 err = meson_mx_efuse_read_addr(efuse, addr, &tmp); 162 err = meson_mx_efuse_read_addr(efuse, addr, &tmp);
163 if (err) 163 if (err)
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 3481e69738b5..a327be1d264b 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -231,7 +231,12 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
231 rc = of_mdiobus_register_phy(mdio, child, addr); 231 rc = of_mdiobus_register_phy(mdio, child, addr);
232 else 232 else
233 rc = of_mdiobus_register_device(mdio, child, addr); 233 rc = of_mdiobus_register_device(mdio, child, addr);
234 if (rc) 234
235 if (rc == -ENODEV)
236 dev_err(&mdio->dev,
237 "MDIO device at address %d is missing.\n",
238 addr);
239 else if (rc)
235 goto unregister; 240 goto unregister;
236 } 241 }
237 242
@@ -255,7 +260,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
255 260
256 if (of_mdiobus_child_is_phy(child)) { 261 if (of_mdiobus_child_is_phy(child)) {
257 rc = of_mdiobus_register_phy(mdio, child, addr); 262 rc = of_mdiobus_register_phy(mdio, child, addr);
258 if (rc) 263 if (rc && rc != -ENODEV)
259 goto unregister; 264 goto unregister;
260 } 265 }
261 } 266 }
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index 0b3fb99d9b89..7390fb8ca9d1 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -303,7 +303,7 @@ static void dino_mask_irq(struct irq_data *d)
303 struct dino_device *dino_dev = irq_data_get_irq_chip_data(d); 303 struct dino_device *dino_dev = irq_data_get_irq_chip_data(d);
304 int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS); 304 int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
305 305
306 DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq); 306 DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq);
307 307
308 /* Clear the matching bit in the IMR register */ 308 /* Clear the matching bit in the IMR register */
309 dino_dev->imr &= ~(DINO_MASK_IRQ(local_irq)); 309 dino_dev->imr &= ~(DINO_MASK_IRQ(local_irq));
@@ -316,7 +316,7 @@ static void dino_unmask_irq(struct irq_data *d)
316 int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS); 316 int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
317 u32 tmp; 317 u32 tmp;
318 318
319 DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq); 319 DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq);
320 320
321 /* 321 /*
322 ** clear pending IRQ bits 322 ** clear pending IRQ bits
@@ -396,7 +396,7 @@ ilr_again:
396 if (mask) { 396 if (mask) {
397 if (--ilr_loop > 0) 397 if (--ilr_loop > 0)
398 goto ilr_again; 398 goto ilr_again;
399 printk(KERN_ERR "Dino 0x%p: stuck interrupt %d\n", 399 printk(KERN_ERR "Dino 0x%px: stuck interrupt %d\n",
400 dino_dev->hba.base_addr, mask); 400 dino_dev->hba.base_addr, mask);
401 return IRQ_NONE; 401 return IRQ_NONE;
402 } 402 }
@@ -553,7 +553,7 @@ dino_fixup_bus(struct pci_bus *bus)
553 struct pci_dev *dev; 553 struct pci_dev *dev;
554 struct dino_device *dino_dev = DINO_DEV(parisc_walk_tree(bus->bridge)); 554 struct dino_device *dino_dev = DINO_DEV(parisc_walk_tree(bus->bridge));
555 555
556 DBG(KERN_WARNING "%s(0x%p) bus %d platform_data 0x%p\n", 556 DBG(KERN_WARNING "%s(0x%px) bus %d platform_data 0x%px\n",
557 __func__, bus, bus->busn_res.start, 557 __func__, bus, bus->busn_res.start,
558 bus->bridge->platform_data); 558 bus->bridge->platform_data);
559 559
@@ -854,7 +854,7 @@ static int __init dino_common_init(struct parisc_device *dev,
854 res->flags = IORESOURCE_IO; /* do not mark it busy ! */ 854 res->flags = IORESOURCE_IO; /* do not mark it busy ! */
855 if (request_resource(&ioport_resource, res) < 0) { 855 if (request_resource(&ioport_resource, res) < 0) {
856 printk(KERN_ERR "%s: request I/O Port region failed " 856 printk(KERN_ERR "%s: request I/O Port region failed "
857 "0x%lx/%lx (hpa 0x%p)\n", 857 "0x%lx/%lx (hpa 0x%px)\n",
858 name, (unsigned long)res->start, (unsigned long)res->end, 858 name, (unsigned long)res->start, (unsigned long)res->end,
859 dino_dev->hba.base_addr); 859 dino_dev->hba.base_addr);
860 return 1; 860 return 1;
diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c
index 4dd9b1308128..99a80da6fd2e 100644
--- a/drivers/parisc/eisa_eeprom.c
+++ b/drivers/parisc/eisa_eeprom.c
@@ -106,7 +106,7 @@ static int __init eisa_eeprom_init(void)
106 return retval; 106 return retval;
107 } 107 }
108 108
109 printk(KERN_INFO "EISA EEPROM at 0x%p\n", eisa_eeprom_addr); 109 printk(KERN_INFO "EISA EEPROM at 0x%px\n", eisa_eeprom_addr);
110 return 0; 110 return 0;
111} 111}
112 112
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index a25fed52f7e9..41b740aed3a3 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask)
1692 iounmap(base_addr); 1692 iounmap(base_addr);
1693} 1693}
1694 1694
1695
1696/*
1697 * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
1698 * seems rushed, so that many built-in components simply don't work.
1699 * The following quirks disable the serial AUX port and the built-in ATI RV100
1700 * Radeon 7000 graphics card which both don't have any external connectors and
1701 * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
1702 * such makes those machines the only PARISC machines on which we can't use
1703 * ttyS0 as boot console.
1704 */
1705static void quirk_diva_ati_card(struct pci_dev *dev)
1706{
1707 if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
1708 dev->subsystem_device != 0x1292)
1709 return;
1710
1711 dev_info(&dev->dev, "Hiding Diva built-in ATI card");
1712 dev->device = 0;
1713}
1714DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
1715 quirk_diva_ati_card);
1716
1717static void quirk_diva_aux_disable(struct pci_dev *dev)
1718{
1719 if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
1720 dev->subsystem_device != 0x1291)
1721 return;
1722
1723 dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
1724 dev->device = 0;
1725}
1726DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
1727 quirk_diva_aux_disable);
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 04dac6a42c9f..6b8d060d07de 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -985,9 +985,7 @@ static u32 hv_compose_msi_req_v1(
985 int_pkt->wslot.slot = slot; 985 int_pkt->wslot.slot = slot;
986 int_pkt->int_desc.vector = vector; 986 int_pkt->int_desc.vector = vector;
987 int_pkt->int_desc.vector_count = 1; 987 int_pkt->int_desc.vector_count = 1;
988 int_pkt->int_desc.delivery_mode = 988 int_pkt->int_desc.delivery_mode = dest_Fixed;
989 (apic->irq_delivery_mode == dest_LowestPrio) ?
990 dest_LowestPrio : dest_Fixed;
991 989
992 /* 990 /*
993 * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in 991 * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in
@@ -1008,9 +1006,7 @@ static u32 hv_compose_msi_req_v2(
1008 int_pkt->wslot.slot = slot; 1006 int_pkt->wslot.slot = slot;
1009 int_pkt->int_desc.vector = vector; 1007 int_pkt->int_desc.vector = vector;
1010 int_pkt->int_desc.vector_count = 1; 1008 int_pkt->int_desc.vector_count = 1;
1011 int_pkt->int_desc.delivery_mode = 1009 int_pkt->int_desc.delivery_mode = dest_Fixed;
1012 (apic->irq_delivery_mode == dest_LowestPrio) ?
1013 dest_LowestPrio : dest_Fixed;
1014 1010
1015 /* 1011 /*
1016 * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten 1012 * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 945099d49f8f..14fd865a5120 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev)
1012 if (pci_has_legacy_pm_support(pci_dev)) 1012 if (pci_has_legacy_pm_support(pci_dev))
1013 return pci_legacy_resume_early(dev); 1013 return pci_legacy_resume_early(dev);
1014 1014
1015 pci_update_current_state(pci_dev, PCI_D0); 1015 /*
1016 * pci_restore_state() requires the device to be in D0 (because of MSI
1017 * restoration among other things), so force it into D0 in case the
1018 * driver's "freeze" callbacks put it into a low-power state directly.
1019 */
1020 pci_set_power_state(pci_dev, PCI_D0);
1016 pci_restore_state(pci_dev); 1021 pci_restore_state(pci_dev);
1017 1022
1018 if (drv && drv->pm && drv->pm->thaw_noirq) 1023 if (drv && drv->pm && drv->pm->thaw_noirq)
diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c
index accaaaccb662..6601ad0dfb3a 100644
--- a/drivers/phy/motorola/phy-cpcap-usb.c
+++ b/drivers/phy/motorola/phy-cpcap-usb.c
@@ -310,7 +310,7 @@ static int cpcap_usb_init_irq(struct platform_device *pdev,
310 int irq, error; 310 int irq, error;
311 311
312 irq = platform_get_irq_byname(pdev, name); 312 irq = platform_get_irq_byname(pdev, name);
313 if (!irq) 313 if (irq < 0)
314 return -ENODEV; 314 return -ENODEV;
315 315
316 error = devm_request_threaded_irq(ddata->dev, irq, NULL, 316 error = devm_request_threaded_irq(ddata->dev, irq, NULL,
diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig
index cb09245e9b4c..c845facacb06 100644
--- a/drivers/phy/renesas/Kconfig
+++ b/drivers/phy/renesas/Kconfig
@@ -12,7 +12,9 @@ config PHY_RCAR_GEN3_USB2
12 tristate "Renesas R-Car generation 3 USB 2.0 PHY driver" 12 tristate "Renesas R-Car generation 3 USB 2.0 PHY driver"
13 depends on ARCH_RENESAS 13 depends on ARCH_RENESAS
14 depends on EXTCON 14 depends on EXTCON
15 depends on USB_SUPPORT
15 select GENERIC_PHY 16 select GENERIC_PHY
17 select USB_COMMON
16 help 18 help
17 Support for USB 2.0 PHY found on Renesas R-Car generation 3 SoCs. 19 Support for USB 2.0 PHY found on Renesas R-Car generation 3 SoCs.
18 20
diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c
index ee85fa0ca4b0..7492c8978217 100644
--- a/drivers/phy/rockchip/phy-rockchip-typec.c
+++ b/drivers/phy/rockchip/phy-rockchip-typec.c
@@ -1137,6 +1137,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
1137 if (IS_ERR(phy)) { 1137 if (IS_ERR(phy)) {
1138 dev_err(dev, "failed to create phy: %s\n", 1138 dev_err(dev, "failed to create phy: %s\n",
1139 child_np->name); 1139 child_np->name);
1140 pm_runtime_disable(dev);
1140 return PTR_ERR(phy); 1141 return PTR_ERR(phy);
1141 } 1142 }
1142 1143
@@ -1146,6 +1147,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
1146 phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); 1147 phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
1147 if (IS_ERR(phy_provider)) { 1148 if (IS_ERR(phy_provider)) {
1148 dev_err(dev, "Failed to register phy provider\n"); 1149 dev_err(dev, "Failed to register phy provider\n");
1150 pm_runtime_disable(dev);
1149 return PTR_ERR(phy_provider); 1151 return PTR_ERR(phy_provider);
1150 } 1152 }
1151 1153
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index 4307bf0013e1..63e916d4d069 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -75,14 +75,14 @@ MODULE_DEVICE_TABLE(of, tegra_xusb_padctl_of_match);
75static struct device_node * 75static struct device_node *
76tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name) 76tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
77{ 77{
78 /* 78 struct device_node *pads, *np;
79 * of_find_node_by_name() drops a reference, so make sure to grab one. 79
80 */ 80 pads = of_get_child_by_name(padctl->dev->of_node, "pads");
81 struct device_node *np = of_node_get(padctl->dev->of_node); 81 if (!pads)
82 return NULL;
82 83
83 np = of_find_node_by_name(np, "pads"); 84 np = of_get_child_by_name(pads, name);
84 if (np) 85 of_node_put(pads);
85 np = of_find_node_by_name(np, name);
86 86
87 return np; 87 return np;
88} 88}
@@ -90,16 +90,16 @@ tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
90static struct device_node * 90static struct device_node *
91tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index) 91tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index)
92{ 92{
93 /* 93 struct device_node *np, *lanes;
94 * of_find_node_by_name() drops a reference, so make sure to grab one.
95 */
96 struct device_node *np = of_node_get(pad->dev.of_node);
97 94
98 np = of_find_node_by_name(np, "lanes"); 95 lanes = of_get_child_by_name(pad->dev.of_node, "lanes");
99 if (!np) 96 if (!lanes)
100 return NULL; 97 return NULL;
101 98
102 return of_find_node_by_name(np, pad->soc->lanes[index].name); 99 np = of_get_child_by_name(lanes, pad->soc->lanes[index].name);
100 of_node_put(lanes);
101
102 return np;
103} 103}
104 104
105static int 105static int
@@ -195,7 +195,7 @@ int tegra_xusb_pad_register(struct tegra_xusb_pad *pad,
195 unsigned int i; 195 unsigned int i;
196 int err; 196 int err;
197 197
198 children = of_find_node_by_name(pad->dev.of_node, "lanes"); 198 children = of_get_child_by_name(pad->dev.of_node, "lanes");
199 if (!children) 199 if (!children)
200 return -ENODEV; 200 return -ENODEV;
201 201
@@ -444,21 +444,21 @@ static struct device_node *
444tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type, 444tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type,
445 unsigned int index) 445 unsigned int index)
446{ 446{
447 /* 447 struct device_node *ports, *np;
448 * of_find_node_by_name() drops a reference, so make sure to grab one. 448 char *name;
449 */
450 struct device_node *np = of_node_get(padctl->dev->of_node);
451 449
452 np = of_find_node_by_name(np, "ports"); 450 ports = of_get_child_by_name(padctl->dev->of_node, "ports");
453 if (np) { 451 if (!ports)
454 char *name; 452 return NULL;
455 453
456 name = kasprintf(GFP_KERNEL, "%s-%u", type, index); 454 name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
457 if (!name) 455 if (!name) {
458 return ERR_PTR(-ENOMEM); 456 of_node_put(ports);
459 np = of_find_node_by_name(np, name); 457 return ERR_PTR(-ENOMEM);
460 kfree(name);
461 } 458 }
459 np = of_get_child_by_name(ports, name);
460 kfree(name);
461 of_node_put(ports);
462 462
463 return np; 463 return np;
464} 464}
@@ -847,7 +847,7 @@ static void tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl)
847 847
848static int tegra_xusb_padctl_probe(struct platform_device *pdev) 848static int tegra_xusb_padctl_probe(struct platform_device *pdev)
849{ 849{
850 struct device_node *np = of_node_get(pdev->dev.of_node); 850 struct device_node *np = pdev->dev.of_node;
851 const struct tegra_xusb_padctl_soc *soc; 851 const struct tegra_xusb_padctl_soc *soc;
852 struct tegra_xusb_padctl *padctl; 852 struct tegra_xusb_padctl *padctl;
853 const struct of_device_id *match; 853 const struct of_device_id *match;
@@ -855,7 +855,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev)
855 int err; 855 int err;
856 856
857 /* for backwards compatibility with old device trees */ 857 /* for backwards compatibility with old device trees */
858 np = of_find_node_by_name(np, "pads"); 858 np = of_get_child_by_name(np, "pads");
859 if (!np) { 859 if (!np) {
860 dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n"); 860 dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n");
861 return tegra_xusb_padctl_legacy_probe(pdev); 861 return tegra_xusb_padctl_legacy_probe(pdev);
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index bdedb6325c72..4471fd94e1fe 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
1620 clear_bit(i, chip->irq.valid_mask); 1620 clear_bit(i, chip->irq.valid_mask);
1621 } 1621 }
1622 1622
1623 /*
1624 * The same set of machines in chv_no_valid_mask[] have incorrectly
1625 * configured GPIOs that generate spurious interrupts so we use
1626 * this same list to apply another quirk for them.
1627 *
1628 * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
1629 */
1630 if (!need_valid_mask) {
1631 /*
1632 * Mask all interrupts the community is able to generate
1633 * but leave the ones that can only generate GPEs unmasked.
1634 */
1635 chv_writel(GENMASK(31, pctrl->community->nirqs),
1636 pctrl->regs + CHV_INTMASK);
1637 }
1638
1623 /* Clear all interrupts */ 1639 /* Clear all interrupts */
1624 chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); 1640 chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
1625 1641
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index e6cd8de793e2..3501491e5bfc 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -222,6 +222,9 @@ static enum pin_config_param pcs_bias[] = {
222 */ 222 */
223static struct lock_class_key pcs_lock_class; 223static struct lock_class_key pcs_lock_class;
224 224
225/* Class for the IRQ request mutex */
226static struct lock_class_key pcs_request_class;
227
225/* 228/*
226 * REVISIT: Reads and writes could eventually use regmap or something 229 * REVISIT: Reads and writes could eventually use regmap or something
227 * generic. But at least on omaps, some mux registers are performance 230 * generic. But at least on omaps, some mux registers are performance
@@ -1486,7 +1489,7 @@ static int pcs_irqdomain_map(struct irq_domain *d, unsigned int irq,
1486 irq_set_chip_data(irq, pcs_soc); 1489 irq_set_chip_data(irq, pcs_soc);
1487 irq_set_chip_and_handler(irq, &pcs->chip, 1490 irq_set_chip_and_handler(irq, &pcs->chip,
1488 handle_level_irq); 1491 handle_level_irq);
1489 irq_set_lockdep_class(irq, &pcs_lock_class); 1492 irq_set_lockdep_class(irq, &pcs_lock_class, &pcs_request_class);
1490 irq_set_noprobe(irq); 1493 irq_set_noprobe(irq);
1491 1494
1492 return 0; 1495 return 0;
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index a276c61be217..e62ab087bfd8 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -290,7 +290,7 @@ static int stm32_gpio_domain_translate(struct irq_domain *d,
290} 290}
291 291
292static int stm32_gpio_domain_activate(struct irq_domain *d, 292static int stm32_gpio_domain_activate(struct irq_domain *d,
293 struct irq_data *irq_data, bool early) 293 struct irq_data *irq_data, bool reserve)
294{ 294{
295 struct stm32_gpio_bank *bank = d->host_data; 295 struct stm32_gpio_bank *bank = d->host_data;
296 struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent); 296 struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent);
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 791449a2370f..daa68acbc900 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -1458,5 +1458,5 @@ static void __exit acpi_wmi_exit(void)
1458 class_unregister(&wmi_bus_class); 1458 class_unregister(&wmi_bus_class);
1459} 1459}
1460 1460
1461subsys_initcall(acpi_wmi_init); 1461subsys_initcall_sync(acpi_wmi_init);
1462module_exit(acpi_wmi_exit); 1462module_exit(acpi_wmi_exit);
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index c94b606e0df8..ee14d8e45c97 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -2803,6 +2803,16 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr)
2803 erp = dasd_3990_erp_handle_match_erp(cqr, erp); 2803 erp = dasd_3990_erp_handle_match_erp(cqr, erp);
2804 } 2804 }
2805 2805
2806
2807 /*
2808 * For path verification work we need to stick with the path that was
2809 * originally chosen so that the per path configuration data is
2810 * assigned correctly.
2811 */
2812 if (test_bit(DASD_CQR_VERIFY_PATH, &erp->flags) && cqr->lpm) {
2813 erp->lpm = cqr->lpm;
2814 }
2815
2806 if (device->features & DASD_FEATURE_ERPLOG) { 2816 if (device->features & DASD_FEATURE_ERPLOG) {
2807 /* print current erp_chain */ 2817 /* print current erp_chain */
2808 dev_err(&device->cdev->dev, 2818 dev_err(&device->cdev->dev,
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 05ac6ba15a53..614b44e70a28 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -17,6 +17,8 @@ CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH)
17CFLAGS_sclp_early_core.o += -march=z900 17CFLAGS_sclp_early_core.o += -march=z900
18endif 18endif
19 19
20CFLAGS_sclp_early_core.o += -D__NO_FORTIFY
21
20obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \ 22obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \
21 sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o sclp_ctl.o \ 23 sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o sclp_ctl.o \
22 sclp_early.o sclp_early_core.o 24 sclp_early.o sclp_early_core.o
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 6c815207f4f5..3614df68830f 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5386,6 +5386,13 @@ out:
5386} 5386}
5387EXPORT_SYMBOL_GPL(qeth_poll); 5387EXPORT_SYMBOL_GPL(qeth_poll);
5388 5388
5389static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
5390{
5391 if (!cmd->hdr.return_code)
5392 cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
5393 return cmd->hdr.return_code;
5394}
5395
5389int qeth_setassparms_cb(struct qeth_card *card, 5396int qeth_setassparms_cb(struct qeth_card *card,
5390 struct qeth_reply *reply, unsigned long data) 5397 struct qeth_reply *reply, unsigned long data)
5391{ 5398{
@@ -6242,7 +6249,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
6242 (struct qeth_checksum_cmd *)reply->param; 6249 (struct qeth_checksum_cmd *)reply->param;
6243 6250
6244 QETH_CARD_TEXT(card, 4, "chkdoccb"); 6251 QETH_CARD_TEXT(card, 4, "chkdoccb");
6245 if (cmd->hdr.return_code) 6252 if (qeth_setassparms_inspect_rc(cmd))
6246 return 0; 6253 return 0;
6247 6254
6248 memset(chksum_cb, 0, sizeof(*chksum_cb)); 6255 memset(chksum_cb, 0, sizeof(*chksum_cb));
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 6e3d81969a77..d52265416da2 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1725,6 +1725,7 @@ struct aac_dev
1725#define FIB_CONTEXT_FLAG_NATIVE_HBA (0x00000010) 1725#define FIB_CONTEXT_FLAG_NATIVE_HBA (0x00000010)
1726#define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF (0x00000020) 1726#define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF (0x00000020)
1727#define FIB_CONTEXT_FLAG_SCSI_CMD (0x00000040) 1727#define FIB_CONTEXT_FLAG_SCSI_CMD (0x00000040)
1728#define FIB_CONTEXT_FLAG_EH_RESET (0x00000080)
1728 1729
1729/* 1730/*
1730 * Define the command values 1731 * Define the command values
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index bdf127aaab41..d55332de08f9 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd)
1037 info = &aac->hba_map[bus][cid]; 1037 info = &aac->hba_map[bus][cid];
1038 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS || 1038 if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
1039 info->devtype != AAC_DEVTYPE_NATIVE_RAW) { 1039 info->devtype != AAC_DEVTYPE_NATIVE_RAW) {
1040 fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT; 1040 fib->flags |= FIB_CONTEXT_FLAG_EH_RESET;
1041 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER; 1041 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
1042 } 1042 }
1043 } 1043 }
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index a4f28b7e4c65..e18877177f1b 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
1576 return req; 1576 return req;
1577 1577
1578 for_each_bio(bio) { 1578 for_each_bio(bio) {
1579 ret = blk_rq_append_bio(req, bio); 1579 struct bio *bounce_bio = bio;
1580
1581 ret = blk_rq_append_bio(req, &bounce_bio);
1580 if (ret) 1582 if (ret)
1581 return ERR_PTR(ret); 1583 return ERR_PTR(ret);
1582 } 1584 }
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 449ef5adbb2b..dfb8da83fa50 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -374,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model,
374 model, compatible); 374 model, compatible);
375 375
376 if (strflags) 376 if (strflags)
377 devinfo->flags = simple_strtoul(strflags, NULL, 0); 377 flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0);
378 else 378 devinfo->flags = flags;
379 devinfo->flags = flags;
380
381 devinfo->compatible = compatible; 379 devinfo->compatible = compatible;
382 380
383 if (compatible) 381 if (compatible)
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index be5e919db0e8..0880d975eed3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
770 * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized 770 * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
771 **/ 771 **/
772static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, 772static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
773 int *bflags, int async) 773 blist_flags_t *bflags, int async)
774{ 774{
775 int ret; 775 int ret;
776 776
@@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq,
1049 * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized 1049 * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
1050 **/ 1050 **/
1051static int scsi_probe_and_add_lun(struct scsi_target *starget, 1051static int scsi_probe_and_add_lun(struct scsi_target *starget,
1052 u64 lun, int *bflagsp, 1052 u64 lun, blist_flags_t *bflagsp,
1053 struct scsi_device **sdevp, 1053 struct scsi_device **sdevp,
1054 enum scsi_scan_mode rescan, 1054 enum scsi_scan_mode rescan,
1055 void *hostdata) 1055 void *hostdata)
1056{ 1056{
1057 struct scsi_device *sdev; 1057 struct scsi_device *sdev;
1058 unsigned char *result; 1058 unsigned char *result;
1059 int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256; 1059 blist_flags_t bflags;
1060 int res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
1060 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); 1061 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
1061 1062
1062 /* 1063 /*
@@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
1201 * Modifies sdevscan->lun. 1202 * Modifies sdevscan->lun.
1202 **/ 1203 **/
1203static void scsi_sequential_lun_scan(struct scsi_target *starget, 1204static void scsi_sequential_lun_scan(struct scsi_target *starget,
1204 int bflags, int scsi_level, 1205 blist_flags_t bflags, int scsi_level,
1205 enum scsi_scan_mode rescan) 1206 enum scsi_scan_mode rescan)
1206{ 1207{
1207 uint max_dev_lun; 1208 uint max_dev_lun;
@@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget,
1292 * 0: scan completed (or no memory, so further scanning is futile) 1293 * 0: scan completed (or no memory, so further scanning is futile)
1293 * 1: could not scan with REPORT LUN 1294 * 1: could not scan with REPORT LUN
1294 **/ 1295 **/
1295static int scsi_report_lun_scan(struct scsi_target *starget, int bflags, 1296static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags,
1296 enum scsi_scan_mode rescan) 1297 enum scsi_scan_mode rescan)
1297{ 1298{
1298 unsigned char scsi_cmd[MAX_COMMAND_SIZE]; 1299 unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
1538 unsigned int id, u64 lun, enum scsi_scan_mode rescan) 1539 unsigned int id, u64 lun, enum scsi_scan_mode rescan)
1539{ 1540{
1540 struct Scsi_Host *shost = dev_to_shost(parent); 1541 struct Scsi_Host *shost = dev_to_shost(parent);
1541 int bflags = 0; 1542 blist_flags_t bflags = 0;
1542 int res; 1543 int res;
1543 struct scsi_target *starget; 1544 struct scsi_target *starget;
1544 1545
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 50e7d7e4a861..26ce17178401 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr,
967} 967}
968static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL); 968static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);
969 969
970#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name 970#define BLIST_FLAG_NAME(name) \
971 [ilog2((__force unsigned int)BLIST_##name)] = #name
971static const char *const sdev_bflags_name[] = { 972static const char *const sdev_bflags_name[] = {
972#include "scsi_devinfo_tbl.c" 973#include "scsi_devinfo_tbl.c"
973}; 974};
@@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
984 for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) { 985 for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) {
985 const char *name = NULL; 986 const char *name = NULL;
986 987
987 if (!(sdev->sdev_bflags & BIT(i))) 988 if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i)))
988 continue; 989 continue;
989 if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i]) 990 if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i])
990 name = sdev_bflags_name[i]; 991 name = sdev_bflags_name[i];
@@ -1414,7 +1415,10 @@ static void __scsi_remove_target(struct scsi_target *starget)
1414 * check. 1415 * check.
1415 */ 1416 */
1416 if (sdev->channel != starget->channel || 1417 if (sdev->channel != starget->channel ||
1417 sdev->id != starget->id || 1418 sdev->id != starget->id)
1419 continue;
1420 if (sdev->sdev_state == SDEV_DEL ||
1421 sdev->sdev_state == SDEV_CANCEL ||
1418 !get_device(&sdev->sdev_gendev)) 1422 !get_device(&sdev->sdev_gendev))
1419 continue; 1423 continue;
1420 spin_unlock_irqrestore(shost->host_lock, flags); 1424 spin_unlock_irqrestore(shost->host_lock, flags);
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index d0219e36080c..10ebb213ddb3 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -50,14 +50,14 @@
50 50
51/* Our blacklist flags */ 51/* Our blacklist flags */
52enum { 52enum {
53 SPI_BLIST_NOIUS = 0x1, 53 SPI_BLIST_NOIUS = (__force blist_flags_t)0x1,
54}; 54};
55 55
56/* blacklist table, modelled on scsi_devinfo.c */ 56/* blacklist table, modelled on scsi_devinfo.c */
57static struct { 57static struct {
58 char *vendor; 58 char *vendor;
59 char *model; 59 char *model;
60 unsigned flags; 60 blist_flags_t flags;
61} spi_static_device_list[] __initdata = { 61} spi_static_device_list[] __initdata = {
62 {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS }, 62 {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS },
63 {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS }, 63 {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS },
@@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc,
221{ 221{
222 struct scsi_device *sdev = to_scsi_device(dev); 222 struct scsi_device *sdev = to_scsi_device(dev);
223 struct scsi_target *starget = sdev->sdev_target; 223 struct scsi_target *starget = sdev->sdev_target;
224 unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8], 224 blist_flags_t bflags;
225 &sdev->inquiry[16], 225
226 SCSI_DEVINFO_SPI); 226 bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
227 &sdev->inquiry[16],
228 SCSI_DEVINFO_SPI);
227 229
228 /* Populate the target capability fields with the values 230 /* Populate the target capability fields with the values
229 * gleaned from the device inquiry */ 231 * gleaned from the device inquiry */
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 1b06cf0375dc..3b3d1d050cac 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -953,10 +953,11 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
953 case TEST_UNIT_READY: 953 case TEST_UNIT_READY:
954 break; 954 break;
955 default: 955 default:
956 set_host_byte(scmnd, DID_TARGET_FAILURE); 956 set_host_byte(scmnd, DID_ERROR);
957 } 957 }
958 break; 958 break;
959 case SRB_STATUS_INVALID_LUN: 959 case SRB_STATUS_INVALID_LUN:
960 set_host_byte(scmnd, DID_NO_CONNECT);
960 do_work = true; 961 do_work = true;
961 process_err_fn = storvsc_remove_lun; 962 process_err_fn = storvsc_remove_lun;
962 break; 963 break;
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
index 77fe55ce790c..d65345312527 100644
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -79,6 +79,7 @@
79#define A3700_SPI_BYTE_LEN BIT(5) 79#define A3700_SPI_BYTE_LEN BIT(5)
80#define A3700_SPI_CLK_PRESCALE BIT(0) 80#define A3700_SPI_CLK_PRESCALE BIT(0)
81#define A3700_SPI_CLK_PRESCALE_MASK (0x1f) 81#define A3700_SPI_CLK_PRESCALE_MASK (0x1f)
82#define A3700_SPI_CLK_EVEN_OFFS (0x10)
82 83
83#define A3700_SPI_WFIFO_THRS_BIT 28 84#define A3700_SPI_WFIFO_THRS_BIT 28
84#define A3700_SPI_RFIFO_THRS_BIT 24 85#define A3700_SPI_RFIFO_THRS_BIT 24
@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi,
220 221
221 prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz); 222 prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
222 223
224 /* For prescaler values over 15, we can only set it by steps of 2.
225 * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
226 * 30. We only use this range from 16 to 30.
227 */
228 if (prescale > 15)
229 prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
230
223 val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG); 231 val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
224 val = val & ~A3700_SPI_CLK_PRESCALE_MASK; 232 val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
225 233
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index f95da364c283..669470971023 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev)
1661 pm_runtime_get_sync(&pdev->dev); 1661 pm_runtime_get_sync(&pdev->dev);
1662 1662
1663 /* reset the hardware and block queue progress */ 1663 /* reset the hardware and block queue progress */
1664 spin_lock_irq(&as->lock);
1665 if (as->use_dma) { 1664 if (as->use_dma) {
1666 atmel_spi_stop_dma(master); 1665 atmel_spi_stop_dma(master);
1667 atmel_spi_release_dma(master); 1666 atmel_spi_release_dma(master);
1668 } 1667 }
1669 1668
1669 spin_lock_irq(&as->lock);
1670 spi_writel(as, CR, SPI_BIT(SWRST)); 1670 spi_writel(as, CR, SPI_BIT(SWRST));
1671 spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */ 1671 spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
1672 spi_readl(as, SR); 1672 spi_readl(as, SR);
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 2ce875764ca6..0835a8d88fb8 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
377 /* Sets SPCMD */ 377 /* Sets SPCMD */
378 rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0); 378 rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
379 379
380 /* Enables SPI function in master mode */ 380 /* Sets RSPI mode */
381 rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR); 381 rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
382 382
383 return 0; 383 return 0;
384} 384}
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index c5cd635c28f3..41410031f8e9 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -525,7 +525,7 @@ err_free_master:
525 525
526static int sun4i_spi_remove(struct platform_device *pdev) 526static int sun4i_spi_remove(struct platform_device *pdev)
527{ 527{
528 pm_runtime_disable(&pdev->dev); 528 pm_runtime_force_suspend(&pdev->dev);
529 529
530 return 0; 530 return 0;
531} 531}
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c
index bc7100b93dfc..e0b9fe1d0e37 100644
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
271 while (remaining_words) { 271 while (remaining_words) {
272 int n_words, tx_words, rx_words; 272 int n_words, tx_words, rx_words;
273 u32 sr; 273 u32 sr;
274 int stalled;
274 275
275 n_words = min(remaining_words, xspi->buffer_size); 276 n_words = min(remaining_words, xspi->buffer_size);
276 277
@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
299 300
300 /* Read out all the data from the Rx FIFO */ 301 /* Read out all the data from the Rx FIFO */
301 rx_words = n_words; 302 rx_words = n_words;
303 stalled = 10;
302 while (rx_words) { 304 while (rx_words) {
305 if (rx_words == n_words && !(stalled--) &&
306 !(sr & XSPI_SR_TX_EMPTY_MASK) &&
307 (sr & XSPI_SR_RX_EMPTY_MASK)) {
308 dev_err(&spi->dev,
309 "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
310 xspi_init_hw(xspi);
311 return -EIO;
312 }
313
303 if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) { 314 if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
304 xilinx_spi_rx(xspi); 315 xilinx_spi_rx(xspi);
305 rx_words--; 316 rx_words--;
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 0f695df14c9d..372ce9913e6d 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -765,10 +765,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
765 break; 765 break;
766 case ASHMEM_SET_SIZE: 766 case ASHMEM_SET_SIZE:
767 ret = -EINVAL; 767 ret = -EINVAL;
768 mutex_lock(&ashmem_mutex);
768 if (!asma->file) { 769 if (!asma->file) {
769 ret = 0; 770 ret = 0;
770 asma->size = (size_t)arg; 771 asma->size = (size_t)arg;
771 } 772 }
773 mutex_unlock(&ashmem_mutex);
772 break; 774 break;
773 case ASHMEM_GET_SIZE: 775 case ASHMEM_GET_SIZE:
774 ret = asma->size; 776 ret = asma->size;
diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig
index a517b2d29f1b..8f6494158d3d 100644
--- a/drivers/staging/android/ion/Kconfig
+++ b/drivers/staging/android/ion/Kconfig
@@ -37,7 +37,7 @@ config ION_CHUNK_HEAP
37 37
38config ION_CMA_HEAP 38config ION_CMA_HEAP
39 bool "Ion CMA heap support" 39 bool "Ion CMA heap support"
40 depends on ION && CMA 40 depends on ION && DMA_CMA
41 help 41 help
42 Choose this option to enable CMA heaps with Ion. This heap is backed 42 Choose this option to enable CMA heaps with Ion. This heap is backed
43 by the Contiguous Memory Allocator (CMA). If your system has these 43 by the Contiguous Memory Allocator (CMA). If your system has these
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index a7d9b0e98572..f480885e346b 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -346,7 +346,7 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
346 mutex_lock(&buffer->lock); 346 mutex_lock(&buffer->lock);
347 list_for_each_entry(a, &buffer->attachments, list) { 347 list_for_each_entry(a, &buffer->attachments, list) {
348 dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents, 348 dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents,
349 DMA_BIDIRECTIONAL); 349 direction);
350 } 350 }
351 mutex_unlock(&buffer->lock); 351 mutex_unlock(&buffer->lock);
352 352
@@ -368,7 +368,7 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
368 mutex_lock(&buffer->lock); 368 mutex_lock(&buffer->lock);
369 list_for_each_entry(a, &buffer->attachments, list) { 369 list_for_each_entry(a, &buffer->attachments, list) {
370 dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents, 370 dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents,
371 DMA_BIDIRECTIONAL); 371 direction);
372 } 372 }
373 mutex_unlock(&buffer->lock); 373 mutex_unlock(&buffer->lock);
374 374
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
index dd5545d9990a..86196ffd2faf 100644
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -39,9 +39,15 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
39 struct ion_cma_heap *cma_heap = to_cma_heap(heap); 39 struct ion_cma_heap *cma_heap = to_cma_heap(heap);
40 struct sg_table *table; 40 struct sg_table *table;
41 struct page *pages; 41 struct page *pages;
42 unsigned long size = PAGE_ALIGN(len);
43 unsigned long nr_pages = size >> PAGE_SHIFT;
44 unsigned long align = get_order(size);
42 int ret; 45 int ret;
43 46
44 pages = cma_alloc(cma_heap->cma, len, 0, GFP_KERNEL); 47 if (align > CONFIG_CMA_ALIGNMENT)
48 align = CONFIG_CMA_ALIGNMENT;
49
50 pages = cma_alloc(cma_heap->cma, nr_pages, align, GFP_KERNEL);
45 if (!pages) 51 if (!pages)
46 return -ENOMEM; 52 return -ENOMEM;
47 53
@@ -53,7 +59,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
53 if (ret) 59 if (ret)
54 goto free_mem; 60 goto free_mem;
55 61
56 sg_set_page(table->sgl, pages, len, 0); 62 sg_set_page(table->sgl, pages, size, 0);
57 63
58 buffer->priv_virt = pages; 64 buffer->priv_virt = pages;
59 buffer->sg_table = table; 65 buffer->sg_table = table;
@@ -62,7 +68,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
62free_mem: 68free_mem:
63 kfree(table); 69 kfree(table);
64err: 70err:
65 cma_release(cma_heap->cma, pages, buffer->size); 71 cma_release(cma_heap->cma, pages, nr_pages);
66 return -ENOMEM; 72 return -ENOMEM;
67} 73}
68 74
@@ -70,9 +76,10 @@ static void ion_cma_free(struct ion_buffer *buffer)
70{ 76{
71 struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap); 77 struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap);
72 struct page *pages = buffer->priv_virt; 78 struct page *pages = buffer->priv_virt;
79 unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT;
73 80
74 /* release memory */ 81 /* release memory */
75 cma_release(cma_heap->cma, pages, buffer->size); 82 cma_release(cma_heap->cma, pages, nr_pages);
76 /* release sg table */ 83 /* release sg table */
77 sg_free_table(buffer->sg_table); 84 sg_free_table(buffer->sg_table);
78 kfree(buffer->sg_table); 85 kfree(buffer->sg_table);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index 986c2a40d978..8267119ccc8e 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -487,21 +487,18 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
487 ksocknal_nid2peerlist(id.nid)); 487 ksocknal_nid2peerlist(id.nid));
488 } 488 }
489 489
490 route2 = NULL;
491 list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) { 490 list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
492 if (route2->ksnr_ipaddr == ipaddr) 491 if (route2->ksnr_ipaddr == ipaddr) {
493 break; 492 /* Route already exists, use the old one */
494 493 ksocknal_route_decref(route);
495 route2 = NULL; 494 route2->ksnr_share_count++;
496 } 495 goto out;
497 if (!route2) { 496 }
498 ksocknal_add_route_locked(peer, route);
499 route->ksnr_share_count++;
500 } else {
501 ksocknal_route_decref(route);
502 route2->ksnr_share_count++;
503 } 497 }
504 498 /* Route doesn't already exist, add the new one */
499 ksocknal_add_route_locked(peer, route);
500 route->ksnr_share_count++;
501out:
505 write_unlock_bh(&ksocknal_data.ksnd_global_lock); 502 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
506 503
507 return 0; 504 return 0;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 7c69b4a9694d..0d99b242e82e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
920 " %d i: %d bio: %p, allocating another" 920 " %d i: %d bio: %p, allocating another"
921 " bio\n", bio->bi_vcnt, i, bio); 921 " bio\n", bio->bi_vcnt, i, bio);
922 922
923 rc = blk_rq_append_bio(req, bio); 923 rc = blk_rq_append_bio(req, &bio);
924 if (rc) { 924 if (rc) {
925 pr_err("pSCSI: failed to append bio\n"); 925 pr_err("pSCSI: failed to append bio\n");
926 goto fail; 926 goto fail;
@@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
938 } 938 }
939 939
940 if (bio) { 940 if (bio) {
941 rc = blk_rq_append_bio(req, bio); 941 rc = blk_rq_append_bio(req, &bio);
942 if (rc) { 942 if (rc) {
943 pr_err("pSCSI: failed to append bio\n"); 943 pr_err("pSCSI: failed to append bio\n");
944 goto fail; 944 goto fail;
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 419a7a90bce0..f45bcbc63738 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -339,7 +339,7 @@ static void __ring_interrupt(struct tb_ring *ring)
339 return; 339 return;
340 340
341 if (ring->start_poll) { 341 if (ring->start_poll) {
342 __ring_interrupt_mask(ring, false); 342 __ring_interrupt_mask(ring, true);
343 ring->start_poll(ring->poll_data); 343 ring->start_poll(ring->poll_data);
344 } else { 344 } else {
345 schedule_work(&ring->work); 345 schedule_work(&ring->work);
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 427e0d5d8f13..539b49adb6af 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1762,7 +1762,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
1762{ 1762{
1763 struct n_tty_data *ldata = tty->disc_data; 1763 struct n_tty_data *ldata = tty->disc_data;
1764 1764
1765 if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) { 1765 if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) {
1766 bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); 1766 bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
1767 ldata->line_start = ldata->read_tail; 1767 ldata->line_start = ldata->read_tail;
1768 if (!L_ICANON(tty) || !read_cnt(ldata)) { 1768 if (!L_ICANON(tty) || !read_cnt(ldata)) {
@@ -2425,7 +2425,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
2425 return put_user(tty_chars_in_buffer(tty), (int __user *) arg); 2425 return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
2426 case TIOCINQ: 2426 case TIOCINQ:
2427 down_write(&tty->termios_rwsem); 2427 down_write(&tty->termios_rwsem);
2428 if (L_ICANON(tty)) 2428 if (L_ICANON(tty) && !L_EXTPROC(tty))
2429 retval = inq_canon(ldata); 2429 retval = inq_canon(ldata);
2430 else 2430 else
2431 retval = read_cnt(ldata); 2431 retval = read_cnt(ldata);
diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c b/drivers/usb/chipidea/ci_hdrc_msm.c
index 3593ce0ec641..880009987460 100644
--- a/drivers/usb/chipidea/ci_hdrc_msm.c
+++ b/drivers/usb/chipidea/ci_hdrc_msm.c
@@ -247,7 +247,7 @@ static int ci_hdrc_msm_probe(struct platform_device *pdev)
247 if (ret) 247 if (ret)
248 goto err_mux; 248 goto err_mux;
249 249
250 ulpi_node = of_find_node_by_name(of_node_get(pdev->dev.of_node), "ulpi"); 250 ulpi_node = of_get_child_by_name(pdev->dev.of_node, "ulpi");
251 if (ulpi_node) { 251 if (ulpi_node) {
252 phy_node = of_get_next_available_child(ulpi_node, NULL); 252 phy_node = of_get_next_available_child(ulpi_node, NULL);
253 ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy"); 253 ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy");
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 78e92d29f8d9..c821b4b9647e 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -1007,7 +1007,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
1007 case USB_SSP_CAP_TYPE: 1007 case USB_SSP_CAP_TYPE:
1008 ssp_cap = (struct usb_ssp_cap_descriptor *)buffer; 1008 ssp_cap = (struct usb_ssp_cap_descriptor *)buffer;
1009 ssac = (le32_to_cpu(ssp_cap->bmAttributes) & 1009 ssac = (le32_to_cpu(ssp_cap->bmAttributes) &
1010 USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1; 1010 USB_SSP_SUBLINK_SPEED_ATTRIBS);
1011 if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac)) 1011 if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac))
1012 dev->bos->ssp_cap = ssp_cap; 1012 dev->bos->ssp_cap = ssp_cap;
1013 break; 1013 break;
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index a10b346b9777..4024926c1d68 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -52,10 +52,11 @@ static const struct usb_device_id usb_quirk_list[] = {
52 /* Microsoft LifeCam-VX700 v2.0 */ 52 /* Microsoft LifeCam-VX700 v2.0 */
53 { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, 53 { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
54 54
55 /* Logitech HD Pro Webcams C920, C920-C and C930e */ 55 /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
56 { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, 56 { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
57 { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, 57 { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
58 { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, 58 { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT },
59 { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT },
59 60
60 /* Logitech ConferenceCam CC3000e */ 61 /* Logitech ConferenceCam CC3000e */
61 { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, 62 { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT },
@@ -149,6 +150,9 @@ static const struct usb_device_id usb_quirk_list[] = {
149 /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */ 150 /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */
150 { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM }, 151 { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM },
151 152
153 /* ELSA MicroLink 56K */
154 { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME },
155
152 /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ 156 /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */
153 { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, 157 { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM },
154 158
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 93eff7dec2f5..1b3efb14aec7 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1147,11 +1147,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
1147 1147
1148 udc = kzalloc(sizeof(*udc), GFP_KERNEL); 1148 udc = kzalloc(sizeof(*udc), GFP_KERNEL);
1149 if (!udc) 1149 if (!udc)
1150 goto err1; 1150 goto err_put_gadget;
1151
1152 ret = device_add(&gadget->dev);
1153 if (ret)
1154 goto err2;
1155 1151
1156 device_initialize(&udc->dev); 1152 device_initialize(&udc->dev);
1157 udc->dev.release = usb_udc_release; 1153 udc->dev.release = usb_udc_release;
@@ -1160,7 +1156,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
1160 udc->dev.parent = parent; 1156 udc->dev.parent = parent;
1161 ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj)); 1157 ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj));
1162 if (ret) 1158 if (ret)
1163 goto err3; 1159 goto err_put_udc;
1160
1161 ret = device_add(&gadget->dev);
1162 if (ret)
1163 goto err_put_udc;
1164 1164
1165 udc->gadget = gadget; 1165 udc->gadget = gadget;
1166 gadget->udc = udc; 1166 gadget->udc = udc;
@@ -1170,7 +1170,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
1170 1170
1171 ret = device_add(&udc->dev); 1171 ret = device_add(&udc->dev);
1172 if (ret) 1172 if (ret)
1173 goto err4; 1173 goto err_unlist_udc;
1174 1174
1175 usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED); 1175 usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED);
1176 udc->vbus = true; 1176 udc->vbus = true;
@@ -1178,27 +1178,25 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
1178 /* pick up one of pending gadget drivers */ 1178 /* pick up one of pending gadget drivers */
1179 ret = check_pending_gadget_drivers(udc); 1179 ret = check_pending_gadget_drivers(udc);
1180 if (ret) 1180 if (ret)
1181 goto err5; 1181 goto err_del_udc;
1182 1182
1183 mutex_unlock(&udc_lock); 1183 mutex_unlock(&udc_lock);
1184 1184
1185 return 0; 1185 return 0;
1186 1186
1187err5: 1187 err_del_udc:
1188 device_del(&udc->dev); 1188 device_del(&udc->dev);
1189 1189
1190err4: 1190 err_unlist_udc:
1191 list_del(&udc->list); 1191 list_del(&udc->list);
1192 mutex_unlock(&udc_lock); 1192 mutex_unlock(&udc_lock);
1193 1193
1194err3:
1195 put_device(&udc->dev);
1196 device_del(&gadget->dev); 1194 device_del(&gadget->dev);
1197 1195
1198err2: 1196 err_put_udc:
1199 kfree(udc); 1197 put_device(&udc->dev);
1200 1198
1201err1: 1199 err_put_gadget:
1202 put_device(&gadget->dev); 1200 put_device(&gadget->dev);
1203 return ret; 1201 return ret;
1204} 1202}
diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c
index 4f7895dbcf88..e26e685d8a57 100644
--- a/drivers/usb/host/xhci-debugfs.c
+++ b/drivers/usb/host/xhci-debugfs.c
@@ -162,7 +162,7 @@ static void xhci_debugfs_extcap_regset(struct xhci_hcd *xhci, int cap_id,
162static int xhci_ring_enqueue_show(struct seq_file *s, void *unused) 162static int xhci_ring_enqueue_show(struct seq_file *s, void *unused)
163{ 163{
164 dma_addr_t dma; 164 dma_addr_t dma;
165 struct xhci_ring *ring = s->private; 165 struct xhci_ring *ring = *(struct xhci_ring **)s->private;
166 166
167 dma = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue); 167 dma = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue);
168 seq_printf(s, "%pad\n", &dma); 168 seq_printf(s, "%pad\n", &dma);
@@ -173,7 +173,7 @@ static int xhci_ring_enqueue_show(struct seq_file *s, void *unused)
173static int xhci_ring_dequeue_show(struct seq_file *s, void *unused) 173static int xhci_ring_dequeue_show(struct seq_file *s, void *unused)
174{ 174{
175 dma_addr_t dma; 175 dma_addr_t dma;
176 struct xhci_ring *ring = s->private; 176 struct xhci_ring *ring = *(struct xhci_ring **)s->private;
177 177
178 dma = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue); 178 dma = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue);
179 seq_printf(s, "%pad\n", &dma); 179 seq_printf(s, "%pad\n", &dma);
@@ -183,7 +183,7 @@ static int xhci_ring_dequeue_show(struct seq_file *s, void *unused)
183 183
184static int xhci_ring_cycle_show(struct seq_file *s, void *unused) 184static int xhci_ring_cycle_show(struct seq_file *s, void *unused)
185{ 185{
186 struct xhci_ring *ring = s->private; 186 struct xhci_ring *ring = *(struct xhci_ring **)s->private;
187 187
188 seq_printf(s, "%d\n", ring->cycle_state); 188 seq_printf(s, "%d\n", ring->cycle_state);
189 189
@@ -346,7 +346,7 @@ static void xhci_debugfs_create_files(struct xhci_hcd *xhci,
346} 346}
347 347
348static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci, 348static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci,
349 struct xhci_ring *ring, 349 struct xhci_ring **ring,
350 const char *name, 350 const char *name,
351 struct dentry *parent) 351 struct dentry *parent)
352{ 352{
@@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci,
387 387
388 snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index); 388 snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index);
389 epriv->root = xhci_debugfs_create_ring_dir(xhci, 389 epriv->root = xhci_debugfs_create_ring_dir(xhci,
390 dev->eps[ep_index].new_ring, 390 &dev->eps[ep_index].new_ring,
391 epriv->name, 391 epriv->name,
392 spriv->root); 392 spriv->root);
393 spriv->eps[ep_index] = epriv; 393 spriv->eps[ep_index] = epriv;
@@ -423,7 +423,7 @@ void xhci_debugfs_create_slot(struct xhci_hcd *xhci, int slot_id)
423 priv->dev = dev; 423 priv->dev = dev;
424 dev->debugfs_private = priv; 424 dev->debugfs_private = priv;
425 425
426 xhci_debugfs_create_ring_dir(xhci, dev->eps[0].ring, 426 xhci_debugfs_create_ring_dir(xhci, &dev->eps[0].ring,
427 "ep00", priv->root); 427 "ep00", priv->root);
428 428
429 xhci_debugfs_create_context_files(xhci, priv->root, slot_id); 429 xhci_debugfs_create_context_files(xhci, priv->root, slot_id);
@@ -488,11 +488,11 @@ void xhci_debugfs_init(struct xhci_hcd *xhci)
488 ARRAY_SIZE(xhci_extcap_dbc), 488 ARRAY_SIZE(xhci_extcap_dbc),
489 "reg-ext-dbc"); 489 "reg-ext-dbc");
490 490
491 xhci_debugfs_create_ring_dir(xhci, xhci->cmd_ring, 491 xhci_debugfs_create_ring_dir(xhci, &xhci->cmd_ring,
492 "command-ring", 492 "command-ring",
493 xhci->debugfs_root); 493 xhci->debugfs_root);
494 494
495 xhci_debugfs_create_ring_dir(xhci, xhci->event_ring, 495 xhci_debugfs_create_ring_dir(xhci, &xhci->event_ring,
496 "event-ring", 496 "event-ring",
497 xhci->debugfs_root); 497 xhci->debugfs_root);
498 498
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 7ef1274ef7f7..1aad89b8aba0 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -178,6 +178,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
178 xhci->quirks |= XHCI_BROKEN_STREAMS; 178 xhci->quirks |= XHCI_BROKEN_STREAMS;
179 } 179 }
180 if (pdev->vendor == PCI_VENDOR_ID_RENESAS && 180 if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
181 pdev->device == 0x0014)
182 xhci->quirks |= XHCI_TRUST_TX_LENGTH;
183 if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
181 pdev->device == 0x0015) 184 pdev->device == 0x0015)
182 xhci->quirks |= XHCI_RESET_ON_RESUME; 185 xhci->quirks |= XHCI_RESET_ON_RESUME;
183 if (pdev->vendor == PCI_VENDOR_ID_VIA) 186 if (pdev->vendor == PCI_VENDOR_ID_VIA)
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2424d3020ca3..da6dbe3ebd8b 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3525,8 +3525,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
3525 struct xhci_slot_ctx *slot_ctx; 3525 struct xhci_slot_ctx *slot_ctx;
3526 int i, ret; 3526 int i, ret;
3527 3527
3528 xhci_debugfs_remove_slot(xhci, udev->slot_id);
3529
3530#ifndef CONFIG_USB_DEFAULT_PERSIST 3528#ifndef CONFIG_USB_DEFAULT_PERSIST
3531 /* 3529 /*
3532 * We called pm_runtime_get_noresume when the device was attached. 3530 * We called pm_runtime_get_noresume when the device was attached.
@@ -3555,8 +3553,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
3555 } 3553 }
3556 3554
3557 ret = xhci_disable_slot(xhci, udev->slot_id); 3555 ret = xhci_disable_slot(xhci, udev->slot_id);
3558 if (ret) 3556 if (ret) {
3557 xhci_debugfs_remove_slot(xhci, udev->slot_id);
3559 xhci_free_virt_device(xhci, udev->slot_id); 3558 xhci_free_virt_device(xhci, udev->slot_id);
3559 }
3560} 3560}
3561 3561
3562int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) 3562int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
index 465dbf68b463..f723f7b8c9ac 100644
--- a/drivers/usb/misc/usb3503.c
+++ b/drivers/usb/misc/usb3503.c
@@ -279,6 +279,8 @@ static int usb3503_probe(struct usb3503 *hub)
279 if (gpio_is_valid(hub->gpio_reset)) { 279 if (gpio_is_valid(hub->gpio_reset)) {
280 err = devm_gpio_request_one(dev, hub->gpio_reset, 280 err = devm_gpio_request_one(dev, hub->gpio_reset,
281 GPIOF_OUT_INIT_LOW, "usb3503 reset"); 281 GPIOF_OUT_INIT_LOW, "usb3503 reset");
282 /* Datasheet defines a hardware reset to be at least 100us */
283 usleep_range(100, 10000);
282 if (err) { 284 if (err) {
283 dev_err(dev, 285 dev_err(dev,
284 "unable to request GPIO %d as reset pin (%d)\n", 286 "unable to request GPIO %d as reset pin (%d)\n",
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index f6ae753ab99b..f932f40302df 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1004,7 +1004,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
1004 break; 1004 break;
1005 1005
1006 case MON_IOCQ_RING_SIZE: 1006 case MON_IOCQ_RING_SIZE:
1007 mutex_lock(&rp->fetch_lock);
1007 ret = rp->b_size; 1008 ret = rp->b_size;
1009 mutex_unlock(&rp->fetch_lock);
1008 break; 1010 break;
1009 1011
1010 case MON_IOCT_RING_SIZE: 1012 case MON_IOCT_RING_SIZE:
@@ -1231,12 +1233,16 @@ static int mon_bin_vma_fault(struct vm_fault *vmf)
1231 unsigned long offset, chunk_idx; 1233 unsigned long offset, chunk_idx;
1232 struct page *pageptr; 1234 struct page *pageptr;
1233 1235
1236 mutex_lock(&rp->fetch_lock);
1234 offset = vmf->pgoff << PAGE_SHIFT; 1237 offset = vmf->pgoff << PAGE_SHIFT;
1235 if (offset >= rp->b_size) 1238 if (offset >= rp->b_size) {
1239 mutex_unlock(&rp->fetch_lock);
1236 return VM_FAULT_SIGBUS; 1240 return VM_FAULT_SIGBUS;
1241 }
1237 chunk_idx = offset / CHUNK_SIZE; 1242 chunk_idx = offset / CHUNK_SIZE;
1238 pageptr = rp->b_vec[chunk_idx].pg; 1243 pageptr = rp->b_vec[chunk_idx].pg;
1239 get_page(pageptr); 1244 get_page(pageptr);
1245 mutex_unlock(&rp->fetch_lock);
1240 vmf->page = pageptr; 1246 vmf->page = pageptr;
1241 return 0; 1247 return 0;
1242} 1248}
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 7c6273bf5beb..06d502b3e913 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -124,6 +124,7 @@ static const struct usb_device_id id_table[] = {
124 { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */ 124 { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
125 { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ 125 { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
126 { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */ 126 { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
127 { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
127 { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ 128 { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
128 { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ 129 { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
129 { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ 130 { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
@@ -174,6 +175,7 @@ static const struct usb_device_id id_table[] = {
174 { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ 175 { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
175 { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ 176 { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
176 { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ 177 { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
178 { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
177 { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */ 179 { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
178 { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ 180 { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
179 { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ 181 { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 1aba9105b369..fc68952c994a 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1013,6 +1013,7 @@ static const struct usb_device_id id_table_combined[] = {
1013 .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, 1013 .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
1014 { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, 1014 { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) },
1015 { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, 1015 { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) },
1016 { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) },
1016 { } /* Terminating entry */ 1017 { } /* Terminating entry */
1017}; 1018};
1018 1019
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 4faa09fe308c..8b4ecd2bd297 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -915,6 +915,12 @@
915#define ICPDAS_I7563U_PID 0x0105 915#define ICPDAS_I7563U_PID 0x0105
916 916
917/* 917/*
918 * Airbus Defence and Space
919 */
920#define AIRBUS_DS_VID 0x1e8e /* Vendor ID */
921#define AIRBUS_DS_P8GR 0x6001 /* Tetra P8GR */
922
923/*
918 * RT Systems programming cables for various ham radios 924 * RT Systems programming cables for various ham radios
919 */ 925 */
920#define RTSYSTEMS_VID 0x2100 /* Vendor ID */ 926#define RTSYSTEMS_VID 0x2100 /* Vendor ID */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 3b3513874cfd..b6320e3be429 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -233,6 +233,8 @@ static void option_instat_callback(struct urb *urb);
233/* These Quectel products use Qualcomm's vendor ID */ 233/* These Quectel products use Qualcomm's vendor ID */
234#define QUECTEL_PRODUCT_UC20 0x9003 234#define QUECTEL_PRODUCT_UC20 0x9003
235#define QUECTEL_PRODUCT_UC15 0x9090 235#define QUECTEL_PRODUCT_UC15 0x9090
236/* These Yuga products use Qualcomm's vendor ID */
237#define YUGA_PRODUCT_CLM920_NC5 0x9625
236 238
237#define QUECTEL_VENDOR_ID 0x2c7c 239#define QUECTEL_VENDOR_ID 0x2c7c
238/* These Quectel products use Quectel's vendor ID */ 240/* These Quectel products use Quectel's vendor ID */
@@ -280,6 +282,7 @@ static void option_instat_callback(struct urb *urb);
280#define TELIT_PRODUCT_LE922_USBCFG3 0x1043 282#define TELIT_PRODUCT_LE922_USBCFG3 0x1043
281#define TELIT_PRODUCT_LE922_USBCFG5 0x1045 283#define TELIT_PRODUCT_LE922_USBCFG5 0x1045
282#define TELIT_PRODUCT_ME910 0x1100 284#define TELIT_PRODUCT_ME910 0x1100
285#define TELIT_PRODUCT_ME910_DUAL_MODEM 0x1101
283#define TELIT_PRODUCT_LE920 0x1200 286#define TELIT_PRODUCT_LE920 0x1200
284#define TELIT_PRODUCT_LE910 0x1201 287#define TELIT_PRODUCT_LE910 0x1201
285#define TELIT_PRODUCT_LE910_USBCFG4 0x1206 288#define TELIT_PRODUCT_LE910_USBCFG4 0x1206
@@ -645,6 +648,11 @@ static const struct option_blacklist_info telit_me910_blacklist = {
645 .reserved = BIT(1) | BIT(3), 648 .reserved = BIT(1) | BIT(3),
646}; 649};
647 650
651static const struct option_blacklist_info telit_me910_dual_modem_blacklist = {
652 .sendsetup = BIT(0),
653 .reserved = BIT(3),
654};
655
648static const struct option_blacklist_info telit_le910_blacklist = { 656static const struct option_blacklist_info telit_le910_blacklist = {
649 .sendsetup = BIT(0), 657 .sendsetup = BIT(0),
650 .reserved = BIT(1) | BIT(2), 658 .reserved = BIT(1) | BIT(2),
@@ -674,6 +682,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = {
674 .reserved = BIT(4) | BIT(5), 682 .reserved = BIT(4) | BIT(5),
675}; 683};
676 684
685static const struct option_blacklist_info yuga_clm920_nc5_blacklist = {
686 .reserved = BIT(1) | BIT(4),
687};
688
677static const struct usb_device_id option_ids[] = { 689static const struct usb_device_id option_ids[] = {
678 { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, 690 { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
679 { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, 691 { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1178,6 +1190,9 @@ static const struct usb_device_id option_ids[] = {
1178 { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, 1190 { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
1179 { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), 1191 { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
1180 .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, 1192 .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
1193 /* Yuga products use Qualcomm vendor ID */
1194 { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
1195 .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist },
1181 /* Quectel products using Quectel vendor ID */ 1196 /* Quectel products using Quectel vendor ID */
1182 { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), 1197 { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
1183 .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, 1198 .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
@@ -1244,6 +1259,8 @@ static const struct usb_device_id option_ids[] = {
1244 .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, 1259 .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
1245 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), 1260 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
1246 .driver_info = (kernel_ulong_t)&telit_me910_blacklist }, 1261 .driver_info = (kernel_ulong_t)&telit_me910_blacklist },
1262 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
1263 .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist },
1247 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), 1264 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
1248 .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, 1265 .driver_info = (kernel_ulong_t)&telit_le910_blacklist },
1249 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), 1266 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index e3892541a489..613f91add03d 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = {
162 {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ 162 {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */
163 {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */ 163 {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */
164 {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ 164 {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */
165 {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */
166 {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */
165 {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ 167 {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
166 {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ 168 {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
167 {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ 169 {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
@@ -342,6 +344,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
342 break; 344 break;
343 case 2: 345 case 2:
344 dev_dbg(dev, "NMEA GPS interface found\n"); 346 dev_dbg(dev, "NMEA GPS interface found\n");
347 sendsetup = true;
345 break; 348 break;
346 case 3: 349 case 3:
347 dev_dbg(dev, "Modem port found\n"); 350 dev_dbg(dev, "Modem port found\n");
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index e6127fb21c12..a7d08ae0adad 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -143,6 +143,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
143 USB_SC_DEVICE, USB_PR_DEVICE, NULL, 143 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
144 US_FL_NO_ATA_1X), 144 US_FL_NO_ATA_1X),
145 145
146/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
147UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
148 "Norelsys",
149 "NS1068X",
150 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
151 US_FL_IGNORE_UAS),
152
146/* Reported-by: Takeo Nakayama <javhera@gmx.com> */ 153/* Reported-by: Takeo Nakayama <javhera@gmx.com> */
147UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999, 154UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
148 "JMicron", 155 "JMicron",
diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index a3df8ee82faf..e31a6f204397 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -149,8 +149,7 @@ static void stub_shutdown_connection(struct usbip_device *ud)
149 * step 1? 149 * step 1?
150 */ 150 */
151 if (ud->tcp_socket) { 151 if (ud->tcp_socket) {
152 dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n", 152 dev_dbg(&sdev->udev->dev, "shutdown sockfd %d\n", ud->sockfd);
153 ud->tcp_socket);
154 kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); 153 kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
155 } 154 }
156 155
diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c
index 4f48b306713f..c31c8402a0c5 100644
--- a/drivers/usb/usbip/stub_main.c
+++ b/drivers/usb/usbip/stub_main.c
@@ -237,11 +237,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev)
237 struct stub_priv *priv; 237 struct stub_priv *priv;
238 struct urb *urb; 238 struct urb *urb;
239 239
240 dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev); 240 dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n");
241 241
242 while ((priv = stub_priv_pop(sdev))) { 242 while ((priv = stub_priv_pop(sdev))) {
243 urb = priv->urb; 243 urb = priv->urb;
244 dev_dbg(&sdev->udev->dev, "free urb %p\n", urb); 244 dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n",
245 priv->seqnum);
245 usb_kill_urb(urb); 246 usb_kill_urb(urb);
246 247
247 kmem_cache_free(stub_priv_cache, priv); 248 kmem_cache_free(stub_priv_cache, priv);
diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
index 493ac2928391..6c5a59313999 100644
--- a/drivers/usb/usbip/stub_rx.c
+++ b/drivers/usb/usbip/stub_rx.c
@@ -211,9 +211,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
211 if (priv->seqnum != pdu->u.cmd_unlink.seqnum) 211 if (priv->seqnum != pdu->u.cmd_unlink.seqnum)
212 continue; 212 continue;
213 213
214 dev_info(&priv->urb->dev->dev, "unlink urb %p\n",
215 priv->urb);
216
217 /* 214 /*
218 * This matched urb is not completed yet (i.e., be in 215 * This matched urb is not completed yet (i.e., be in
219 * flight in usb hcd hardware/driver). Now we are 216 * flight in usb hcd hardware/driver). Now we are
@@ -252,8 +249,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
252 ret = usb_unlink_urb(priv->urb); 249 ret = usb_unlink_urb(priv->urb);
253 if (ret != -EINPROGRESS) 250 if (ret != -EINPROGRESS)
254 dev_err(&priv->urb->dev->dev, 251 dev_err(&priv->urb->dev->dev,
255 "failed to unlink a urb %p, ret %d\n", 252 "failed to unlink a urb # %lu, ret %d\n",
256 priv->urb, ret); 253 priv->seqnum, ret);
257 254
258 return 0; 255 return 0;
259 } 256 }
@@ -342,14 +339,6 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu)
342 339
343 epd = &ep->desc; 340 epd = &ep->desc;
344 341
345 /* validate transfer_buffer_length */
346 if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) {
347 dev_err(&sdev->udev->dev,
348 "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n",
349 pdu->u.cmd_submit.transfer_buffer_length);
350 return -1;
351 }
352
353 if (usb_endpoint_xfer_control(epd)) { 342 if (usb_endpoint_xfer_control(epd)) {
354 if (dir == USBIP_DIR_OUT) 343 if (dir == USBIP_DIR_OUT)
355 return usb_sndctrlpipe(udev, epnum); 344 return usb_sndctrlpipe(udev, epnum);
@@ -482,8 +471,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev,
482 } 471 }
483 472
484 /* allocate urb transfer buffer, if needed */ 473 /* allocate urb transfer buffer, if needed */
485 if (pdu->u.cmd_submit.transfer_buffer_length > 0 && 474 if (pdu->u.cmd_submit.transfer_buffer_length > 0) {
486 pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) {
487 priv->urb->transfer_buffer = 475 priv->urb->transfer_buffer =
488 kzalloc(pdu->u.cmd_submit.transfer_buffer_length, 476 kzalloc(pdu->u.cmd_submit.transfer_buffer_length,
489 GFP_KERNEL); 477 GFP_KERNEL);
diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c
index 53172b1f6257..f0ec41a50cbc 100644
--- a/drivers/usb/usbip/stub_tx.c
+++ b/drivers/usb/usbip/stub_tx.c
@@ -88,7 +88,7 @@ void stub_complete(struct urb *urb)
88 /* link a urb to the queue of tx. */ 88 /* link a urb to the queue of tx. */
89 spin_lock_irqsave(&sdev->priv_lock, flags); 89 spin_lock_irqsave(&sdev->priv_lock, flags);
90 if (sdev->ud.tcp_socket == NULL) { 90 if (sdev->ud.tcp_socket == NULL) {
91 usbip_dbg_stub_tx("ignore urb for closed connection %p", urb); 91 usbip_dbg_stub_tx("ignore urb for closed connection\n");
92 /* It will be freed in stub_device_cleanup_urbs(). */ 92 /* It will be freed in stub_device_cleanup_urbs(). */
93 } else if (priv->unlinking) { 93 } else if (priv->unlinking) {
94 stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status); 94 stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status);
@@ -190,8 +190,8 @@ static int stub_send_ret_submit(struct stub_device *sdev)
190 190
191 /* 1. setup usbip_header */ 191 /* 1. setup usbip_header */
192 setup_ret_submit_pdu(&pdu_header, urb); 192 setup_ret_submit_pdu(&pdu_header, urb);
193 usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", 193 usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
194 pdu_header.base.seqnum, urb); 194 pdu_header.base.seqnum);
195 usbip_header_correct_endian(&pdu_header, 1); 195 usbip_header_correct_endian(&pdu_header, 1);
196 196
197 iov[iovnum].iov_base = &pdu_header; 197 iov[iovnum].iov_base = &pdu_header;
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index f7978933b402..ee2bbce24584 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -91,7 +91,7 @@ static void usbip_dump_usb_device(struct usb_device *udev)
91 dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)", 91 dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)",
92 udev->devnum, udev->devpath, usb_speed_string(udev->speed)); 92 udev->devnum, udev->devpath, usb_speed_string(udev->speed));
93 93
94 pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport); 94 pr_debug("tt hub ttport %d\n", udev->ttport);
95 95
96 dev_dbg(dev, " "); 96 dev_dbg(dev, " ");
97 for (i = 0; i < 16; i++) 97 for (i = 0; i < 16; i++)
@@ -124,12 +124,8 @@ static void usbip_dump_usb_device(struct usb_device *udev)
124 } 124 }
125 pr_debug("\n"); 125 pr_debug("\n");
126 126
127 dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus); 127 dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
128 128 udev->bus->bus_name);
129 dev_dbg(dev,
130 "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
131 &udev->descriptor, udev->config,
132 udev->actconfig, udev->rawdescriptors);
133 129
134 dev_dbg(dev, "have_langid %d, string_langid %d\n", 130 dev_dbg(dev, "have_langid %d, string_langid %d\n",
135 udev->have_langid, udev->string_langid); 131 udev->have_langid, udev->string_langid);
@@ -237,9 +233,6 @@ void usbip_dump_urb(struct urb *urb)
237 233
238 dev = &urb->dev->dev; 234 dev = &urb->dev->dev;
239 235
240 dev_dbg(dev, " urb :%p\n", urb);
241 dev_dbg(dev, " dev :%p\n", urb->dev);
242
243 usbip_dump_usb_device(urb->dev); 236 usbip_dump_usb_device(urb->dev);
244 237
245 dev_dbg(dev, " pipe :%08x ", urb->pipe); 238 dev_dbg(dev, " pipe :%08x ", urb->pipe);
@@ -248,11 +241,9 @@ void usbip_dump_urb(struct urb *urb)
248 241
249 dev_dbg(dev, " status :%d\n", urb->status); 242 dev_dbg(dev, " status :%d\n", urb->status);
250 dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags); 243 dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags);
251 dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer);
252 dev_dbg(dev, " transfer_buffer_length:%d\n", 244 dev_dbg(dev, " transfer_buffer_length:%d\n",
253 urb->transfer_buffer_length); 245 urb->transfer_buffer_length);
254 dev_dbg(dev, " actual_length :%d\n", urb->actual_length); 246 dev_dbg(dev, " actual_length :%d\n", urb->actual_length);
255 dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet);
256 247
257 if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL) 248 if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
258 usbip_dump_usb_ctrlrequest( 249 usbip_dump_usb_ctrlrequest(
@@ -262,8 +253,6 @@ void usbip_dump_urb(struct urb *urb)
262 dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets); 253 dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets);
263 dev_dbg(dev, " interval :%d\n", urb->interval); 254 dev_dbg(dev, " interval :%d\n", urb->interval);
264 dev_dbg(dev, " error_count :%d\n", urb->error_count); 255 dev_dbg(dev, " error_count :%d\n", urb->error_count);
265 dev_dbg(dev, " context :%p\n", urb->context);
266 dev_dbg(dev, " complete :%p\n", urb->complete);
267} 256}
268EXPORT_SYMBOL_GPL(usbip_dump_urb); 257EXPORT_SYMBOL_GPL(usbip_dump_urb);
269 258
@@ -317,26 +306,20 @@ int usbip_recv(struct socket *sock, void *buf, int size)
317 struct msghdr msg = {.msg_flags = MSG_NOSIGNAL}; 306 struct msghdr msg = {.msg_flags = MSG_NOSIGNAL};
318 int total = 0; 307 int total = 0;
319 308
309 if (!sock || !buf || !size)
310 return -EINVAL;
311
320 iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size); 312 iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
321 313
322 usbip_dbg_xmit("enter\n"); 314 usbip_dbg_xmit("enter\n");
323 315
324 if (!sock || !buf || !size) {
325 pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf,
326 size);
327 return -EINVAL;
328 }
329
330 do { 316 do {
331 int sz = msg_data_left(&msg); 317 msg_data_left(&msg);
332 sock->sk->sk_allocation = GFP_NOIO; 318 sock->sk->sk_allocation = GFP_NOIO;
333 319
334 result = sock_recvmsg(sock, &msg, MSG_WAITALL); 320 result = sock_recvmsg(sock, &msg, MSG_WAITALL);
335 if (result <= 0) { 321 if (result <= 0)
336 pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
337 sock, buf + total, sz, result, total);
338 goto err; 322 goto err;
339 }
340 323
341 total += result; 324 total += result;
342 } while (msg_data_left(&msg)); 325 } while (msg_data_left(&msg));
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index 6b3278c4b72a..c3e1008aa491 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -656,9 +656,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
656 struct vhci_device *vdev; 656 struct vhci_device *vdev;
657 unsigned long flags; 657 unsigned long flags;
658 658
659 usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n",
660 hcd, urb, mem_flags);
661
662 if (portnum > VHCI_HC_PORTS) { 659 if (portnum > VHCI_HC_PORTS) {
663 pr_err("invalid port number %d\n", portnum); 660 pr_err("invalid port number %d\n", portnum);
664 return -ENODEV; 661 return -ENODEV;
@@ -822,8 +819,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
822 struct vhci_device *vdev; 819 struct vhci_device *vdev;
823 unsigned long flags; 820 unsigned long flags;
824 821
825 pr_info("dequeue a urb %p\n", urb);
826
827 spin_lock_irqsave(&vhci->lock, flags); 822 spin_lock_irqsave(&vhci->lock, flags);
828 823
829 priv = urb->hcpriv; 824 priv = urb->hcpriv;
@@ -851,7 +846,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
851 /* tcp connection is closed */ 846 /* tcp connection is closed */
852 spin_lock(&vdev->priv_lock); 847 spin_lock(&vdev->priv_lock);
853 848
854 pr_info("device %p seems to be disconnected\n", vdev);
855 list_del(&priv->list); 849 list_del(&priv->list);
856 kfree(priv); 850 kfree(priv);
857 urb->hcpriv = NULL; 851 urb->hcpriv = NULL;
@@ -863,8 +857,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
863 * vhci_rx will receive RET_UNLINK and give back the URB. 857 * vhci_rx will receive RET_UNLINK and give back the URB.
864 * Otherwise, we give back it here. 858 * Otherwise, we give back it here.
865 */ 859 */
866 pr_info("gives back urb %p\n", urb);
867
868 usb_hcd_unlink_urb_from_ep(hcd, urb); 860 usb_hcd_unlink_urb_from_ep(hcd, urb);
869 861
870 spin_unlock_irqrestore(&vhci->lock, flags); 862 spin_unlock_irqrestore(&vhci->lock, flags);
@@ -892,8 +884,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
892 884
893 unlink->unlink_seqnum = priv->seqnum; 885 unlink->unlink_seqnum = priv->seqnum;
894 886
895 pr_info("device %p seems to be still connected\n", vdev);
896
897 /* send cmd_unlink and try to cancel the pending URB in the 887 /* send cmd_unlink and try to cancel the pending URB in the
898 * peer */ 888 * peer */
899 list_add_tail(&unlink->list, &vdev->unlink_tx); 889 list_add_tail(&unlink->list, &vdev->unlink_tx);
@@ -975,7 +965,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
975 965
976 /* need this? see stub_dev.c */ 966 /* need this? see stub_dev.c */
977 if (ud->tcp_socket) { 967 if (ud->tcp_socket) {
978 pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket); 968 pr_debug("shutdown tcp_socket %d\n", ud->sockfd);
979 kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); 969 kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
980 } 970 }
981 971
diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c
index 90577e8b2282..112ebb90d8c9 100644
--- a/drivers/usb/usbip/vhci_rx.c
+++ b/drivers/usb/usbip/vhci_rx.c
@@ -23,24 +23,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum)
23 urb = priv->urb; 23 urb = priv->urb;
24 status = urb->status; 24 status = urb->status;
25 25
26 usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n", 26 usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum);
27 urb, priv, seqnum);
28 27
29 switch (status) { 28 switch (status) {
30 case -ENOENT: 29 case -ENOENT:
31 /* fall through */ 30 /* fall through */
32 case -ECONNRESET: 31 case -ECONNRESET:
33 dev_info(&urb->dev->dev, 32 dev_dbg(&urb->dev->dev,
34 "urb %p was unlinked %ssynchronuously.\n", urb, 33 "urb seq# %u was unlinked %ssynchronuously\n",
35 status == -ENOENT ? "" : "a"); 34 seqnum, status == -ENOENT ? "" : "a");
36 break; 35 break;
37 case -EINPROGRESS: 36 case -EINPROGRESS:
38 /* no info output */ 37 /* no info output */
39 break; 38 break;
40 default: 39 default:
41 dev_info(&urb->dev->dev, 40 dev_dbg(&urb->dev->dev,
42 "urb %p may be in a error, status %d\n", urb, 41 "urb seq# %u may be in a error, status %d\n",
43 status); 42 seqnum, status);
44 } 43 }
45 44
46 list_del(&priv->list); 45 list_del(&priv->list);
@@ -67,8 +66,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
67 spin_unlock_irqrestore(&vdev->priv_lock, flags); 66 spin_unlock_irqrestore(&vdev->priv_lock, flags);
68 67
69 if (!urb) { 68 if (!urb) {
70 pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum); 69 pr_err("cannot find a urb of seqnum %u max seqnum %d\n",
71 pr_info("max seqnum %d\n", 70 pdu->base.seqnum,
72 atomic_read(&vhci_hcd->seqnum)); 71 atomic_read(&vhci_hcd->seqnum));
73 usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); 72 usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
74 return; 73 return;
@@ -91,7 +90,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
91 if (usbip_dbg_flag_vhci_rx) 90 if (usbip_dbg_flag_vhci_rx)
92 usbip_dump_urb(urb); 91 usbip_dump_urb(urb);
93 92
94 usbip_dbg_vhci_rx("now giveback urb %p\n", urb); 93 usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum);
95 94
96 spin_lock_irqsave(&vhci->lock, flags); 95 spin_lock_irqsave(&vhci->lock, flags);
97 usb_hcd_unlink_urb_from_ep(vhci_hcd_to_hcd(vhci_hcd), urb); 96 usb_hcd_unlink_urb_from_ep(vhci_hcd_to_hcd(vhci_hcd), urb);
@@ -158,7 +157,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev,
158 pr_info("the urb (seqnum %d) was already given back\n", 157 pr_info("the urb (seqnum %d) was already given back\n",
159 pdu->base.seqnum); 158 pdu->base.seqnum);
160 } else { 159 } else {
161 usbip_dbg_vhci_rx("now giveback urb %p\n", urb); 160 usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum);
162 161
163 /* If unlink is successful, status is -ECONNRESET */ 162 /* If unlink is successful, status is -ECONNRESET */
164 urb->status = pdu->u.ret_unlink.status; 163 urb->status = pdu->u.ret_unlink.status;
diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c
index d625a2ff4b71..9aed15a358b7 100644
--- a/drivers/usb/usbip/vhci_tx.c
+++ b/drivers/usb/usbip/vhci_tx.c
@@ -69,7 +69,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev)
69 memset(&msg, 0, sizeof(msg)); 69 memset(&msg, 0, sizeof(msg));
70 memset(&iov, 0, sizeof(iov)); 70 memset(&iov, 0, sizeof(iov));
71 71
72 usbip_dbg_vhci_tx("setup txdata urb %p\n", urb); 72 usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n",
73 priv->seqnum);
73 74
74 /* 1. setup usbip_header */ 75 /* 1. setup usbip_header */
75 setup_cmd_submit_pdu(&pdu_header, urb); 76 setup_cmd_submit_pdu(&pdu_header, urb);
diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c
index df1e30989148..1e8a23d92cb4 100644
--- a/drivers/usb/usbip/vudc_rx.c
+++ b/drivers/usb/usbip/vudc_rx.c
@@ -120,6 +120,25 @@ static int v_recv_cmd_submit(struct vudc *udc,
120 urb_p->new = 1; 120 urb_p->new = 1;
121 urb_p->seqnum = pdu->base.seqnum; 121 urb_p->seqnum = pdu->base.seqnum;
122 122
123 if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) {
124 /* validate packet size and number of packets */
125 unsigned int maxp, packets, bytes;
126
127 maxp = usb_endpoint_maxp(urb_p->ep->desc);
128 maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc);
129 bytes = pdu->u.cmd_submit.transfer_buffer_length;
130 packets = DIV_ROUND_UP(bytes, maxp);
131
132 if (pdu->u.cmd_submit.number_of_packets < 0 ||
133 pdu->u.cmd_submit.number_of_packets > packets) {
134 dev_err(&udc->gadget.dev,
135 "CMD_SUBMIT: isoc invalid num packets %d\n",
136 pdu->u.cmd_submit.number_of_packets);
137 ret = -EMSGSIZE;
138 goto free_urbp;
139 }
140 }
141
123 ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type); 142 ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type);
124 if (ret) { 143 if (ret) {
125 usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC); 144 usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC);
diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c
index 1440ae0919ec..3ccb17c3e840 100644
--- a/drivers/usb/usbip/vudc_tx.c
+++ b/drivers/usb/usbip/vudc_tx.c
@@ -85,6 +85,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
85 memset(&pdu_header, 0, sizeof(pdu_header)); 85 memset(&pdu_header, 0, sizeof(pdu_header));
86 memset(&msg, 0, sizeof(msg)); 86 memset(&msg, 0, sizeof(msg));
87 87
88 if (urb->actual_length > 0 && !urb->transfer_buffer) {
89 dev_err(&udc->gadget.dev,
90 "urb: actual_length %d transfer_buffer null\n",
91 urb->actual_length);
92 return -1;
93 }
94
88 if (urb_p->type == USB_ENDPOINT_XFER_ISOC) 95 if (urb_p->type == USB_ENDPOINT_XFER_ISOC)
89 iovnum = 2 + urb->number_of_packets; 96 iovnum = 2 + urb->number_of_packets;
90 else 97 else
@@ -100,8 +107,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
100 107
101 /* 1. setup usbip_header */ 108 /* 1. setup usbip_header */
102 setup_ret_submit_pdu(&pdu_header, urb_p); 109 setup_ret_submit_pdu(&pdu_header, urb_p);
103 usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", 110 usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
104 pdu_header.base.seqnum, urb); 111 pdu_header.base.seqnum);
105 usbip_header_correct_endian(&pdu_header, 1); 112 usbip_header_correct_endian(&pdu_header, 1);
106 113
107 iov[iovnum].iov_base = &pdu_header; 114 iov[iovnum].iov_base = &pdu_header;
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f77e499afddd..065f0b607373 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource)
257 kfree(resource); 257 kfree(resource);
258} 258}
259 259
260/*
261 * Host memory not allocated to dom0. We can use this range for hotplug-based
262 * ballooning.
263 *
264 * It's a type-less resource. Setting IORESOURCE_MEM will make resource
265 * management algorithms (arch_remove_reservations()) look into guest e820,
266 * which we don't want.
267 */
268static struct resource hostmem_resource = {
269 .name = "Host RAM",
270};
271
272void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res)
273{}
274
260static struct resource *additional_memory_resource(phys_addr_t size) 275static struct resource *additional_memory_resource(phys_addr_t size)
261{ 276{
262 struct resource *res; 277 struct resource *res, *res_hostmem;
263 int ret; 278 int ret = -ENOMEM;
264 279
265 res = kzalloc(sizeof(*res), GFP_KERNEL); 280 res = kzalloc(sizeof(*res), GFP_KERNEL);
266 if (!res) 281 if (!res)
@@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size)
269 res->name = "System RAM"; 284 res->name = "System RAM";
270 res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; 285 res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
271 286
272 ret = allocate_resource(&iomem_resource, res, 287 res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL);
273 size, 0, -1, 288 if (res_hostmem) {
274 PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); 289 /* Try to grab a range from hostmem */
275 if (ret < 0) { 290 res_hostmem->name = "Host memory";
276 pr_err("Cannot allocate new System RAM resource\n"); 291 ret = allocate_resource(&hostmem_resource, res_hostmem,
277 kfree(res); 292 size, 0, -1,
278 return NULL; 293 PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
294 }
295
296 if (!ret) {
297 /*
298 * Insert this resource into iomem. Because hostmem_resource
299 * tracks portion of guest e820 marked as UNUSABLE noone else
300 * should try to use it.
301 */
302 res->start = res_hostmem->start;
303 res->end = res_hostmem->end;
304 ret = insert_resource(&iomem_resource, res);
305 if (ret < 0) {
306 pr_err("Can't insert iomem_resource [%llx - %llx]\n",
307 res->start, res->end);
308 release_memory_resource(res_hostmem);
309 res_hostmem = NULL;
310 res->start = res->end = 0;
311 }
312 }
313
314 if (ret) {
315 ret = allocate_resource(&iomem_resource, res,
316 size, 0, -1,
317 PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
318 if (ret < 0) {
319 pr_err("Cannot allocate new System RAM resource\n");
320 kfree(res);
321 return NULL;
322 }
279 } 323 }
280 324
281#ifdef CONFIG_SPARSEMEM 325#ifdef CONFIG_SPARSEMEM
@@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size)
287 pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", 331 pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
288 pfn, limit); 332 pfn, limit);
289 release_memory_resource(res); 333 release_memory_resource(res);
334 release_memory_resource(res_hostmem);
290 return NULL; 335 return NULL;
291 } 336 }
292 } 337 }
@@ -765,6 +810,8 @@ static int __init balloon_init(void)
765 set_online_page_callback(&xen_online_page); 810 set_online_page_callback(&xen_online_page);
766 register_memory_notifier(&xen_memory_nb); 811 register_memory_notifier(&xen_memory_nb);
767 register_sysctl_table(xen_root); 812 register_sysctl_table(xen_root);
813
814 arch_xen_balloon_init(&hostmem_resource);
768#endif 815#endif
769 816
770#ifdef CONFIG_XEN_PV 817#ifdef CONFIG_XEN_PV
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 57efbd3b053b..bd56653b9bbc 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -380,10 +380,8 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
380 } 380 }
381 range = 0; 381 range = 0;
382 while (range < pages) { 382 while (range < pages) {
383 if (map->unmap_ops[offset+range].handle == -1) { 383 if (map->unmap_ops[offset+range].handle == -1)
384 range--;
385 break; 384 break;
386 }
387 range++; 385 range++;
388 } 386 }
389 err = __unmap_grant_pages(map, offset, range); 387 err = __unmap_grant_pages(map, offset, range);
@@ -1073,8 +1071,10 @@ unlock_out:
1073out_unlock_put: 1071out_unlock_put:
1074 mutex_unlock(&priv->lock); 1072 mutex_unlock(&priv->lock);
1075out_put_map: 1073out_put_map:
1076 if (use_ptemod) 1074 if (use_ptemod) {
1077 map->vma = NULL; 1075 map->vma = NULL;
1076 unmap_grant_pages(map, 0, map->count);
1077 }
1078 gntdev_put_map(priv, map); 1078 gntdev_put_map(priv, map);
1079 return err; 1079 return err;
1080} 1080}
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index d1e1d8d2b9d5..4c789e61554b 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -805,7 +805,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
805 pvcalls_exit(); 805 pvcalls_exit();
806 return ret; 806 return ret;
807 } 807 }
808 map2 = kzalloc(sizeof(*map2), GFP_KERNEL); 808 map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
809 if (map2 == NULL) { 809 if (map2 == NULL) {
810 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, 810 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
811 (void *)&map->passive.flags); 811 (void *)&map->passive.flags);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index ff8d5bf4354f..23c7f395d718 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -895,20 +895,38 @@ error:
895 * However, if we didn't have a callback promise outstanding, or it was 895 * However, if we didn't have a callback promise outstanding, or it was
896 * outstanding on a different server, then it won't break it either... 896 * outstanding on a different server, then it won't break it either...
897 */ 897 */
898static int afs_dir_remove_link(struct dentry *dentry, struct key *key) 898static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
899 unsigned long d_version_before,
900 unsigned long d_version_after)
899{ 901{
902 bool dir_valid;
900 int ret = 0; 903 int ret = 0;
901 904
905 /* There were no intervening changes on the server if the version
906 * number we got back was incremented by exactly 1.
907 */
908 dir_valid = (d_version_after == d_version_before + 1);
909
902 if (d_really_is_positive(dentry)) { 910 if (d_really_is_positive(dentry)) {
903 struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); 911 struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
904 912
905 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) 913 if (dir_valid) {
906 kdebug("AFS_VNODE_DELETED"); 914 drop_nlink(&vnode->vfs_inode);
907 clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); 915 if (vnode->vfs_inode.i_nlink == 0) {
908 916 set_bit(AFS_VNODE_DELETED, &vnode->flags);
909 ret = afs_validate(vnode, key); 917 clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
910 if (ret == -ESTALE) 918 }
911 ret = 0; 919 ret = 0;
920 } else {
921 clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
922
923 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
924 kdebug("AFS_VNODE_DELETED");
925
926 ret = afs_validate(vnode, key);
927 if (ret == -ESTALE)
928 ret = 0;
929 }
912 _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); 930 _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
913 } 931 }
914 932
@@ -923,6 +941,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
923 struct afs_fs_cursor fc; 941 struct afs_fs_cursor fc;
924 struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; 942 struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
925 struct key *key; 943 struct key *key;
944 unsigned long d_version = (unsigned long)dentry->d_fsdata;
926 int ret; 945 int ret;
927 946
928 _enter("{%x:%u},{%pd}", 947 _enter("{%x:%u},{%pd}",
@@ -955,7 +974,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
955 afs_vnode_commit_status(&fc, dvnode, fc.cb_break); 974 afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
956 ret = afs_end_vnode_operation(&fc); 975 ret = afs_end_vnode_operation(&fc);
957 if (ret == 0) 976 if (ret == 0)
958 ret = afs_dir_remove_link(dentry, key); 977 ret = afs_dir_remove_link(
978 dentry, key, d_version,
979 (unsigned long)dvnode->status.data_version);
959 } 980 }
960 981
961error_key: 982error_key:
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 3415eb7484f6..1e81864ef0b2 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -377,6 +377,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
377 } 377 }
378 378
379 read_sequnlock_excl(&vnode->cb_lock); 379 read_sequnlock_excl(&vnode->cb_lock);
380
381 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
382 clear_nlink(&vnode->vfs_inode);
383
380 if (valid) 384 if (valid)
381 goto valid; 385 goto valid;
382 386
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index ea1460b9b71a..e1126659f043 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -885,7 +885,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
885{ 885{
886 struct afs_net *net = call->net; 886 struct afs_net *net = call->net;
887 enum afs_call_state state; 887 enum afs_call_state state;
888 u32 remote_abort; 888 u32 remote_abort = 0;
889 int ret; 889 int ret;
890 890
891 _enter("{%s,%zu},,%zu,%d", 891 _enter("{%s,%zu},,%zu,%d",
diff --git a/fs/afs/write.c b/fs/afs/write.c
index cb5f8a3df577..9370e2feb999 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -198,7 +198,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
198 ret = afs_fill_page(vnode, key, pos + copied, 198 ret = afs_fill_page(vnode, key, pos + copied,
199 len - copied, page); 199 len - copied, page);
200 if (ret < 0) 200 if (ret < 0)
201 return ret; 201 goto out;
202 } 202 }
203 SetPageUptodate(page); 203 SetPageUptodate(page);
204 } 204 }
@@ -206,10 +206,12 @@ int afs_write_end(struct file *file, struct address_space *mapping,
206 set_page_dirty(page); 206 set_page_dirty(page);
207 if (PageDirty(page)) 207 if (PageDirty(page))
208 _debug("dirtied"); 208 _debug("dirtied");
209 ret = copied;
210
211out:
209 unlock_page(page); 212 unlock_page(page);
210 put_page(page); 213 put_page(page);
211 214 return ret;
212 return copied;
213} 215}
214 216
215/* 217/*
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5d73f79ded8b..056276101c63 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -87,6 +87,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
87 87
88 spin_lock(&root->inode_lock); 88 spin_lock(&root->inode_lock);
89 node = radix_tree_lookup(&root->delayed_nodes_tree, ino); 89 node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
90
90 if (node) { 91 if (node) {
91 if (btrfs_inode->delayed_node) { 92 if (btrfs_inode->delayed_node) {
92 refcount_inc(&node->refs); /* can be accessed */ 93 refcount_inc(&node->refs); /* can be accessed */
@@ -94,9 +95,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
94 spin_unlock(&root->inode_lock); 95 spin_unlock(&root->inode_lock);
95 return node; 96 return node;
96 } 97 }
97 btrfs_inode->delayed_node = node; 98
98 /* can be accessed and cached in the inode */ 99 /*
99 refcount_add(2, &node->refs); 100 * It's possible that we're racing into the middle of removing
101 * this node from the radix tree. In this case, the refcount
102 * was zero and it should never go back to one. Just return
103 * NULL like it was never in the radix at all; our release
104 * function is in the process of removing it.
105 *
106 * Some implementations of refcount_inc refuse to bump the
107 * refcount once it has hit zero. If we don't do this dance
108 * here, refcount_inc() may decide to just WARN_ONCE() instead
109 * of actually bumping the refcount.
110 *
111 * If this node is properly in the radix, we want to bump the
112 * refcount twice, once for the inode and once for this get
113 * operation.
114 */
115 if (refcount_inc_not_zero(&node->refs)) {
116 refcount_inc(&node->refs);
117 btrfs_inode->delayed_node = node;
118 } else {
119 node = NULL;
120 }
121
100 spin_unlock(&root->inode_lock); 122 spin_unlock(&root->inode_lock);
101 return node; 123 return node;
102 } 124 }
@@ -254,17 +276,18 @@ static void __btrfs_release_delayed_node(
254 mutex_unlock(&delayed_node->mutex); 276 mutex_unlock(&delayed_node->mutex);
255 277
256 if (refcount_dec_and_test(&delayed_node->refs)) { 278 if (refcount_dec_and_test(&delayed_node->refs)) {
257 bool free = false;
258 struct btrfs_root *root = delayed_node->root; 279 struct btrfs_root *root = delayed_node->root;
280
259 spin_lock(&root->inode_lock); 281 spin_lock(&root->inode_lock);
260 if (refcount_read(&delayed_node->refs) == 0) { 282 /*
261 radix_tree_delete(&root->delayed_nodes_tree, 283 * Once our refcount goes to zero, nobody is allowed to bump it
262 delayed_node->inode_id); 284 * back up. We can delete it now.
263 free = true; 285 */
264 } 286 ASSERT(refcount_read(&delayed_node->refs) == 0);
287 radix_tree_delete(&root->delayed_nodes_tree,
288 delayed_node->inode_id);
265 spin_unlock(&root->inode_lock); 289 spin_unlock(&root->inode_lock);
266 if (free) 290 kmem_cache_free(delayed_node_cache, delayed_node);
267 kmem_cache_free(delayed_node_cache, delayed_node);
268 } 291 }
269} 292}
270 293
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 49810b70afd3..a25684287501 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -237,7 +237,6 @@ static struct btrfs_device *__alloc_device(void)
237 kfree(dev); 237 kfree(dev);
238 return ERR_PTR(-ENOMEM); 238 return ERR_PTR(-ENOMEM);
239 } 239 }
240 bio_get(dev->flush_bio);
241 240
242 INIT_LIST_HEAD(&dev->dev_list); 241 INIT_LIST_HEAD(&dev->dev_list);
243 INIT_LIST_HEAD(&dev->dev_alloc_list); 242 INIT_LIST_HEAD(&dev->dev_alloc_list);
diff --git a/fs/exec.c b/fs/exec.c
index 5688b5e1b937..7eb8d21bcab9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1349,9 +1349,14 @@ void setup_new_exec(struct linux_binprm * bprm)
1349 1349
1350 current->sas_ss_sp = current->sas_ss_size = 0; 1350 current->sas_ss_sp = current->sas_ss_size = 0;
1351 1351
1352 /* Figure out dumpability. */ 1352 /*
1353 * Figure out dumpability. Note that this checking only of current
1354 * is wrong, but userspace depends on it. This should be testing
1355 * bprm->secureexec instead.
1356 */
1353 if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP || 1357 if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
1354 bprm->secureexec) 1358 !(uid_eq(current_euid(), current_uid()) &&
1359 gid_eq(current_egid(), current_gid())))
1355 set_dumpable(current->mm, suid_dumpable); 1360 set_dumpable(current->mm, suid_dumpable);
1356 else 1361 else
1357 set_dumpable(current->mm, SUID_DUMP_USER); 1362 set_dumpable(current->mm, SUID_DUMP_USER);
diff --git a/fs/super.c b/fs/super.c
index 7ff1349609e4..06bd25d90ba5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -517,7 +517,11 @@ retry:
517 hlist_add_head(&s->s_instances, &type->fs_supers); 517 hlist_add_head(&s->s_instances, &type->fs_supers);
518 spin_unlock(&sb_lock); 518 spin_unlock(&sb_lock);
519 get_filesystem(type); 519 get_filesystem(type);
520 register_shrinker(&s->s_shrink); 520 err = register_shrinker(&s->s_shrink);
521 if (err) {
522 deactivate_locked_super(s);
523 s = ERR_PTR(err);
524 }
521 return s; 525 return s;
522} 526}
523 527
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ac9a4e65ca49..41a75f9f23fd 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -570,11 +570,14 @@ out:
570static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, 570static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
571 struct userfaultfd_wait_queue *ewq) 571 struct userfaultfd_wait_queue *ewq)
572{ 572{
573 struct userfaultfd_ctx *release_new_ctx;
574
573 if (WARN_ON_ONCE(current->flags & PF_EXITING)) 575 if (WARN_ON_ONCE(current->flags & PF_EXITING))
574 goto out; 576 goto out;
575 577
576 ewq->ctx = ctx; 578 ewq->ctx = ctx;
577 init_waitqueue_entry(&ewq->wq, current); 579 init_waitqueue_entry(&ewq->wq, current);
580 release_new_ctx = NULL;
578 581
579 spin_lock(&ctx->event_wqh.lock); 582 spin_lock(&ctx->event_wqh.lock);
580 /* 583 /*
@@ -601,8 +604,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
601 new = (struct userfaultfd_ctx *) 604 new = (struct userfaultfd_ctx *)
602 (unsigned long) 605 (unsigned long)
603 ewq->msg.arg.reserved.reserved1; 606 ewq->msg.arg.reserved.reserved1;
604 607 release_new_ctx = new;
605 userfaultfd_ctx_put(new);
606 } 608 }
607 break; 609 break;
608 } 610 }
@@ -617,6 +619,20 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
617 __set_current_state(TASK_RUNNING); 619 __set_current_state(TASK_RUNNING);
618 spin_unlock(&ctx->event_wqh.lock); 620 spin_unlock(&ctx->event_wqh.lock);
619 621
622 if (release_new_ctx) {
623 struct vm_area_struct *vma;
624 struct mm_struct *mm = release_new_ctx->mm;
625
626 /* the various vma->vm_userfaultfd_ctx still points to it */
627 down_write(&mm->mmap_sem);
628 for (vma = mm->mmap; vma; vma = vma->vm_next)
629 if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx)
630 vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
631 up_write(&mm->mmap_sem);
632
633 userfaultfd_ctx_put(release_new_ctx);
634 }
635
620 /* 636 /*
621 * ctx may go away after this if the userfault pseudo fd is 637 * ctx may go away after this if the userfault pseudo fd is
622 * already released. 638 * already released.
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 0da80019a917..83ed7715f856 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(
702 ASSERT(args->agbno % args->alignment == 0); 702 ASSERT(args->agbno % args->alignment == 0);
703 703
704 /* if not file data, insert new block into the reverse map btree */ 704 /* if not file data, insert new block into the reverse map btree */
705 if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { 705 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
707 args->agbno, args->len, &args->oinfo); 707 args->agbno, args->len, &args->oinfo);
708 if (error) 708 if (error)
@@ -1682,7 +1682,7 @@ xfs_free_ag_extent(
1682 bno_cur = cnt_cur = NULL; 1682 bno_cur = cnt_cur = NULL;
1683 mp = tp->t_mountp; 1683 mp = tp->t_mountp;
1684 1684
1685 if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { 1685 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1687 if (error) 1687 if (error)
1688 goto error0; 1688 goto error0;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6249c92671de..a76914db72ef 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -212,6 +212,7 @@ xfs_attr_set(
212 int flags) 212 int flags)
213{ 213{
214 struct xfs_mount *mp = dp->i_mount; 214 struct xfs_mount *mp = dp->i_mount;
215 struct xfs_buf *leaf_bp = NULL;
215 struct xfs_da_args args; 216 struct xfs_da_args args;
216 struct xfs_defer_ops dfops; 217 struct xfs_defer_ops dfops;
217 struct xfs_trans_res tres; 218 struct xfs_trans_res tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
327 * GROT: another possible req'mt for a double-split btree op. 328 * GROT: another possible req'mt for a double-split btree op.
328 */ 329 */
329 xfs_defer_init(args.dfops, args.firstblock); 330 xfs_defer_init(args.dfops, args.firstblock);
330 error = xfs_attr_shortform_to_leaf(&args); 331 error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
331 if (error) 332 if (error)
332 goto out_defer_cancel; 333 goto out_defer_cancel;
334 /*
335 * Prevent the leaf buffer from being unlocked so that a
336 * concurrent AIL push cannot grab the half-baked leaf
337 * buffer and run into problems with the write verifier.
338 */
339 xfs_trans_bhold(args.trans, leaf_bp);
340 xfs_defer_bjoin(args.dfops, leaf_bp);
333 xfs_defer_ijoin(args.dfops, dp); 341 xfs_defer_ijoin(args.dfops, dp);
334 error = xfs_defer_finish(&args.trans, args.dfops); 342 error = xfs_defer_finish(&args.trans, args.dfops);
335 if (error) 343 if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
337 345
338 /* 346 /*
339 * Commit the leaf transformation. We'll need another (linked) 347 * Commit the leaf transformation. We'll need another (linked)
340 * transaction to add the new attribute to the leaf. 348 * transaction to add the new attribute to the leaf, which
349 * means that we have to hold & join the leaf buffer here too.
341 */ 350 */
342
343 error = xfs_trans_roll_inode(&args.trans, dp); 351 error = xfs_trans_roll_inode(&args.trans, dp);
344 if (error) 352 if (error)
345 goto out; 353 goto out;
346 354 xfs_trans_bjoin(args.trans, leaf_bp);
355 leaf_bp = NULL;
347 } 356 }
348 357
349 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) 358 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
374 383
375out_defer_cancel: 384out_defer_cancel:
376 xfs_defer_cancel(&dfops); 385 xfs_defer_cancel(&dfops);
377 args.trans = NULL;
378out: 386out:
387 if (leaf_bp)
388 xfs_trans_brelse(args.trans, leaf_bp);
379 if (args.trans) 389 if (args.trans)
380 xfs_trans_cancel(args.trans); 390 xfs_trans_cancel(args.trans);
381 xfs_iunlock(dp, XFS_ILOCK_EXCL); 391 xfs_iunlock(dp, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 53cc8b986eac..601eaa36f1ad 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
735} 735}
736 736
737/* 737/*
738 * Convert from using the shortform to the leaf. 738 * Convert from using the shortform to the leaf. On success, return the
739 * buffer so that we can keep it locked until we're totally done with it.
739 */ 740 */
740int 741int
741xfs_attr_shortform_to_leaf(xfs_da_args_t *args) 742xfs_attr_shortform_to_leaf(
743 struct xfs_da_args *args,
744 struct xfs_buf **leaf_bp)
742{ 745{
743 xfs_inode_t *dp; 746 xfs_inode_t *dp;
744 xfs_attr_shortform_t *sf; 747 xfs_attr_shortform_t *sf;
@@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
818 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 821 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
819 } 822 }
820 error = 0; 823 error = 0;
821 824 *leaf_bp = bp;
822out: 825out:
823 kmem_free(tmpbuffer); 826 kmem_free(tmpbuffer);
824 return error; 827 return error;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index f7dda0c237b0..894124efb421 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xfs_da_args *args);
48void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); 48void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
49int xfs_attr_shortform_lookup(struct xfs_da_args *args); 49int xfs_attr_shortform_lookup(struct xfs_da_args *args);
50int xfs_attr_shortform_getvalue(struct xfs_da_args *args); 50int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
51int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); 51int xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
52 struct xfs_buf **leaf_bp);
52int xfs_attr_shortform_remove(struct xfs_da_args *args); 53int xfs_attr_shortform_remove(struct xfs_da_args *args);
53int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 54int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
54int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); 55int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1210f684d3c2..1bddbba6b80c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5136,7 +5136,7 @@ __xfs_bunmapi(
5136 * blowing out the transaction with a mix of EFIs and reflink 5136 * blowing out the transaction with a mix of EFIs and reflink
5137 * adjustments. 5137 * adjustments.
5138 */ 5138 */
5139 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) 5139 if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); 5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5141 else 5141 else
5142 max_len = len; 5142 max_len = len;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 072ebfe1d6ae..087fea02c389 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); 250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
251 251
252 /* Hold the (previously bjoin'd) buffer locked across the roll. */
253 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
254 xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
255
252 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); 256 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
253 257
254 /* Roll the transaction. */ 258 /* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
264 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 268 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
265 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); 269 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
266 270
271 /* Rejoin the buffers and dirty them so the log moves forward. */
272 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
273 xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
274 xfs_trans_bhold(*tp, dop->dop_bufs[i]);
275 }
276
267 return error; 277 return error;
268} 278}
269 279
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
295 } 305 }
296 } 306 }
297 307
308 ASSERT(0);
309 return -EFSCORRUPTED;
310}
311
312/*
313 * Add this buffer to the deferred op. Each joined buffer is relogged
314 * each time we roll the transaction.
315 */
316int
317xfs_defer_bjoin(
318 struct xfs_defer_ops *dop,
319 struct xfs_buf *bp)
320{
321 int i;
322
323 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
324 if (dop->dop_bufs[i] == bp)
325 return 0;
326 else if (dop->dop_bufs[i] == NULL) {
327 dop->dop_bufs[i] = bp;
328 return 0;
329 }
330 }
331
332 ASSERT(0);
298 return -EFSCORRUPTED; 333 return -EFSCORRUPTED;
299} 334}
300 335
@@ -493,9 +528,7 @@ xfs_defer_init(
493 struct xfs_defer_ops *dop, 528 struct xfs_defer_ops *dop,
494 xfs_fsblock_t *fbp) 529 xfs_fsblock_t *fbp)
495{ 530{
496 dop->dop_committed = false; 531 memset(dop, 0, sizeof(struct xfs_defer_ops));
497 dop->dop_low = false;
498 memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
499 *fbp = NULLFSBLOCK; 532 *fbp = NULLFSBLOCK;
500 INIT_LIST_HEAD(&dop->dop_intake); 533 INIT_LIST_HEAD(&dop->dop_intake);
501 INIT_LIST_HEAD(&dop->dop_pending); 534 INIT_LIST_HEAD(&dop->dop_pending);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index d4f046dd44bd..045beacdd37d 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
59}; 59};
60 60
61#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ 61#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
62#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
62 63
63struct xfs_defer_ops { 64struct xfs_defer_ops {
64 bool dop_committed; /* did any trans commit? */ 65 bool dop_committed; /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
66 struct list_head dop_intake; /* unlogged pending work */ 67 struct list_head dop_intake; /* unlogged pending work */
67 struct list_head dop_pending; /* logged pending work */ 68 struct list_head dop_pending; /* logged pending work */
68 69
69 /* relog these inodes with each roll */ 70 /* relog these with each roll */
70 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; 71 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES];
72 struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
71}; 73};
72 74
73void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, 75void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
77void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); 79void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
78bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); 80bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
79int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); 81int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
82int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
80 83
81/* Description of a deferred type. */ 84/* Description of a deferred type. */
82struct xfs_defer_op_type { 85struct xfs_defer_op_type {
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 89bf16b4d937..b0f31791c7e6 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -632,8 +632,6 @@ xfs_iext_insert(
632 struct xfs_iext_leaf *new = NULL; 632 struct xfs_iext_leaf *new = NULL;
633 int nr_entries, i; 633 int nr_entries, i;
634 634
635 trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
636
637 if (ifp->if_height == 0) 635 if (ifp->if_height == 0)
638 xfs_iext_alloc_root(ifp, cur); 636 xfs_iext_alloc_root(ifp, cur);
639 else if (ifp->if_height == 1) 637 else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
661 xfs_iext_set(cur_rec(cur), irec); 659 xfs_iext_set(cur_rec(cur), irec);
662 ifp->if_bytes += sizeof(struct xfs_iext_rec); 660 ifp->if_bytes += sizeof(struct xfs_iext_rec);
663 661
662 trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
663
664 if (new) 664 if (new)
665 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); 665 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
666} 666}
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 585b35d34142..c40d26763075 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
1488 xfs_extlen_t aglen, 1488 xfs_extlen_t aglen,
1489 struct xfs_defer_ops *dfops) 1489 struct xfs_defer_ops *dfops)
1490{ 1490{
1491 int error;
1492
1493 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1491 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
1494 agbno, aglen); 1492 agbno, aglen);
1495 1493
1496 /* Add refcount btree reservation */ 1494 /* Add refcount btree reservation */
1497 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1495 return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1498 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); 1496 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
1499 if (error)
1500 return error;
1501
1502 /* Add rmap entry */
1503 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
1504 error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
1505 rcur->bc_private.a.agno,
1506 agbno, aglen, XFS_RMAP_OWN_COW);
1507 if (error)
1508 return error;
1509 }
1510
1511 return error;
1512} 1497}
1513 1498
1514/* 1499/*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
1521 xfs_extlen_t aglen, 1506 xfs_extlen_t aglen,
1522 struct xfs_defer_ops *dfops) 1507 struct xfs_defer_ops *dfops)
1523{ 1508{
1524 int error;
1525
1526 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1509 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
1527 agbno, aglen); 1510 agbno, aglen);
1528 1511
1529 /* Remove refcount btree reservation */ 1512 /* Remove refcount btree reservation */
1530 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1513 return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1531 XFS_REFCOUNT_ADJUST_COW_FREE, dfops); 1514 XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
1532 if (error)
1533 return error;
1534
1535 /* Remove rmap entry */
1536 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
1537 error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
1538 rcur->bc_private.a.agno,
1539 agbno, aglen, XFS_RMAP_OWN_COW);
1540 if (error)
1541 return error;
1542 }
1543
1544 return error;
1545} 1515}
1546 1516
1547/* Record a CoW staging extent in the refcount btree. */ 1517/* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
1552 xfs_fsblock_t fsb, 1522 xfs_fsblock_t fsb,
1553 xfs_extlen_t len) 1523 xfs_extlen_t len)
1554{ 1524{
1525 int error;
1526
1555 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1527 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1556 return 0; 1528 return 0;
1557 1529
1558 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1530 error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
1559 fsb, len); 1531 fsb, len);
1532 if (error)
1533 return error;
1534
1535 /* Add rmap entry */
1536 return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1537 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1560} 1538}
1561 1539
1562/* Forget a CoW staging event in the refcount btree. */ 1540/* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
1567 xfs_fsblock_t fsb, 1545 xfs_fsblock_t fsb,
1568 xfs_extlen_t len) 1546 xfs_extlen_t len)
1569{ 1547{
1548 int error;
1549
1570 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1550 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1571 return 0; 1551 return 0;
1572 1552
1553 /* Remove rmap entry */
1554 error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1555 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1556 if (error)
1557 return error;
1558
1573 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1559 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
1574 fsb, len); 1560 fsb, len);
1575} 1561}
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index dd019cee1b3b..50db920ceeeb 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -368,6 +368,51 @@ xfs_rmap_lookup_le_range(
368} 368}
369 369
370/* 370/*
371 * Perform all the relevant owner checks for a removal op. If we're doing an
372 * unknown-owner removal then we have no owner information to check.
373 */
374static int
375xfs_rmap_free_check_owner(
376 struct xfs_mount *mp,
377 uint64_t ltoff,
378 struct xfs_rmap_irec *rec,
379 xfs_fsblock_t bno,
380 xfs_filblks_t len,
381 uint64_t owner,
382 uint64_t offset,
383 unsigned int flags)
384{
385 int error = 0;
386
387 if (owner == XFS_RMAP_OWN_UNKNOWN)
388 return 0;
389
390 /* Make sure the unwritten flag matches. */
391 XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
392 (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
393
394 /* Make sure the owner matches what we expect to find in the tree. */
395 XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
396
397 /* Check the offset, if necessary. */
398 if (XFS_RMAP_NON_INODE_OWNER(owner))
399 goto out;
400
401 if (flags & XFS_RMAP_BMBT_BLOCK) {
402 XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
403 out);
404 } else {
405 XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
406 XFS_WANT_CORRUPTED_GOTO(mp,
407 ltoff + rec->rm_blockcount >= offset + len,
408 out);
409 }
410
411out:
412 return error;
413}
414
415/*
371 * Find the extent in the rmap btree and remove it. 416 * Find the extent in the rmap btree and remove it.
372 * 417 *
373 * The record we find should always be an exact match for the extent that we're 418 * The record we find should always be an exact match for the extent that we're
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
444 goto out_done; 489 goto out_done;
445 } 490 }
446 491
447 /* Make sure the unwritten flag matches. */ 492 /*
448 XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == 493 * If we're doing an unknown-owner removal for EFI recovery, we expect
449 (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); 494 * to find the full range in the rmapbt or nothing at all. If we
495 * don't find any rmaps overlapping either end of the range, we're
496 * done. Hopefully this means that the EFI creator already queued
497 * (and finished) a RUI to remove the rmap.
498 */
499 if (owner == XFS_RMAP_OWN_UNKNOWN &&
500 ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
501 struct xfs_rmap_irec rtrec;
502
503 error = xfs_btree_increment(cur, 0, &i);
504 if (error)
505 goto out_error;
506 if (i == 0)
507 goto out_done;
508 error = xfs_rmap_get_rec(cur, &rtrec, &i);
509 if (error)
510 goto out_error;
511 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
512 if (rtrec.rm_startblock >= bno + len)
513 goto out_done;
514 }
450 515
451 /* Make sure the extent we found covers the entire freeing range. */ 516 /* Make sure the extent we found covers the entire freeing range. */
452 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && 517 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
453 ltrec.rm_startblock + ltrec.rm_blockcount >= 518 ltrec.rm_startblock + ltrec.rm_blockcount >=
454 bno + len, out_error); 519 bno + len, out_error);
455 520
456 /* Make sure the owner matches what we expect to find in the tree. */ 521 /* Check owner information. */
457 XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || 522 error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
458 XFS_RMAP_NON_INODE_OWNER(owner), out_error); 523 offset, flags);
459 524 if (error)
460 /* Check the offset, if necessary. */ 525 goto out_error;
461 if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
462 if (flags & XFS_RMAP_BMBT_BLOCK) {
463 XFS_WANT_CORRUPTED_GOTO(mp,
464 ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
465 out_error);
466 } else {
467 XFS_WANT_CORRUPTED_GOTO(mp,
468 ltrec.rm_offset <= offset, out_error);
469 XFS_WANT_CORRUPTED_GOTO(mp,
470 ltoff + ltrec.rm_blockcount >= offset + len,
471 out_error);
472 }
473 }
474 526
475 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { 527 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
476 /* exact match, simply remove the record from rmap tree */ 528 /* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
664 flags |= XFS_RMAP_UNWRITTEN; 716 flags |= XFS_RMAP_UNWRITTEN;
665 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, 717 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
666 unwritten, oinfo); 718 unwritten, oinfo);
719 ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
667 720
668 /* 721 /*
669 * For the initial lookup, look for an exact match or the left-adjacent 722 * For the initial lookup, look for an exact match or the left-adjacent
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 466ede637080..0fcd5b1ba729 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -61,7 +61,21 @@ static inline void
61xfs_rmap_skip_owner_update( 61xfs_rmap_skip_owner_update(
62 struct xfs_owner_info *oi) 62 struct xfs_owner_info *oi)
63{ 63{
64 oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; 64 xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
65}
66
67static inline bool
68xfs_rmap_should_skip_owner_update(
69 struct xfs_owner_info *oi)
70{
71 return oi->oi_owner == XFS_RMAP_OWN_NULL;
72}
73
74static inline void
75xfs_rmap_any_owner_update(
76 struct xfs_owner_info *oi)
77{
78 xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
65} 79}
66 80
67/* Reverse mapping functions. */ 81/* Reverse mapping functions. */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 21e2d70884e1..4fc526a27a94 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -399,7 +399,7 @@ xfs_map_blocks(
399 (ip->i_df.if_flags & XFS_IFEXTENTS)); 399 (ip->i_df.if_flags & XFS_IFEXTENTS));
400 ASSERT(offset <= mp->m_super->s_maxbytes); 400 ASSERT(offset <= mp->m_super->s_maxbytes);
401 401
402 if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) 402 if (offset > mp->m_super->s_maxbytes - count)
403 count = mp->m_super->s_maxbytes - offset; 403 count = mp->m_super->s_maxbytes - offset;
404 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 404 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
405 offset_fsb = XFS_B_TO_FSBT(mp, offset); 405 offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -1312,7 +1312,7 @@ xfs_get_blocks(
1312 lockmode = xfs_ilock_data_map_shared(ip); 1312 lockmode = xfs_ilock_data_map_shared(ip);
1313 1313
1314 ASSERT(offset <= mp->m_super->s_maxbytes); 1314 ASSERT(offset <= mp->m_super->s_maxbytes);
1315 if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) 1315 if (offset > mp->m_super->s_maxbytes - size)
1316 size = mp->m_super->s_maxbytes - offset; 1316 size = mp->m_super->s_maxbytes - offset;
1317 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); 1317 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1318 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1318 offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 44f8c5451210..64da90655e95 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -538,7 +538,7 @@ xfs_efi_recover(
538 return error; 538 return error;
539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
540 540
541 xfs_rmap_skip_owner_update(&oinfo); 541 xfs_rmap_any_owner_update(&oinfo);
542 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 542 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
543 extp = &efip->efi_format.efi_extents[i]; 543 extp = &efip->efi_format.efi_extents[i];
544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start, 544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8f22fc579dbb..60a2e128cb6a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -571,6 +571,11 @@ xfs_growfs_data_private(
571 * this doesn't actually exist in the rmap btree. 571 * this doesn't actually exist in the rmap btree.
572 */ 572 */
573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); 573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
574 error = xfs_rmap_free(tp, bp, agno,
575 be32_to_cpu(agf->agf_length) - new,
576 new, &oinfo);
577 if (error)
578 goto error0;
574 error = xfs_free_extent(tp, 579 error = xfs_free_extent(tp,
575 XFS_AGB_TO_FSB(mp, agno, 580 XFS_AGB_TO_FSB(mp, agno,
576 be32_to_cpu(agf->agf_length) - new), 581 be32_to_cpu(agf->agf_length) - new),
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 43005fbe8b1e..3861d61fb265 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -870,7 +870,7 @@ xfs_eofblocks_worker(
870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). 870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
871 * (We'll just piggyback on the post-EOF prealloc space workqueue.) 871 * (We'll just piggyback on the post-EOF prealloc space workqueue.)
872 */ 872 */
873STATIC void 873void
874xfs_queue_cowblocks( 874xfs_queue_cowblocks(
875 struct xfs_mount *mp) 875 struct xfs_mount *mp)
876{ 876{
@@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(
1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); 1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
1537} 1537}
1538 1538
1539static inline unsigned long
1540xfs_iflag_for_tag(
1541 int tag)
1542{
1543 switch (tag) {
1544 case XFS_ICI_EOFBLOCKS_TAG:
1545 return XFS_IEOFBLOCKS;
1546 case XFS_ICI_COWBLOCKS_TAG:
1547 return XFS_ICOWBLOCKS;
1548 default:
1549 ASSERT(0);
1550 return 0;
1551 }
1552}
1553
1539static void 1554static void
1540__xfs_inode_set_eofblocks_tag( 1555__xfs_inode_set_blocks_tag(
1541 xfs_inode_t *ip, 1556 xfs_inode_t *ip,
1542 void (*execute)(struct xfs_mount *mp), 1557 void (*execute)(struct xfs_mount *mp),
1543 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1558 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(
1552 * Don't bother locking the AG and looking up in the radix trees 1567 * Don't bother locking the AG and looking up in the radix trees
1553 * if we already know that we have the tag set. 1568 * if we already know that we have the tag set.
1554 */ 1569 */
1555 if (ip->i_flags & XFS_IEOFBLOCKS) 1570 if (ip->i_flags & xfs_iflag_for_tag(tag))
1556 return; 1571 return;
1557 spin_lock(&ip->i_flags_lock); 1572 spin_lock(&ip->i_flags_lock);
1558 ip->i_flags |= XFS_IEOFBLOCKS; 1573 ip->i_flags |= xfs_iflag_for_tag(tag);
1559 spin_unlock(&ip->i_flags_lock); 1574 spin_unlock(&ip->i_flags_lock);
1560 1575
1561 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1576 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(
1587 xfs_inode_t *ip) 1602 xfs_inode_t *ip)
1588{ 1603{
1589 trace_xfs_inode_set_eofblocks_tag(ip); 1604 trace_xfs_inode_set_eofblocks_tag(ip);
1590 return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, 1605 return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
1591 trace_xfs_perag_set_eofblocks, 1606 trace_xfs_perag_set_eofblocks,
1592 XFS_ICI_EOFBLOCKS_TAG); 1607 XFS_ICI_EOFBLOCKS_TAG);
1593} 1608}
1594 1609
1595static void 1610static void
1596__xfs_inode_clear_eofblocks_tag( 1611__xfs_inode_clear_blocks_tag(
1597 xfs_inode_t *ip, 1612 xfs_inode_t *ip,
1598 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1613 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
1599 int error, unsigned long caller_ip), 1614 int error, unsigned long caller_ip),
@@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(
1603 struct xfs_perag *pag; 1618 struct xfs_perag *pag;
1604 1619
1605 spin_lock(&ip->i_flags_lock); 1620 spin_lock(&ip->i_flags_lock);
1606 ip->i_flags &= ~XFS_IEOFBLOCKS; 1621 ip->i_flags &= ~xfs_iflag_for_tag(tag);
1607 spin_unlock(&ip->i_flags_lock); 1622 spin_unlock(&ip->i_flags_lock);
1608 1623
1609 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1624 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(
1630 xfs_inode_t *ip) 1645 xfs_inode_t *ip)
1631{ 1646{
1632 trace_xfs_inode_clear_eofblocks_tag(ip); 1647 trace_xfs_inode_clear_eofblocks_tag(ip);
1633 return __xfs_inode_clear_eofblocks_tag(ip, 1648 return __xfs_inode_clear_blocks_tag(ip,
1634 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); 1649 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
1635} 1650}
1636 1651
@@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(
1724 xfs_inode_t *ip) 1739 xfs_inode_t *ip)
1725{ 1740{
1726 trace_xfs_inode_set_cowblocks_tag(ip); 1741 trace_xfs_inode_set_cowblocks_tag(ip);
1727 return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, 1742 return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
1728 trace_xfs_perag_set_cowblocks, 1743 trace_xfs_perag_set_cowblocks,
1729 XFS_ICI_COWBLOCKS_TAG); 1744 XFS_ICI_COWBLOCKS_TAG);
1730} 1745}
@@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(
1734 xfs_inode_t *ip) 1749 xfs_inode_t *ip)
1735{ 1750{
1736 trace_xfs_inode_clear_cowblocks_tag(ip); 1751 trace_xfs_inode_clear_cowblocks_tag(ip);
1737 return __xfs_inode_clear_eofblocks_tag(ip, 1752 return __xfs_inode_clear_blocks_tag(ip,
1738 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); 1753 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
1739} 1754}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index bff4d85e5498..d4a77588eca1 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
81int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); 81int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
82int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); 82int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
83void xfs_cowblocks_worker(struct work_struct *); 83void xfs_cowblocks_worker(struct work_struct *);
84void xfs_queue_cowblocks(struct xfs_mount *);
84 85
85int xfs_inode_ag_iterator(struct xfs_mount *mp, 86int xfs_inode_ag_iterator(struct xfs_mount *mp,
86 int (*execute)(struct xfs_inode *ip, int flags, void *args), 87 int (*execute)(struct xfs_inode *ip, int flags, void *args),
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b41952a4ddd8..6f95bdb408ce 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1487,6 +1487,24 @@ xfs_link(
1487 return error; 1487 return error;
1488} 1488}
1489 1489
1490/* Clear the reflink flag and the cowblocks tag if possible. */
1491static void
1492xfs_itruncate_clear_reflink_flags(
1493 struct xfs_inode *ip)
1494{
1495 struct xfs_ifork *dfork;
1496 struct xfs_ifork *cfork;
1497
1498 if (!xfs_is_reflink_inode(ip))
1499 return;
1500 dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1501 cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
1502 if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
1503 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1504 if (cfork->if_bytes == 0)
1505 xfs_inode_clear_cowblocks_tag(ip);
1506}
1507
1490/* 1508/*
1491 * Free up the underlying blocks past new_size. The new size must be smaller 1509 * Free up the underlying blocks past new_size. The new size must be smaller
1492 * than the current size. This routine can be used both for the attribute and 1510 * than the current size. This routine can be used both for the attribute and
@@ -1583,15 +1601,7 @@ xfs_itruncate_extents(
1583 if (error) 1601 if (error)
1584 goto out; 1602 goto out;
1585 1603
1586 /* 1604 xfs_itruncate_clear_reflink_flags(ip);
1587 * Clear the reflink flag if there are no data fork blocks and
1588 * there are no extents staged in the cow fork.
1589 */
1590 if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
1591 if (ip->i_d.di_nblocks == 0)
1592 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1593 xfs_inode_clear_cowblocks_tag(ip);
1594 }
1595 1605
1596 /* 1606 /*
1597 * Always re-log the inode so that our permanent transaction can keep 1607 * Always re-log the inode so that our permanent transaction can keep
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b2136af9289f..d383e392ec9d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
232 * log recovery to replay a bmap operation on the inode. 232 * log recovery to replay a bmap operation on the inode.
233 */ 233 */
234#define XFS_IRECOVERY (1 << 11) 234#define XFS_IRECOVERY (1 << 11)
235#define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */
235 236
236/* 237/*
237 * Per-lifetime flags need to be reset when re-using a reclaimable inode during 238 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7ab52a8bc0a9..66e1edbfb2b2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1006,7 +1006,7 @@ xfs_file_iomap_begin(
1006 } 1006 }
1007 1007
1008 ASSERT(offset <= mp->m_super->s_maxbytes); 1008 ASSERT(offset <= mp->m_super->s_maxbytes);
1009 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) 1009 if (offset > mp->m_super->s_maxbytes - length)
1010 length = mp->m_super->s_maxbytes - offset; 1010 length = mp->m_super->s_maxbytes - offset;
1011 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1011 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1012 end_fsb = XFS_B_TO_FSB(mp, offset + length); 1012 end_fsb = XFS_B_TO_FSB(mp, offset + length);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index ec952dfad359..b897b11afb2c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -48,7 +48,7 @@
48STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 48STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
49STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 49STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
50 50
51 51STATIC void xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi);
52STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); 52STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp);
53/* 53/*
54 * We use the batch lookup interface to iterate over the dquots as it 54 * We use the batch lookup interface to iterate over the dquots as it
@@ -695,9 +695,17 @@ xfs_qm_init_quotainfo(
695 qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; 695 qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
696 qinf->qi_shrinker.seeks = DEFAULT_SEEKS; 696 qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
697 qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; 697 qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
698 register_shrinker(&qinf->qi_shrinker); 698
699 error = register_shrinker(&qinf->qi_shrinker);
700 if (error)
701 goto out_free_inos;
702
699 return 0; 703 return 0;
700 704
705out_free_inos:
706 mutex_destroy(&qinf->qi_quotaofflock);
707 mutex_destroy(&qinf->qi_tree_lock);
708 xfs_qm_destroy_quotainos(qinf);
701out_free_lru: 709out_free_lru:
702 list_lru_destroy(&qinf->qi_lru); 710 list_lru_destroy(&qinf->qi_lru);
703out_free_qinf: 711out_free_qinf:
@@ -706,7 +714,6 @@ out_free_qinf:
706 return error; 714 return error;
707} 715}
708 716
709
710/* 717/*
711 * Gets called when unmounting a filesystem or when all quotas get 718 * Gets called when unmounting a filesystem or when all quotas get
712 * turned off. 719 * turned off.
@@ -723,19 +730,8 @@ xfs_qm_destroy_quotainfo(
723 730
724 unregister_shrinker(&qi->qi_shrinker); 731 unregister_shrinker(&qi->qi_shrinker);
725 list_lru_destroy(&qi->qi_lru); 732 list_lru_destroy(&qi->qi_lru);
726 733 xfs_qm_destroy_quotainos(qi);
727 if (qi->qi_uquotaip) { 734 mutex_destroy(&qi->qi_tree_lock);
728 IRELE(qi->qi_uquotaip);
729 qi->qi_uquotaip = NULL; /* paranoia */
730 }
731 if (qi->qi_gquotaip) {
732 IRELE(qi->qi_gquotaip);
733 qi->qi_gquotaip = NULL;
734 }
735 if (qi->qi_pquotaip) {
736 IRELE(qi->qi_pquotaip);
737 qi->qi_pquotaip = NULL;
738 }
739 mutex_destroy(&qi->qi_quotaofflock); 735 mutex_destroy(&qi->qi_quotaofflock);
740 kmem_free(qi); 736 kmem_free(qi);
741 mp->m_quotainfo = NULL; 737 mp->m_quotainfo = NULL;
@@ -1600,6 +1596,24 @@ error_rele:
1600} 1596}
1601 1597
1602STATIC void 1598STATIC void
1599xfs_qm_destroy_quotainos(
1600 xfs_quotainfo_t *qi)
1601{
1602 if (qi->qi_uquotaip) {
1603 IRELE(qi->qi_uquotaip);
1604 qi->qi_uquotaip = NULL; /* paranoia */
1605 }
1606 if (qi->qi_gquotaip) {
1607 IRELE(qi->qi_gquotaip);
1608 qi->qi_gquotaip = NULL;
1609 }
1610 if (qi->qi_pquotaip) {
1611 IRELE(qi->qi_pquotaip);
1612 qi->qi_pquotaip = NULL;
1613 }
1614}
1615
1616STATIC void
1603xfs_qm_dqfree_one( 1617xfs_qm_dqfree_one(
1604 struct xfs_dquot *dqp) 1618 struct xfs_dquot *dqp)
1605{ 1619{
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cf7c8f81bebb..47aea2e82c26 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -454,6 +454,8 @@ retry:
454 if (error) 454 if (error)
455 goto out_bmap_cancel; 455 goto out_bmap_cancel;
456 456
457 xfs_inode_set_cowblocks_tag(ip);
458
457 /* Finish up. */ 459 /* Finish up. */
458 error = xfs_defer_finish(&tp, &dfops); 460 error = xfs_defer_finish(&tp, &dfops);
459 if (error) 461 if (error)
@@ -490,8 +492,9 @@ xfs_reflink_find_cow_mapping(
490 struct xfs_iext_cursor icur; 492 struct xfs_iext_cursor icur;
491 493
492 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 494 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
493 ASSERT(xfs_is_reflink_inode(ip));
494 495
496 if (!xfs_is_reflink_inode(ip))
497 return false;
495 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 498 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
496 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) 499 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
497 return false; 500 return false;
@@ -610,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(
610 613
611 /* Remove the mapping from the CoW fork. */ 614 /* Remove the mapping from the CoW fork. */
612 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 615 xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
616 } else {
617 /* Didn't do anything, push cursor back. */
618 xfs_iext_prev(ifp, &icur);
613 } 619 }
614next_extent: 620next_extent:
615 if (!xfs_iext_get_extent(ifp, &icur, &got)) 621 if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -725,7 +731,7 @@ xfs_reflink_end_cow(
725 (unsigned int)(end_fsb - offset_fsb), 731 (unsigned int)(end_fsb - offset_fsb),
726 XFS_DATA_FORK); 732 XFS_DATA_FORK);
727 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 733 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
728 resblks, 0, 0, &tp); 734 resblks, 0, XFS_TRANS_RESERVE, &tp);
729 if (error) 735 if (error)
730 goto out; 736 goto out;
731 737
@@ -1291,6 +1297,17 @@ xfs_reflink_remap_range(
1291 1297
1292 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1298 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1293 1299
1300 /*
1301 * Clear out post-eof preallocations because we don't have page cache
1302 * backing the delayed allocations and they'll never get freed on
1303 * their own.
1304 */
1305 if (xfs_can_free_eofblocks(dest, true)) {
1306 ret = xfs_free_eofblocks(dest);
1307 if (ret)
1308 goto out_unlock;
1309 }
1310
1294 /* Set flags and remap blocks. */ 1311 /* Set flags and remap blocks. */
1295 ret = xfs_reflink_set_inode_flag(src, dest); 1312 ret = xfs_reflink_set_inode_flag(src, dest);
1296 if (ret) 1313 if (ret)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 5122d3021117..1dacccc367f8 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1360,6 +1360,7 @@ xfs_fs_remount(
1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1361 return error; 1361 return error;
1362 } 1362 }
1363 xfs_queue_cowblocks(mp);
1363 1364
1364 /* Create the per-AG metadata reservation pool .*/ 1365 /* Create the per-AG metadata reservation pool .*/
1365 error = xfs_fs_reserve_ag_blocks(mp); 1366 error = xfs_fs_reserve_ag_blocks(mp);
@@ -1369,6 +1370,14 @@ xfs_fs_remount(
1369 1370
1370 /* rw -> ro */ 1371 /* rw -> ro */
1371 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { 1372 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
1373 /* Get rid of any leftover CoW reservations... */
1374 cancel_delayed_work_sync(&mp->m_cowblocks_work);
1375 error = xfs_icache_free_cowblocks(mp, NULL);
1376 if (error) {
1377 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1378 return error;
1379 }
1380
1372 /* Free the per-AG metadata reservation pool. */ 1381 /* Free the per-AG metadata reservation pool. */
1373 error = xfs_fs_unreserve_ag_blocks(mp); 1382 error = xfs_fs_unreserve_ag_blocks(mp);
1374 if (error) { 1383 if (error) {
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index ea189d88a3cc..8ac4e68a12f0 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -7,9 +7,10 @@
7#ifndef _ASM_GENERIC_MM_HOOKS_H 7#ifndef _ASM_GENERIC_MM_HOOKS_H
8#define _ASM_GENERIC_MM_HOOKS_H 8#define _ASM_GENERIC_MM_HOOKS_H
9 9
10static inline void arch_dup_mmap(struct mm_struct *oldmm, 10static inline int arch_dup_mmap(struct mm_struct *oldmm,
11 struct mm_struct *mm) 11 struct mm_struct *mm)
12{ 12{
13 return 0;
13} 14}
14 15
15static inline void arch_exit_mmap(struct mm_struct *mm) 16static inline void arch_exit_mmap(struct mm_struct *mm)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b234d54f2cb6..868e68561f91 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
1025struct file; 1025struct file;
1026int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, 1026int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
1027 unsigned long size, pgprot_t *vma_prot); 1027 unsigned long size, pgprot_t *vma_prot);
1028
1029#ifndef CONFIG_X86_ESPFIX64
1030static inline void init_espfix_bsp(void) { }
1031#endif
1032
1028#endif /* !__ASSEMBLY__ */ 1033#endif /* !__ASSEMBLY__ */
1029 1034
1030#ifndef io_remap_pfn_range 1035#ifndef io_remap_pfn_range
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 38d9c5861ed8..f38227a78eae 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -18,6 +18,7 @@
18#include <linux/if_alg.h> 18#include <linux/if_alg.h>
19#include <linux/scatterlist.h> 19#include <linux/scatterlist.h>
20#include <linux/types.h> 20#include <linux/types.h>
21#include <linux/atomic.h>
21#include <net/sock.h> 22#include <net/sock.h>
22 23
23#include <crypto/aead.h> 24#include <crypto/aead.h>
@@ -150,7 +151,7 @@ struct af_alg_ctx {
150 struct crypto_wait wait; 151 struct crypto_wait wait;
151 152
152 size_t used; 153 size_t used;
153 size_t rcvused; 154 atomic_t rcvused;
154 155
155 bool more; 156 bool more;
156 bool merge; 157 bool merge;
@@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk)
215 struct af_alg_ctx *ctx = ask->private; 216 struct af_alg_ctx *ctx = ask->private;
216 217
217 return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) - 218 return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
218 ctx->rcvused, 0); 219 atomic_read(&ctx->rcvused), 0);
219} 220}
220 221
221/** 222/**
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
index cceafa01f907..b67404fc4b34 100644
--- a/include/crypto/mcryptd.h
+++ b/include/crypto/mcryptd.h
@@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
27 27
28struct mcryptd_cpu_queue { 28struct mcryptd_cpu_queue {
29 struct crypto_queue queue; 29 struct crypto_queue queue;
30 spinlock_t q_lock;
30 struct work_struct work; 31 struct work_struct work;
31}; 32};
32 33
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 6e45608b2399..9da6ce22803f 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -62,7 +62,7 @@ struct arch_timer_cpu {
62 bool enabled; 62 bool enabled;
63}; 63};
64 64
65int kvm_timer_hyp_init(void); 65int kvm_timer_hyp_init(bool);
66int kvm_timer_enable(struct kvm_vcpu *vcpu); 66int kvm_timer_enable(struct kvm_vcpu *vcpu);
67int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); 67int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
68void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); 68void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 82f0c8fd7be8..23d29b39f71e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
492 492
493#define bio_set_dev(bio, bdev) \ 493#define bio_set_dev(bio, bdev) \
494do { \ 494do { \
495 if ((bio)->bi_disk != (bdev)->bd_disk) \
496 bio_clear_flag(bio, BIO_THROTTLED);\
495 (bio)->bi_disk = (bdev)->bd_disk; \ 497 (bio)->bi_disk = (bdev)->bd_disk; \
496 (bio)->bi_partno = (bdev)->bd_partno; \ 498 (bio)->bi_partno = (bdev)->bd_partno; \
497} while (0) 499} while (0)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a1e628e032da..9e7d8bd776d2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -50,8 +50,6 @@ struct blk_issue_stat {
50struct bio { 50struct bio {
51 struct bio *bi_next; /* request queue link */ 51 struct bio *bi_next; /* request queue link */
52 struct gendisk *bi_disk; 52 struct gendisk *bi_disk;
53 u8 bi_partno;
54 blk_status_t bi_status;
55 unsigned int bi_opf; /* bottom bits req flags, 53 unsigned int bi_opf; /* bottom bits req flags,
56 * top bits REQ_OP. Use 54 * top bits REQ_OP. Use
57 * accessors. 55 * accessors.
@@ -59,8 +57,8 @@ struct bio {
59 unsigned short bi_flags; /* status, etc and bvec pool number */ 57 unsigned short bi_flags; /* status, etc and bvec pool number */
60 unsigned short bi_ioprio; 58 unsigned short bi_ioprio;
61 unsigned short bi_write_hint; 59 unsigned short bi_write_hint;
62 60 blk_status_t bi_status;
63 struct bvec_iter bi_iter; 61 u8 bi_partno;
64 62
65 /* Number of segments in this BIO after 63 /* Number of segments in this BIO after
66 * physical address coalescing is performed. 64 * physical address coalescing is performed.
@@ -74,8 +72,9 @@ struct bio {
74 unsigned int bi_seg_front_size; 72 unsigned int bi_seg_front_size;
75 unsigned int bi_seg_back_size; 73 unsigned int bi_seg_back_size;
76 74
77 atomic_t __bi_remaining; 75 struct bvec_iter bi_iter;
78 76
77 atomic_t __bi_remaining;
79 bio_end_io_t *bi_end_io; 78 bio_end_io_t *bi_end_io;
80 79
81 void *bi_private; 80 void *bi_private;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8089ca17db9a..0ce8a372d506 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
135struct request { 135struct request {
136 struct list_head queuelist; 136 struct list_head queuelist;
137 union { 137 union {
138 call_single_data_t csd; 138 struct __call_single_data csd;
139 u64 fifo_time; 139 u64 fifo_time;
140 }; 140 };
141 141
@@ -241,14 +241,24 @@ struct request {
241 struct request *next_rq; 241 struct request *next_rq;
242}; 242};
243 243
244static inline bool blk_op_is_scsi(unsigned int op)
245{
246 return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
247}
248
249static inline bool blk_op_is_private(unsigned int op)
250{
251 return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
252}
253
244static inline bool blk_rq_is_scsi(struct request *rq) 254static inline bool blk_rq_is_scsi(struct request *rq)
245{ 255{
246 return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; 256 return blk_op_is_scsi(req_op(rq));
247} 257}
248 258
249static inline bool blk_rq_is_private(struct request *rq) 259static inline bool blk_rq_is_private(struct request *rq)
250{ 260{
251 return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; 261 return blk_op_is_private(req_op(rq));
252} 262}
253 263
254static inline bool blk_rq_is_passthrough(struct request *rq) 264static inline bool blk_rq_is_passthrough(struct request *rq)
@@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
256 return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); 266 return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
257} 267}
258 268
269static inline bool bio_is_passthrough(struct bio *bio)
270{
271 unsigned op = bio_op(bio);
272
273 return blk_op_is_scsi(op) || blk_op_is_private(op);
274}
275
259static inline unsigned short req_get_ioprio(struct request *req) 276static inline unsigned short req_get_ioprio(struct request *req)
260{ 277{
261 return req->ioprio; 278 return req->ioprio;
@@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
948extern void blk_rq_unprep_clone(struct request *rq); 965extern void blk_rq_unprep_clone(struct request *rq);
949extern blk_status_t blk_insert_cloned_request(struct request_queue *q, 966extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
950 struct request *rq); 967 struct request *rq);
951extern int blk_rq_append_bio(struct request *rq, struct bio *bio); 968extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
952extern void blk_delay_queue(struct request_queue *, unsigned long); 969extern void blk_delay_queue(struct request_queue *, unsigned long);
953extern void blk_queue_split(struct request_queue *, struct bio **); 970extern void blk_queue_split(struct request_queue *, struct bio **);
954extern void blk_recount_segments(struct request_queue *, struct bio *); 971extern void blk_recount_segments(struct request_queue *, struct bio *);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e55e4255a210..0b25cf87b6d6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -43,7 +43,14 @@ struct bpf_map_ops {
43}; 43};
44 44
45struct bpf_map { 45struct bpf_map {
46 atomic_t refcnt; 46 /* 1st cacheline with read-mostly members of which some
47 * are also accessed in fast-path (e.g. ops, max_entries).
48 */
49 const struct bpf_map_ops *ops ____cacheline_aligned;
50 struct bpf_map *inner_map_meta;
51#ifdef CONFIG_SECURITY
52 void *security;
53#endif
47 enum bpf_map_type map_type; 54 enum bpf_map_type map_type;
48 u32 key_size; 55 u32 key_size;
49 u32 value_size; 56 u32 value_size;
@@ -52,15 +59,17 @@ struct bpf_map {
52 u32 pages; 59 u32 pages;
53 u32 id; 60 u32 id;
54 int numa_node; 61 int numa_node;
55 struct user_struct *user; 62 bool unpriv_array;
56 const struct bpf_map_ops *ops; 63 /* 7 bytes hole */
57 struct work_struct work; 64
65 /* 2nd cacheline with misc members to avoid false sharing
66 * particularly with refcounting.
67 */
68 struct user_struct *user ____cacheline_aligned;
69 atomic_t refcnt;
58 atomic_t usercnt; 70 atomic_t usercnt;
59 struct bpf_map *inner_map_meta; 71 struct work_struct work;
60 char name[BPF_OBJ_NAME_LEN]; 72 char name[BPF_OBJ_NAME_LEN];
61#ifdef CONFIG_SECURITY
62 void *security;
63#endif
64}; 73};
65 74
66/* function argument constraints */ 75/* function argument constraints */
@@ -221,6 +230,7 @@ struct bpf_prog_aux {
221struct bpf_array { 230struct bpf_array {
222 struct bpf_map map; 231 struct bpf_map map;
223 u32 elem_size; 232 u32 elem_size;
233 u32 index_mask;
224 /* 'ownership' of prog_array is claimed by the first program that 234 /* 'ownership' of prog_array is claimed by the first program that
225 * is going to use this map or by the first program which FD is stored 235 * is going to use this map or by the first program which FD is stored
226 * in the map to make sure that all callers and callees have the same 236 * in the map to make sure that all callers and callees have the same
@@ -419,6 +429,8 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
419 attr->numa_node : NUMA_NO_NODE; 429 attr->numa_node : NUMA_NO_NODE;
420} 430}
421 431
432struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
433
422#else /* !CONFIG_BPF_SYSCALL */ 434#else /* !CONFIG_BPF_SYSCALL */
423static inline struct bpf_prog *bpf_prog_get(u32 ufd) 435static inline struct bpf_prog *bpf_prog_get(u32 ufd)
424{ 436{
@@ -506,6 +518,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
506{ 518{
507 return 0; 519 return 0;
508} 520}
521
522static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
523 enum bpf_prog_type type)
524{
525 return ERR_PTR(-EOPNOTSUPP);
526}
509#endif /* CONFIG_BPF_SYSCALL */ 527#endif /* CONFIG_BPF_SYSCALL */
510 528
511static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, 529static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
@@ -514,6 +532,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
514 return bpf_prog_get_type_dev(ufd, type, false); 532 return bpf_prog_get_type_dev(ufd, type, false);
515} 533}
516 534
535bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
536
517int bpf_prog_offload_compile(struct bpf_prog *prog); 537int bpf_prog_offload_compile(struct bpf_prog *prog);
518void bpf_prog_offload_destroy(struct bpf_prog *prog); 538void bpf_prog_offload_destroy(struct bpf_prog *prog);
519 539
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c561b986bab0..1632bb13ad8a 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -15,11 +15,11 @@
15 * In practice this is far bigger than any realistic pointer offset; this limit 15 * In practice this is far bigger than any realistic pointer offset; this limit
16 * ensures that umax_value + (int)off + (int)size cannot overflow a u64. 16 * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
17 */ 17 */
18#define BPF_MAX_VAR_OFF (1ULL << 31) 18#define BPF_MAX_VAR_OFF (1 << 29)
19/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures 19/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures
20 * that converting umax_value to int cannot overflow. 20 * that converting umax_value to int cannot overflow.
21 */ 21 */
22#define BPF_MAX_VAR_SIZ INT_MAX 22#define BPF_MAX_VAR_SIZ (1 << 29)
23 23
24/* Liveness marks, used for registers and spilled-regs (in stack slots). 24/* Liveness marks, used for registers and spilled-regs (in stack slots).
25 * Read marks propagate upwards until they find a write mark; they record that 25 * Read marks propagate upwards until they find a write mark; they record that
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 94a59ba7d422..519e94915d18 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -32,7 +32,6 @@ struct completion {
32#define init_completion(x) __init_completion(x) 32#define init_completion(x) __init_completion(x)
33static inline void complete_acquire(struct completion *x) {} 33static inline void complete_acquire(struct completion *x) {}
34static inline void complete_release(struct completion *x) {} 34static inline void complete_release(struct completion *x) {}
35static inline void complete_release_commit(struct completion *x) {}
36 35
37#define COMPLETION_INITIALIZER(work) \ 36#define COMPLETION_INITIALIZER(work) \
38 { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } 37 { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a04ef7c15c6a..7b01bc11c692 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -47,6 +47,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
47extern int cpu_add_dev_attr_group(struct attribute_group *attrs); 47extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
48extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); 48extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
49 49
50extern ssize_t cpu_show_meltdown(struct device *dev,
51 struct device_attribute *attr, char *buf);
52extern ssize_t cpu_show_spectre_v1(struct device *dev,
53 struct device_attribute *attr, char *buf);
54extern ssize_t cpu_show_spectre_v2(struct device *dev,
55 struct device_attribute *attr, char *buf);
56
50extern __printf(4, 5) 57extern __printf(4, 5)
51struct device *cpu_device_create(struct device *parent, void *drvdata, 58struct device *cpu_device_create(struct device *parent, void *drvdata,
52 const struct attribute_group **groups, 59 const struct attribute_group **groups,
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 201ab7267986..1a32e558eb11 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -86,7 +86,7 @@ enum cpuhp_state {
86 CPUHP_MM_ZSWP_POOL_PREPARE, 86 CPUHP_MM_ZSWP_POOL_PREPARE,
87 CPUHP_KVM_PPC_BOOK3S_PREPARE, 87 CPUHP_KVM_PPC_BOOK3S_PREPARE,
88 CPUHP_ZCOMP_PREPARE, 88 CPUHP_ZCOMP_PREPARE,
89 CPUHP_TIMERS_DEAD, 89 CPUHP_TIMERS_PREPARE,
90 CPUHP_MIPS_SOC_PREPARE, 90 CPUHP_MIPS_SOC_PREPARE,
91 CPUHP_BP_PREPARE_DYN, 91 CPUHP_BP_PREPARE_DYN,
92 CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, 92 CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20,
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 06097ef30449..b511f6d24b42 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
42 vmcoreinfo_append_str("PAGESIZE=%ld\n", value) 42 vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
43#define VMCOREINFO_SYMBOL(name) \ 43#define VMCOREINFO_SYMBOL(name) \
44 vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) 44 vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
45#define VMCOREINFO_SYMBOL_ARRAY(name) \
46 vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
45#define VMCOREINFO_SIZE(name) \ 47#define VMCOREINFO_SIZE(name) \
46 vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ 48 vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
47 (unsigned long)sizeof(name)) 49 (unsigned long)sizeof(name))
diff --git a/include/linux/efi.h b/include/linux/efi.h
index d813f7b04da7..29fdf8029cf6 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -140,11 +140,13 @@ struct efi_boot_memmap {
140 140
141struct capsule_info { 141struct capsule_info {
142 efi_capsule_header_t header; 142 efi_capsule_header_t header;
143 efi_capsule_header_t *capsule;
143 int reset_type; 144 int reset_type;
144 long index; 145 long index;
145 size_t count; 146 size_t count;
146 size_t total_size; 147 size_t total_size;
147 phys_addr_t *pages; 148 struct page **pages;
149 phys_addr_t *phys;
148 size_t page_bytes_remain; 150 size_t page_bytes_remain;
149}; 151};
150 152
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index f4ff47d4a893..fe0c349684fa 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -755,7 +755,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie,
755{ 755{
756 if (fscache_cookie_valid(cookie) && PageFsCache(page)) 756 if (fscache_cookie_valid(cookie) && PageFsCache(page))
757 return __fscache_maybe_release_page(cookie, page, gfp); 757 return __fscache_maybe_release_page(cookie, page, gfp);
758 return false; 758 return true;
759} 759}
760 760
761/** 761/**
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 55e672592fa9..7258cd676df4 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -66,9 +66,10 @@ struct gpio_irq_chip {
66 /** 66 /**
67 * @lock_key: 67 * @lock_key:
68 * 68 *
69 * Per GPIO IRQ chip lockdep class. 69 * Per GPIO IRQ chip lockdep classes.
70 */ 70 */
71 struct lock_class_key *lock_key; 71 struct lock_class_key *lock_key;
72 struct lock_class_key *request_key;
72 73
73 /** 74 /**
74 * @parent_handler: 75 * @parent_handler:
@@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip,
323 324
324/* add/remove chips */ 325/* add/remove chips */
325extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, 326extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
326 struct lock_class_key *lock_key); 327 struct lock_class_key *lock_key,
328 struct lock_class_key *request_key);
327 329
328/** 330/**
329 * gpiochip_add_data() - register a gpio_chip 331 * gpiochip_add_data() - register a gpio_chip
@@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
350 */ 352 */
351#ifdef CONFIG_LOCKDEP 353#ifdef CONFIG_LOCKDEP
352#define gpiochip_add_data(chip, data) ({ \ 354#define gpiochip_add_data(chip, data) ({ \
353 static struct lock_class_key key; \ 355 static struct lock_class_key lock_key; \
354 gpiochip_add_data_with_key(chip, data, &key); \ 356 static struct lock_class_key request_key; \
357 gpiochip_add_data_with_key(chip, data, &lock_key, \
358 &request_key); \
355 }) 359 })
356#else 360#else
357#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL) 361#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL)
358#endif 362#endif
359 363
360static inline int gpiochip_add(struct gpio_chip *chip) 364static inline int gpiochip_add(struct gpio_chip *chip)
@@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
429 irq_flow_handler_t handler, 433 irq_flow_handler_t handler,
430 unsigned int type, 434 unsigned int type,
431 bool threaded, 435 bool threaded,
432 struct lock_class_key *lock_key); 436 struct lock_class_key *lock_key,
437 struct lock_class_key *request_key);
433 438
434#ifdef CONFIG_LOCKDEP 439#ifdef CONFIG_LOCKDEP
435 440
@@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
445 irq_flow_handler_t handler, 450 irq_flow_handler_t handler,
446 unsigned int type) 451 unsigned int type)
447{ 452{
448 static struct lock_class_key key; 453 static struct lock_class_key lock_key;
454 static struct lock_class_key request_key;
449 455
450 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, 456 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
451 handler, type, false, &key); 457 handler, type, false,
458 &lock_key, &request_key);
452} 459}
453 460
454static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, 461static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
@@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
458 unsigned int type) 465 unsigned int type)
459{ 466{
460 467
461 static struct lock_class_key key; 468 static struct lock_class_key lock_key;
469 static struct lock_class_key request_key;
462 470
463 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, 471 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
464 handler, type, true, &key); 472 handler, type, true,
473 &lock_key, &request_key);
465} 474}
466#else 475#else
467static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, 476static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
@@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
471 unsigned int type) 480 unsigned int type)
472{ 481{
473 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, 482 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
474 handler, type, false, NULL); 483 handler, type, false, NULL, NULL);
475} 484}
476 485
477static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, 486static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
@@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
481 unsigned int type) 490 unsigned int type)
482{ 491{
483 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, 492 return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
484 handler, type, true, NULL); 493 handler, type, true, NULL, NULL);
485} 494}
486#endif /* CONFIG_LOCKDEP */ 495#endif /* CONFIG_LOCKDEP */
487 496
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index cb18c6290ca8..8415bf1a9776 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
273 * 100: prefer care-of address 273 * 100: prefer care-of address
274 */ 274 */
275 dontfrag:1, 275 dontfrag:1,
276 autoflowlabel:1; 276 autoflowlabel:1,
277 autoflowlabel_set:1;
277 __u8 min_hopcount; 278 __u8 min_hopcount;
278 __u8 tclass; 279 __u8 tclass;
279 __be32 rcv_flowinfo; 280 __be32 rcv_flowinfo;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index e140f69163b6..a0231e96a578 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -212,6 +212,7 @@ struct irq_data {
212 * mask. Applies only to affinity managed irqs. 212 * mask. Applies only to affinity managed irqs.
213 * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target 213 * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target
214 * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set 214 * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set
215 * IRQD_CAN_RESERVE - Can use reservation mode
215 */ 216 */
216enum { 217enum {
217 IRQD_TRIGGER_MASK = 0xf, 218 IRQD_TRIGGER_MASK = 0xf,
@@ -233,6 +234,7 @@ enum {
233 IRQD_MANAGED_SHUTDOWN = (1 << 23), 234 IRQD_MANAGED_SHUTDOWN = (1 << 23),
234 IRQD_SINGLE_TARGET = (1 << 24), 235 IRQD_SINGLE_TARGET = (1 << 24),
235 IRQD_DEFAULT_TRIGGER_SET = (1 << 25), 236 IRQD_DEFAULT_TRIGGER_SET = (1 << 25),
237 IRQD_CAN_RESERVE = (1 << 26),
236}; 238};
237 239
238#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) 240#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -377,6 +379,21 @@ static inline bool irqd_is_managed_and_shutdown(struct irq_data *d)
377 return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; 379 return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN;
378} 380}
379 381
382static inline void irqd_set_can_reserve(struct irq_data *d)
383{
384 __irqd_to_state(d) |= IRQD_CAN_RESERVE;
385}
386
387static inline void irqd_clr_can_reserve(struct irq_data *d)
388{
389 __irqd_to_state(d) &= ~IRQD_CAN_RESERVE;
390}
391
392static inline bool irqd_can_reserve(struct irq_data *d)
393{
394 return __irqd_to_state(d) & IRQD_CAN_RESERVE;
395}
396
380#undef __irqd_to_state 397#undef __irqd_to_state
381 398
382static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) 399static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 39fb3700f7a9..25b33b664537 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -255,12 +255,15 @@ static inline bool irq_is_percpu_devid(unsigned int irq)
255} 255}
256 256
257static inline void 257static inline void
258irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class) 258irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
259 struct lock_class_key *request_class)
259{ 260{
260 struct irq_desc *desc = irq_to_desc(irq); 261 struct irq_desc *desc = irq_to_desc(irq);
261 262
262 if (desc) 263 if (desc) {
263 lockdep_set_class(&desc->lock, class); 264 lockdep_set_class(&desc->lock, lock_class);
265 lockdep_set_class(&desc->request_mutex, request_class);
266 }
264} 267}
265 268
266#ifdef CONFIG_IRQ_PREFLOW_FASTEOI 269#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index a34355d19546..48c7e86bb556 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -113,7 +113,7 @@ struct irq_domain_ops {
113 unsigned int nr_irqs, void *arg); 113 unsigned int nr_irqs, void *arg);
114 void (*free)(struct irq_domain *d, unsigned int virq, 114 void (*free)(struct irq_domain *d, unsigned int virq,
115 unsigned int nr_irqs); 115 unsigned int nr_irqs);
116 int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early); 116 int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
117 void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); 117 void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
118 int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, 118 int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
119 unsigned long *out_hwirq, unsigned int *out_type); 119 unsigned long *out_hwirq, unsigned int *out_type);
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 46cb57d5eb13..1b3996ff3f16 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -27,22 +27,18 @@
27# define trace_hardirq_enter() \ 27# define trace_hardirq_enter() \
28do { \ 28do { \
29 current->hardirq_context++; \ 29 current->hardirq_context++; \
30 crossrelease_hist_start(XHLOCK_HARD); \
31} while (0) 30} while (0)
32# define trace_hardirq_exit() \ 31# define trace_hardirq_exit() \
33do { \ 32do { \
34 current->hardirq_context--; \ 33 current->hardirq_context--; \
35 crossrelease_hist_end(XHLOCK_HARD); \
36} while (0) 34} while (0)
37# define lockdep_softirq_enter() \ 35# define lockdep_softirq_enter() \
38do { \ 36do { \
39 current->softirq_context++; \ 37 current->softirq_context++; \
40 crossrelease_hist_start(XHLOCK_SOFT); \
41} while (0) 38} while (0)
42# define lockdep_softirq_exit() \ 39# define lockdep_softirq_exit() \
43do { \ 40do { \
44 current->softirq_context--; \ 41 current->softirq_context--; \
45 crossrelease_hist_end(XHLOCK_SOFT); \
46} while (0) 42} while (0)
47# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, 43# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
48#else 44#else
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 2e75dc34bff5..3251d9c0d313 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -475,8 +475,6 @@ enum xhlock_context_t {
475#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ 475#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
476 { .name = (_name), .key = (void *)(_key), } 476 { .name = (_name), .key = (void *)(_key), }
477 477
478static inline void crossrelease_hist_start(enum xhlock_context_t c) {}
479static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
480static inline void lockdep_invariant_state(bool force) {} 478static inline void lockdep_invariant_state(bool force) {}
481static inline void lockdep_init_task(struct task_struct *task) {} 479static inline void lockdep_init_task(struct task_struct *task) {}
482static inline void lockdep_free_task(struct task_struct *task) {} 480static inline void lockdep_free_task(struct task_struct *task) {}
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index a2a1318a3d0c..c3d3f04d8cc6 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -915,10 +915,10 @@ enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN};
915#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6) 915#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6)
916 916
917enum dev_aspm_mode { 917enum dev_aspm_mode {
918 DEV_ASPM_DISABLE = 0,
919 DEV_ASPM_DYNAMIC, 918 DEV_ASPM_DYNAMIC,
920 DEV_ASPM_BACKDOOR, 919 DEV_ASPM_BACKDOOR,
921 DEV_ASPM_STATIC, 920 DEV_ASPM_STATIC,
921 DEV_ASPM_DISABLE,
922}; 922};
923 923
924/* 924/*
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a886b51511ab..1f509d072026 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -556,6 +556,7 @@ struct mlx5_core_sriov {
556}; 556};
557 557
558struct mlx5_irq_info { 558struct mlx5_irq_info {
559 cpumask_var_t mask;
559 char name[MLX5_MAX_IRQ_NAME]; 560 char name[MLX5_MAX_IRQ_NAME];
560}; 561};
561 562
@@ -1048,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
1048 enum mlx5_eq_type type); 1049 enum mlx5_eq_type type);
1049int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); 1050int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
1050int mlx5_start_eqs(struct mlx5_core_dev *dev); 1051int mlx5_start_eqs(struct mlx5_core_dev *dev);
1051int mlx5_stop_eqs(struct mlx5_core_dev *dev); 1052void mlx5_stop_eqs(struct mlx5_core_dev *dev);
1052int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, 1053int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
1053 unsigned int *irqn); 1054 unsigned int *irqn);
1054int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); 1055int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1164,6 +1165,10 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
1164int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); 1165int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
1165bool mlx5_lag_is_active(struct mlx5_core_dev *dev); 1166bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
1166struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); 1167struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
1168int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1169 u64 *values,
1170 int num_counters,
1171 size_t *offsets);
1167struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); 1172struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
1168void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); 1173void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
1169 1174
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 38a7577a9ce7..d44ec5f41d4a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -147,7 +147,7 @@ enum {
147 MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, 147 MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771,
148 MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, 148 MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772,
149 MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, 149 MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773,
150 MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, 150 MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780,
151 MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, 151 MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781,
152 MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, 152 MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782,
153 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, 153 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783,
@@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
7239 u8 vxlan_udp_port[0x10]; 7239 u8 vxlan_udp_port[0x10];
7240}; 7240};
7241 7241
7242struct mlx5_ifc_set_rate_limit_out_bits { 7242struct mlx5_ifc_set_pp_rate_limit_out_bits {
7243 u8 status[0x8]; 7243 u8 status[0x8];
7244 u8 reserved_at_8[0x18]; 7244 u8 reserved_at_8[0x18];
7245 7245
@@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits {
7248 u8 reserved_at_40[0x40]; 7248 u8 reserved_at_40[0x40];
7249}; 7249};
7250 7250
7251struct mlx5_ifc_set_rate_limit_in_bits { 7251struct mlx5_ifc_set_pp_rate_limit_in_bits {
7252 u8 opcode[0x10]; 7252 u8 opcode[0x10];
7253 u8 reserved_at_10[0x10]; 7253 u8 reserved_at_10[0x10];
7254 7254
@@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits {
7261 u8 reserved_at_60[0x20]; 7261 u8 reserved_at_60[0x20];
7262 7262
7263 u8 rate_limit[0x20]; 7263 u8 rate_limit[0x20];
7264
7265 u8 reserved_at_a0[0x160];
7264}; 7266};
7265 7267
7266struct mlx5_ifc_access_register_out_bits { 7268struct mlx5_ifc_access_register_out_bits {
diff --git a/include/linux/pti.h b/include/linux/pti.h
new file mode 100644
index 000000000000..0174883a935a
--- /dev/null
+++ b/include/linux/pti.h
@@ -0,0 +1,11 @@
1// SPDX-License-Identifier: GPL-2.0
2#ifndef _INCLUDE_PTI_H
3#define _INCLUDE_PTI_H
4
5#ifdef CONFIG_PAGE_TABLE_ISOLATION
6#include <asm/pti.h>
7#else
8static inline void pti_init(void) { }
9#endif
10
11#endif
diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h
index ff3642d267f7..94081e9a5010 100644
--- a/include/linux/sh_eth.h
+++ b/include/linux/sh_eth.h
@@ -17,7 +17,6 @@ struct sh_eth_plat_data {
17 unsigned char mac_addr[ETH_ALEN]; 17 unsigned char mac_addr[ETH_ALEN];
18 unsigned no_ether_link:1; 18 unsigned no_ether_link:1;
19 unsigned ether_link_active_low:1; 19 unsigned ether_link_active_low:1;
20 unsigned needs_init:1;
21}; 20};
22 21
23#endif 22#endif
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7b2170bfd6e7..bc6bb325d1bf 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
126 * for that name. This appears in the sysfs "modalias" attribute 126 * for that name. This appears in the sysfs "modalias" attribute
127 * for driver coldplugging, and in uevents used for hotplugging 127 * for driver coldplugging, and in uevents used for hotplugging
128 * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when 128 * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
129 * when not using a GPIO line) 129 * not using a GPIO line)
130 * 130 *
131 * @statistics: statistics for the spi_device 131 * @statistics: statistics for the spi_device
132 * 132 *
diff --git a/include/linux/tick.h b/include/linux/tick.h
index f442d1a42025..7cc35921218e 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -119,6 +119,7 @@ extern void tick_nohz_idle_exit(void);
119extern void tick_nohz_irq_exit(void); 119extern void tick_nohz_irq_exit(void);
120extern ktime_t tick_nohz_get_sleep_length(void); 120extern ktime_t tick_nohz_get_sleep_length(void);
121extern unsigned long tick_nohz_get_idle_calls(void); 121extern unsigned long tick_nohz_get_idle_calls(void);
122extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
122extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); 123extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
123extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); 124extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
124#else /* !CONFIG_NO_HZ_COMMON */ 125#else /* !CONFIG_NO_HZ_COMMON */
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 04af640ea95b..2448f9cc48a3 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -207,9 +207,11 @@ unsigned long round_jiffies_up(unsigned long j);
207unsigned long round_jiffies_up_relative(unsigned long j); 207unsigned long round_jiffies_up_relative(unsigned long j);
208 208
209#ifdef CONFIG_HOTPLUG_CPU 209#ifdef CONFIG_HOTPLUG_CPU
210int timers_prepare_cpu(unsigned int cpu);
210int timers_dead_cpu(unsigned int cpu); 211int timers_dead_cpu(unsigned int cpu);
211#else 212#else
212#define timers_dead_cpu NULL 213#define timers_prepare_cpu NULL
214#define timers_dead_cpu NULL
213#endif 215#endif
214 216
215#endif 217#endif
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8b8118a7fadb..cb4d92b79cd9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3226,7 +3226,6 @@ struct cfg80211_ops {
3226 * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. 3226 * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
3227 * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing 3227 * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
3228 * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH. 3228 * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
3229 * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans.
3230 * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the 3229 * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
3231 * firmware. 3230 * firmware.
3232 * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP. 3231 * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0105445cab83..8e08b6da72f3 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -694,9 +694,7 @@ struct tc_cls_matchall_offload {
694}; 694};
695 695
696enum tc_clsbpf_command { 696enum tc_clsbpf_command {
697 TC_CLSBPF_ADD, 697 TC_CLSBPF_OFFLOAD,
698 TC_CLSBPF_REPLACE,
699 TC_CLSBPF_DESTROY,
700 TC_CLSBPF_STATS, 698 TC_CLSBPF_STATS,
701}; 699};
702 700
@@ -705,6 +703,7 @@ struct tc_cls_bpf_offload {
705 enum tc_clsbpf_command command; 703 enum tc_clsbpf_command command;
706 struct tcf_exts *exts; 704 struct tcf_exts *exts;
707 struct bpf_prog *prog; 705 struct bpf_prog *prog;
706 struct bpf_prog *oldprog;
708 const char *name; 707 const char *name;
709 bool exts_integrated; 708 bool exts_integrated;
710 u32 gen_flags; 709 u32 gen_flags;
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 2f8f93da5dc2..9a5ccf03a59b 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -966,7 +966,7 @@ void sctp_transport_burst_limited(struct sctp_transport *);
966void sctp_transport_burst_reset(struct sctp_transport *); 966void sctp_transport_burst_reset(struct sctp_transport *);
967unsigned long sctp_transport_timeout(struct sctp_transport *); 967unsigned long sctp_transport_timeout(struct sctp_transport *);
968void sctp_transport_reset(struct sctp_transport *t); 968void sctp_transport_reset(struct sctp_transport *t);
969void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); 969bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
970void sctp_transport_immediate_rtx(struct sctp_transport *); 970void sctp_transport_immediate_rtx(struct sctp_transport *);
971void sctp_transport_dst_release(struct sctp_transport *t); 971void sctp_transport_dst_release(struct sctp_transport *t);
972void sctp_transport_dst_confirm(struct sctp_transport *t); 972void sctp_transport_dst_confirm(struct sctp_transport *t);
diff --git a/include/net/sock.h b/include/net/sock.h
index 9155da422692..7a7b14e9628a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1514,6 +1514,11 @@ static inline bool sock_owned_by_user(const struct sock *sk)
1514 return sk->sk_lock.owned; 1514 return sk->sk_lock.owned;
1515} 1515}
1516 1516
1517static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
1518{
1519 return sk->sk_lock.owned;
1520}
1521
1517/* no reclassification while locks are held */ 1522/* no reclassification while locks are held */
1518static inline bool sock_allow_reclassification(const struct sock *csk) 1523static inline bool sock_allow_reclassification(const struct sock *csk)
1519{ 1524{
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 13223396dc64..f96391e84a8a 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -146,7 +146,7 @@ struct vxlanhdr_gpe {
146 np_applied:1, 146 np_applied:1,
147 instance_applied:1, 147 instance_applied:1,
148 version:2, 148 version:2,
149reserved_flags2:2; 149 reserved_flags2:2;
150#elif defined(__BIG_ENDIAN_BITFIELD) 150#elif defined(__BIG_ENDIAN_BITFIELD)
151 u8 reserved_flags2:2, 151 u8 reserved_flags2:2,
152 version:2, 152 version:2,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dc28a98ce97c..ae35991b5877 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1570,6 +1570,9 @@ int xfrm_init_state(struct xfrm_state *x);
1570int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); 1570int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
1571int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); 1571int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
1572int xfrm_input_resume(struct sk_buff *skb, int nexthdr); 1572int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
1573int xfrm_trans_queue(struct sk_buff *skb,
1574 int (*finish)(struct net *, struct sock *,
1575 struct sk_buff *));
1573int xfrm_output_resume(struct sk_buff *skb, int err); 1576int xfrm_output_resume(struct sk_buff *skb, int err);
1574int xfrm_output(struct sock *sk, struct sk_buff *skb); 1577int xfrm_output(struct sock *sk, struct sk_buff *skb);
1575int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); 1578int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h
index 758607226bfd..2cd449328aee 100644
--- a/include/trace/events/clk.h
+++ b/include/trace/events/clk.h
@@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent,
134 134
135 TP_STRUCT__entry( 135 TP_STRUCT__entry(
136 __string( name, core->name ) 136 __string( name, core->name )
137 __string( pname, parent->name ) 137 __string( pname, parent ? parent->name : "none" )
138 ), 138 ),
139 139
140 TP_fast_assign( 140 TP_fast_assign(
141 __assign_str(name, core->name); 141 __assign_str(name, core->name);
142 __assign_str(pname, parent->name); 142 __assign_str(pname, parent ? parent->name : "none");
143 ), 143 ),
144 144
145 TP_printk("%s %s", __get_str(name), __get_str(pname)) 145 TP_printk("%s %s", __get_str(name), __get_str(pname))
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index e4b0b8e09932..2c735a3e6613 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
211 { KVM_TRACE_MMIO_WRITE, "write" } 211 { KVM_TRACE_MMIO_WRITE, "write" }
212 212
213TRACE_EVENT(kvm_mmio, 213TRACE_EVENT(kvm_mmio,
214 TP_PROTO(int type, int len, u64 gpa, u64 val), 214 TP_PROTO(int type, int len, u64 gpa, void *val),
215 TP_ARGS(type, len, gpa, val), 215 TP_ARGS(type, len, gpa, val),
216 216
217 TP_STRUCT__entry( 217 TP_STRUCT__entry(
@@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
225 __entry->type = type; 225 __entry->type = type;
226 __entry->len = len; 226 __entry->len = len;
227 __entry->gpa = gpa; 227 __entry->gpa = gpa;
228 __entry->val = val; 228 __entry->val = 0;
229 if (val)
230 memcpy(&__entry->val, val,
231 min_t(u32, sizeof(__entry->val), len));
229 ), 232 ),
230 233
231 TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", 234 TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 07cccca6cbf1..ab34c561f26b 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -25,6 +25,35 @@
25 tcp_state_name(TCP_CLOSING), \ 25 tcp_state_name(TCP_CLOSING), \
26 tcp_state_name(TCP_NEW_SYN_RECV)) 26 tcp_state_name(TCP_NEW_SYN_RECV))
27 27
28#define TP_STORE_V4MAPPED(__entry, saddr, daddr) \
29 do { \
30 struct in6_addr *pin6; \
31 \
32 pin6 = (struct in6_addr *)__entry->saddr_v6; \
33 ipv6_addr_set_v4mapped(saddr, pin6); \
34 pin6 = (struct in6_addr *)__entry->daddr_v6; \
35 ipv6_addr_set_v4mapped(daddr, pin6); \
36 } while (0)
37
38#if IS_ENABLED(CONFIG_IPV6)
39#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \
40 do { \
41 if (sk->sk_family == AF_INET6) { \
42 struct in6_addr *pin6; \
43 \
44 pin6 = (struct in6_addr *)__entry->saddr_v6; \
45 *pin6 = saddr6; \
46 pin6 = (struct in6_addr *)__entry->daddr_v6; \
47 *pin6 = daddr6; \
48 } else { \
49 TP_STORE_V4MAPPED(__entry, saddr, daddr); \
50 } \
51 } while (0)
52#else
53#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \
54 TP_STORE_V4MAPPED(__entry, saddr, daddr)
55#endif
56
28/* 57/*
29 * tcp event with arguments sk and skb 58 * tcp event with arguments sk and skb
30 * 59 *
@@ -50,7 +79,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
50 79
51 TP_fast_assign( 80 TP_fast_assign(
52 struct inet_sock *inet = inet_sk(sk); 81 struct inet_sock *inet = inet_sk(sk);
53 struct in6_addr *pin6;
54 __be32 *p32; 82 __be32 *p32;
55 83
56 __entry->skbaddr = skb; 84 __entry->skbaddr = skb;
@@ -65,20 +93,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
65 p32 = (__be32 *) __entry->daddr; 93 p32 = (__be32 *) __entry->daddr;
66 *p32 = inet->inet_daddr; 94 *p32 = inet->inet_daddr;
67 95
68#if IS_ENABLED(CONFIG_IPV6) 96 TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
69 if (sk->sk_family == AF_INET6) { 97 sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
70 pin6 = (struct in6_addr *)__entry->saddr_v6;
71 *pin6 = sk->sk_v6_rcv_saddr;
72 pin6 = (struct in6_addr *)__entry->daddr_v6;
73 *pin6 = sk->sk_v6_daddr;
74 } else
75#endif
76 {
77 pin6 = (struct in6_addr *)__entry->saddr_v6;
78 ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
79 pin6 = (struct in6_addr *)__entry->daddr_v6;
80 ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
81 }
82 ), 98 ),
83 99
84 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", 100 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -127,7 +143,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
127 143
128 TP_fast_assign( 144 TP_fast_assign(
129 struct inet_sock *inet = inet_sk(sk); 145 struct inet_sock *inet = inet_sk(sk);
130 struct in6_addr *pin6;
131 __be32 *p32; 146 __be32 *p32;
132 147
133 __entry->skaddr = sk; 148 __entry->skaddr = sk;
@@ -141,20 +156,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
141 p32 = (__be32 *) __entry->daddr; 156 p32 = (__be32 *) __entry->daddr;
142 *p32 = inet->inet_daddr; 157 *p32 = inet->inet_daddr;
143 158
144#if IS_ENABLED(CONFIG_IPV6) 159 TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
145 if (sk->sk_family == AF_INET6) { 160 sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
146 pin6 = (struct in6_addr *)__entry->saddr_v6;
147 *pin6 = sk->sk_v6_rcv_saddr;
148 pin6 = (struct in6_addr *)__entry->daddr_v6;
149 *pin6 = sk->sk_v6_daddr;
150 } else
151#endif
152 {
153 pin6 = (struct in6_addr *)__entry->saddr_v6;
154 ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
155 pin6 = (struct in6_addr *)__entry->daddr_v6;
156 ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
157 }
158 ), 161 ),
159 162
160 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", 163 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -197,7 +200,6 @@ TRACE_EVENT(tcp_set_state,
197 200
198 TP_fast_assign( 201 TP_fast_assign(
199 struct inet_sock *inet = inet_sk(sk); 202 struct inet_sock *inet = inet_sk(sk);
200 struct in6_addr *pin6;
201 __be32 *p32; 203 __be32 *p32;
202 204
203 __entry->skaddr = sk; 205 __entry->skaddr = sk;
@@ -213,20 +215,8 @@ TRACE_EVENT(tcp_set_state,
213 p32 = (__be32 *) __entry->daddr; 215 p32 = (__be32 *) __entry->daddr;
214 *p32 = inet->inet_daddr; 216 *p32 = inet->inet_daddr;
215 217
216#if IS_ENABLED(CONFIG_IPV6) 218 TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
217 if (sk->sk_family == AF_INET6) { 219 sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
218 pin6 = (struct in6_addr *)__entry->saddr_v6;
219 *pin6 = sk->sk_v6_rcv_saddr;
220 pin6 = (struct in6_addr *)__entry->daddr_v6;
221 *pin6 = sk->sk_v6_daddr;
222 } else
223#endif
224 {
225 pin6 = (struct in6_addr *)__entry->saddr_v6;
226 ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
227 pin6 = (struct in6_addr *)__entry->daddr_v6;
228 ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
229 }
230 ), 220 ),
231 221
232 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s", 222 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
@@ -256,7 +246,6 @@ TRACE_EVENT(tcp_retransmit_synack,
256 246
257 TP_fast_assign( 247 TP_fast_assign(
258 struct inet_request_sock *ireq = inet_rsk(req); 248 struct inet_request_sock *ireq = inet_rsk(req);
259 struct in6_addr *pin6;
260 __be32 *p32; 249 __be32 *p32;
261 250
262 __entry->skaddr = sk; 251 __entry->skaddr = sk;
@@ -271,20 +260,8 @@ TRACE_EVENT(tcp_retransmit_synack,
271 p32 = (__be32 *) __entry->daddr; 260 p32 = (__be32 *) __entry->daddr;
272 *p32 = ireq->ir_rmt_addr; 261 *p32 = ireq->ir_rmt_addr;
273 262
274#if IS_ENABLED(CONFIG_IPV6) 263 TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr,
275 if (sk->sk_family == AF_INET6) { 264 ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr);
276 pin6 = (struct in6_addr *)__entry->saddr_v6;
277 *pin6 = ireq->ir_v6_loc_addr;
278 pin6 = (struct in6_addr *)__entry->daddr_v6;
279 *pin6 = ireq->ir_v6_rmt_addr;
280 } else
281#endif
282 {
283 pin6 = (struct in6_addr *)__entry->saddr_v6;
284 ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6);
285 pin6 = (struct in6_addr *)__entry->daddr_v6;
286 ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6);
287 }
288 ), 265 ),
289 266
290 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", 267 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 3ee3bf7c8526..144de4d2f385 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -23,6 +23,7 @@
23#define _UAPI_LINUX_IF_ETHER_H 23#define _UAPI_LINUX_IF_ETHER_H
24 24
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/libc-compat.h>
26 27
27/* 28/*
28 * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble 29 * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble
@@ -149,11 +150,13 @@
149 * This is an Ethernet frame header. 150 * This is an Ethernet frame header.
150 */ 151 */
151 152
153#if __UAPI_DEF_ETHHDR
152struct ethhdr { 154struct ethhdr {
153 unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ 155 unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
154 unsigned char h_source[ETH_ALEN]; /* source ether addr */ 156 unsigned char h_source[ETH_ALEN]; /* source ether addr */
155 __be16 h_proto; /* packet type ID field */ 157 __be16 h_proto; /* packet type ID field */
156} __attribute__((packed)); 158} __attribute__((packed));
159#endif
157 160
158 161
159#endif /* _UAPI_LINUX_IF_ETHER_H */ 162#endif /* _UAPI_LINUX_IF_ETHER_H */
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index 282875cf8056..fc29efaa918c 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -168,47 +168,106 @@
168 168
169/* If we did not see any headers from any supported C libraries, 169/* If we did not see any headers from any supported C libraries,
170 * or we are being included in the kernel, then define everything 170 * or we are being included in the kernel, then define everything
171 * that we need. */ 171 * that we need. Check for previous __UAPI_* definitions to give
172 * unsupported C libraries a way to opt out of any kernel definition. */
172#else /* !defined(__GLIBC__) */ 173#else /* !defined(__GLIBC__) */
173 174
174/* Definitions for if.h */ 175/* Definitions for if.h */
176#ifndef __UAPI_DEF_IF_IFCONF
175#define __UAPI_DEF_IF_IFCONF 1 177#define __UAPI_DEF_IF_IFCONF 1
178#endif
179#ifndef __UAPI_DEF_IF_IFMAP
176#define __UAPI_DEF_IF_IFMAP 1 180#define __UAPI_DEF_IF_IFMAP 1
181#endif
182#ifndef __UAPI_DEF_IF_IFNAMSIZ
177#define __UAPI_DEF_IF_IFNAMSIZ 1 183#define __UAPI_DEF_IF_IFNAMSIZ 1
184#endif
185#ifndef __UAPI_DEF_IF_IFREQ
178#define __UAPI_DEF_IF_IFREQ 1 186#define __UAPI_DEF_IF_IFREQ 1
187#endif
179/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ 188/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
189#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS
180#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 190#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
191#endif
181/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ 192/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
193#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
182#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 194#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
195#endif
183 196
184/* Definitions for in.h */ 197/* Definitions for in.h */
198#ifndef __UAPI_DEF_IN_ADDR
185#define __UAPI_DEF_IN_ADDR 1 199#define __UAPI_DEF_IN_ADDR 1
200#endif
201#ifndef __UAPI_DEF_IN_IPPROTO
186#define __UAPI_DEF_IN_IPPROTO 1 202#define __UAPI_DEF_IN_IPPROTO 1
203#endif
204#ifndef __UAPI_DEF_IN_PKTINFO
187#define __UAPI_DEF_IN_PKTINFO 1 205#define __UAPI_DEF_IN_PKTINFO 1
206#endif
207#ifndef __UAPI_DEF_IP_MREQ
188#define __UAPI_DEF_IP_MREQ 1 208#define __UAPI_DEF_IP_MREQ 1
209#endif
210#ifndef __UAPI_DEF_SOCKADDR_IN
189#define __UAPI_DEF_SOCKADDR_IN 1 211#define __UAPI_DEF_SOCKADDR_IN 1
212#endif
213#ifndef __UAPI_DEF_IN_CLASS
190#define __UAPI_DEF_IN_CLASS 1 214#define __UAPI_DEF_IN_CLASS 1
215#endif
191 216
192/* Definitions for in6.h */ 217/* Definitions for in6.h */
218#ifndef __UAPI_DEF_IN6_ADDR
193#define __UAPI_DEF_IN6_ADDR 1 219#define __UAPI_DEF_IN6_ADDR 1
220#endif
221#ifndef __UAPI_DEF_IN6_ADDR_ALT
194#define __UAPI_DEF_IN6_ADDR_ALT 1 222#define __UAPI_DEF_IN6_ADDR_ALT 1
223#endif
224#ifndef __UAPI_DEF_SOCKADDR_IN6
195#define __UAPI_DEF_SOCKADDR_IN6 1 225#define __UAPI_DEF_SOCKADDR_IN6 1
226#endif
227#ifndef __UAPI_DEF_IPV6_MREQ
196#define __UAPI_DEF_IPV6_MREQ 1 228#define __UAPI_DEF_IPV6_MREQ 1
229#endif
230#ifndef __UAPI_DEF_IPPROTO_V6
197#define __UAPI_DEF_IPPROTO_V6 1 231#define __UAPI_DEF_IPPROTO_V6 1
232#endif
233#ifndef __UAPI_DEF_IPV6_OPTIONS
198#define __UAPI_DEF_IPV6_OPTIONS 1 234#define __UAPI_DEF_IPV6_OPTIONS 1
235#endif
236#ifndef __UAPI_DEF_IN6_PKTINFO
199#define __UAPI_DEF_IN6_PKTINFO 1 237#define __UAPI_DEF_IN6_PKTINFO 1
238#endif
239#ifndef __UAPI_DEF_IP6_MTUINFO
200#define __UAPI_DEF_IP6_MTUINFO 1 240#define __UAPI_DEF_IP6_MTUINFO 1
241#endif
201 242
202/* Definitions for ipx.h */ 243/* Definitions for ipx.h */
244#ifndef __UAPI_DEF_SOCKADDR_IPX
203#define __UAPI_DEF_SOCKADDR_IPX 1 245#define __UAPI_DEF_SOCKADDR_IPX 1
246#endif
247#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION
204#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 248#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1
249#endif
250#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION
205#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 251#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1
252#endif
253#ifndef __UAPI_DEF_IPX_CONFIG_DATA
206#define __UAPI_DEF_IPX_CONFIG_DATA 1 254#define __UAPI_DEF_IPX_CONFIG_DATA 1
255#endif
256#ifndef __UAPI_DEF_IPX_ROUTE_DEF
207#define __UAPI_DEF_IPX_ROUTE_DEF 1 257#define __UAPI_DEF_IPX_ROUTE_DEF 1
258#endif
208 259
209/* Definitions for xattr.h */ 260/* Definitions for xattr.h */
261#ifndef __UAPI_DEF_XATTR
210#define __UAPI_DEF_XATTR 1 262#define __UAPI_DEF_XATTR 1
263#endif
211 264
212#endif /* __GLIBC__ */ 265#endif /* __GLIBC__ */
213 266
267/* Definitions for if_ether.h */
268/* allow libcs like musl to deactivate this, glibc does not implement this. */
269#ifndef __UAPI_DEF_ETHHDR
270#define __UAPI_DEF_ETHHDR 1
271#endif
272
214#endif /* _UAPI_LIBC_COMPAT_H */ 273#endif /* _UAPI_LIBC_COMPAT_H */
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 3fea7709a441..57ccfb32e87f 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -36,7 +36,7 @@ enum ip_conntrack_info {
36 36
37#define NF_CT_STATE_INVALID_BIT (1 << 0) 37#define NF_CT_STATE_INVALID_BIT (1 << 0)
38#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) 38#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1))
39#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1)) 39#define NF_CT_STATE_UNTRACKED_BIT (1 << 6)
40 40
41/* Bitset representing status of connection. */ 41/* Bitset representing status of connection. */
42enum ip_conntrack_status { 42enum ip_conntrack_status {
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index 4914b93a23f2..61f410fd74e4 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -44,3 +44,8 @@ static inline void xen_balloon_init(void)
44{ 44{
45} 45}
46#endif 46#endif
47
48#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
49struct resource;
50void arch_xen_balloon_init(struct resource *hostmem_resource);
51#endif
diff --git a/init/Kconfig b/init/Kconfig
index 2934249fba46..a9a2e2c86671 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -461,10 +461,15 @@ endmenu # "CPU/Task time and stats accounting"
461 461
462config CPU_ISOLATION 462config CPU_ISOLATION
463 bool "CPU isolation" 463 bool "CPU isolation"
464 depends on SMP || COMPILE_TEST
465 default y
464 help 466 help
465 Make sure that CPUs running critical tasks are not disturbed by 467 Make sure that CPUs running critical tasks are not disturbed by
466 any source of "noise" such as unbound workqueues, timers, kthreads... 468 any source of "noise" such as unbound workqueues, timers, kthreads...
467 Unbound jobs get offloaded to housekeeping CPUs. 469 Unbound jobs get offloaded to housekeeping CPUs. This is driven by
470 the "isolcpus=" boot parameter.
471
472 Say Y if unsure.
468 473
469source "kernel/rcu/Kconfig" 474source "kernel/rcu/Kconfig"
470 475
@@ -1392,6 +1397,13 @@ config BPF_SYSCALL
1392 Enable the bpf() system call that allows to manipulate eBPF 1397 Enable the bpf() system call that allows to manipulate eBPF
1393 programs and maps via file descriptors. 1398 programs and maps via file descriptors.
1394 1399
1400config BPF_JIT_ALWAYS_ON
1401 bool "Permanently enable BPF JIT and remove BPF interpreter"
1402 depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
1403 help
1404 Enables BPF JIT and removes BPF interpreter to avoid
1405 speculative execution of BPF instructions by the interpreter
1406
1395config USERFAULTFD 1407config USERFAULTFD
1396 bool "Enable userfaultfd() system call" 1408 bool "Enable userfaultfd() system call"
1397 select ANON_INODES 1409 select ANON_INODES
diff --git a/init/main.c b/init/main.c
index e96e3a14533c..a8100b954839 100644
--- a/init/main.c
+++ b/init/main.c
@@ -75,6 +75,7 @@
75#include <linux/slab.h> 75#include <linux/slab.h>
76#include <linux/perf_event.h> 76#include <linux/perf_event.h>
77#include <linux/ptrace.h> 77#include <linux/ptrace.h>
78#include <linux/pti.h>
78#include <linux/blkdev.h> 79#include <linux/blkdev.h>
79#include <linux/elevator.h> 80#include <linux/elevator.h>
80#include <linux/sched_clock.h> 81#include <linux/sched_clock.h>
@@ -504,6 +505,10 @@ static void __init mm_init(void)
504 pgtable_init(); 505 pgtable_init();
505 vmalloc_init(); 506 vmalloc_init();
506 ioremap_huge_init(); 507 ioremap_huge_init();
508 /* Should be run before the first non-init thread is created */
509 init_espfix_bsp();
510 /* Should be run after espfix64 is set up. */
511 pti_init();
507} 512}
508 513
509asmlinkage __visible void __init start_kernel(void) 514asmlinkage __visible void __init start_kernel(void)
@@ -679,10 +684,6 @@ asmlinkage __visible void __init start_kernel(void)
679 if (efi_enabled(EFI_RUNTIME_SERVICES)) 684 if (efi_enabled(EFI_RUNTIME_SERVICES))
680 efi_enter_virtual_mode(); 685 efi_enter_virtual_mode();
681#endif 686#endif
682#ifdef CONFIG_X86_ESPFIX64
683 /* Should be run before the first non-init thread is created */
684 init_espfix_bsp();
685#endif
686 thread_stack_cache_init(); 687 thread_stack_cache_init();
687 cred_init(); 688 cred_init();
688 fork_init(); 689 fork_init();
diff --git a/kernel/acct.c b/kernel/acct.c
index d15c0ee4d955..addf7732fb56 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -102,7 +102,7 @@ static int check_free_space(struct bsd_acct_struct *acct)
102{ 102{
103 struct kstatfs sbuf; 103 struct kstatfs sbuf;
104 104
105 if (time_is_before_jiffies(acct->needcheck)) 105 if (time_is_after_jiffies(acct->needcheck))
106 goto out; 106 goto out;
107 107
108 /* May block */ 108 /* May block */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 7c25426d3cf5..aaa319848e7d 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
53{ 53{
54 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 54 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
55 int numa_node = bpf_map_attr_numa_node(attr); 55 int numa_node = bpf_map_attr_numa_node(attr);
56 u32 elem_size, index_mask, max_entries;
57 bool unpriv = !capable(CAP_SYS_ADMIN);
56 struct bpf_array *array; 58 struct bpf_array *array;
57 u64 array_size; 59 u64 array_size;
58 u32 elem_size;
59 60
60 /* check sanity of attributes */ 61 /* check sanity of attributes */
61 if (attr->max_entries == 0 || attr->key_size != 4 || 62 if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
72 73
73 elem_size = round_up(attr->value_size, 8); 74 elem_size = round_up(attr->value_size, 8);
74 75
76 max_entries = attr->max_entries;
77 index_mask = roundup_pow_of_two(max_entries) - 1;
78
79 if (unpriv)
80 /* round up array size to nearest power of 2,
81 * since cpu will speculate within index_mask limits
82 */
83 max_entries = index_mask + 1;
84
75 array_size = sizeof(*array); 85 array_size = sizeof(*array);
76 if (percpu) 86 if (percpu)
77 array_size += (u64) attr->max_entries * sizeof(void *); 87 array_size += (u64) max_entries * sizeof(void *);
78 else 88 else
79 array_size += (u64) attr->max_entries * elem_size; 89 array_size += (u64) max_entries * elem_size;
80 90
81 /* make sure there is no u32 overflow later in round_up() */ 91 /* make sure there is no u32 overflow later in round_up() */
82 if (array_size >= U32_MAX - PAGE_SIZE) 92 if (array_size >= U32_MAX - PAGE_SIZE)
@@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
86 array = bpf_map_area_alloc(array_size, numa_node); 96 array = bpf_map_area_alloc(array_size, numa_node);
87 if (!array) 97 if (!array)
88 return ERR_PTR(-ENOMEM); 98 return ERR_PTR(-ENOMEM);
99 array->index_mask = index_mask;
100 array->map.unpriv_array = unpriv;
89 101
90 /* copy mandatory map attributes */ 102 /* copy mandatory map attributes */
91 array->map.map_type = attr->map_type; 103 array->map.map_type = attr->map_type;
@@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
121 if (unlikely(index >= array->map.max_entries)) 133 if (unlikely(index >= array->map.max_entries))
122 return NULL; 134 return NULL;
123 135
124 return array->value + array->elem_size * index; 136 return array->value + array->elem_size * (index & array->index_mask);
125} 137}
126 138
127/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ 139/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
128static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 140static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
129{ 141{
142 struct bpf_array *array = container_of(map, struct bpf_array, map);
130 struct bpf_insn *insn = insn_buf; 143 struct bpf_insn *insn = insn_buf;
131 u32 elem_size = round_up(map->value_size, 8); 144 u32 elem_size = round_up(map->value_size, 8);
132 const int ret = BPF_REG_0; 145 const int ret = BPF_REG_0;
@@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
135 148
136 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 149 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
137 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 150 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
138 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); 151 if (map->unpriv_array) {
152 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
153 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
154 } else {
155 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
156 }
139 157
140 if (is_power_of_2(elem_size)) { 158 if (is_power_of_2(elem_size)) {
141 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 159 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
@@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
157 if (unlikely(index >= array->map.max_entries)) 175 if (unlikely(index >= array->map.max_entries))
158 return NULL; 176 return NULL;
159 177
160 return this_cpu_ptr(array->pptrs[index]); 178 return this_cpu_ptr(array->pptrs[index & array->index_mask]);
161} 179}
162 180
163int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) 181int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
@@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
177 */ 195 */
178 size = round_up(map->value_size, 8); 196 size = round_up(map->value_size, 8);
179 rcu_read_lock(); 197 rcu_read_lock();
180 pptr = array->pptrs[index]; 198 pptr = array->pptrs[index & array->index_mask];
181 for_each_possible_cpu(cpu) { 199 for_each_possible_cpu(cpu) {
182 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); 200 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
183 off += size; 201 off += size;
@@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
225 return -EEXIST; 243 return -EEXIST;
226 244
227 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 245 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
228 memcpy(this_cpu_ptr(array->pptrs[index]), 246 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
229 value, map->value_size); 247 value, map->value_size);
230 else 248 else
231 memcpy(array->value + array->elem_size * index, 249 memcpy(array->value +
250 array->elem_size * (index & array->index_mask),
232 value, map->value_size); 251 value, map->value_size);
233 return 0; 252 return 0;
234} 253}
@@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
262 */ 281 */
263 size = round_up(map->value_size, 8); 282 size = round_up(map->value_size, 8);
264 rcu_read_lock(); 283 rcu_read_lock();
265 pptr = array->pptrs[index]; 284 pptr = array->pptrs[index & array->index_mask];
266 for_each_possible_cpu(cpu) { 285 for_each_possible_cpu(cpu) {
267 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); 286 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
268 off += size; 287 off += size;
@@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
613static u32 array_of_map_gen_lookup(struct bpf_map *map, 632static u32 array_of_map_gen_lookup(struct bpf_map *map,
614 struct bpf_insn *insn_buf) 633 struct bpf_insn *insn_buf)
615{ 634{
635 struct bpf_array *array = container_of(map, struct bpf_array, map);
616 u32 elem_size = round_up(map->value_size, 8); 636 u32 elem_size = round_up(map->value_size, 8);
617 struct bpf_insn *insn = insn_buf; 637 struct bpf_insn *insn = insn_buf;
618 const int ret = BPF_REG_0; 638 const int ret = BPF_REG_0;
@@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
621 641
622 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 642 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
623 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 643 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
624 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 644 if (map->unpriv_array) {
645 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
646 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
647 } else {
648 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
649 }
625 if (is_power_of_2(elem_size)) 650 if (is_power_of_2(elem_size))
626 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 651 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
627 else 652 else
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 86b50aa26ee8..51ec2dda7f08 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
767} 767}
768EXPORT_SYMBOL_GPL(__bpf_call_base); 768EXPORT_SYMBOL_GPL(__bpf_call_base);
769 769
770#ifndef CONFIG_BPF_JIT_ALWAYS_ON
770/** 771/**
771 * __bpf_prog_run - run eBPF program on a given context 772 * __bpf_prog_run - run eBPF program on a given context
772 * @ctx: is the data we are operating on 773 * @ctx: is the data we are operating on
@@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
1317EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) 1318EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
1318}; 1319};
1319 1320
1321#else
1322static unsigned int __bpf_prog_ret0(const void *ctx,
1323 const struct bpf_insn *insn)
1324{
1325 return 0;
1326}
1327#endif
1328
1320bool bpf_prog_array_compatible(struct bpf_array *array, 1329bool bpf_prog_array_compatible(struct bpf_array *array,
1321 const struct bpf_prog *fp) 1330 const struct bpf_prog *fp)
1322{ 1331{
@@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
1364 */ 1373 */
1365struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) 1374struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
1366{ 1375{
1376#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1367 u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); 1377 u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
1368 1378
1369 fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; 1379 fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
1380#else
1381 fp->bpf_func = __bpf_prog_ret0;
1382#endif
1370 1383
1371 /* eBPF JITs can rewrite the program in case constant 1384 /* eBPF JITs can rewrite the program in case constant
1372 * blinding is active. However, in case of error during 1385 * blinding is active. However, in case of error during
@@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
1376 */ 1389 */
1377 if (!bpf_prog_is_dev_bound(fp->aux)) { 1390 if (!bpf_prog_is_dev_bound(fp->aux)) {
1378 fp = bpf_int_jit_compile(fp); 1391 fp = bpf_int_jit_compile(fp);
1392#ifdef CONFIG_BPF_JIT_ALWAYS_ON
1393 if (!fp->jited) {
1394 *err = -ENOTSUPP;
1395 return fp;
1396 }
1397#endif
1379 } else { 1398 } else {
1380 *err = bpf_prog_offload_compile(fp); 1399 *err = bpf_prog_offload_compile(fp);
1381 if (*err) 1400 if (*err)
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 01aaef1a77c5..5bb5e49ef4c3 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -368,7 +368,45 @@ out:
368 putname(pname); 368 putname(pname);
369 return ret; 369 return ret;
370} 370}
371EXPORT_SYMBOL_GPL(bpf_obj_get_user); 371
372static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
373{
374 struct bpf_prog *prog;
375 int ret = inode_permission(inode, MAY_READ | MAY_WRITE);
376 if (ret)
377 return ERR_PTR(ret);
378
379 if (inode->i_op == &bpf_map_iops)
380 return ERR_PTR(-EINVAL);
381 if (inode->i_op != &bpf_prog_iops)
382 return ERR_PTR(-EACCES);
383
384 prog = inode->i_private;
385
386 ret = security_bpf_prog(prog);
387 if (ret < 0)
388 return ERR_PTR(ret);
389
390 if (!bpf_prog_get_ok(prog, &type, false))
391 return ERR_PTR(-EINVAL);
392
393 return bpf_prog_inc(prog);
394}
395
396struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
397{
398 struct bpf_prog *prog;
399 struct path path;
400 int ret = kern_path(name, LOOKUP_FOLLOW, &path);
401 if (ret)
402 return ERR_PTR(ret);
403 prog = __get_prog_inode(d_backing_inode(path.dentry), type);
404 if (!IS_ERR(prog))
405 touch_atime(&path);
406 path_put(&path);
407 return prog;
408}
409EXPORT_SYMBOL(bpf_prog_get_type_path);
372 410
373static void bpf_evict_inode(struct inode *inode) 411static void bpf_evict_inode(struct inode *inode)
374{ 412{
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 5ee2e41893d9..1712d319c2d8 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -591,8 +591,15 @@ static void sock_map_free(struct bpf_map *map)
591 591
592 write_lock_bh(&sock->sk_callback_lock); 592 write_lock_bh(&sock->sk_callback_lock);
593 psock = smap_psock_sk(sock); 593 psock = smap_psock_sk(sock);
594 smap_list_remove(psock, &stab->sock_map[i]); 594 /* This check handles a racing sock event that can get the
595 smap_release_sock(psock, sock); 595 * sk_callback_lock before this case but after xchg happens
596 * causing the refcnt to hit zero and sock user data (psock)
597 * to be null and queued for garbage collection.
598 */
599 if (likely(psock)) {
600 smap_list_remove(psock, &stab->sock_map[i]);
601 smap_release_sock(psock, sock);
602 }
596 write_unlock_bh(&sock->sk_callback_lock); 603 write_unlock_bh(&sock->sk_callback_lock);
597 } 604 }
598 rcu_read_unlock(); 605 rcu_read_unlock();
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2c4cfeaa8d5e..5cb783fc8224 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1057,7 +1057,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
1057} 1057}
1058EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 1058EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
1059 1059
1060static bool bpf_prog_get_ok(struct bpf_prog *prog, 1060bool bpf_prog_get_ok(struct bpf_prog *prog,
1061 enum bpf_prog_type *attach_type, bool attach_drv) 1061 enum bpf_prog_type *attach_type, bool attach_drv)
1062{ 1062{
1063 /* not an attachment, just a refcount inc, always allow */ 1063 /* not an attachment, just a refcount inc, always allow */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d4593571c404..b414d6b2d470 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1059 break; 1059 break;
1060 case PTR_TO_STACK: 1060 case PTR_TO_STACK:
1061 pointer_desc = "stack "; 1061 pointer_desc = "stack ";
1062 /* The stack spill tracking logic in check_stack_write()
1063 * and check_stack_read() relies on stack accesses being
1064 * aligned.
1065 */
1066 strict = true;
1062 break; 1067 break;
1063 default: 1068 default:
1064 break; 1069 break;
@@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1067 strict); 1072 strict);
1068} 1073}
1069 1074
1075/* truncate register to smaller size (in bytes)
1076 * must be called with size < BPF_REG_SIZE
1077 */
1078static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1079{
1080 u64 mask;
1081
1082 /* clear high bits in bit representation */
1083 reg->var_off = tnum_cast(reg->var_off, size);
1084
1085 /* fix arithmetic bounds */
1086 mask = ((u64)1 << (size * 8)) - 1;
1087 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1088 reg->umin_value &= mask;
1089 reg->umax_value &= mask;
1090 } else {
1091 reg->umin_value = 0;
1092 reg->umax_value = mask;
1093 }
1094 reg->smin_value = reg->umin_value;
1095 reg->smax_value = reg->umax_value;
1096}
1097
1070/* check whether memory at (regno + off) is accessible for t = (read | write) 1098/* check whether memory at (regno + off) is accessible for t = (read | write)
1071 * if t==write, value_regno is a register which value is stored into memory 1099 * if t==write, value_regno is a register which value is stored into memory
1072 * if t==read, value_regno is a register which will receive the value from memory 1100 * if t==read, value_regno is a register which will receive the value from memory
@@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1200 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && 1228 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
1201 regs[value_regno].type == SCALAR_VALUE) { 1229 regs[value_regno].type == SCALAR_VALUE) {
1202 /* b/h/w load zero-extends, mark upper bits as known 0 */ 1230 /* b/h/w load zero-extends, mark upper bits as known 0 */
1203 regs[value_regno].var_off = 1231 coerce_reg_to_size(&regs[value_regno], size);
1204 tnum_cast(regs[value_regno].var_off, size);
1205 __update_reg_bounds(&regs[value_regno]);
1206 } 1232 }
1207 return err; 1233 return err;
1208} 1234}
@@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1282 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); 1308 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
1283 verbose(env, "invalid variable stack read R%d var_off=%s\n", 1309 verbose(env, "invalid variable stack read R%d var_off=%s\n",
1284 regno, tn_buf); 1310 regno, tn_buf);
1311 return -EACCES;
1285 } 1312 }
1286 off = regs[regno].off + regs[regno].var_off.value; 1313 off = regs[regno].off + regs[regno].var_off.value;
1287 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || 1314 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1674 return -EINVAL; 1701 return -EINVAL;
1675 } 1702 }
1676 1703
1704 /* With LD_ABS/IND some JITs save/restore skb from r1. */
1677 changes_data = bpf_helper_changes_pkt_data(fn->func); 1705 changes_data = bpf_helper_changes_pkt_data(fn->func);
1706 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
1707 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
1708 func_id_name(func_id), func_id);
1709 return -EINVAL;
1710 }
1678 1711
1679 memset(&meta, 0, sizeof(meta)); 1712 memset(&meta, 0, sizeof(meta));
1680 meta.pkt_access = fn->pkt_access; 1713 meta.pkt_access = fn->pkt_access;
@@ -1696,6 +1729,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1696 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); 1729 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
1697 if (err) 1730 if (err)
1698 return err; 1731 return err;
1732 if (func_id == BPF_FUNC_tail_call) {
1733 if (meta.map_ptr == NULL) {
1734 verbose(env, "verifier bug\n");
1735 return -EINVAL;
1736 }
1737 env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
1738 }
1699 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); 1739 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
1700 if (err) 1740 if (err)
1701 return err; 1741 return err;
@@ -1766,14 +1806,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1766 return 0; 1806 return 0;
1767} 1807}
1768 1808
1769static void coerce_reg_to_32(struct bpf_reg_state *reg)
1770{
1771 /* clear high 32 bits */
1772 reg->var_off = tnum_cast(reg->var_off, 4);
1773 /* Update bounds */
1774 __update_reg_bounds(reg);
1775}
1776
1777static bool signed_add_overflows(s64 a, s64 b) 1809static bool signed_add_overflows(s64 a, s64 b)
1778{ 1810{
1779 /* Do the add in u64, where overflow is well-defined */ 1811 /* Do the add in u64, where overflow is well-defined */
@@ -1794,6 +1826,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
1794 return res > a; 1826 return res > a;
1795} 1827}
1796 1828
1829static bool check_reg_sane_offset(struct bpf_verifier_env *env,
1830 const struct bpf_reg_state *reg,
1831 enum bpf_reg_type type)
1832{
1833 bool known = tnum_is_const(reg->var_off);
1834 s64 val = reg->var_off.value;
1835 s64 smin = reg->smin_value;
1836
1837 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
1838 verbose(env, "math between %s pointer and %lld is not allowed\n",
1839 reg_type_str[type], val);
1840 return false;
1841 }
1842
1843 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
1844 verbose(env, "%s pointer offset %d is not allowed\n",
1845 reg_type_str[type], reg->off);
1846 return false;
1847 }
1848
1849 if (smin == S64_MIN) {
1850 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
1851 reg_type_str[type]);
1852 return false;
1853 }
1854
1855 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
1856 verbose(env, "value %lld makes %s pointer be out of bounds\n",
1857 smin, reg_type_str[type]);
1858 return false;
1859 }
1860
1861 return true;
1862}
1863
1797/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 1864/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
1798 * Caller should also handle BPF_MOV case separately. 1865 * Caller should also handle BPF_MOV case separately.
1799 * If we return -EACCES, caller may want to try again treating pointer as a 1866 * If we return -EACCES, caller may want to try again treating pointer as a
@@ -1830,29 +1897,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1830 1897
1831 if (BPF_CLASS(insn->code) != BPF_ALU64) { 1898 if (BPF_CLASS(insn->code) != BPF_ALU64) {
1832 /* 32-bit ALU ops on pointers produce (meaningless) scalars */ 1899 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
1833 if (!env->allow_ptr_leaks) 1900 verbose(env,
1834 verbose(env, 1901 "R%d 32-bit pointer arithmetic prohibited\n",
1835 "R%d 32-bit pointer arithmetic prohibited\n", 1902 dst);
1836 dst);
1837 return -EACCES; 1903 return -EACCES;
1838 } 1904 }
1839 1905
1840 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { 1906 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
1841 if (!env->allow_ptr_leaks) 1907 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
1842 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", 1908 dst);
1843 dst);
1844 return -EACCES; 1909 return -EACCES;
1845 } 1910 }
1846 if (ptr_reg->type == CONST_PTR_TO_MAP) { 1911 if (ptr_reg->type == CONST_PTR_TO_MAP) {
1847 if (!env->allow_ptr_leaks) 1912 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
1848 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", 1913 dst);
1849 dst);
1850 return -EACCES; 1914 return -EACCES;
1851 } 1915 }
1852 if (ptr_reg->type == PTR_TO_PACKET_END) { 1916 if (ptr_reg->type == PTR_TO_PACKET_END) {
1853 if (!env->allow_ptr_leaks) 1917 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
1854 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", 1918 dst);
1855 dst);
1856 return -EACCES; 1919 return -EACCES;
1857 } 1920 }
1858 1921
@@ -1862,6 +1925,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1862 dst_reg->type = ptr_reg->type; 1925 dst_reg->type = ptr_reg->type;
1863 dst_reg->id = ptr_reg->id; 1926 dst_reg->id = ptr_reg->id;
1864 1927
1928 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
1929 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
1930 return -EINVAL;
1931
1865 switch (opcode) { 1932 switch (opcode) {
1866 case BPF_ADD: 1933 case BPF_ADD:
1867 /* We can take a fixed offset as long as it doesn't overflow 1934 /* We can take a fixed offset as long as it doesn't overflow
@@ -1915,9 +1982,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1915 case BPF_SUB: 1982 case BPF_SUB:
1916 if (dst_reg == off_reg) { 1983 if (dst_reg == off_reg) {
1917 /* scalar -= pointer. Creates an unknown scalar */ 1984 /* scalar -= pointer. Creates an unknown scalar */
1918 if (!env->allow_ptr_leaks) 1985 verbose(env, "R%d tried to subtract pointer from scalar\n",
1919 verbose(env, "R%d tried to subtract pointer from scalar\n", 1986 dst);
1920 dst);
1921 return -EACCES; 1987 return -EACCES;
1922 } 1988 }
1923 /* We don't allow subtraction from FP, because (according to 1989 /* We don't allow subtraction from FP, because (according to
@@ -1925,9 +1991,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1925 * be able to deal with it. 1991 * be able to deal with it.
1926 */ 1992 */
1927 if (ptr_reg->type == PTR_TO_STACK) { 1993 if (ptr_reg->type == PTR_TO_STACK) {
1928 if (!env->allow_ptr_leaks) 1994 verbose(env, "R%d subtraction from stack pointer prohibited\n",
1929 verbose(env, "R%d subtraction from stack pointer prohibited\n", 1995 dst);
1930 dst);
1931 return -EACCES; 1996 return -EACCES;
1932 } 1997 }
1933 if (known && (ptr_reg->off - smin_val == 1998 if (known && (ptr_reg->off - smin_val ==
@@ -1976,28 +2041,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1976 case BPF_AND: 2041 case BPF_AND:
1977 case BPF_OR: 2042 case BPF_OR:
1978 case BPF_XOR: 2043 case BPF_XOR:
1979 /* bitwise ops on pointers are troublesome, prohibit for now. 2044 /* bitwise ops on pointers are troublesome, prohibit. */
1980 * (However, in principle we could allow some cases, e.g. 2045 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
1981 * ptr &= ~3 which would reduce min_value by 3.) 2046 dst, bpf_alu_string[opcode >> 4]);
1982 */
1983 if (!env->allow_ptr_leaks)
1984 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
1985 dst, bpf_alu_string[opcode >> 4]);
1986 return -EACCES; 2047 return -EACCES;
1987 default: 2048 default:
1988 /* other operators (e.g. MUL,LSH) produce non-pointer results */ 2049 /* other operators (e.g. MUL,LSH) produce non-pointer results */
1989 if (!env->allow_ptr_leaks) 2050 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
1990 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", 2051 dst, bpf_alu_string[opcode >> 4]);
1991 dst, bpf_alu_string[opcode >> 4]);
1992 return -EACCES; 2052 return -EACCES;
1993 } 2053 }
1994 2054
2055 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
2056 return -EINVAL;
2057
1995 __update_reg_bounds(dst_reg); 2058 __update_reg_bounds(dst_reg);
1996 __reg_deduce_bounds(dst_reg); 2059 __reg_deduce_bounds(dst_reg);
1997 __reg_bound_offset(dst_reg); 2060 __reg_bound_offset(dst_reg);
1998 return 0; 2061 return 0;
1999} 2062}
2000 2063
2064/* WARNING: This function does calculations on 64-bit values, but the actual
2065 * execution may occur on 32-bit values. Therefore, things like bitshifts
2066 * need extra checks in the 32-bit case.
2067 */
2001static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, 2068static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2002 struct bpf_insn *insn, 2069 struct bpf_insn *insn,
2003 struct bpf_reg_state *dst_reg, 2070 struct bpf_reg_state *dst_reg,
@@ -2008,12 +2075,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2008 bool src_known, dst_known; 2075 bool src_known, dst_known;
2009 s64 smin_val, smax_val; 2076 s64 smin_val, smax_val;
2010 u64 umin_val, umax_val; 2077 u64 umin_val, umax_val;
2078 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
2011 2079
2012 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2013 /* 32-bit ALU ops are (32,32)->64 */
2014 coerce_reg_to_32(dst_reg);
2015 coerce_reg_to_32(&src_reg);
2016 }
2017 smin_val = src_reg.smin_value; 2080 smin_val = src_reg.smin_value;
2018 smax_val = src_reg.smax_value; 2081 smax_val = src_reg.smax_value;
2019 umin_val = src_reg.umin_value; 2082 umin_val = src_reg.umin_value;
@@ -2021,6 +2084,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2021 src_known = tnum_is_const(src_reg.var_off); 2084 src_known = tnum_is_const(src_reg.var_off);
2022 dst_known = tnum_is_const(dst_reg->var_off); 2085 dst_known = tnum_is_const(dst_reg->var_off);
2023 2086
2087 if (!src_known &&
2088 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
2089 __mark_reg_unknown(dst_reg);
2090 return 0;
2091 }
2092
2024 switch (opcode) { 2093 switch (opcode) {
2025 case BPF_ADD: 2094 case BPF_ADD:
2026 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 2095 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@ -2149,9 +2218,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2149 __update_reg_bounds(dst_reg); 2218 __update_reg_bounds(dst_reg);
2150 break; 2219 break;
2151 case BPF_LSH: 2220 case BPF_LSH:
2152 if (umax_val > 63) { 2221 if (umax_val >= insn_bitness) {
2153 /* Shifts greater than 63 are undefined. This includes 2222 /* Shifts greater than 31 or 63 are undefined.
2154 * shifts by a negative number. 2223 * This includes shifts by a negative number.
2155 */ 2224 */
2156 mark_reg_unknown(env, regs, insn->dst_reg); 2225 mark_reg_unknown(env, regs, insn->dst_reg);
2157 break; 2226 break;
@@ -2177,27 +2246,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2177 __update_reg_bounds(dst_reg); 2246 __update_reg_bounds(dst_reg);
2178 break; 2247 break;
2179 case BPF_RSH: 2248 case BPF_RSH:
2180 if (umax_val > 63) { 2249 if (umax_val >= insn_bitness) {
2181 /* Shifts greater than 63 are undefined. This includes 2250 /* Shifts greater than 31 or 63 are undefined.
2182 * shifts by a negative number. 2251 * This includes shifts by a negative number.
2183 */ 2252 */
2184 mark_reg_unknown(env, regs, insn->dst_reg); 2253 mark_reg_unknown(env, regs, insn->dst_reg);
2185 break; 2254 break;
2186 } 2255 }
2187 /* BPF_RSH is an unsigned shift, so make the appropriate casts */ 2256 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
2188 if (dst_reg->smin_value < 0) { 2257 * be negative, then either:
2189 if (umin_val) { 2258 * 1) src_reg might be zero, so the sign bit of the result is
2190 /* Sign bit will be cleared */ 2259 * unknown, so we lose our signed bounds
2191 dst_reg->smin_value = 0; 2260 * 2) it's known negative, thus the unsigned bounds capture the
2192 } else { 2261 * signed bounds
2193 /* Lost sign bit information */ 2262 * 3) the signed bounds cross zero, so they tell us nothing
2194 dst_reg->smin_value = S64_MIN; 2263 * about the result
2195 dst_reg->smax_value = S64_MAX; 2264 * If the value in dst_reg is known nonnegative, then again the
2196 } 2265 * unsigned bounts capture the signed bounds.
2197 } else { 2266 * Thus, in all cases it suffices to blow away our signed bounds
2198 dst_reg->smin_value = 2267 * and rely on inferring new ones from the unsigned bounds and
2199 (u64)(dst_reg->smin_value) >> umax_val; 2268 * var_off of the result.
2200 } 2269 */
2270 dst_reg->smin_value = S64_MIN;
2271 dst_reg->smax_value = S64_MAX;
2201 if (src_known) 2272 if (src_known)
2202 dst_reg->var_off = tnum_rshift(dst_reg->var_off, 2273 dst_reg->var_off = tnum_rshift(dst_reg->var_off,
2203 umin_val); 2274 umin_val);
@@ -2213,6 +2284,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2213 break; 2284 break;
2214 } 2285 }
2215 2286
2287 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2288 /* 32-bit ALU ops are (32,32)->32 */
2289 coerce_reg_to_size(dst_reg, 4);
2290 coerce_reg_to_size(&src_reg, 4);
2291 }
2292
2216 __reg_deduce_bounds(dst_reg); 2293 __reg_deduce_bounds(dst_reg);
2217 __reg_bound_offset(dst_reg); 2294 __reg_bound_offset(dst_reg);
2218 return 0; 2295 return 0;
@@ -2227,7 +2304,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2227 struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; 2304 struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
2228 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 2305 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2229 u8 opcode = BPF_OP(insn->code); 2306 u8 opcode = BPF_OP(insn->code);
2230 int rc;
2231 2307
2232 dst_reg = &regs[insn->dst_reg]; 2308 dst_reg = &regs[insn->dst_reg];
2233 src_reg = NULL; 2309 src_reg = NULL;
@@ -2238,43 +2314,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2238 if (src_reg->type != SCALAR_VALUE) { 2314 if (src_reg->type != SCALAR_VALUE) {
2239 if (dst_reg->type != SCALAR_VALUE) { 2315 if (dst_reg->type != SCALAR_VALUE) {
2240 /* Combining two pointers by any ALU op yields 2316 /* Combining two pointers by any ALU op yields
2241 * an arbitrary scalar. 2317 * an arbitrary scalar. Disallow all math except
2318 * pointer subtraction
2242 */ 2319 */
2243 if (!env->allow_ptr_leaks) { 2320 if (opcode == BPF_SUB){
2244 verbose(env, "R%d pointer %s pointer prohibited\n", 2321 mark_reg_unknown(env, regs, insn->dst_reg);
2245 insn->dst_reg, 2322 return 0;
2246 bpf_alu_string[opcode >> 4]);
2247 return -EACCES;
2248 } 2323 }
2249 mark_reg_unknown(env, regs, insn->dst_reg); 2324 verbose(env, "R%d pointer %s pointer prohibited\n",
2250 return 0; 2325 insn->dst_reg,
2326 bpf_alu_string[opcode >> 4]);
2327 return -EACCES;
2251 } else { 2328 } else {
2252 /* scalar += pointer 2329 /* scalar += pointer
2253 * This is legal, but we have to reverse our 2330 * This is legal, but we have to reverse our
2254 * src/dest handling in computing the range 2331 * src/dest handling in computing the range
2255 */ 2332 */
2256 rc = adjust_ptr_min_max_vals(env, insn, 2333 return adjust_ptr_min_max_vals(env, insn,
2257 src_reg, dst_reg); 2334 src_reg, dst_reg);
2258 if (rc == -EACCES && env->allow_ptr_leaks) {
2259 /* scalar += unknown scalar */
2260 __mark_reg_unknown(&off_reg);
2261 return adjust_scalar_min_max_vals(
2262 env, insn,
2263 dst_reg, off_reg);
2264 }
2265 return rc;
2266 } 2335 }
2267 } else if (ptr_reg) { 2336 } else if (ptr_reg) {
2268 /* pointer += scalar */ 2337 /* pointer += scalar */
2269 rc = adjust_ptr_min_max_vals(env, insn, 2338 return adjust_ptr_min_max_vals(env, insn,
2270 dst_reg, src_reg); 2339 dst_reg, src_reg);
2271 if (rc == -EACCES && env->allow_ptr_leaks) {
2272 /* unknown scalar += scalar */
2273 __mark_reg_unknown(dst_reg);
2274 return adjust_scalar_min_max_vals(
2275 env, insn, dst_reg, *src_reg);
2276 }
2277 return rc;
2278 } 2340 }
2279 } else { 2341 } else {
2280 /* Pretend the src is a reg with a known value, since we only 2342 /* Pretend the src is a reg with a known value, since we only
@@ -2283,17 +2345,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2283 off_reg.type = SCALAR_VALUE; 2345 off_reg.type = SCALAR_VALUE;
2284 __mark_reg_known(&off_reg, insn->imm); 2346 __mark_reg_known(&off_reg, insn->imm);
2285 src_reg = &off_reg; 2347 src_reg = &off_reg;
2286 if (ptr_reg) { /* pointer += K */ 2348 if (ptr_reg) /* pointer += K */
2287 rc = adjust_ptr_min_max_vals(env, insn, 2349 return adjust_ptr_min_max_vals(env, insn,
2288 ptr_reg, src_reg); 2350 ptr_reg, src_reg);
2289 if (rc == -EACCES && env->allow_ptr_leaks) {
2290 /* unknown scalar += K */
2291 __mark_reg_unknown(dst_reg);
2292 return adjust_scalar_min_max_vals(
2293 env, insn, dst_reg, off_reg);
2294 }
2295 return rc;
2296 }
2297 } 2351 }
2298 2352
2299 /* Got here implies adding two SCALAR_VALUEs */ 2353 /* Got here implies adding two SCALAR_VALUEs */
@@ -2390,17 +2444,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2390 return -EACCES; 2444 return -EACCES;
2391 } 2445 }
2392 mark_reg_unknown(env, regs, insn->dst_reg); 2446 mark_reg_unknown(env, regs, insn->dst_reg);
2393 /* high 32 bits are known zero. */ 2447 coerce_reg_to_size(&regs[insn->dst_reg], 4);
2394 regs[insn->dst_reg].var_off = tnum_cast(
2395 regs[insn->dst_reg].var_off, 4);
2396 __update_reg_bounds(&regs[insn->dst_reg]);
2397 } 2448 }
2398 } else { 2449 } else {
2399 /* case: R = imm 2450 /* case: R = imm
2400 * remember the value we stored into this reg 2451 * remember the value we stored into this reg
2401 */ 2452 */
2402 regs[insn->dst_reg].type = SCALAR_VALUE; 2453 regs[insn->dst_reg].type = SCALAR_VALUE;
2403 __mark_reg_known(regs + insn->dst_reg, insn->imm); 2454 if (BPF_CLASS(insn->code) == BPF_ALU64) {
2455 __mark_reg_known(regs + insn->dst_reg,
2456 insn->imm);
2457 } else {
2458 __mark_reg_known(regs + insn->dst_reg,
2459 (u32)insn->imm);
2460 }
2404 } 2461 }
2405 2462
2406 } else if (opcode > BPF_END) { 2463 } else if (opcode > BPF_END) {
@@ -3431,15 +3488,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
3431 return range_within(rold, rcur) && 3488 return range_within(rold, rcur) &&
3432 tnum_in(rold->var_off, rcur->var_off); 3489 tnum_in(rold->var_off, rcur->var_off);
3433 } else { 3490 } else {
3434 /* if we knew anything about the old value, we're not 3491 /* We're trying to use a pointer in place of a scalar.
3435 * equal, because we can't know anything about the 3492 * Even if the scalar was unbounded, this could lead to
3436 * scalar value of the pointer in the new value. 3493 * pointer leaks because scalars are allowed to leak
3494 * while pointers are not. We could make this safe in
3495 * special cases if root is calling us, but it's
3496 * probably not worth the hassle.
3437 */ 3497 */
3438 return rold->umin_value == 0 && 3498 return false;
3439 rold->umax_value == U64_MAX &&
3440 rold->smin_value == S64_MIN &&
3441 rold->smax_value == S64_MAX &&
3442 tnum_is_unknown(rold->var_off);
3443 } 3499 }
3444 case PTR_TO_MAP_VALUE: 3500 case PTR_TO_MAP_VALUE:
3445 /* If the new min/max/var_off satisfy the old ones and 3501 /* If the new min/max/var_off satisfy the old ones and
@@ -4407,6 +4463,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
4407 */ 4463 */
4408 insn->imm = 0; 4464 insn->imm = 0;
4409 insn->code = BPF_JMP | BPF_TAIL_CALL; 4465 insn->code = BPF_JMP | BPF_TAIL_CALL;
4466
4467 /* instead of changing every JIT dealing with tail_call
4468 * emit two extra insns:
4469 * if (index >= max_entries) goto out;
4470 * index &= array->index_mask;
4471 * to avoid out-of-bounds cpu speculation
4472 */
4473 map_ptr = env->insn_aux_data[i + delta].map_ptr;
4474 if (map_ptr == BPF_MAP_PTR_POISON) {
4475 verbose(env, "tail_call obusing map_ptr\n");
4476 return -EINVAL;
4477 }
4478 if (!map_ptr->unpriv_array)
4479 continue;
4480 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
4481 map_ptr->max_entries, 2);
4482 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
4483 container_of(map_ptr,
4484 struct bpf_array,
4485 map)->index_mask);
4486 insn_buf[2] = *insn;
4487 cnt = 3;
4488 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
4489 if (!new_prog)
4490 return -ENOMEM;
4491
4492 delta += cnt - 1;
4493 env->prog = prog = new_prog;
4494 insn = new_prog->insnsi + i + delta;
4410 continue; 4495 continue;
4411 } 4496 }
4412 4497
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 024085daab1a..a2c05d2476ac 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -123,7 +123,11 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
123 */ 123 */
124 do { 124 do {
125 css_task_iter_start(&from->self, 0, &it); 125 css_task_iter_start(&from->self, 0, &it);
126 task = css_task_iter_next(&it); 126
127 do {
128 task = css_task_iter_next(&it);
129 } while (task && (task->flags & PF_EXITING));
130
127 if (task) 131 if (task)
128 get_task_struct(task); 132 get_task_struct(task);
129 css_task_iter_end(&it); 133 css_task_iter_end(&it);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 0b1ffe147f24..2cf06c274e4c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1397,7 +1397,7 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
1397 cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name, 1397 cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
1398 cft->name); 1398 cft->name);
1399 else 1399 else
1400 strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX); 1400 strlcpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
1401 return buf; 1401 return buf;
1402} 1402}
1403 1403
@@ -1864,9 +1864,9 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
1864 1864
1865 root->flags = opts->flags; 1865 root->flags = opts->flags;
1866 if (opts->release_agent) 1866 if (opts->release_agent)
1867 strcpy(root->release_agent_path, opts->release_agent); 1867 strlcpy(root->release_agent_path, opts->release_agent, PATH_MAX);
1868 if (opts->name) 1868 if (opts->name)
1869 strcpy(root->name, opts->name); 1869 strlcpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
1870 if (opts->cpuset_clone_children) 1870 if (opts->cpuset_clone_children)
1871 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); 1871 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1872} 1872}
@@ -4125,26 +4125,24 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
4125 4125
4126static void css_task_iter_advance(struct css_task_iter *it) 4126static void css_task_iter_advance(struct css_task_iter *it)
4127{ 4127{
4128 struct list_head *l = it->task_pos; 4128 struct list_head *next;
4129 4129
4130 lockdep_assert_held(&css_set_lock); 4130 lockdep_assert_held(&css_set_lock);
4131 WARN_ON_ONCE(!l);
4132
4133repeat: 4131repeat:
4134 /* 4132 /*
4135 * Advance iterator to find next entry. cset->tasks is consumed 4133 * Advance iterator to find next entry. cset->tasks is consumed
4136 * first and then ->mg_tasks. After ->mg_tasks, we move onto the 4134 * first and then ->mg_tasks. After ->mg_tasks, we move onto the
4137 * next cset. 4135 * next cset.
4138 */ 4136 */
4139 l = l->next; 4137 next = it->task_pos->next;
4140 4138
4141 if (l == it->tasks_head) 4139 if (next == it->tasks_head)
4142 l = it->mg_tasks_head->next; 4140 next = it->mg_tasks_head->next;
4143 4141
4144 if (l == it->mg_tasks_head) 4142 if (next == it->mg_tasks_head)
4145 css_task_iter_advance_css_set(it); 4143 css_task_iter_advance_css_set(it);
4146 else 4144 else
4147 it->task_pos = l; 4145 it->task_pos = next;
4148 4146
4149 /* if PROCS, skip over tasks which aren't group leaders */ 4147 /* if PROCS, skip over tasks which aren't group leaders */
4150 if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos && 4148 if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 41376c3ac93b..53f7dc65f9a3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -80,19 +80,19 @@ static struct lockdep_map cpuhp_state_down_map =
80 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); 80 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
81 81
82 82
83static void inline cpuhp_lock_acquire(bool bringup) 83static inline void cpuhp_lock_acquire(bool bringup)
84{ 84{
85 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); 85 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
86} 86}
87 87
88static void inline cpuhp_lock_release(bool bringup) 88static inline void cpuhp_lock_release(bool bringup)
89{ 89{
90 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); 90 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
91} 91}
92#else 92#else
93 93
94static void inline cpuhp_lock_acquire(bool bringup) { } 94static inline void cpuhp_lock_acquire(bool bringup) { }
95static void inline cpuhp_lock_release(bool bringup) { } 95static inline void cpuhp_lock_release(bool bringup) { }
96 96
97#endif 97#endif
98 98
@@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = {
1277 * before blk_mq_queue_reinit_notify() from notify_dead(), 1277 * before blk_mq_queue_reinit_notify() from notify_dead(),
1278 * otherwise a RCU stall occurs. 1278 * otherwise a RCU stall occurs.
1279 */ 1279 */
1280 [CPUHP_TIMERS_DEAD] = { 1280 [CPUHP_TIMERS_PREPARE] = {
1281 .name = "timers:dead", 1281 .name = "timers:dead",
1282 .startup.single = NULL, 1282 .startup.single = timers_prepare_cpu,
1283 .teardown.single = timers_dead_cpu, 1283 .teardown.single = timers_dead_cpu,
1284 }, 1284 },
1285 /* Kicks the plugged cpu into life */ 1285 /* Kicks the plugged cpu into life */
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b3663896278e..4f63597c824d 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -410,7 +410,7 @@ static int __init crash_save_vmcoreinfo_init(void)
410 VMCOREINFO_SYMBOL(contig_page_data); 410 VMCOREINFO_SYMBOL(contig_page_data);
411#endif 411#endif
412#ifdef CONFIG_SPARSEMEM 412#ifdef CONFIG_SPARSEMEM
413 VMCOREINFO_SYMBOL(mem_section); 413 VMCOREINFO_SYMBOL_ARRAY(mem_section);
414 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 414 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
415 VMCOREINFO_STRUCT_SIZE(mem_section); 415 VMCOREINFO_STRUCT_SIZE(mem_section);
416 VMCOREINFO_OFFSET(mem_section, section_mem_map); 416 VMCOREINFO_OFFSET(mem_section, section_mem_map);
diff --git a/kernel/exit.c b/kernel/exit.c
index df0c91d5606c..995453d9fb55 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1763,3 +1763,4 @@ __weak void abort(void)
1763 /* if that doesn't kill us, halt */ 1763 /* if that doesn't kill us, halt */
1764 panic("Oops failed to kill thread"); 1764 panic("Oops failed to kill thread");
1765} 1765}
1766EXPORT_SYMBOL(abort);
diff --git a/kernel/fork.c b/kernel/fork.c
index 432eadf6b58c..2295fc69717f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
721 goto out; 721 goto out;
722 } 722 }
723 /* a new mm has just been created */ 723 /* a new mm has just been created */
724 arch_dup_mmap(oldmm, mm); 724 retval = arch_dup_mmap(oldmm, mm);
725 retval = 0;
726out: 725out:
727 up_write(&mm->mmap_sem); 726 up_write(&mm->mmap_sem);
728 flush_tlb_mm(oldmm); 727 flush_tlb_mm(oldmm);
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index 17f05ef8f575..e4d3819a91cc 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -12,6 +12,11 @@
12 12
13static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) 13static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
14{ 14{
15 static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
16
17 if (!__ratelimit(&ratelimit))
18 return;
19
15 printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", 20 printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
16 irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); 21 irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
17 printk("->handle_irq(): %p, ", desc->handle_irq); 22 printk("->handle_irq(): %p, ", desc->handle_irq);
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 7f608ac39653..acfaaef8672a 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = {
113 BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), 113 BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
114 BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), 114 BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
115 BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), 115 BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
116 BIT_MASK_DESCR(IRQD_CAN_RESERVE),
116 117
117 BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), 118 BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
118 119
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index c26c5bb6b491..508c03dfef25 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
364EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); 364EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip);
365 365
366/* 366/*
367 * Separate lockdep class for interrupt chip which can nest irq_desc 367 * Separate lockdep classes for interrupt chip which can nest irq_desc
368 * lock. 368 * lock and request mutex.
369 */ 369 */
370static struct lock_class_key irq_nested_lock_class; 370static struct lock_class_key irq_nested_lock_class;
371static struct lock_class_key irq_nested_request_class;
371 372
372/* 373/*
373 * irq_map_generic_chip - Map a generic chip for an irq domain 374 * irq_map_generic_chip - Map a generic chip for an irq domain
@@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
409 set_bit(idx, &gc->installed); 410 set_bit(idx, &gc->installed);
410 411
411 if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) 412 if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK)
412 irq_set_lockdep_class(virq, &irq_nested_lock_class); 413 irq_set_lockdep_class(virq, &irq_nested_lock_class,
414 &irq_nested_request_class);
413 415
414 if (chip->irq_calc_mask) 416 if (chip->irq_calc_mask)
415 chip->irq_calc_mask(data); 417 chip->irq_calc_mask(data);
@@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
479 continue; 481 continue;
480 482
481 if (flags & IRQ_GC_INIT_NESTED_LOCK) 483 if (flags & IRQ_GC_INIT_NESTED_LOCK)
482 irq_set_lockdep_class(i, &irq_nested_lock_class); 484 irq_set_lockdep_class(i, &irq_nested_lock_class,
485 &irq_nested_request_class);
483 486
484 if (!(flags & IRQ_GC_NO_MASK)) { 487 if (!(flags & IRQ_GC_NO_MASK)) {
485 struct irq_data *d = irq_get_irq_data(i); 488 struct irq_data *d = irq_get_irq_data(i);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 07d08ca701ec..ab19371eab9b 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
440#endif /* !CONFIG_GENERIC_PENDING_IRQ */ 440#endif /* !CONFIG_GENERIC_PENDING_IRQ */
441 441
442#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) 442#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
443static inline int irq_domain_activate_irq(struct irq_data *data, bool early) 443static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve)
444{ 444{
445 irqd_set_activated(data); 445 irqd_set_activated(data);
446 return 0; 446 return 0;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 4f4f60015e8a..62068ad46930 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
1693 } 1693 }
1694} 1694}
1695 1695
1696static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) 1696static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve)
1697{ 1697{
1698 int ret = 0; 1698 int ret = 0;
1699 1699
@@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
1702 1702
1703 if (irqd->parent_data) 1703 if (irqd->parent_data)
1704 ret = __irq_domain_activate_irq(irqd->parent_data, 1704 ret = __irq_domain_activate_irq(irqd->parent_data,
1705 early); 1705 reserve);
1706 if (!ret && domain->ops->activate) { 1706 if (!ret && domain->ops->activate) {
1707 ret = domain->ops->activate(domain, irqd, early); 1707 ret = domain->ops->activate(domain, irqd, reserve);
1708 /* Rollback in case of error */ 1708 /* Rollback in case of error */
1709 if (ret && irqd->parent_data) 1709 if (ret && irqd->parent_data)
1710 __irq_domain_deactivate_irq(irqd->parent_data); 1710 __irq_domain_deactivate_irq(irqd->parent_data);
@@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
1716/** 1716/**
1717 * irq_domain_activate_irq - Call domain_ops->activate recursively to activate 1717 * irq_domain_activate_irq - Call domain_ops->activate recursively to activate
1718 * interrupt 1718 * interrupt
1719 * @irq_data: outermost irq_data associated with interrupt 1719 * @irq_data: Outermost irq_data associated with interrupt
1720 * @reserve: If set only reserve an interrupt vector instead of assigning one
1720 * 1721 *
1721 * This is the second step to call domain_ops->activate to program interrupt 1722 * This is the second step to call domain_ops->activate to program interrupt
1722 * controllers, so the interrupt could actually get delivered. 1723 * controllers, so the interrupt could actually get delivered.
1723 */ 1724 */
1724int irq_domain_activate_irq(struct irq_data *irq_data, bool early) 1725int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve)
1725{ 1726{
1726 int ret = 0; 1727 int ret = 0;
1727 1728
1728 if (!irqd_is_activated(irq_data)) 1729 if (!irqd_is_activated(irq_data))
1729 ret = __irq_domain_activate_irq(irq_data, early); 1730 ret = __irq_domain_activate_irq(irq_data, reserve);
1730 if (!ret) 1731 if (!ret)
1731 irqd_set_activated(irq_data); 1732 irqd_set_activated(irq_data);
1732 return ret; 1733 return ret;
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index edb987b2c58d..2f3c4f5382cc 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -339,6 +339,40 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
339 return ret; 339 return ret;
340} 340}
341 341
342/*
343 * Carefully check whether the device can use reservation mode. If
344 * reservation mode is enabled then the early activation will assign a
345 * dummy vector to the device. If the PCI/MSI device does not support
346 * masking of the entry then this can result in spurious interrupts when
347 * the device driver is not absolutely careful. But even then a malfunction
348 * of the hardware could result in a spurious interrupt on the dummy vector
349 * and render the device unusable. If the entry can be masked then the core
350 * logic will prevent the spurious interrupt and reservation mode can be
351 * used. For now reservation mode is restricted to PCI/MSI.
352 */
353static bool msi_check_reservation_mode(struct irq_domain *domain,
354 struct msi_domain_info *info,
355 struct device *dev)
356{
357 struct msi_desc *desc;
358
359 if (domain->bus_token != DOMAIN_BUS_PCI_MSI)
360 return false;
361
362 if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
363 return false;
364
365 if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
366 return false;
367
368 /*
369 * Checking the first MSI descriptor is sufficient. MSIX supports
370 * masking and MSI does so when the maskbit is set.
371 */
372 desc = first_msi_entry(dev);
373 return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
374}
375
342/** 376/**
343 * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain 377 * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
344 * @domain: The domain to allocate from 378 * @domain: The domain to allocate from
@@ -353,9 +387,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
353{ 387{
354 struct msi_domain_info *info = domain->host_data; 388 struct msi_domain_info *info = domain->host_data;
355 struct msi_domain_ops *ops = info->ops; 389 struct msi_domain_ops *ops = info->ops;
356 msi_alloc_info_t arg; 390 struct irq_data *irq_data;
357 struct msi_desc *desc; 391 struct msi_desc *desc;
392 msi_alloc_info_t arg;
358 int i, ret, virq; 393 int i, ret, virq;
394 bool can_reserve;
359 395
360 ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); 396 ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
361 if (ret) 397 if (ret)
@@ -385,6 +421,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
385 if (ops->msi_finish) 421 if (ops->msi_finish)
386 ops->msi_finish(&arg, 0); 422 ops->msi_finish(&arg, 0);
387 423
424 can_reserve = msi_check_reservation_mode(domain, info, dev);
425
388 for_each_msi_entry(desc, dev) { 426 for_each_msi_entry(desc, dev) {
389 virq = desc->irq; 427 virq = desc->irq;
390 if (desc->nvec_used == 1) 428 if (desc->nvec_used == 1)
@@ -397,15 +435,25 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
397 * the MSI entries before the PCI layer enables MSI in the 435 * the MSI entries before the PCI layer enables MSI in the
398 * card. Otherwise the card latches a random msi message. 436 * card. Otherwise the card latches a random msi message.
399 */ 437 */
400 if (info->flags & MSI_FLAG_ACTIVATE_EARLY) { 438 if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
401 struct irq_data *irq_data; 439 continue;
402 440
441 irq_data = irq_domain_get_irq_data(domain, desc->irq);
442 if (!can_reserve)
443 irqd_clr_can_reserve(irq_data);
444 ret = irq_domain_activate_irq(irq_data, can_reserve);
445 if (ret)
446 goto cleanup;
447 }
448
449 /*
450 * If these interrupts use reservation mode, clear the activated bit
451 * so request_irq() will assign the final vector.
452 */
453 if (can_reserve) {
454 for_each_msi_entry(desc, dev) {
403 irq_data = irq_domain_get_irq_data(domain, desc->irq); 455 irq_data = irq_domain_get_irq_data(domain, desc->irq);
404 ret = irq_domain_activate_irq(irq_data, true); 456 irqd_clr_activated(irq_data);
405 if (ret)
406 goto cleanup;
407 if (info->flags & MSI_FLAG_MUST_REACTIVATE)
408 irqd_clr_activated(irq_data);
409 } 457 }
410 } 458 }
411 return 0; 459 return 0;
diff --git a/kernel/pid.c b/kernel/pid.c
index b13b624e2c49..1e8bb6550ec4 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -193,10 +193,8 @@ struct pid *alloc_pid(struct pid_namespace *ns)
193 } 193 }
194 194
195 if (unlikely(is_child_reaper(pid))) { 195 if (unlikely(is_child_reaper(pid))) {
196 if (pid_ns_prepare_proc(ns)) { 196 if (pid_ns_prepare_proc(ns))
197 disable_pid_allocation(ns);
198 goto out_free; 197 goto out_free;
199 }
200 } 198 }
201 199
202 get_pid_ns(ns); 200 get_pid_ns(ns);
@@ -226,6 +224,10 @@ out_free:
226 while (++i <= ns->level) 224 while (++i <= ns->level)
227 idr_remove(&ns->idr, (pid->numbers + i)->nr); 225 idr_remove(&ns->idr, (pid->numbers + i)->nr);
228 226
227 /* On failure to allocate the first pid, reset the state */
228 if (ns->pid_allocated == PIDNS_ADDING)
229 idr_set_cursor(&ns->idr, 0);
230
229 spin_unlock_irq(&pidmap_lock); 231 spin_unlock_irq(&pidmap_lock);
230 232
231 kmem_cache_free(ns->pid_cachep, pid); 233 kmem_cache_free(ns->pid_cachep, pid);
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 2ddaec40956f..0926aef10dad 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -34,11 +34,6 @@ void complete(struct completion *x)
34 34
35 spin_lock_irqsave(&x->wait.lock, flags); 35 spin_lock_irqsave(&x->wait.lock, flags);
36 36
37 /*
38 * Perform commit of crossrelease here.
39 */
40 complete_release_commit(x);
41
42 if (x->done != UINT_MAX) 37 if (x->done != UINT_MAX)
43 x->done++; 38 x->done++;
44 __wake_up_locked(&x->wait, TASK_NORMAL, 1); 39 __wake_up_locked(&x->wait, TASK_NORMAL, 1);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2f52ec0f1539..d6717a3331a1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
244#ifdef CONFIG_NO_HZ_COMMON 244#ifdef CONFIG_NO_HZ_COMMON
245static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) 245static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
246{ 246{
247 unsigned long idle_calls = tick_nohz_get_idle_calls(); 247 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
248 bool ret = idle_calls == sg_cpu->saved_idle_calls; 248 bool ret = idle_calls == sg_cpu->saved_idle_calls;
249 249
250 sg_cpu->saved_idle_calls = idle_calls; 250 sg_cpu->saved_idle_calls = idle_calls;
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index dd7908743dab..9bcbacba82a8 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -89,7 +89,9 @@ static int membarrier_private_expedited(void)
89 rcu_read_unlock(); 89 rcu_read_unlock();
90 } 90 }
91 if (!fallback) { 91 if (!fallback) {
92 preempt_disable();
92 smp_call_function_many(tmpmask, ipi_mb, NULL, 1); 93 smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
94 preempt_enable();
93 free_cpumask_var(tmpmask); 95 free_cpumask_var(tmpmask);
94 } 96 }
95 cpus_read_unlock(); 97 cpus_read_unlock();
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index e776fc8cc1df..f6b5f19223d6 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -95,6 +95,7 @@ config NO_HZ_FULL
95 select RCU_NOCB_CPU 95 select RCU_NOCB_CPU
96 select VIRT_CPU_ACCOUNTING_GEN 96 select VIRT_CPU_ACCOUNTING_GEN
97 select IRQ_WORK 97 select IRQ_WORK
98 select CPU_ISOLATION
98 help 99 help
99 Adaptively try to shutdown the tick whenever possible, even when 100 Adaptively try to shutdown the tick whenever possible, even when
100 the CPU is running tasks. Typically this requires running a single 101 the CPU is running tasks. Typically this requires running a single
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 99578f06c8d4..f7cc7abfcf25 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -650,6 +650,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
650 ts->next_tick = 0; 650 ts->next_tick = 0;
651} 651}
652 652
653static inline bool local_timer_softirq_pending(void)
654{
655 return local_softirq_pending() & TIMER_SOFTIRQ;
656}
657
653static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, 658static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
654 ktime_t now, int cpu) 659 ktime_t now, int cpu)
655{ 660{
@@ -666,8 +671,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
666 } while (read_seqretry(&jiffies_lock, seq)); 671 } while (read_seqretry(&jiffies_lock, seq));
667 ts->last_jiffies = basejiff; 672 ts->last_jiffies = basejiff;
668 673
669 if (rcu_needs_cpu(basemono, &next_rcu) || 674 /*
670 arch_needs_cpu() || irq_work_needs_cpu()) { 675 * Keep the periodic tick, when RCU, architecture or irq_work
676 * requests it.
677 * Aside of that check whether the local timer softirq is
678 * pending. If so its a bad idea to call get_next_timer_interrupt()
679 * because there is an already expired timer, so it will request
680 * immeditate expiry, which rearms the hardware timer with a
681 * minimal delta which brings us back to this place
682 * immediately. Lather, rinse and repeat...
683 */
684 if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
685 irq_work_needs_cpu() || local_timer_softirq_pending()) {
671 next_tick = basemono + TICK_NSEC; 686 next_tick = basemono + TICK_NSEC;
672 } else { 687 } else {
673 /* 688 /*
@@ -986,6 +1001,19 @@ ktime_t tick_nohz_get_sleep_length(void)
986} 1001}
987 1002
988/** 1003/**
1004 * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
1005 * for a particular CPU.
1006 *
1007 * Called from the schedutil frequency scaling governor in scheduler context.
1008 */
1009unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
1010{
1011 struct tick_sched *ts = tick_get_tick_sched(cpu);
1012
1013 return ts->idle_calls;
1014}
1015
1016/**
989 * tick_nohz_get_idle_calls - return the current idle calls counter value 1017 * tick_nohz_get_idle_calls - return the current idle calls counter value
990 * 1018 *
991 * Called from the schedutil frequency scaling governor in scheduler context. 1019 * Called from the schedutil frequency scaling governor in scheduler context.
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index ffebcf878fba..89a9e1b4264a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -823,11 +823,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
823 struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); 823 struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
824 824
825 /* 825 /*
826 * If the timer is deferrable and nohz is active then we need to use 826 * If the timer is deferrable and NO_HZ_COMMON is set then we need
827 * the deferrable base. 827 * to use the deferrable base.
828 */ 828 */
829 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && 829 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
830 (tflags & TIMER_DEFERRABLE))
831 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); 830 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
832 return base; 831 return base;
833} 832}
@@ -837,11 +836,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
837 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); 836 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
838 837
839 /* 838 /*
840 * If the timer is deferrable and nohz is active then we need to use 839 * If the timer is deferrable and NO_HZ_COMMON is set then we need
841 * the deferrable base. 840 * to use the deferrable base.
842 */ 841 */
843 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && 842 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
844 (tflags & TIMER_DEFERRABLE))
845 base = this_cpu_ptr(&timer_bases[BASE_DEF]); 843 base = this_cpu_ptr(&timer_bases[BASE_DEF]);
846 return base; 844 return base;
847} 845}
@@ -1009,8 +1007,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
1009 if (!ret && (options & MOD_TIMER_PENDING_ONLY)) 1007 if (!ret && (options & MOD_TIMER_PENDING_ONLY))
1010 goto out_unlock; 1008 goto out_unlock;
1011 1009
1012 debug_activate(timer, expires);
1013
1014 new_base = get_target_base(base, timer->flags); 1010 new_base = get_target_base(base, timer->flags);
1015 1011
1016 if (base != new_base) { 1012 if (base != new_base) {
@@ -1034,6 +1030,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
1034 } 1030 }
1035 } 1031 }
1036 1032
1033 debug_activate(timer, expires);
1034
1037 timer->expires = expires; 1035 timer->expires = expires;
1038 /* 1036 /*
1039 * If 'idx' was calculated above and the base time did not advance 1037 * If 'idx' was calculated above and the base time did not advance
@@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
1684 base->must_forward_clk = false; 1682 base->must_forward_clk = false;
1685 1683
1686 __run_timers(base); 1684 __run_timers(base);
1687 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) 1685 if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
1688 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); 1686 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
1689} 1687}
1690 1688
@@ -1855,6 +1853,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h
1855 } 1853 }
1856} 1854}
1857 1855
1856int timers_prepare_cpu(unsigned int cpu)
1857{
1858 struct timer_base *base;
1859 int b;
1860
1861 for (b = 0; b < NR_BASES; b++) {
1862 base = per_cpu_ptr(&timer_bases[b], cpu);
1863 base->clk = jiffies;
1864 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
1865 base->is_idle = false;
1866 base->must_forward_clk = true;
1867 }
1868 return 0;
1869}
1870
1858int timers_dead_cpu(unsigned int cpu) 1871int timers_dead_cpu(unsigned int cpu)
1859{ 1872{
1860 struct timer_base *old_base; 1873 struct timer_base *old_base;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c87766c1c204..9ab18995ff1e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
280/* Missed count stored at end */ 280/* Missed count stored at end */
281#define RB_MISSED_STORED (1 << 30) 281#define RB_MISSED_STORED (1 << 30)
282 282
283#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED)
284
283struct buffer_data_page { 285struct buffer_data_page {
284 u64 time_stamp; /* page time stamp */ 286 u64 time_stamp; /* page time stamp */
285 local_t commit; /* write committed index */ 287 local_t commit; /* write committed index */
@@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
331 */ 333 */
332size_t ring_buffer_page_len(void *page) 334size_t ring_buffer_page_len(void *page)
333{ 335{
334 return local_read(&((struct buffer_data_page *)page)->commit) 336 struct buffer_data_page *bpage = page;
337
338 return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
335 + BUF_PAGE_HDR_SIZE; 339 + BUF_PAGE_HDR_SIZE;
336} 340}
337 341
@@ -4400,8 +4404,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
4400{ 4404{
4401 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 4405 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4402 struct buffer_data_page *bpage = data; 4406 struct buffer_data_page *bpage = data;
4407 struct page *page = virt_to_page(bpage);
4403 unsigned long flags; 4408 unsigned long flags;
4404 4409
4410 /* If the page is still in use someplace else, we can't reuse it */
4411 if (page_ref_count(page) > 1)
4412 goto out;
4413
4405 local_irq_save(flags); 4414 local_irq_save(flags);
4406 arch_spin_lock(&cpu_buffer->lock); 4415 arch_spin_lock(&cpu_buffer->lock);
4407 4416
@@ -4413,6 +4422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
4413 arch_spin_unlock(&cpu_buffer->lock); 4422 arch_spin_unlock(&cpu_buffer->lock);
4414 local_irq_restore(flags); 4423 local_irq_restore(flags);
4415 4424
4425 out:
4416 free_page((unsigned long)bpage); 4426 free_page((unsigned long)bpage);
4417} 4427}
4418EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); 4428EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 59518b8126d0..2a8d8a294345 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6769,7 +6769,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6769 .spd_release = buffer_spd_release, 6769 .spd_release = buffer_spd_release,
6770 }; 6770 };
6771 struct buffer_ref *ref; 6771 struct buffer_ref *ref;
6772 int entries, size, i; 6772 int entries, i;
6773 ssize_t ret = 0; 6773 ssize_t ret = 0;
6774 6774
6775#ifdef CONFIG_TRACER_MAX_TRACE 6775#ifdef CONFIG_TRACER_MAX_TRACE
@@ -6823,14 +6823,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6823 break; 6823 break;
6824 } 6824 }
6825 6825
6826 /*
6827 * zero out any left over data, this is going to
6828 * user land.
6829 */
6830 size = ring_buffer_page_len(ref->page);
6831 if (size < PAGE_SIZE)
6832 memset(ref->page + size, 0, PAGE_SIZE - size);
6833
6834 page = virt_to_page(ref->page); 6826 page = virt_to_page(ref->page);
6835 6827
6836 spd.pages[i] = page; 6828 spd.pages[i] = page;
@@ -7588,6 +7580,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
7588 buf->data = alloc_percpu(struct trace_array_cpu); 7580 buf->data = alloc_percpu(struct trace_array_cpu);
7589 if (!buf->data) { 7581 if (!buf->data) {
7590 ring_buffer_free(buf->buffer); 7582 ring_buffer_free(buf->buffer);
7583 buf->buffer = NULL;
7591 return -ENOMEM; 7584 return -ENOMEM;
7592 } 7585 }
7593 7586
@@ -7611,7 +7604,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
7611 allocate_snapshot ? size : 1); 7604 allocate_snapshot ? size : 1);
7612 if (WARN_ON(ret)) { 7605 if (WARN_ON(ret)) {
7613 ring_buffer_free(tr->trace_buffer.buffer); 7606 ring_buffer_free(tr->trace_buffer.buffer);
7607 tr->trace_buffer.buffer = NULL;
7614 free_percpu(tr->trace_buffer.data); 7608 free_percpu(tr->trace_buffer.data);
7609 tr->trace_buffer.data = NULL;
7615 return -ENOMEM; 7610 return -ENOMEM;
7616 } 7611 }
7617 tr->allocated_snapshot = allocate_snapshot; 7612 tr->allocated_snapshot = allocate_snapshot;
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index c3e84edc47c9..2615074d3de5 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -346,7 +346,8 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj,
346static void zap_modalias_env(struct kobj_uevent_env *env) 346static void zap_modalias_env(struct kobj_uevent_env *env)
347{ 347{
348 static const char modalias_prefix[] = "MODALIAS="; 348 static const char modalias_prefix[] = "MODALIAS=";
349 int i; 349 size_t len;
350 int i, j;
350 351
351 for (i = 0; i < env->envp_idx;) { 352 for (i = 0; i < env->envp_idx;) {
352 if (strncmp(env->envp[i], modalias_prefix, 353 if (strncmp(env->envp[i], modalias_prefix,
@@ -355,11 +356,18 @@ static void zap_modalias_env(struct kobj_uevent_env *env)
355 continue; 356 continue;
356 } 357 }
357 358
358 if (i != env->envp_idx - 1) 359 len = strlen(env->envp[i]) + 1;
359 memmove(&env->envp[i], &env->envp[i + 1], 360
360 sizeof(env->envp[i]) * env->envp_idx - 1); 361 if (i != env->envp_idx - 1) {
362 memmove(env->envp[i], env->envp[i + 1],
363 env->buflen - len);
364
365 for (j = i; j < env->envp_idx - 1; j++)
366 env->envp[j] = env->envp[j + 1] - len;
367 }
361 368
362 env->envp_idx--; 369 env->envp_idx--;
370 env->buflen -= len;
363 } 371 }
364} 372}
365 373
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index 57fd45ab7af1..08c60d10747f 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -671,7 +671,23 @@ do { \
671 ************** MIPS/64 ************** 671 ************** MIPS/64 **************
672 ***************************************/ 672 ***************************************/
673#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 673#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
674#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) 674#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
675/*
676 * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C
677 * code below, so we special case MIPS64r6 until the compiler can do better.
678 */
679#define umul_ppmm(w1, w0, u, v) \
680do { \
681 __asm__ ("dmulu %0,%1,%2" \
682 : "=d" ((UDItype)(w0)) \
683 : "d" ((UDItype)(u)), \
684 "d" ((UDItype)(v))); \
685 __asm__ ("dmuhu %0,%1,%2" \
686 : "=d" ((UDItype)(w1)) \
687 : "d" ((UDItype)(u)), \
688 "d" ((UDItype)(v))); \
689} while (0)
690#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
675#define umul_ppmm(w1, w0, u, v) \ 691#define umul_ppmm(w1, w0, u, v) \
676do { \ 692do { \
677 typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ 693 typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index aa8812ae6776..f369889e521d 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -435,6 +435,41 @@ loop:
435 return 0; 435 return 0;
436} 436}
437 437
438static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
439{
440 struct bpf_insn *insn;
441
442 insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
443 if (!insn)
444 return -ENOMEM;
445
446 /* Due to func address being non-const, we need to
447 * assemble this here.
448 */
449 insn[0] = BPF_MOV64_REG(R6, R1);
450 insn[1] = BPF_LD_ABS(BPF_B, 0);
451 insn[2] = BPF_LD_ABS(BPF_H, 0);
452 insn[3] = BPF_LD_ABS(BPF_W, 0);
453 insn[4] = BPF_MOV64_REG(R7, R6);
454 insn[5] = BPF_MOV64_IMM(R6, 0);
455 insn[6] = BPF_MOV64_REG(R1, R7);
456 insn[7] = BPF_MOV64_IMM(R2, 1);
457 insn[8] = BPF_MOV64_IMM(R3, 2);
458 insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
459 bpf_skb_vlan_push_proto.func - __bpf_call_base);
460 insn[10] = BPF_MOV64_REG(R6, R7);
461 insn[11] = BPF_LD_ABS(BPF_B, 0);
462 insn[12] = BPF_LD_ABS(BPF_H, 0);
463 insn[13] = BPF_LD_ABS(BPF_W, 0);
464 insn[14] = BPF_MOV64_IMM(R0, 42);
465 insn[15] = BPF_EXIT_INSN();
466
467 self->u.ptr.insns = insn;
468 self->u.ptr.len = 16;
469
470 return 0;
471}
472
438static int bpf_fill_jump_around_ld_abs(struct bpf_test *self) 473static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
439{ 474{
440 unsigned int len = BPF_MAXINSNS; 475 unsigned int len = BPF_MAXINSNS;
@@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = {
6066 {}, 6101 {},
6067 { {0x1, 0x42 } }, 6102 { {0x1, 0x42 } },
6068 }, 6103 },
6104 {
6105 "LD_ABS with helper changing skb data",
6106 { },
6107 INTERNAL,
6108 { 0x34 },
6109 { { ETH_HLEN, 42 } },
6110 .fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
6111 },
6069}; 6112};
6070 6113
6071static struct net_device dev; 6114static struct net_device dev;
@@ -6207,9 +6250,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
6207 return NULL; 6250 return NULL;
6208 } 6251 }
6209 } 6252 }
6210 /* We don't expect to fail. */
6211 if (*err) { 6253 if (*err) {
6212 pr_cont("FAIL to attach err=%d len=%d\n", 6254 pr_cont("FAIL to prog_create err=%d len=%d\n",
6213 *err, fprog.len); 6255 *err, fprog.len);
6214 return NULL; 6256 return NULL;
6215 } 6257 }
@@ -6233,6 +6275,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
6233 * checks. 6275 * checks.
6234 */ 6276 */
6235 fp = bpf_prog_select_runtime(fp, err); 6277 fp = bpf_prog_select_runtime(fp, err);
6278 if (*err) {
6279 pr_cont("FAIL to select_runtime err=%d\n", *err);
6280 return NULL;
6281 }
6236 break; 6282 break;
6237 } 6283 }
6238 6284
@@ -6418,8 +6464,8 @@ static __init int test_bpf(void)
6418 pass_cnt++; 6464 pass_cnt++;
6419 continue; 6465 continue;
6420 } 6466 }
6421 6467 err_cnt++;
6422 return err; 6468 continue;
6423 } 6469 }
6424 6470
6425 pr_cont("jited:%u ", fp->jited); 6471 pr_cont("jited:%u ", fp->jited);
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index 4a720ed4fdaf..0d54bcbc8170 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -33,8 +33,9 @@
33 * @head: head of timerqueue 33 * @head: head of timerqueue
34 * @node: timer node to be added 34 * @node: timer node to be added
35 * 35 *
36 * Adds the timer node to the timerqueue, sorted by the 36 * Adds the timer node to the timerqueue, sorted by the node's expires
37 * node's expires value. 37 * value. Returns true if the newly added timer is the first expiring timer in
38 * the queue.
38 */ 39 */
39bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) 40bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
40{ 41{
@@ -70,7 +71,8 @@ EXPORT_SYMBOL_GPL(timerqueue_add);
70 * @head: head of timerqueue 71 * @head: head of timerqueue
71 * @node: timer node to be removed 72 * @node: timer node to be removed
72 * 73 *
73 * Removes the timer node from the timerqueue. 74 * Removes the timer node from the timerqueue. Returns true if the queue is
75 * not empty after the remove.
74 */ 76 */
75bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) 77bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
76{ 78{
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 84b2dc76f140..b5f940ce0143 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
882 if (IS_ERR(dev)) 882 if (IS_ERR(dev))
883 return PTR_ERR(dev); 883 return PTR_ERR(dev);
884 884
885 if (bdi_debug_register(bdi, dev_name(dev))) {
886 device_destroy(bdi_class, dev->devt);
887 return -ENOMEM;
888 }
889 cgwb_bdi_register(bdi); 885 cgwb_bdi_register(bdi);
890 bdi->dev = dev; 886 bdi->dev = dev;
891 887
888 bdi_debug_register(bdi, dev_name(dev));
892 set_bit(WB_registered, &bdi->wb.state); 889 set_bit(WB_registered, &bdi->wb.state);
893 890
894 spin_lock_bh(&bdi_lock); 891 spin_lock_bh(&bdi_lock);
diff --git a/mm/debug.c b/mm/debug.c
index d947f3e03b0d..56e2d9125ea5 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -50,7 +50,7 @@ void __dump_page(struct page *page, const char *reason)
50 */ 50 */
51 int mapcount = PageSlab(page) ? 0 : page_mapcount(page); 51 int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
52 52
53 pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", 53 pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx",
54 page, page_ref_count(page), mapcount, 54 page, page_ref_count(page), mapcount,
55 page->mapping, page_to_pgoff(page)); 55 page->mapping, page_to_pgoff(page));
56 if (PageCompound(page)) 56 if (PageCompound(page))
@@ -69,7 +69,7 @@ void __dump_page(struct page *page, const char *reason)
69 69
70#ifdef CONFIG_MEMCG 70#ifdef CONFIG_MEMCG
71 if (page->mem_cgroup) 71 if (page->mem_cgroup)
72 pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); 72 pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup);
73#endif 73#endif
74} 74}
75 75
@@ -84,10 +84,10 @@ EXPORT_SYMBOL(dump_page);
84 84
85void dump_vma(const struct vm_area_struct *vma) 85void dump_vma(const struct vm_area_struct *vma)
86{ 86{
87 pr_emerg("vma %p start %p end %p\n" 87 pr_emerg("vma %px start %px end %px\n"
88 "next %p prev %p mm %p\n" 88 "next %px prev %px mm %px\n"
89 "prot %lx anon_vma %p vm_ops %p\n" 89 "prot %lx anon_vma %px vm_ops %px\n"
90 "pgoff %lx file %p private_data %p\n" 90 "pgoff %lx file %px private_data %px\n"
91 "flags: %#lx(%pGv)\n", 91 "flags: %#lx(%pGv)\n",
92 vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next, 92 vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
93 vma->vm_prev, vma->vm_mm, 93 vma->vm_prev, vma->vm_mm,
@@ -100,27 +100,27 @@ EXPORT_SYMBOL(dump_vma);
100 100
101void dump_mm(const struct mm_struct *mm) 101void dump_mm(const struct mm_struct *mm)
102{ 102{
103 pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n" 103 pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n"
104#ifdef CONFIG_MMU 104#ifdef CONFIG_MMU
105 "get_unmapped_area %p\n" 105 "get_unmapped_area %px\n"
106#endif 106#endif
107 "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" 107 "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
108 "pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" 108 "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
109 "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" 109 "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
110 "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n" 110 "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
111 "start_code %lx end_code %lx start_data %lx end_data %lx\n" 111 "start_code %lx end_code %lx start_data %lx end_data %lx\n"
112 "start_brk %lx brk %lx start_stack %lx\n" 112 "start_brk %lx brk %lx start_stack %lx\n"
113 "arg_start %lx arg_end %lx env_start %lx env_end %lx\n" 113 "arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
114 "binfmt %p flags %lx core_state %p\n" 114 "binfmt %px flags %lx core_state %px\n"
115#ifdef CONFIG_AIO 115#ifdef CONFIG_AIO
116 "ioctx_table %p\n" 116 "ioctx_table %px\n"
117#endif 117#endif
118#ifdef CONFIG_MEMCG 118#ifdef CONFIG_MEMCG
119 "owner %p " 119 "owner %px "
120#endif 120#endif
121 "exe_file %p\n" 121 "exe_file %px\n"
122#ifdef CONFIG_MMU_NOTIFIER 122#ifdef CONFIG_MMU_NOTIFIER
123 "mmu_notifier_mm %p\n" 123 "mmu_notifier_mm %px\n"
124#endif 124#endif
125#ifdef CONFIG_NUMA_BALANCING 125#ifdef CONFIG_NUMA_BALANCING
126 "numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n" 126 "numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index d73c14294f3a..f656ca27f6c2 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -127,7 +127,7 @@
127/* GFP bitmask for kmemleak internal allocations */ 127/* GFP bitmask for kmemleak internal allocations */
128#define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \ 128#define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
129 __GFP_NORETRY | __GFP_NOMEMALLOC | \ 129 __GFP_NORETRY | __GFP_NOMEMALLOC | \
130 __GFP_NOWARN) 130 __GFP_NOWARN | __GFP_NOFAIL)
131 131
132/* scanning area inside a memory block */ 132/* scanning area inside a memory block */
133struct kmemleak_scan_area { 133struct kmemleak_scan_area {
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ec39f730a0bf..58b629bb70de 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -166,7 +166,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
166 next = pmd_addr_end(addr, end); 166 next = pmd_addr_end(addr, end);
167 if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) 167 if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
168 && pmd_none_or_clear_bad(pmd)) 168 && pmd_none_or_clear_bad(pmd))
169 continue; 169 goto next;
170 170
171 /* invoke the mmu notifier if the pmd is populated */ 171 /* invoke the mmu notifier if the pmd is populated */
172 if (!mni_start) { 172 if (!mni_start) {
@@ -188,7 +188,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
188 } 188 }
189 189
190 /* huge pmd was handled */ 190 /* huge pmd was handled */
191 continue; 191 goto next;
192 } 192 }
193 } 193 }
194 /* fall through, the trans huge pmd just split */ 194 /* fall through, the trans huge pmd just split */
@@ -196,6 +196,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
196 this_pages = change_pte_range(vma, pmd, addr, next, newprot, 196 this_pages = change_pte_range(vma, pmd, addr, next, newprot,
197 dirty_accountable, prot_numa); 197 dirty_accountable, prot_numa);
198 pages += this_pages; 198 pages += this_pages;
199next:
200 cond_resched();
199 } while (pmd++, addr = next, addr != end); 201 } while (pmd++, addr = next, addr != end);
200 202
201 if (mni_start) 203 if (mni_start)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7e5e775e97f4..76c9688b6a0a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6260,6 +6260,8 @@ void __paginginit zero_resv_unavail(void)
6260 pgcnt = 0; 6260 pgcnt = 0;
6261 for_each_resv_unavail_range(i, &start, &end) { 6261 for_each_resv_unavail_range(i, &start, &end) {
6262 for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) { 6262 for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) {
6263 if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages)))
6264 continue;
6263 mm_zero_struct_page(pfn_to_page(pfn)); 6265 mm_zero_struct_page(pfn_to_page(pfn));
6264 pgcnt++; 6266 pgcnt++;
6265 } 6267 }
diff --git a/mm/sparse.c b/mm/sparse.c
index 7a5dacaa06e3..2609aba121e8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -211,7 +211,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
211 if (unlikely(!mem_section)) { 211 if (unlikely(!mem_section)) {
212 unsigned long size, align; 212 unsigned long size, align;
213 213
214 size = sizeof(struct mem_section) * NR_SECTION_ROOTS; 214 size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
215 align = 1 << (INTERNODE_CACHE_SHIFT); 215 align = 1 << (INTERNODE_CACHE_SHIFT);
216 mem_section = memblock_virt_alloc(size, align); 216 mem_section = memblock_virt_alloc(size, align);
217 } 217 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c02c850ea349..47d5ced51f2d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -297,10 +297,13 @@ EXPORT_SYMBOL(register_shrinker);
297 */ 297 */
298void unregister_shrinker(struct shrinker *shrinker) 298void unregister_shrinker(struct shrinker *shrinker)
299{ 299{
300 if (!shrinker->nr_deferred)
301 return;
300 down_write(&shrinker_rwsem); 302 down_write(&shrinker_rwsem);
301 list_del(&shrinker->list); 303 list_del(&shrinker->list);
302 up_write(&shrinker_rwsem); 304 up_write(&shrinker_rwsem);
303 kfree(shrinker->nr_deferred); 305 kfree(shrinker->nr_deferred);
306 shrinker->nr_deferred = NULL;
304} 307}
305EXPORT_SYMBOL(unregister_shrinker); 308EXPORT_SYMBOL(unregister_shrinker);
306 309
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 685049a9048d..683c0651098c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -53,6 +53,7 @@
53#include <linux/mount.h> 53#include <linux/mount.h>
54#include <linux/migrate.h> 54#include <linux/migrate.h>
55#include <linux/pagemap.h> 55#include <linux/pagemap.h>
56#include <linux/fs.h>
56 57
57#define ZSPAGE_MAGIC 0x58 58#define ZSPAGE_MAGIC 0x58
58 59
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8dfdd94e430f..bad01b14a4ad 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
111 vlan_gvrp_uninit_applicant(real_dev); 111 vlan_gvrp_uninit_applicant(real_dev);
112 } 112 }
113 113
114 /* Take it out of our own structures, but be sure to interlock with 114 vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
115 * HW accelerating devices or SW vlan input packet processing if
116 * VLAN is not 0 (leave it there for 802.1p).
117 */
118 if (vlan_id)
119 vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
120 115
121 /* Get rid of the vlan's reference to real_dev */ 116 /* Get rid of the vlan's reference to real_dev */
122 dev_put(real_dev); 117 dev_put(real_dev);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 43ba91c440bc..fc6615d59165 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
3363 break; 3363 break;
3364 3364
3365 case L2CAP_CONF_EFS: 3365 case L2CAP_CONF_EFS:
3366 remote_efs = 1; 3366 if (olen == sizeof(efs)) {
3367 if (olen == sizeof(efs)) 3367 remote_efs = 1;
3368 memcpy(&efs, (void *) val, olen); 3368 memcpy(&efs, (void *) val, olen);
3369 }
3369 break; 3370 break;
3370 3371
3371 case L2CAP_CONF_EWS: 3372 case L2CAP_CONF_EWS:
@@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
3584 break; 3585 break;
3585 3586
3586 case L2CAP_CONF_EFS: 3587 case L2CAP_CONF_EFS:
3587 if (olen == sizeof(efs)) 3588 if (olen == sizeof(efs)) {
3588 memcpy(&efs, (void *)val, olen); 3589 memcpy(&efs, (void *)val, olen);
3589 3590
3590 if (chan->local_stype != L2CAP_SERV_NOTRAFIC && 3591 if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
3591 efs.stype != L2CAP_SERV_NOTRAFIC && 3592 efs.stype != L2CAP_SERV_NOTRAFIC &&
3592 efs.stype != chan->local_stype) 3593 efs.stype != chan->local_stype)
3593 return -ECONNREFUSED; 3594 return -ECONNREFUSED;
3594 3595
3595 l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), 3596 l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
3596 (unsigned long) &efs, endptr - ptr); 3597 (unsigned long) &efs, endptr - ptr);
3598 }
3597 break; 3599 break;
3598 3600
3599 case L2CAP_CONF_FCS: 3601 case L2CAP_CONF_FCS:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d0ef0a8e8831..015f465c514b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
1262 struct net_bridge *br = netdev_priv(dev); 1262 struct net_bridge *br = netdev_priv(dev);
1263 int err; 1263 int err;
1264 1264
1265 err = register_netdevice(dev);
1266 if (err)
1267 return err;
1268
1265 if (tb[IFLA_ADDRESS]) { 1269 if (tb[IFLA_ADDRESS]) {
1266 spin_lock_bh(&br->lock); 1270 spin_lock_bh(&br->lock);
1267 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); 1271 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
1268 spin_unlock_bh(&br->lock); 1272 spin_unlock_bh(&br->lock);
1269 } 1273 }
1270 1274
1271 err = register_netdevice(dev);
1272 if (err)
1273 return err;
1274
1275 err = br_changelink(dev, tb, data, extack); 1275 err = br_changelink(dev, tb, data, extack);
1276 if (err) 1276 if (err)
1277 unregister_netdevice(dev); 1277 br_dev_delete(dev, NULL);
1278
1278 return err; 1279 return err;
1279} 1280}
1280 1281
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 2d38b6e34203..e0adcd123f48 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -334,9 +334,8 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
334 mutex_lock(&caifdevs->lock); 334 mutex_lock(&caifdevs->lock);
335 list_add_rcu(&caifd->list, &caifdevs->list); 335 list_add_rcu(&caifd->list, &caifdevs->list);
336 336
337 strncpy(caifd->layer.name, dev->name, 337 strlcpy(caifd->layer.name, dev->name,
338 sizeof(caifd->layer.name) - 1); 338 sizeof(caifd->layer.name));
339 caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
340 caifd->layer.transmit = transmit; 339 caifd->layer.transmit = transmit;
341 cfcnfg_add_phy_layer(cfg, 340 cfcnfg_add_phy_layer(cfg,
342 dev, 341 dev,
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 5cd44f001f64..1a082a946045 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -176,9 +176,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
176 dev_add_pack(&caif_usb_type); 176 dev_add_pack(&caif_usb_type);
177 pack_added = true; 177 pack_added = true;
178 178
179 strncpy(layer->name, dev->name, 179 strlcpy(layer->name, dev->name, sizeof(layer->name));
180 sizeof(layer->name) - 1);
181 layer->name[sizeof(layer->name) - 1] = 0;
182 180
183 return 0; 181 return 0;
184} 182}
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 273cb07f57d8..8f00bea093b9 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -268,17 +268,15 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
268 case CAIFPROTO_RFM: 268 case CAIFPROTO_RFM:
269 l->linktype = CFCTRL_SRV_RFM; 269 l->linktype = CFCTRL_SRV_RFM;
270 l->u.datagram.connid = s->sockaddr.u.rfm.connection_id; 270 l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
271 strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume, 271 strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
272 sizeof(l->u.rfm.volume)-1); 272 sizeof(l->u.rfm.volume));
273 l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
274 break; 273 break;
275 case CAIFPROTO_UTIL: 274 case CAIFPROTO_UTIL:
276 l->linktype = CFCTRL_SRV_UTIL; 275 l->linktype = CFCTRL_SRV_UTIL;
277 l->endpoint = 0x00; 276 l->endpoint = 0x00;
278 l->chtype = 0x00; 277 l->chtype = 0x00;
279 strncpy(l->u.utility.name, s->sockaddr.u.util.service, 278 strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
280 sizeof(l->u.utility.name)-1); 279 sizeof(l->u.utility.name));
281 l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
282 caif_assert(sizeof(l->u.utility.name) > 10); 280 caif_assert(sizeof(l->u.utility.name) > 10);
283 l->u.utility.paramlen = s->param.size; 281 l->u.utility.paramlen = s->param.size;
284 if (l->u.utility.paramlen > sizeof(l->u.utility.params)) 282 if (l->u.utility.paramlen > sizeof(l->u.utility.params))
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index f5afda1abc76..655ed7032150 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -258,8 +258,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
258 tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs); 258 tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
259 cfpkt_add_body(pkt, &tmp16, 2); 259 cfpkt_add_body(pkt, &tmp16, 2);
260 memset(utility_name, 0, sizeof(utility_name)); 260 memset(utility_name, 0, sizeof(utility_name));
261 strncpy(utility_name, param->u.utility.name, 261 strlcpy(utility_name, param->u.utility.name,
262 UTILITY_NAME_LENGTH - 1); 262 UTILITY_NAME_LENGTH);
263 cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH); 263 cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
264 tmp8 = param->u.utility.paramlen; 264 tmp8 = param->u.utility.paramlen;
265 cfpkt_add_body(pkt, &tmp8, 1); 265 cfpkt_add_body(pkt, &tmp8, 1);
diff --git a/net/core/dev.c b/net/core/dev.c
index f47e96b62308..0e0ba36eeac9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name);
1146int dev_get_valid_name(struct net *net, struct net_device *dev, 1146int dev_get_valid_name(struct net *net, struct net_device *dev,
1147 const char *name) 1147 const char *name)
1148{ 1148{
1149 return dev_alloc_name_ns(net, dev, name); 1149 BUG_ON(!net);
1150
1151 if (!dev_valid_name(name))
1152 return -EINVAL;
1153
1154 if (strchr(name, '%'))
1155 return dev_alloc_name_ns(net, dev, name);
1156 else if (__dev_get_by_name(net, name))
1157 return -EEXIST;
1158 else if (dev->name != name)
1159 strlcpy(dev->name, name, IFNAMSIZ);
1160
1161 return 0;
1150} 1162}
1151EXPORT_SYMBOL(dev_get_valid_name); 1163EXPORT_SYMBOL(dev_get_valid_name);
1152 1164
@@ -3904,7 +3916,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3904 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, 3916 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
3905 troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) 3917 troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
3906 goto do_drop; 3918 goto do_drop;
3907 if (troom > 0 && __skb_linearize(skb)) 3919 if (skb_linearize(skb))
3908 goto do_drop; 3920 goto do_drop;
3909 } 3921 }
3910 3922
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f8fcf450a36e..8225416911ae 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -770,15 +770,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
770 return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); 770 return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
771} 771}
772 772
773static void
774warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
775{
776 char name[sizeof(current->comm)];
777
778 pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
779 get_task_comm(name, current), details);
780}
781
782/* Query device for its ethtool_cmd settings. 773/* Query device for its ethtool_cmd settings.
783 * 774 *
784 * Backward compatibility note: for compatibility with legacy ethtool, 775 * Backward compatibility note: for compatibility with legacy ethtool,
@@ -805,10 +796,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
805 &link_ksettings); 796 &link_ksettings);
806 if (err < 0) 797 if (err < 0)
807 return err; 798 return err;
808 if (!convert_link_ksettings_to_legacy_settings(&cmd, 799 convert_link_ksettings_to_legacy_settings(&cmd,
809 &link_ksettings)) 800 &link_ksettings);
810 warn_incomplete_ethtool_legacy_settings_conversion(
811 "link modes are only partially reported");
812 801
813 /* send a sensible cmd tag back to user */ 802 /* send a sensible cmd tag back to user */
814 cmd.cmd = ETHTOOL_GSET; 803 cmd.cmd = ETHTOOL_GSET;
diff --git a/net/core/filter.c b/net/core/filter.c
index 6a85e67fafce..d339ef170df6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1054,11 +1054,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
1054 */ 1054 */
1055 goto out_err_free; 1055 goto out_err_free;
1056 1056
1057 /* We are guaranteed to never error here with cBPF to eBPF
1058 * transitions, since there's no issue with type compatibility
1059 * checks on program arrays.
1060 */
1061 fp = bpf_prog_select_runtime(fp, &err); 1057 fp = bpf_prog_select_runtime(fp, &err);
1058 if (err)
1059 goto out_err_free;
1062 1060
1063 kfree(old_prog); 1061 kfree(old_prog);
1064 return fp; 1062 return fp;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832565d3..60a71be75aea 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
267 spin_lock_bh(&net->nsid_lock); 267 spin_lock_bh(&net->nsid_lock);
268 peer = idr_find(&net->netns_ids, id); 268 peer = idr_find(&net->netns_ids, id);
269 if (peer) 269 if (peer)
270 get_net(peer); 270 peer = maybe_get_net(peer);
271 spin_unlock_bh(&net->nsid_lock); 271 spin_unlock_bh(&net->nsid_lock);
272 rcu_read_unlock(); 272 rcu_read_unlock();
273 273
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dabba2a91fc8..778d7f03404a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1681,18 +1681,18 @@ static bool link_dump_filtered(struct net_device *dev,
1681 return false; 1681 return false;
1682} 1682}
1683 1683
1684static struct net *get_target_net(struct sk_buff *skb, int netnsid) 1684static struct net *get_target_net(struct sock *sk, int netnsid)
1685{ 1685{
1686 struct net *net; 1686 struct net *net;
1687 1687
1688 net = get_net_ns_by_id(sock_net(skb->sk), netnsid); 1688 net = get_net_ns_by_id(sock_net(sk), netnsid);
1689 if (!net) 1689 if (!net)
1690 return ERR_PTR(-EINVAL); 1690 return ERR_PTR(-EINVAL);
1691 1691
1692 /* For now, the caller is required to have CAP_NET_ADMIN in 1692 /* For now, the caller is required to have CAP_NET_ADMIN in
1693 * the user namespace owning the target net ns. 1693 * the user namespace owning the target net ns.
1694 */ 1694 */
1695 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { 1695 if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
1696 put_net(net); 1696 put_net(net);
1697 return ERR_PTR(-EACCES); 1697 return ERR_PTR(-EACCES);
1698 } 1698 }
@@ -1733,7 +1733,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1733 ifla_policy, NULL) >= 0) { 1733 ifla_policy, NULL) >= 0) {
1734 if (tb[IFLA_IF_NETNSID]) { 1734 if (tb[IFLA_IF_NETNSID]) {
1735 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); 1735 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
1736 tgt_net = get_target_net(skb, netnsid); 1736 tgt_net = get_target_net(skb->sk, netnsid);
1737 if (IS_ERR(tgt_net)) { 1737 if (IS_ERR(tgt_net)) {
1738 tgt_net = net; 1738 tgt_net = net;
1739 netnsid = -1; 1739 netnsid = -1;
@@ -2883,7 +2883,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2883 2883
2884 if (tb[IFLA_IF_NETNSID]) { 2884 if (tb[IFLA_IF_NETNSID]) {
2885 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); 2885 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
2886 tgt_net = get_target_net(skb, netnsid); 2886 tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
2887 if (IS_ERR(tgt_net)) 2887 if (IS_ERR(tgt_net))
2888 return PTR_ERR(tgt_net); 2888 return PTR_ERR(tgt_net);
2889 } 2889 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a592ca025fc4..08f574081315 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1177 int i, new_frags; 1177 int i, new_frags;
1178 u32 d_off; 1178 u32 d_off;
1179 1179
1180 if (!num_frags)
1181 return 0;
1182
1183 if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) 1180 if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
1184 return -EINVAL; 1181 return -EINVAL;
1185 1182
1183 if (!num_frags)
1184 goto release;
1185
1186 new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; 1186 new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1187 for (i = 0; i < new_frags; i++) { 1187 for (i = 0; i < new_frags; i++) {
1188 page = alloc_page(gfp_mask); 1188 page = alloc_page(gfp_mask);
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1238 __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); 1238 __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
1239 skb_shinfo(skb)->nr_frags = new_frags; 1239 skb_shinfo(skb)->nr_frags = new_frags;
1240 1240
1241release:
1241 skb_zcopy_clear(skb, false); 1242 skb_zcopy_clear(skb, false);
1242 return 0; 1243 return 0;
1243} 1244}
@@ -3654,8 +3655,6 @@ normal:
3654 3655
3655 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & 3656 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
3656 SKBTX_SHARED_FRAG; 3657 SKBTX_SHARED_FRAG;
3657 if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
3658 goto err;
3659 3658
3660 while (pos < offset + len) { 3659 while (pos < offset + len) {
3661 if (i >= nfrags) { 3660 if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
3681 3680
3682 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) 3681 if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
3683 goto err; 3682 goto err;
3683 if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
3684 goto err;
3684 3685
3685 *nskb_frag = *frag; 3686 *nskb_frag = *frag;
3686 __skb_frag_ref(nskb_frag); 3687 __skb_frag_ref(nskb_frag);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 217f4e3b82f6..146b50e30659 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group)
288 case SKNLGRP_INET6_UDP_DESTROY: 288 case SKNLGRP_INET6_UDP_DESTROY:
289 if (!sock_diag_handlers[AF_INET6]) 289 if (!sock_diag_handlers[AF_INET6])
290 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, 290 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
291 NETLINK_SOCK_DIAG, AF_INET); 291 NETLINK_SOCK_DIAG, AF_INET6);
292 break; 292 break;
293 } 293 }
294 return 0; 294 return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cbc3dde4cfcc..a47ad6cd41c0 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
325 .data = &bpf_jit_enable, 325 .data = &bpf_jit_enable,
326 .maxlen = sizeof(int), 326 .maxlen = sizeof(int),
327 .mode = 0644, 327 .mode = 0644,
328#ifndef CONFIG_BPF_JIT_ALWAYS_ON
328 .proc_handler = proc_dointvec 329 .proc_handler = proc_dointvec
330#else
331 .proc_handler = proc_dointvec_minmax,
332 .extra1 = &one,
333 .extra2 = &one,
334#endif
329 }, 335 },
330# ifdef CONFIG_HAVE_EBPF_JIT 336# ifdef CONFIG_HAVE_EBPF_JIT
331 { 337 {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f52d27a422c3..08259d078b1c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
1298 1298
1299static void ip_fib_net_exit(struct net *net) 1299static void ip_fib_net_exit(struct net *net)
1300{ 1300{
1301 unsigned int i; 1301 int i;
1302 1302
1303 rtnl_lock(); 1303 rtnl_lock();
1304#ifdef CONFIG_IP_MULTIPLE_TABLES 1304#ifdef CONFIG_IP_MULTIPLE_TABLES
1305 RCU_INIT_POINTER(net->ipv4.fib_main, NULL); 1305 RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
1306 RCU_INIT_POINTER(net->ipv4.fib_default, NULL); 1306 RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
1307#endif 1307#endif
1308 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1308 /* Destroy the tables in reverse order to guarantee that the
1309 * local table, ID 255, is destroyed before the main table, ID
1310 * 254. This is necessary as the local table may contain
1311 * references to data contained in the main table.
1312 */
1313 for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
1309 struct hlist_head *head = &net->ipv4.fib_table_hash[i]; 1314 struct hlist_head *head = &net->ipv4.fib_table_hash[i];
1310 struct hlist_node *tmp; 1315 struct hlist_node *tmp;
1311 struct fib_table *tb; 1316 struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f04d944f8abe..c586597da20d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
698 698
699 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 699 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
700 int type = nla_type(nla); 700 int type = nla_type(nla);
701 u32 val; 701 u32 fi_val, val;
702 702
703 if (!type) 703 if (!type)
704 continue; 704 continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
715 val = nla_get_u32(nla); 715 val = nla_get_u32(nla);
716 } 716 }
717 717
718 if (fi->fib_metrics->metrics[type - 1] != val) 718 fi_val = fi->fib_metrics->metrics[type - 1];
719 if (type == RTAX_FEATURES)
720 fi_val &= ~DST_FEATURE_ECN_CA;
721
722 if (fi_val != val)
719 return false; 723 return false;
720 } 724 }
721 725
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9c1735632c8c..45ffd3d045d2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
1310static void ipgre_tap_setup(struct net_device *dev) 1310static void ipgre_tap_setup(struct net_device *dev)
1311{ 1311{
1312 ether_setup(dev); 1312 ether_setup(dev);
1313 dev->max_mtu = 0;
1313 dev->netdev_ops = &gre_tap_netdev_ops; 1314 dev->netdev_ops = &gre_tap_netdev_ops;
1314 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1315 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1315 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1316 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 125c1eab3eaa..5e570aa9e43b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -520,9 +520,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
520 goto out; 520 goto out;
521 521
522 /* hdrincl should be READ_ONCE(inet->hdrincl) 522 /* hdrincl should be READ_ONCE(inet->hdrincl)
523 * but READ_ONCE() doesn't work with bit fields 523 * but READ_ONCE() doesn't work with bit fields.
524 * Doing this indirectly yields the same result.
524 */ 525 */
525 hdrincl = inet->hdrincl; 526 hdrincl = inet->hdrincl;
527 hdrincl = READ_ONCE(hdrincl);
526 /* 528 /*
527 * Check the flags. 529 * Check the flags.
528 */ 530 */
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e50b7fea57ee..bcfc00e88756 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
23 return xfrm4_extract_header(skb); 23 return xfrm4_extract_header(skb);
24} 24}
25 25
26static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
27 struct sk_buff *skb)
28{
29 return dst_input(skb);
30}
31
26static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, 32static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
27 struct sk_buff *skb) 33 struct sk_buff *skb)
28{ 34{
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
33 iph->tos, skb->dev)) 39 iph->tos, skb->dev))
34 goto drop; 40 goto drop;
35 } 41 }
36 return dst_input(skb); 42
43 if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
44 goto drop;
45
46 return 0;
37drop: 47drop:
38 kfree_skb(skb); 48 kfree_skb(skb);
39 return NET_RX_DROP; 49 return NET_RX_DROP;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f71234b9c..c9441ca45399 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ lookup_protocol:
210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; 210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
211 np->mc_loop = 1; 211 np->mc_loop = 1;
212 np->pmtudisc = IPV6_PMTUDISC_WANT; 212 np->pmtudisc = IPV6_PMTUDISC_WANT;
213 np->autoflowlabel = ip6_default_np_autolabel(net);
214 np->repflow = net->ipv6.sysctl.flowlabel_reflect; 213 np->repflow = net->ipv6.sysctl.flowlabel_reflect;
215 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; 214 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
216 215
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 83bd75713535..bc68eb661970 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -925,6 +925,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
925 sr_phdr->segments[0] = **addr_p; 925 sr_phdr->segments[0] = **addr_p;
926 *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left]; 926 *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
927 927
928 if (sr_ihdr->hdrlen > hops * 2) {
929 int tlvs_offset, tlvs_length;
930
931 tlvs_offset = (1 + hops * 2) << 3;
932 tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
933 memcpy((char *)sr_phdr + tlvs_offset,
934 (char *)sr_ihdr + tlvs_offset, tlvs_length);
935 }
936
928#ifdef CONFIG_IPV6_SEG6_HMAC 937#ifdef CONFIG_IPV6_SEG6_HMAC
929 if (sr_has_hmac(sr_phdr)) { 938 if (sr_has_hmac(sr_phdr)) {
930 struct net *net = NULL; 939 struct net *net = NULL;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f5285f4e1d08..9dcc3924a975 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -640,6 +640,11 @@ static struct fib6_node *fib6_add_1(struct net *net,
640 if (!(fn->fn_flags & RTN_RTINFO)) { 640 if (!(fn->fn_flags & RTN_RTINFO)) {
641 RCU_INIT_POINTER(fn->leaf, NULL); 641 RCU_INIT_POINTER(fn->leaf, NULL);
642 rt6_release(leaf); 642 rt6_release(leaf);
643 /* remove null_entry in the root node */
644 } else if (fn->fn_flags & RTN_TL_ROOT &&
645 rcu_access_pointer(fn->leaf) ==
646 net->ipv6.ip6_null_entry) {
647 RCU_INIT_POINTER(fn->leaf, NULL);
643 } 648 }
644 649
645 return fn; 650 return fn;
@@ -1241,23 +1246,28 @@ out:
1241 * If fib6_add_1 has cleared the old leaf pointer in the 1246 * If fib6_add_1 has cleared the old leaf pointer in the
1242 * super-tree leaf node we have to find a new one for it. 1247 * super-tree leaf node we have to find a new one for it.
1243 */ 1248 */
1244 struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, 1249 if (pn != fn) {
1245 lockdep_is_held(&table->tb6_lock)); 1250 struct rt6_info *pn_leaf =
1246 if (pn != fn && pn_leaf == rt) { 1251 rcu_dereference_protected(pn->leaf,
1247 pn_leaf = NULL; 1252 lockdep_is_held(&table->tb6_lock));
1248 RCU_INIT_POINTER(pn->leaf, NULL); 1253 if (pn_leaf == rt) {
1249 atomic_dec(&rt->rt6i_ref); 1254 pn_leaf = NULL;
1250 } 1255 RCU_INIT_POINTER(pn->leaf, NULL);
1251 if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { 1256 atomic_dec(&rt->rt6i_ref);
1252 pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
1253#if RT6_DEBUG >= 2
1254 if (!pn_leaf) {
1255 WARN_ON(!pn_leaf);
1256 pn_leaf = info->nl_net->ipv6.ip6_null_entry;
1257 } 1257 }
1258 if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
1259 pn_leaf = fib6_find_prefix(info->nl_net, table,
1260 pn);
1261#if RT6_DEBUG >= 2
1262 if (!pn_leaf) {
1263 WARN_ON(!pn_leaf);
1264 pn_leaf =
1265 info->nl_net->ipv6.ip6_null_entry;
1266 }
1258#endif 1267#endif
1259 atomic_inc(&pn_leaf->rt6i_ref); 1268 atomic_inc(&pn_leaf->rt6i_ref);
1260 rcu_assign_pointer(pn->leaf, pn_leaf); 1269 rcu_assign_pointer(pn->leaf, pn_leaf);
1270 }
1261 } 1271 }
1262#endif 1272#endif
1263 goto failure; 1273 goto failure;
@@ -1265,13 +1275,17 @@ out:
1265 return err; 1275 return err;
1266 1276
1267failure: 1277failure:
1268 /* fn->leaf could be NULL if fn is an intermediate node and we 1278 /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
1269 * failed to add the new route to it in both subtree creation 1279 * 1. fn is an intermediate node and we failed to add the new
1270 * failure and fib6_add_rt2node() failure case. 1280 * route to it in both subtree creation failure and fib6_add_rt2node()
1271 * In both cases, fib6_repair_tree() should be called to fix 1281 * failure case.
1272 * fn->leaf. 1282 * 2. fn is the root node in the table and we fail to add the first
1283 * default route to it.
1273 */ 1284 */
1274 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) 1285 if (fn &&
1286 (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
1287 (fn->fn_flags & RTN_TL_ROOT &&
1288 !rcu_access_pointer(fn->leaf))))
1275 fib6_repair_tree(info->nl_net, table, fn); 1289 fib6_repair_tree(info->nl_net, table, fn);
1276 /* Always release dst as dst->__refcnt is guaranteed 1290 /* Always release dst as dst->__refcnt is guaranteed
1277 * to be taken before entering this function 1291 * to be taken before entering this function
@@ -1526,6 +1540,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1526 struct fib6_walker *w; 1540 struct fib6_walker *w;
1527 int iter = 0; 1541 int iter = 0;
1528 1542
1543 /* Set fn->leaf to null_entry for root node. */
1544 if (fn->fn_flags & RTN_TL_ROOT) {
1545 rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
1546 return fn;
1547 }
1548
1529 for (;;) { 1549 for (;;) {
1530 struct fib6_node *fn_r = rcu_dereference_protected(fn->right, 1550 struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
1531 lockdep_is_held(&table->tb6_lock)); 1551 lockdep_is_held(&table->tb6_lock));
@@ -1680,10 +1700,15 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1680 } 1700 }
1681 read_unlock(&net->ipv6.fib6_walker_lock); 1701 read_unlock(&net->ipv6.fib6_walker_lock);
1682 1702
1683 /* If it was last route, expunge its radix tree node */ 1703 /* If it was last route, call fib6_repair_tree() to:
1704 * 1. For root node, put back null_entry as how the table was created.
1705 * 2. For other nodes, expunge its radix tree node.
1706 */
1684 if (!rcu_access_pointer(fn->leaf)) { 1707 if (!rcu_access_pointer(fn->leaf)) {
1685 fn->fn_flags &= ~RTN_RTINFO; 1708 if (!(fn->fn_flags & RTN_TL_ROOT)) {
1686 net->ipv6.rt6_stats->fib_route_nodes--; 1709 fn->fn_flags &= ~RTN_RTINFO;
1710 net->ipv6.rt6_stats->fib_route_nodes--;
1711 }
1687 fn = fib6_repair_tree(net, table, fn); 1712 fn = fib6_repair_tree(net, table, fn);
1688 } 1713 }
1689 1714
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4cfd8e0696fe..772695960890 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1014,6 +1014,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
1014 eth_random_addr(dev->perm_addr); 1014 eth_random_addr(dev->perm_addr);
1015} 1015}
1016 1016
1017#define GRE6_FEATURES (NETIF_F_SG | \
1018 NETIF_F_FRAGLIST | \
1019 NETIF_F_HIGHDMA | \
1020 NETIF_F_HW_CSUM)
1021
1022static void ip6gre_tnl_init_features(struct net_device *dev)
1023{
1024 struct ip6_tnl *nt = netdev_priv(dev);
1025
1026 dev->features |= GRE6_FEATURES;
1027 dev->hw_features |= GRE6_FEATURES;
1028
1029 if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
1030 /* TCP offload with GRE SEQ is not supported, nor
1031 * can we support 2 levels of outer headers requiring
1032 * an update.
1033 */
1034 if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
1035 nt->encap.type == TUNNEL_ENCAP_NONE) {
1036 dev->features |= NETIF_F_GSO_SOFTWARE;
1037 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1038 }
1039
1040 /* Can use a lockless transmit, unless we generate
1041 * output sequences
1042 */
1043 dev->features |= NETIF_F_LLTX;
1044 }
1045}
1046
1017static int ip6gre_tunnel_init_common(struct net_device *dev) 1047static int ip6gre_tunnel_init_common(struct net_device *dev)
1018{ 1048{
1019 struct ip6_tnl *tunnel; 1049 struct ip6_tnl *tunnel;
@@ -1048,6 +1078,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
1048 if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1078 if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1049 dev->mtu -= 8; 1079 dev->mtu -= 8;
1050 1080
1081 ip6gre_tnl_init_features(dev);
1082
1051 return 0; 1083 return 0;
1052} 1084}
1053 1085
@@ -1298,16 +1330,12 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
1298 .ndo_get_iflink = ip6_tnl_get_iflink, 1330 .ndo_get_iflink = ip6_tnl_get_iflink,
1299}; 1331};
1300 1332
1301#define GRE6_FEATURES (NETIF_F_SG | \
1302 NETIF_F_FRAGLIST | \
1303 NETIF_F_HIGHDMA | \
1304 NETIF_F_HW_CSUM)
1305
1306static void ip6gre_tap_setup(struct net_device *dev) 1333static void ip6gre_tap_setup(struct net_device *dev)
1307{ 1334{
1308 1335
1309 ether_setup(dev); 1336 ether_setup(dev);
1310 1337
1338 dev->max_mtu = 0;
1311 dev->netdev_ops = &ip6gre_tap_netdev_ops; 1339 dev->netdev_ops = &ip6gre_tap_netdev_ops;
1312 dev->needs_free_netdev = true; 1340 dev->needs_free_netdev = true;
1313 dev->priv_destructor = ip6gre_dev_free; 1341 dev->priv_destructor = ip6gre_dev_free;
@@ -1382,26 +1410,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1382 nt->net = dev_net(dev); 1410 nt->net = dev_net(dev);
1383 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); 1411 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1384 1412
1385 dev->features |= GRE6_FEATURES;
1386 dev->hw_features |= GRE6_FEATURES;
1387
1388 if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
1389 /* TCP offload with GRE SEQ is not supported, nor
1390 * can we support 2 levels of outer headers requiring
1391 * an update.
1392 */
1393 if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
1394 (nt->encap.type == TUNNEL_ENCAP_NONE)) {
1395 dev->features |= NETIF_F_GSO_SOFTWARE;
1396 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1397 }
1398
1399 /* Can use a lockless transmit, unless we generate
1400 * output sequences
1401 */
1402 dev->features |= NETIF_F_LLTX;
1403 }
1404
1405 err = register_netdevice(dev); 1413 err = register_netdevice(dev);
1406 if (err) 1414 if (err)
1407 goto out; 1415 goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a418cc4d..688ba5f7516b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
166 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 166 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
167} 167}
168 168
169static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
170{
171 if (!np->autoflowlabel_set)
172 return ip6_default_np_autolabel(net);
173 else
174 return np->autoflowlabel;
175}
176
169/* 177/*
170 * xmit an sk_buff (used by TCP, SCTP and DCCP) 178 * xmit an sk_buff (used by TCP, SCTP and DCCP)
171 * Note : socket lock is not held for SYNACK packets, but might be modified 179 * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
230 hlimit = ip6_dst_hoplimit(dst); 238 hlimit = ip6_dst_hoplimit(dst);
231 239
232 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 240 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
233 np->autoflowlabel, fl6)); 241 ip6_autoflowlabel(net, np), fl6));
234 242
235 hdr->payload_len = htons(seg_len); 243 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto; 244 hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
1626 1634
1627 ip6_flow_hdr(hdr, v6_cork->tclass, 1635 ip6_flow_hdr(hdr, v6_cork->tclass,
1628 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1636 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1629 np->autoflowlabel, fl6)); 1637 ip6_autoflowlabel(net, np), fl6));
1630 hdr->hop_limit = v6_cork->hop_limit; 1638 hdr->hop_limit = v6_cork->hop_limit;
1631 hdr->nexthdr = proto; 1639 hdr->nexthdr = proto;
1632 hdr->saddr = fl6->saddr; 1640 hdr->saddr = fl6->saddr;
@@ -1727,9 +1735,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
1727 cork.base.opt = NULL; 1735 cork.base.opt = NULL;
1728 v6_cork.opt = NULL; 1736 v6_cork.opt = NULL;
1729 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); 1737 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1730 if (err) 1738 if (err) {
1739 ip6_cork_release(&cork, &v6_cork);
1731 return ERR_PTR(err); 1740 return ERR_PTR(err);
1732 1741 }
1733 if (ipc6->dontfrag < 0) 1742 if (ipc6->dontfrag < 0)
1734 ipc6->dontfrag = inet6_sk(sk)->dontfrag; 1743 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1735 1744
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index db84f523656d..9a7cf355bc8c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1074,10 +1074,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
1074 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); 1074 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
1075 neigh_release(neigh); 1075 neigh_release(neigh);
1076 } 1076 }
1077 } else if (!(t->parms.flags & 1077 } else if (t->parms.proto != 0 && !(t->parms.flags &
1078 (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { 1078 (IP6_TNL_F_USE_ORIG_TCLASS |
1079 /* enable the cache only only if the routing decision does 1079 IP6_TNL_F_USE_ORIG_FWMARK))) {
1080 * not depend on the current inner header value 1080 /* enable the cache only if neither the outer protocol nor the
1081 * routing decision depends on the current inner header value
1081 */ 1082 */
1082 use_cache = true; 1083 use_cache = true;
1083 } 1084 }
@@ -1123,8 +1124,13 @@ route_lookup:
1123 max_headroom += 8; 1124 max_headroom += 8;
1124 mtu -= 8; 1125 mtu -= 8;
1125 } 1126 }
1126 if (mtu < IPV6_MIN_MTU) 1127 if (skb->protocol == htons(ETH_P_IPV6)) {
1127 mtu = IPV6_MIN_MTU; 1128 if (mtu < IPV6_MIN_MTU)
1129 mtu = IPV6_MIN_MTU;
1130 } else if (mtu < 576) {
1131 mtu = 576;
1132 }
1133
1128 if (skb_dst(skb) && !t->parms.collect_md) 1134 if (skb_dst(skb) && !t->parms.collect_md)
1129 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 1135 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1130 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { 1136 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
@@ -1671,11 +1677,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1671{ 1677{
1672 struct ip6_tnl *tnl = netdev_priv(dev); 1678 struct ip6_tnl *tnl = netdev_priv(dev);
1673 1679
1674 if (tnl->parms.proto == IPPROTO_IPIP) { 1680 if (tnl->parms.proto == IPPROTO_IPV6) {
1675 if (new_mtu < ETH_MIN_MTU) 1681 if (new_mtu < IPV6_MIN_MTU)
1676 return -EINVAL; 1682 return -EINVAL;
1677 } else { 1683 } else {
1678 if (new_mtu < IPV6_MIN_MTU) 1684 if (new_mtu < ETH_MIN_MTU)
1679 return -EINVAL; 1685 return -EINVAL;
1680 } 1686 }
1681 if (new_mtu > 0xFFF8 - dev->hard_header_len) 1687 if (new_mtu > 0xFFF8 - dev->hard_header_len)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404feabd78..2d4680e0376f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ pref_skip_coa:
886 break; 886 break;
887 case IPV6_AUTOFLOWLABEL: 887 case IPV6_AUTOFLOWLABEL:
888 np->autoflowlabel = valbool; 888 np->autoflowlabel = valbool;
889 np->autoflowlabel_set = 1;
889 retv = 0; 890 retv = 0;
890 break; 891 break;
891 case IPV6_RECVFRAGSIZE: 892 case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7a8d1500d374..0458b761f3c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2336 } 2336 }
2337 2337
2338 rt->dst.flags |= DST_HOST; 2338 rt->dst.flags |= DST_HOST;
2339 rt->dst.input = ip6_input;
2339 rt->dst.output = ip6_output; 2340 rt->dst.output = ip6_output;
2340 rt->rt6i_gateway = fl6->daddr; 2341 rt->rt6i_gateway = fl6->daddr;
2341 rt->rt6i_dst.addr = fl6->daddr; 2342 rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4297 if (!ipv6_addr_any(&fl6.saddr)) 4298 if (!ipv6_addr_any(&fl6.saddr))
4298 flags |= RT6_LOOKUP_F_HAS_SADDR; 4299 flags |= RT6_LOOKUP_F_HAS_SADDR;
4299 4300
4300 if (!fibmatch) 4301 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4301 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4302 else
4303 dst = ip6_route_lookup(net, &fl6, 0);
4304 4302
4305 rcu_read_unlock(); 4303 rcu_read_unlock();
4306 } else { 4304 } else {
4307 fl6.flowi6_oif = oif; 4305 fl6.flowi6_oif = oif;
4308 4306
4309 if (!fibmatch) 4307 dst = ip6_route_output(net, NULL, &fl6);
4310 dst = ip6_route_output(net, NULL, &fl6);
4311 else
4312 dst = ip6_route_lookup(net, &fl6, 0);
4313 } 4308 }
4314 4309
4315 4310
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4326 goto errout; 4321 goto errout;
4327 } 4322 }
4328 4323
4324 if (fibmatch && rt->dst.from) {
4325 struct rt6_info *ort = container_of(rt->dst.from,
4326 struct rt6_info, dst);
4327
4328 dst_hold(&ort->dst);
4329 ip6_rt_put(rt);
4330 rt = ort;
4331 }
4332
4329 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 4333 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4330 if (!skb) { 4334 if (!skb) {
4331 ip6_rt_put(rt); 4335 ip6_rt_put(rt);
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index fe04e23af986..841f4a07438e 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
32} 32}
33EXPORT_SYMBOL(xfrm6_rcv_spi); 33EXPORT_SYMBOL(xfrm6_rcv_spi);
34 34
35static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
36 struct sk_buff *skb)
37{
38 if (xfrm_trans_queue(skb, ip6_rcv_finish))
39 __kfree_skb(skb);
40 return -1;
41}
42
35int xfrm6_transport_finish(struct sk_buff *skb, int async) 43int xfrm6_transport_finish(struct sk_buff *skb, int async)
36{ 44{
37 struct xfrm_offload *xo = xfrm_offload(skb); 45 struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
56 64
57 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, 65 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
58 dev_net(skb->dev), NULL, skb, skb->dev, NULL, 66 dev_net(skb->dev), NULL, skb, skb->dev, NULL,
59 ip6_rcv_finish); 67 xfrm6_transport_finish2);
60 return -1; 68 return -1;
61} 69}
62 70
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 70e9d2ca8bbe..4daafb07602f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3632,6 +3632,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
3632 } 3632 }
3633 return true; 3633 return true;
3634 case NL80211_IFTYPE_MESH_POINT: 3634 case NL80211_IFTYPE_MESH_POINT:
3635 if (ether_addr_equal(sdata->vif.addr, hdr->addr2))
3636 return false;
3635 if (multicast) 3637 if (multicast)
3636 return true; 3638 return true;
3637 return ether_addr_equal(sdata->vif.addr, hdr->addr1); 3639 return ether_addr_equal(sdata->vif.addr, hdr->addr1);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 10798b357481..07bd4138c84e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2072,7 +2072,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
2072 continue; 2072 continue;
2073 2073
2074 list_for_each_entry_rcu(chain, &table->chains, list) { 2074 list_for_each_entry_rcu(chain, &table->chains, list) {
2075 if (ctx && ctx->chain[0] && 2075 if (ctx && ctx->chain &&
2076 strcmp(ctx->chain, chain->name) != 0) 2076 strcmp(ctx->chain, chain->name) != 0)
2077 continue; 2077 continue;
2078 2078
@@ -4665,8 +4665,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
4665{ 4665{
4666 struct nft_obj_filter *filter = cb->data; 4666 struct nft_obj_filter *filter = cb->data;
4667 4667
4668 kfree(filter->table); 4668 if (filter) {
4669 kfree(filter); 4669 kfree(filter->table);
4670 kfree(filter);
4671 }
4670 4672
4671 return 0; 4673 return 0;
4672} 4674}
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 1f7fbd3c7e5a..06b090d8e901 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -55,21 +55,11 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
55 55
56static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) 56static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
57{ 57{
58 mm_segment_t oldfs = get_fs();
59 int retval, fd;
60
61 if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX) 58 if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
62 return -EINVAL; 59 return -EINVAL;
63 60
64 set_fs(KERNEL_DS); 61 *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
65 fd = bpf_obj_get_user(path, 0); 62 return PTR_ERR_OR_ZERO(*ret);
66 set_fs(oldfs);
67 if (fd < 0)
68 return fd;
69
70 retval = __bpf_mt_check_fd(fd, ret);
71 sys_close(fd);
72 return retval;
73} 63}
74 64
75static int bpf_mt_check(const struct xt_mtchk_param *par) 65static int bpf_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5..f039064ce922 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
579 return -EINVAL; 579 return -EINVAL;
580 580
581 skb_reset_network_header(skb); 581 skb_reset_network_header(skb);
582 key->eth.type = skb->protocol;
582 } else { 583 } else {
583 eth = eth_hdr(skb); 584 eth = eth_hdr(skb);
584 ether_addr_copy(key->eth.src, eth->h_source); 585 ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
592 if (unlikely(parse_vlan(skb, key))) 593 if (unlikely(parse_vlan(skb, key)))
593 return -ENOMEM; 594 return -ENOMEM;
594 595
595 skb->protocol = parse_ethertype(skb); 596 key->eth.type = parse_ethertype(skb);
596 if (unlikely(skb->protocol == htons(0))) 597 if (unlikely(key->eth.type == htons(0)))
597 return -ENOMEM; 598 return -ENOMEM;
598 599
600 /* Multiple tagged packets need to retain TPID to satisfy
601 * skb_vlan_pop(), which will later shift the ethertype into
602 * skb->protocol.
603 */
604 if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
605 skb->protocol = key->eth.cvlan.tpid;
606 else
607 skb->protocol = key->eth.type;
608
599 skb_reset_network_header(skb); 609 skb_reset_network_header(skb);
600 __skb_push(skb, skb->data - skb_mac_header(skb)); 610 __skb_push(skb, skb->data - skb_mac_header(skb));
601 } 611 }
602 skb_reset_mac_len(skb); 612 skb_reset_mac_len(skb);
603 key->eth.type = skb->protocol;
604 613
605 /* Network layer. */ 614 /* Network layer. */
606 if (key->eth.type == htons(ETH_P_IP)) { 615 if (key->eth.type == htons(ETH_P_IP)) {
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index bc2f1e0977d6..634cfcb7bba6 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
525 525
526 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; 526 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
527 527
528 if (args->nr_local == 0)
529 return -EINVAL;
530
528 /* figure out the number of pages in the vector */ 531 /* figure out the number of pages in the vector */
529 for (i = 0; i < args->nr_local; i++) { 532 for (i = 0; i < args->nr_local; i++) {
530 if (copy_from_user(&vec, &local_vec[i], 533 if (copy_from_user(&vec, &local_vec[i],
@@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
874err: 877err:
875 if (page) 878 if (page)
876 put_page(page); 879 put_page(page);
880 rm->atomic.op_active = 0;
877 kfree(rm->atomic.op_notifier); 881 kfree(rm->atomic.op_notifier);
878 882
879 return ret; 883 return ret;
diff --git a/net/rds/send.c b/net/rds/send.c
index b52cdc8ae428..f72466c63f0c 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
1009 continue; 1009 continue;
1010 1010
1011 if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { 1011 if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
1012 if (cmsg->cmsg_len <
1013 CMSG_LEN(sizeof(struct rds_rdma_args)))
1014 return -EINVAL;
1012 args = CMSG_DATA(cmsg); 1015 args = CMSG_DATA(cmsg);
1013 *rdma_bytes += args->remote_vec.bytes; 1016 *rdma_bytes += args->remote_vec.bytes;
1014 } 1017 }
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e29a48ef7fc3..a0ac42b3ed06 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
159 if (action == TC_ACT_SHOT) 159 if (action == TC_ACT_SHOT)
160 this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; 160 this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
161 161
162 tm->lastuse = lastuse; 162 tm->lastuse = max_t(u64, tm->lastuse, lastuse);
163} 163}
164 164
165static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, 165static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8b3e59388480..08b61849c2a2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -239,7 +239,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
239 struct tcf_t *tm = &m->tcf_tm; 239 struct tcf_t *tm = &m->tcf_tm;
240 240
241 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); 241 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
242 tm->lastuse = lastuse; 242 tm->lastuse = max_t(u64, tm->lastuse, lastuse);
243} 243}
244 244
245static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, 245static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b91ea03e3afa..b9d63d2246e6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -379,6 +379,8 @@ void tcf_block_put(struct tcf_block *block)
379{ 379{
380 struct tcf_block_ext_info ei = {0, }; 380 struct tcf_block_ext_info ei = {0, };
381 381
382 if (!block)
383 return;
382 tcf_block_put_ext(block, block->q, &ei); 384 tcf_block_put_ext(block, block->q, &ei);
383} 385}
384 386
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6fe798c2df1a..8d78e7f4ecc3 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
42 struct list_head link; 42 struct list_head link;
43 struct tcf_result res; 43 struct tcf_result res;
44 bool exts_integrated; 44 bool exts_integrated;
45 bool offloaded;
46 u32 gen_flags; 45 u32 gen_flags;
47 struct tcf_exts exts; 46 struct tcf_exts exts;
48 u32 handle; 47 u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
148} 147}
149 148
150static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, 149static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
151 enum tc_clsbpf_command cmd) 150 struct cls_bpf_prog *oldprog)
152{ 151{
153 bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
154 struct tcf_block *block = tp->chain->block; 152 struct tcf_block *block = tp->chain->block;
155 bool skip_sw = tc_skip_sw(prog->gen_flags);
156 struct tc_cls_bpf_offload cls_bpf = {}; 153 struct tc_cls_bpf_offload cls_bpf = {};
154 struct cls_bpf_prog *obj;
155 bool skip_sw;
157 int err; 156 int err;
158 157
158 skip_sw = prog && tc_skip_sw(prog->gen_flags);
159 obj = prog ?: oldprog;
160
159 tc_cls_common_offload_init(&cls_bpf.common, tp); 161 tc_cls_common_offload_init(&cls_bpf.common, tp);
160 cls_bpf.command = cmd; 162 cls_bpf.command = TC_CLSBPF_OFFLOAD;
161 cls_bpf.exts = &prog->exts; 163 cls_bpf.exts = &obj->exts;
162 cls_bpf.prog = prog->filter; 164 cls_bpf.prog = prog ? prog->filter : NULL;
163 cls_bpf.name = prog->bpf_name; 165 cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
164 cls_bpf.exts_integrated = prog->exts_integrated; 166 cls_bpf.name = obj->bpf_name;
165 cls_bpf.gen_flags = prog->gen_flags; 167 cls_bpf.exts_integrated = obj->exts_integrated;
168 cls_bpf.gen_flags = obj->gen_flags;
166 169
167 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); 170 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
168 if (addorrep) { 171 if (prog) {
169 if (err < 0) { 172 if (err < 0) {
170 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); 173 cls_bpf_offload_cmd(tp, oldprog, prog);
171 return err; 174 return err;
172 } else if (err > 0) { 175 } else if (err > 0) {
173 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; 176 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
174 } 177 }
175 } 178 }
176 179
177 if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) 180 if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
178 return -EINVAL; 181 return -EINVAL;
179 182
180 return 0; 183 return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
183static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, 186static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
184 struct cls_bpf_prog *oldprog) 187 struct cls_bpf_prog *oldprog)
185{ 188{
186 struct cls_bpf_prog *obj = prog; 189 if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
187 enum tc_clsbpf_command cmd; 190 return -EINVAL;
188 bool skip_sw;
189 int ret;
190
191 skip_sw = tc_skip_sw(prog->gen_flags) ||
192 (oldprog && tc_skip_sw(oldprog->gen_flags));
193
194 if (oldprog && oldprog->offloaded) {
195 if (!tc_skip_hw(prog->gen_flags)) {
196 cmd = TC_CLSBPF_REPLACE;
197 } else if (!tc_skip_sw(prog->gen_flags)) {
198 obj = oldprog;
199 cmd = TC_CLSBPF_DESTROY;
200 } else {
201 return -EINVAL;
202 }
203 } else {
204 if (tc_skip_hw(prog->gen_flags))
205 return skip_sw ? -EINVAL : 0;
206 cmd = TC_CLSBPF_ADD;
207 }
208
209 ret = cls_bpf_offload_cmd(tp, obj, cmd);
210 if (ret)
211 return ret;
212 191
213 obj->offloaded = true; 192 if (prog && tc_skip_hw(prog->gen_flags))
214 if (oldprog) 193 prog = NULL;
215 oldprog->offloaded = false; 194 if (oldprog && tc_skip_hw(oldprog->gen_flags))
195 oldprog = NULL;
196 if (!prog && !oldprog)
197 return 0;
216 198
217 return 0; 199 return cls_bpf_offload_cmd(tp, prog, oldprog);
218} 200}
219 201
220static void cls_bpf_stop_offload(struct tcf_proto *tp, 202static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
222{ 204{
223 int err; 205 int err;
224 206
225 if (!prog->offloaded) 207 err = cls_bpf_offload_cmd(tp, NULL, prog);
226 return; 208 if (err)
227
228 err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
229 if (err) {
230 pr_err("Stopping hardware offload failed: %d\n", err); 209 pr_err("Stopping hardware offload failed: %d\n", err);
231 return;
232 }
233
234 prog->offloaded = false;
235} 210}
236 211
237static void cls_bpf_offload_update_stats(struct tcf_proto *tp, 212static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
238 struct cls_bpf_prog *prog) 213 struct cls_bpf_prog *prog)
239{ 214{
240 if (!prog->offloaded) 215 struct tcf_block *block = tp->chain->block;
241 return; 216 struct tc_cls_bpf_offload cls_bpf = {};
217
218 tc_cls_common_offload_init(&cls_bpf.common, tp);
219 cls_bpf.command = TC_CLSBPF_STATS;
220 cls_bpf.exts = &prog->exts;
221 cls_bpf.prog = prog->filter;
222 cls_bpf.name = prog->bpf_name;
223 cls_bpf.exts_integrated = prog->exts_integrated;
224 cls_bpf.gen_flags = prog->gen_flags;
242 225
243 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); 226 tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
244} 227}
245 228
246static int cls_bpf_init(struct tcf_proto *tp) 229static int cls_bpf_init(struct tcf_proto *tp)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cd1b200acae7..661c7144b53a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1040,6 +1040,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1040 1040
1041 if (!tp_head) { 1041 if (!tp_head) {
1042 RCU_INIT_POINTER(*miniqp->p_miniq, NULL); 1042 RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1043 /* Wait for flying RCU callback before it is freed. */
1044 rcu_barrier_bh();
1043 return; 1045 return;
1044 } 1046 }
1045 1047
@@ -1055,7 +1057,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1055 rcu_assign_pointer(*miniqp->p_miniq, miniq); 1057 rcu_assign_pointer(*miniqp->p_miniq, miniq);
1056 1058
1057 if (miniq_old) 1059 if (miniq_old)
1058 /* This is counterpart of the rcu barrier above. We need to 1060 /* This is counterpart of the rcu barriers above. We need to
1059 * block potential new user of miniq_old until all readers 1061 * block potential new user of miniq_old until all readers
1060 * are not seeing it. 1062 * are not seeing it.
1061 */ 1063 */
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 3f619fdcbf0a..291c97b07058 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
78 case SCTP_CID_AUTH: 78 case SCTP_CID_AUTH:
79 return "AUTH"; 79 return "AUTH";
80 80
81 case SCTP_CID_RECONF:
82 return "RECONF";
83
81 default: 84 default:
82 break; 85 break;
83 } 86 }
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 621b5ca3fd1c..141c9c466ec1 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
399 return; 399 return;
400 } 400 }
401 401
402 if (t->param_flags & SPP_PMTUD_ENABLE) { 402 if (!(t->param_flags & SPP_PMTUD_ENABLE))
403 /* Update transports view of the MTU */ 403 /* We can't allow retransmitting in such case, as the
404 sctp_transport_update_pmtu(t, pmtu); 404 * retransmission would be sized just as before, and thus we
405 405 * would get another icmp, and retransmit again.
406 /* Update association pmtu. */ 406 */
407 sctp_assoc_sync_pmtu(asoc); 407 return;
408 }
409 408
410 /* Retransmit with the new pmtu setting. 409 /* Update transports view of the MTU. Return if no update was needed.
411 * Normally, if PMTU discovery is disabled, an ICMP Fragmentation 410 * If an update wasn't needed/possible, it also doesn't make sense to
412 * Needed will never be sent, but if a message was sent before 411 * try to retransmit now.
413 * PMTU discovery was disabled that was larger than the PMTU, it
414 * would not be fragmented, so it must be re-transmitted fragmented.
415 */ 412 */
413 if (!sctp_transport_update_pmtu(t, pmtu))
414 return;
415
416 /* Update association pmtu. */
417 sctp_assoc_sync_pmtu(asoc);
418
419 /* Retransmit with the new pmtu setting. */
416 sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); 420 sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
417} 421}
418 422
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3253f724a995..9b01e994f661 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2277,7 +2277,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
2277 2277
2278 if (asoc && sctp_outq_is_empty(&asoc->outqueue)) { 2278 if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
2279 event = sctp_ulpevent_make_sender_dry_event(asoc, 2279 event = sctp_ulpevent_make_sender_dry_event(asoc,
2280 GFP_ATOMIC); 2280 GFP_USER | __GFP_NOWARN);
2281 if (!event) 2281 if (!event)
2282 return -ENOMEM; 2282 return -ENOMEM;
2283 2283
@@ -3498,6 +3498,8 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
3498 3498
3499 if (optlen < sizeof(struct sctp_hmacalgo)) 3499 if (optlen < sizeof(struct sctp_hmacalgo))
3500 return -EINVAL; 3500 return -EINVAL;
3501 optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) +
3502 SCTP_AUTH_NUM_HMACS * sizeof(u16));
3501 3503
3502 hmacs = memdup_user(optval, optlen); 3504 hmacs = memdup_user(optval, optlen);
3503 if (IS_ERR(hmacs)) 3505 if (IS_ERR(hmacs))
@@ -3536,6 +3538,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
3536 3538
3537 if (optlen <= sizeof(struct sctp_authkey)) 3539 if (optlen <= sizeof(struct sctp_authkey))
3538 return -EINVAL; 3540 return -EINVAL;
3541 /* authkey->sca_keylength is u16, so optlen can't be bigger than
3542 * this.
3543 */
3544 optlen = min_t(unsigned int, optlen, USHRT_MAX +
3545 sizeof(struct sctp_authkey));
3539 3546
3540 authkey = memdup_user(optval, optlen); 3547 authkey = memdup_user(optval, optlen);
3541 if (IS_ERR(authkey)) 3548 if (IS_ERR(authkey))
@@ -3893,6 +3900,9 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
3893 3900
3894 if (optlen < sizeof(*params)) 3901 if (optlen < sizeof(*params))
3895 return -EINVAL; 3902 return -EINVAL;
3903 /* srs_number_streams is u16, so optlen can't be bigger than this. */
3904 optlen = min_t(unsigned int, optlen, USHRT_MAX +
3905 sizeof(__u16) * sizeof(*params));
3896 3906
3897 params = memdup_user(optval, optlen); 3907 params = memdup_user(optval, optlen);
3898 if (IS_ERR(params)) 3908 if (IS_ERR(params))
@@ -4498,7 +4508,7 @@ static int sctp_init_sock(struct sock *sk)
4498 SCTP_DBG_OBJCNT_INC(sock); 4508 SCTP_DBG_OBJCNT_INC(sock);
4499 4509
4500 local_bh_disable(); 4510 local_bh_disable();
4501 percpu_counter_inc(&sctp_sockets_allocated); 4511 sk_sockets_allocated_inc(sk);
4502 sock_prot_inuse_add(net, sk->sk_prot, 1); 4512 sock_prot_inuse_add(net, sk->sk_prot, 1);
4503 4513
4504 /* Nothing can fail after this block, otherwise 4514 /* Nothing can fail after this block, otherwise
@@ -4542,7 +4552,7 @@ static void sctp_destroy_sock(struct sock *sk)
4542 } 4552 }
4543 sctp_endpoint_free(sp->ep); 4553 sctp_endpoint_free(sp->ep);
4544 local_bh_disable(); 4554 local_bh_disable();
4545 percpu_counter_dec(&sctp_sockets_allocated); 4555 sk_sockets_allocated_dec(sk);
4546 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 4556 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
4547 local_bh_enable(); 4557 local_bh_enable();
4548} 4558}
@@ -5015,7 +5025,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
5015 len = sizeof(int); 5025 len = sizeof(int);
5016 if (put_user(len, optlen)) 5026 if (put_user(len, optlen))
5017 return -EFAULT; 5027 return -EFAULT;
5018 if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int))) 5028 if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
5019 return -EFAULT; 5029 return -EFAULT;
5020 return 0; 5030 return 0;
5021} 5031}
@@ -5645,6 +5655,9 @@ copy_getaddrs:
5645 err = -EFAULT; 5655 err = -EFAULT;
5646 goto out; 5656 goto out;
5647 } 5657 }
5658 /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too,
5659 * but we can't change it anymore.
5660 */
5648 if (put_user(bytes_copied, optlen)) 5661 if (put_user(bytes_copied, optlen))
5649 err = -EFAULT; 5662 err = -EFAULT;
5650out: 5663out:
@@ -6081,7 +6094,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
6081 params.assoc_id = 0; 6094 params.assoc_id = 0;
6082 } else if (len >= sizeof(struct sctp_assoc_value)) { 6095 } else if (len >= sizeof(struct sctp_assoc_value)) {
6083 len = sizeof(struct sctp_assoc_value); 6096 len = sizeof(struct sctp_assoc_value);
6084 if (copy_from_user(&params, optval, sizeof(params))) 6097 if (copy_from_user(&params, optval, len))
6085 return -EFAULT; 6098 return -EFAULT;
6086 } else 6099 } else
6087 return -EINVAL; 6100 return -EINVAL;
@@ -6251,7 +6264,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
6251 6264
6252 if (len < sizeof(struct sctp_authkeyid)) 6265 if (len < sizeof(struct sctp_authkeyid))
6253 return -EINVAL; 6266 return -EINVAL;
6254 if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid))) 6267
6268 len = sizeof(struct sctp_authkeyid);
6269 if (copy_from_user(&val, optval, len))
6255 return -EFAULT; 6270 return -EFAULT;
6256 6271
6257 asoc = sctp_id2assoc(sk, val.scact_assoc_id); 6272 asoc = sctp_id2assoc(sk, val.scact_assoc_id);
@@ -6263,7 +6278,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
6263 else 6278 else
6264 val.scact_keynumber = ep->active_key_id; 6279 val.scact_keynumber = ep->active_key_id;
6265 6280
6266 len = sizeof(struct sctp_authkeyid);
6267 if (put_user(len, optlen)) 6281 if (put_user(len, optlen))
6268 return -EFAULT; 6282 return -EFAULT;
6269 if (copy_to_user(optval, &val, len)) 6283 if (copy_to_user(optval, &val, len))
@@ -6289,7 +6303,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
6289 if (len < sizeof(struct sctp_authchunks)) 6303 if (len < sizeof(struct sctp_authchunks))
6290 return -EINVAL; 6304 return -EINVAL;
6291 6305
6292 if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) 6306 if (copy_from_user(&val, optval, sizeof(val)))
6293 return -EFAULT; 6307 return -EFAULT;
6294 6308
6295 to = p->gauth_chunks; 6309 to = p->gauth_chunks;
@@ -6334,7 +6348,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
6334 if (len < sizeof(struct sctp_authchunks)) 6348 if (len < sizeof(struct sctp_authchunks))
6335 return -EINVAL; 6349 return -EINVAL;
6336 6350
6337 if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) 6351 if (copy_from_user(&val, optval, sizeof(val)))
6338 return -EFAULT; 6352 return -EFAULT;
6339 6353
6340 to = p->gauth_chunks; 6354 to = p->gauth_chunks;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 76ea66be0bbe..524dfeb94c41 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
156 sctp_stream_outq_migrate(stream, NULL, outcnt); 156 sctp_stream_outq_migrate(stream, NULL, outcnt);
157 sched->sched_all(stream); 157 sched->sched_all(stream);
158 158
159 i = sctp_stream_alloc_out(stream, outcnt, gfp); 159 ret = sctp_stream_alloc_out(stream, outcnt, gfp);
160 if (i) 160 if (ret)
161 return i; 161 goto out;
162 162
163 stream->outcnt = outcnt; 163 stream->outcnt = outcnt;
164 for (i = 0; i < stream->outcnt; i++) 164 for (i = 0; i < stream->outcnt; i++)
@@ -170,19 +170,17 @@ in:
170 if (!incnt) 170 if (!incnt)
171 goto out; 171 goto out;
172 172
173 i = sctp_stream_alloc_in(stream, incnt, gfp); 173 ret = sctp_stream_alloc_in(stream, incnt, gfp);
174 if (i) { 174 if (ret) {
175 ret = -ENOMEM; 175 sched->free(stream);
176 goto free; 176 kfree(stream->out);
177 stream->out = NULL;
178 stream->outcnt = 0;
179 goto out;
177 } 180 }
178 181
179 stream->incnt = incnt; 182 stream->incnt = incnt;
180 goto out;
181 183
182free:
183 sched->free(stream);
184 kfree(stream->out);
185 stream->out = NULL;
186out: 184out:
187 return ret; 185 return ret;
188} 186}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1e5a22430cf5..47f82bd794d9 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
248 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; 248 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
249} 249}
250 250
251void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) 251bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
252{ 252{
253 struct dst_entry *dst = sctp_transport_dst_check(t); 253 struct dst_entry *dst = sctp_transport_dst_check(t);
254 bool change = true;
254 255
255 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { 256 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
256 pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", 257 pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
257 __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); 258 __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
258 /* Use default minimum segment size and disable 259 /* Use default minimum segment instead */
259 * pmtu discovery on this transport. 260 pmtu = SCTP_DEFAULT_MINSEGMENT;
260 */
261 t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
262 } else {
263 t->pathmtu = pmtu;
264 } 261 }
262 pmtu = SCTP_TRUNC4(pmtu);
265 263
266 if (dst) { 264 if (dst) {
267 dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); 265 dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
268 dst = sctp_transport_dst_check(t); 266 dst = sctp_transport_dst_check(t);
269 } 267 }
270 268
271 if (!dst) 269 if (!dst) {
272 t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); 270 t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
271 dst = t->dst;
272 }
273
274 if (dst) {
275 /* Re-fetch, as under layers may have a higher minimum size */
276 pmtu = SCTP_TRUNC4(dst_mtu(dst));
277 change = t->pathmtu != pmtu;
278 }
279 t->pathmtu = pmtu;
280
281 return change;
273} 282}
274 283
275/* Caches the dst entry and source address for a transport's destination 284/* Caches the dst entry and source address for a transport's destination
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index a71be33f3afe..e36ec5dd64c6 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
1084void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, 1084void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1085 gfp_t gfp) 1085 gfp_t gfp)
1086{ 1086{
1087 struct sctp_association *asoc; 1087 struct sctp_association *asoc = ulpq->asoc;
1088 __u16 needed, freed; 1088 __u32 freed = 0;
1089 1089 __u16 needed;
1090 asoc = ulpq->asoc;
1091 1090
1092 if (chunk) { 1091 needed = ntohs(chunk->chunk_hdr->length) -
1093 needed = ntohs(chunk->chunk_hdr->length); 1092 sizeof(struct sctp_data_chunk);
1094 needed -= sizeof(struct sctp_data_chunk);
1095 } else
1096 needed = SCTP_DEFAULT_MAXWINDOW;
1097
1098 freed = 0;
1099 1093
1100 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { 1094 if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
1101 freed = sctp_ulpq_renege_order(ulpq, needed); 1095 freed = sctp_ulpq_renege_order(ulpq, needed);
1102 if (freed < needed) { 1096 if (freed < needed)
1103 freed += sctp_ulpq_renege_frags(ulpq, needed - freed); 1097 freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
1104 }
1105 } 1098 }
1106 /* If able to free enough room, accept this chunk. */ 1099 /* If able to free enough room, accept this chunk. */
1107 if (chunk && (freed >= needed)) { 1100 if (freed >= needed) {
1108 int retval; 1101 int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
1109 retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
1110 /* 1102 /*
1111 * Enter partial delivery if chunk has not been 1103 * Enter partial delivery if chunk has not been
1112 * delivered; otherwise, drain the reassembly queue. 1104 * delivered; otherwise, drain the reassembly queue.
diff --git a/net/socket.c b/net/socket.c
index 05f361faec45..6f05d5c4bf30 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -436,8 +436,10 @@ static int sock_map_fd(struct socket *sock, int flags)
436{ 436{
437 struct file *newfile; 437 struct file *newfile;
438 int fd = get_unused_fd_flags(flags); 438 int fd = get_unused_fd_flags(flags);
439 if (unlikely(fd < 0)) 439 if (unlikely(fd < 0)) {
440 sock_release(sock);
440 return fd; 441 return fd;
442 }
441 443
442 newfile = sock_alloc_file(sock, flags, NULL); 444 newfile = sock_alloc_file(sock, flags, NULL);
443 if (likely(!IS_ERR(newfile))) { 445 if (likely(!IS_ERR(newfile))) {
@@ -2619,6 +2621,15 @@ out_fs:
2619 2621
2620core_initcall(sock_init); /* early initcall */ 2622core_initcall(sock_init); /* early initcall */
2621 2623
2624static int __init jit_init(void)
2625{
2626#ifdef CONFIG_BPF_JIT_ALWAYS_ON
2627 bpf_jit_enable = 1;
2628#endif
2629 return 0;
2630}
2631pure_initcall(jit_init);
2632
2622#ifdef CONFIG_PROC_FS 2633#ifdef CONFIG_PROC_FS
2623void socket_seq_show(struct seq_file *seq) 2634void socket_seq_show(struct seq_file *seq)
2624{ 2635{
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index c5fda15ba319..1fdab5c4eda8 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp)
401 * allows a thread in BH context to safely check if the process 401 * allows a thread in BH context to safely check if the process
402 * lock is held. In this case, if the lock is held, queue work. 402 * lock is held. In this case, if the lock is held, queue work.
403 */ 403 */
404 if (sock_owned_by_user(strp->sk)) { 404 if (sock_owned_by_user_nocheck(strp->sk)) {
405 queue_work(strp_wq, &strp->work); 405 queue_work(strp_wq, &strp->work);
406 return; 406 return;
407 } 407 }
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 47ec121574ce..c8001471da6c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -324,6 +324,7 @@ restart:
324 if (res) { 324 if (res) {
325 pr_warn("Bearer <%s> rejected, enable failure (%d)\n", 325 pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
326 name, -res); 326 name, -res);
327 kfree(b);
327 return -EINVAL; 328 return -EINVAL;
328 } 329 }
329 330
@@ -347,8 +348,10 @@ restart:
347 if (skb) 348 if (skb)
348 tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); 349 tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
349 350
350 if (tipc_mon_create(net, bearer_id)) 351 if (tipc_mon_create(net, bearer_id)) {
352 bearer_disable(net, b);
351 return -ENOMEM; 353 return -ENOMEM;
354 }
352 355
353 pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", 356 pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
354 name, 357 name,
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 95fec2c057d6..5f4ffae807ee 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -109,7 +109,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
109static void tipc_group_decr_active(struct tipc_group *grp, 109static void tipc_group_decr_active(struct tipc_group *grp,
110 struct tipc_member *m) 110 struct tipc_member *m)
111{ 111{
112 if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING) 112 if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
113 m->state == MBR_REMITTED)
113 grp->active_cnt--; 114 grp->active_cnt--;
114} 115}
115 116
@@ -351,8 +352,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
351 if (m->window >= ADV_IDLE) 352 if (m->window >= ADV_IDLE)
352 return; 353 return;
353 354
354 if (!list_empty(&m->congested)) 355 list_del_init(&m->congested);
355 return;
356 356
357 /* Sort member into congested members' list */ 357 /* Sort member into congested members' list */
358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { 358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -369,18 +369,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
369 u16 prev = grp->bc_snd_nxt - 1; 369 u16 prev = grp->bc_snd_nxt - 1;
370 struct tipc_member *m; 370 struct tipc_member *m;
371 struct rb_node *n; 371 struct rb_node *n;
372 u16 ackers = 0;
372 373
373 for (n = rb_first(&grp->members); n; n = rb_next(n)) { 374 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
374 m = container_of(n, struct tipc_member, tree_node); 375 m = container_of(n, struct tipc_member, tree_node);
375 if (tipc_group_is_enabled(m)) { 376 if (tipc_group_is_enabled(m)) {
376 tipc_group_update_member(m, len); 377 tipc_group_update_member(m, len);
377 m->bc_acked = prev; 378 m->bc_acked = prev;
379 ackers++;
378 } 380 }
379 } 381 }
380 382
381 /* Mark number of acknowledges to expect, if any */ 383 /* Mark number of acknowledges to expect, if any */
382 if (ack) 384 if (ack)
383 grp->bc_ackers = grp->member_cnt; 385 grp->bc_ackers = ackers;
384 grp->bc_snd_nxt++; 386 grp->bc_snd_nxt++;
385} 387}
386 388
@@ -561,7 +563,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
561 int max_active = grp->max_active; 563 int max_active = grp->max_active;
562 int reclaim_limit = max_active * 3 / 4; 564 int reclaim_limit = max_active * 3 / 4;
563 int active_cnt = grp->active_cnt; 565 int active_cnt = grp->active_cnt;
564 struct tipc_member *m, *rm; 566 struct tipc_member *m, *rm, *pm;
565 567
566 m = tipc_group_find_member(grp, node, port); 568 m = tipc_group_find_member(grp, node, port);
567 if (!m) 569 if (!m)
@@ -604,6 +606,17 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
604 pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); 606 pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
605 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); 607 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
606 } 608 }
609 grp->active_cnt--;
610 list_del_init(&m->list);
611 if (list_empty(&grp->pending))
612 return;
613
614 /* Set oldest pending member to active and advertise */
615 pm = list_first_entry(&grp->pending, struct tipc_member, list);
616 pm->state = MBR_ACTIVE;
617 list_move_tail(&pm->list, &grp->active);
618 grp->active_cnt++;
619 tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
607 break; 620 break;
608 case MBR_RECLAIMING: 621 case MBR_RECLAIMING:
609 case MBR_DISCOVERED: 622 case MBR_DISCOVERED:
@@ -648,6 +661,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
648 } else if (mtyp == GRP_REMIT_MSG) { 661 } else if (mtyp == GRP_REMIT_MSG) {
649 msg_set_grp_remitted(hdr, m->window); 662 msg_set_grp_remitted(hdr, m->window);
650 } 663 }
664 msg_set_dest_droppable(hdr, true);
651 __skb_queue_tail(xmitq, skb); 665 __skb_queue_tail(xmitq, skb);
652} 666}
653 667
@@ -689,15 +703,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
689 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); 703 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
690 __skb_queue_tail(inputq, m->event_msg); 704 __skb_queue_tail(inputq, m->event_msg);
691 } 705 }
692 if (m->window < ADV_IDLE) 706 list_del_init(&m->congested);
693 tipc_group_update_member(m, 0); 707 tipc_group_update_member(m, 0);
694 else
695 list_del_init(&m->congested);
696 return; 708 return;
697 case GRP_LEAVE_MSG: 709 case GRP_LEAVE_MSG:
698 if (!m) 710 if (!m)
699 return; 711 return;
700 m->bc_syncpt = msg_grp_bc_syncpt(hdr); 712 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
713 list_del_init(&m->list);
714 list_del_init(&m->congested);
715 *usr_wakeup = true;
701 716
702 /* Wait until WITHDRAW event is received */ 717 /* Wait until WITHDRAW event is received */
703 if (m->state != MBR_LEAVING) { 718 if (m->state != MBR_LEAVING) {
@@ -709,8 +724,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
709 ehdr = buf_msg(m->event_msg); 724 ehdr = buf_msg(m->event_msg);
710 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); 725 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
711 __skb_queue_tail(inputq, m->event_msg); 726 __skb_queue_tail(inputq, m->event_msg);
712 *usr_wakeup = true;
713 list_del_init(&m->congested);
714 return; 727 return;
715 case GRP_ADV_MSG: 728 case GRP_ADV_MSG:
716 if (!m) 729 if (!m)
@@ -741,14 +754,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
741 if (!m || m->state != MBR_RECLAIMING) 754 if (!m || m->state != MBR_RECLAIMING)
742 return; 755 return;
743 756
744 list_del_init(&m->list);
745 grp->active_cnt--;
746 remitted = msg_grp_remitted(hdr); 757 remitted = msg_grp_remitted(hdr);
747 758
748 /* Messages preceding the REMIT still in receive queue */ 759 /* Messages preceding the REMIT still in receive queue */
749 if (m->advertised > remitted) { 760 if (m->advertised > remitted) {
750 m->state = MBR_REMITTED; 761 m->state = MBR_REMITTED;
751 in_flight = m->advertised - remitted; 762 in_flight = m->advertised - remitted;
763 m->advertised = ADV_IDLE + in_flight;
764 return;
752 } 765 }
753 /* All messages preceding the REMIT have been read */ 766 /* All messages preceding the REMIT have been read */
754 if (m->advertised <= remitted) { 767 if (m->advertised <= remitted) {
@@ -760,6 +773,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
760 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); 773 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
761 774
762 m->advertised = ADV_IDLE + in_flight; 775 m->advertised = ADV_IDLE + in_flight;
776 grp->active_cnt--;
777 list_del_init(&m->list);
763 778
764 /* Set oldest pending member to active and advertise */ 779 /* Set oldest pending member to active and advertise */
765 if (list_empty(&grp->pending)) 780 if (list_empty(&grp->pending))
@@ -849,19 +864,29 @@ void tipc_group_member_evt(struct tipc_group *grp,
849 *usr_wakeup = true; 864 *usr_wakeup = true;
850 m->usr_pending = false; 865 m->usr_pending = false;
851 node_up = tipc_node_is_up(net, node); 866 node_up = tipc_node_is_up(net, node);
852 867 m->event_msg = NULL;
853 /* Hold back event if more messages might be expected */ 868
854 if (m->state != MBR_LEAVING && node_up) { 869 if (node_up) {
855 m->event_msg = skb; 870 /* Hold back event if a LEAVE msg should be expected */
856 tipc_group_decr_active(grp, m); 871 if (m->state != MBR_LEAVING) {
857 m->state = MBR_LEAVING; 872 m->event_msg = skb;
858 } else { 873 tipc_group_decr_active(grp, m);
859 if (node_up) 874 m->state = MBR_LEAVING;
875 } else {
860 msg_set_grp_bc_seqno(hdr, m->bc_syncpt); 876 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
861 else 877 __skb_queue_tail(inputq, skb);
878 }
879 } else {
880 if (m->state != MBR_LEAVING) {
881 tipc_group_decr_active(grp, m);
882 m->state = MBR_LEAVING;
862 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); 883 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
884 } else {
885 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
886 }
863 __skb_queue_tail(inputq, skb); 887 __skb_queue_tail(inputq, skb);
864 } 888 }
889 list_del_init(&m->list);
865 list_del_init(&m->congested); 890 list_del_init(&m->congested);
866 } 891 }
867 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); 892 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 8e884ed06d4b..32dc33a94bc7 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id)
642{ 642{
643 struct tipc_net *tn = tipc_net(net); 643 struct tipc_net *tn = tipc_net(net);
644 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 644 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
645 struct tipc_peer *self = get_self(net, bearer_id); 645 struct tipc_peer *self;
646 struct tipc_peer *peer, *tmp; 646 struct tipc_peer *peer, *tmp;
647 647
648 if (!mon)
649 return;
650
651 self = get_self(net, bearer_id);
648 write_lock_bh(&mon->lock); 652 write_lock_bh(&mon->lock);
649 tn->monitors[bearer_id] = NULL; 653 tn->monitors[bearer_id] = NULL;
650 list_for_each_entry_safe(peer, tmp, &self->list, list) { 654 list_for_each_entry_safe(peer, tmp, &self->list, list) {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 41127d0b925e..3b4084480377 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
727 727
728 switch (sk->sk_state) { 728 switch (sk->sk_state) {
729 case TIPC_ESTABLISHED: 729 case TIPC_ESTABLISHED:
730 case TIPC_CONNECTING:
730 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) 731 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
731 revents |= POLLOUT; 732 revents |= POLLOUT;
732 /* fall thru' */ 733 /* fall thru' */
733 case TIPC_LISTEN: 734 case TIPC_LISTEN:
734 case TIPC_CONNECTING:
735 if (!skb_queue_empty(&sk->sk_receive_queue)) 735 if (!skb_queue_empty(&sk->sk_receive_queue))
736 revents |= POLLIN | POLLRDNORM; 736 revents |= POLLIN | POLLRDNORM;
737 break; 737 break;
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index d7d6cb00c47b..1d84f91bbfb0 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -23,27 +23,14 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
23cfg80211-y += extra-certs.o 23cfg80211-y += extra-certs.o
24endif 24endif
25 25
26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) 26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
27 @$(kecho) " GEN $@" 27 @$(kecho) " GEN $@"
28 @(set -e; \ 28 @(echo '#include "reg.h"'; \
29 allf=""; \ 29 echo 'const u8 shipped_regdb_certs[] = {'; \
30 for f in $^ ; do \ 30 cat $^ ; \
31 # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \ 31 echo '};'; \
32 thisf=$$(od -An -v -tx1 < $$f | \ 32 echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
33 sed -e 's/ /\n/g' | \ 33 ) > $@
34 sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
35 sed -e 's/^/0x/;s/$$/,/'); \
36 # file should not be empty - maybe command substitution failed? \
37 test ! -z "$$thisf";\
38 allf=$$allf$$thisf;\
39 done; \
40 ( \
41 echo '#include "reg.h"'; \
42 echo 'const u8 shipped_regdb_certs[] = {'; \
43 echo "$$allf"; \
44 echo '};'; \
45 echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
46 ) >> $@)
47 34
48$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ 35$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
49 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) 36 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
@@ -66,4 +53,6 @@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
66 echo "$$allf"; \ 53 echo "$$allf"; \
67 echo '};'; \ 54 echo '};'; \
68 echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \ 55 echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
69 ) >> $@) 56 ) > $@)
57
58clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644
index 000000000000..14ea66643ffa
--- /dev/null
+++ b/net/wireless/certs/sforshee.hex
@@ -0,0 +1,86 @@
1/* Seth Forshee's regdb certificate */
20x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
30x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
40xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
50x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
60x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
70x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
80x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
90x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
100x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
110x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
120x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
130x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
140x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
150x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
160x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
170x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
180x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
190x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
200x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
210x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
220x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
230x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
240x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
250x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
260x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
270x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
280x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
290x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
300x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
310xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
320x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
330xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
340x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
350x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
360x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
370xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
380xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
390x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
400x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
410x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
420x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
430x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
440x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
450x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
460x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
470x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
480x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
490xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
500xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
510x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
520x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
530x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
540x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
550x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
560x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
570x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
580x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
590x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
600x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
610xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
620x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
630x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
640x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
650xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
660x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
670x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
680x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
690xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
700xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
710x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
720xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
730xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
740x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
750xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
760xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
770x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
780xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
790x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
800xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
810xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
820x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
830x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
840x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
850x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
860x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644
index c6f8f9d6b988..000000000000
--- a/net/wireless/certs/sforshee.x509
+++ /dev/null
Binary files differ
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b1ac23ca20c8..2b3dbcd40e46 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2610 case NL80211_IFTYPE_AP: 2610 case NL80211_IFTYPE_AP:
2611 if (wdev->ssid_len && 2611 if (wdev->ssid_len &&
2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) 2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
2613 goto nla_put_failure; 2613 goto nla_put_failure_locked;
2614 break; 2614 break;
2615 case NL80211_IFTYPE_STATION: 2615 case NL80211_IFTYPE_STATION:
2616 case NL80211_IFTYPE_P2P_CLIENT: 2616 case NL80211_IFTYPE_P2P_CLIENT:
@@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2623 if (!ssid_ie) 2623 if (!ssid_ie)
2624 break; 2624 break;
2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) 2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
2626 goto nla_put_failure; 2626 goto nla_put_failure_locked;
2627 break; 2627 break;
2628 } 2628 }
2629 default: 2629 default:
@@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2635 genlmsg_end(msg, hdr); 2635 genlmsg_end(msg, hdr);
2636 return 0; 2636 return 0;
2637 2637
2638 nla_put_failure_locked:
2639 wdev_unlock(wdev);
2638 nla_put_failure: 2640 nla_put_failure:
2639 genlmsg_cancel(msg, hdr); 2641 genlmsg_cancel(msg, hdr);
2640 return -EMSGSIZE; 2642 return -EMSGSIZE;
@@ -11359,7 +11361,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
11359 break; 11361 break;
11360 case NL80211_NAN_FUNC_FOLLOW_UP: 11362 case NL80211_NAN_FUNC_FOLLOW_UP:
11361 if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || 11363 if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
11362 !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { 11364 !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] ||
11365 !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) {
11363 err = -EINVAL; 11366 err = -EINVAL;
11364 goto out; 11367 goto out;
11365 } 11368 }
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 347ab31574d5..3f6f6f8c9fa5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -8,15 +8,29 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/bottom_half.h>
12#include <linux/interrupt.h>
11#include <linux/slab.h> 13#include <linux/slab.h>
12#include <linux/module.h> 14#include <linux/module.h>
13#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/percpu.h>
14#include <net/dst.h> 17#include <net/dst.h>
15#include <net/ip.h> 18#include <net/ip.h>
16#include <net/xfrm.h> 19#include <net/xfrm.h>
17#include <net/ip_tunnels.h> 20#include <net/ip_tunnels.h>
18#include <net/ip6_tunnel.h> 21#include <net/ip6_tunnel.h>
19 22
23struct xfrm_trans_tasklet {
24 struct tasklet_struct tasklet;
25 struct sk_buff_head queue;
26};
27
28struct xfrm_trans_cb {
29 int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
30};
31
32#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
33
20static struct kmem_cache *secpath_cachep __read_mostly; 34static struct kmem_cache *secpath_cachep __read_mostly;
21 35
22static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); 36static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
25static struct gro_cells gro_cells; 39static struct gro_cells gro_cells;
26static struct net_device xfrm_napi_dev; 40static struct net_device xfrm_napi_dev;
27 41
42static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
43
28int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) 44int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
29{ 45{
30 int err = 0; 46 int err = 0;
@@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
207 xfrm_address_t *daddr; 223 xfrm_address_t *daddr;
208 struct xfrm_mode *inner_mode; 224 struct xfrm_mode *inner_mode;
209 u32 mark = skb->mark; 225 u32 mark = skb->mark;
210 unsigned int family; 226 unsigned int family = AF_UNSPEC;
211 int decaps = 0; 227 int decaps = 0;
212 int async = 0; 228 int async = 0;
213 bool xfrm_gro = false; 229 bool xfrm_gro = false;
@@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
216 232
217 if (encap_type < 0) { 233 if (encap_type < 0) {
218 x = xfrm_input_state(skb); 234 x = xfrm_input_state(skb);
235
236 if (unlikely(x->km.state != XFRM_STATE_VALID)) {
237 if (x->km.state == XFRM_STATE_ACQ)
238 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
239 else
240 XFRM_INC_STATS(net,
241 LINUX_MIB_XFRMINSTATEINVALID);
242 goto drop;
243 }
244
219 family = x->outer_mode->afinfo->family; 245 family = x->outer_mode->afinfo->family;
220 246
221 /* An encap_type of -1 indicates async resumption. */ 247 /* An encap_type of -1 indicates async resumption. */
@@ -467,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
467} 493}
468EXPORT_SYMBOL(xfrm_input_resume); 494EXPORT_SYMBOL(xfrm_input_resume);
469 495
496static void xfrm_trans_reinject(unsigned long data)
497{
498 struct xfrm_trans_tasklet *trans = (void *)data;
499 struct sk_buff_head queue;
500 struct sk_buff *skb;
501
502 __skb_queue_head_init(&queue);
503 skb_queue_splice_init(&trans->queue, &queue);
504
505 while ((skb = __skb_dequeue(&queue)))
506 XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
507}
508
509int xfrm_trans_queue(struct sk_buff *skb,
510 int (*finish)(struct net *, struct sock *,
511 struct sk_buff *))
512{
513 struct xfrm_trans_tasklet *trans;
514
515 trans = this_cpu_ptr(&xfrm_trans_tasklet);
516
517 if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
518 return -ENOBUFS;
519
520 XFRM_TRANS_SKB_CB(skb)->finish = finish;
521 skb_queue_tail(&trans->queue, skb);
522 tasklet_schedule(&trans->tasklet);
523 return 0;
524}
525EXPORT_SYMBOL(xfrm_trans_queue);
526
470void __init xfrm_input_init(void) 527void __init xfrm_input_init(void)
471{ 528{
472 int err; 529 int err;
530 int i;
473 531
474 init_dummy_netdev(&xfrm_napi_dev); 532 init_dummy_netdev(&xfrm_napi_dev);
475 err = gro_cells_init(&gro_cells, &xfrm_napi_dev); 533 err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
@@ -480,4 +538,13 @@ void __init xfrm_input_init(void)
480 sizeof(struct sec_path), 538 sizeof(struct sec_path),
481 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 539 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
482 NULL); 540 NULL);
541
542 for_each_possible_cpu(i) {
543 struct xfrm_trans_tasklet *trans;
544
545 trans = &per_cpu(xfrm_trans_tasklet, i);
546 __skb_queue_head_init(&trans->queue);
547 tasklet_init(&trans->tasklet, xfrm_trans_reinject,
548 (unsigned long)trans);
549 }
483} 550}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9542975eb2f9..70aa5cb0c659 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1168,9 +1168,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1168 again: 1168 again:
1169 pol = rcu_dereference(sk->sk_policy[dir]); 1169 pol = rcu_dereference(sk->sk_policy[dir]);
1170 if (pol != NULL) { 1170 if (pol != NULL) {
1171 bool match = xfrm_selector_match(&pol->selector, fl, family); 1171 bool match;
1172 int err = 0; 1172 int err = 0;
1173 1173
1174 if (pol->family != family) {
1175 pol = NULL;
1176 goto out;
1177 }
1178
1179 match = xfrm_selector_match(&pol->selector, fl, family);
1174 if (match) { 1180 if (match) {
1175 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { 1181 if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1176 pol = NULL; 1182 pol = NULL;
@@ -1833,6 +1839,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1833 sizeof(struct xfrm_policy *) * num_pols) == 0 && 1839 sizeof(struct xfrm_policy *) * num_pols) == 0 &&
1834 xfrm_xdst_can_reuse(xdst, xfrm, err)) { 1840 xfrm_xdst_can_reuse(xdst, xfrm, err)) {
1835 dst_hold(&xdst->u.dst); 1841 dst_hold(&xdst->u.dst);
1842 xfrm_pols_put(pols, num_pols);
1836 while (err > 0) 1843 while (err > 0)
1837 xfrm_state_put(xfrm[--err]); 1844 xfrm_state_put(xfrm[--err]);
1838 return xdst; 1845 return xdst;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 065d89606888..500b3391f474 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1343,6 +1343,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
1343 1343
1344 if (orig->aead) { 1344 if (orig->aead) {
1345 x->aead = xfrm_algo_aead_clone(orig->aead); 1345 x->aead = xfrm_algo_aead_clone(orig->aead);
1346 x->geniv = orig->geniv;
1346 if (!x->aead) 1347 if (!x->aead)
1347 goto error; 1348 goto error;
1348 } 1349 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 983b0233767b..bdb48e5dba04 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1419,11 +1419,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
1419 1419
1420static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) 1420static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
1421{ 1421{
1422 u16 prev_family;
1422 int i; 1423 int i;
1423 1424
1424 if (nr > XFRM_MAX_DEPTH) 1425 if (nr > XFRM_MAX_DEPTH)
1425 return -EINVAL; 1426 return -EINVAL;
1426 1427
1428 prev_family = family;
1429
1427 for (i = 0; i < nr; i++) { 1430 for (i = 0; i < nr; i++) {
1428 /* We never validated the ut->family value, so many 1431 /* We never validated the ut->family value, so many
1429 * applications simply leave it at zero. The check was 1432 * applications simply leave it at zero. The check was
@@ -1435,6 +1438,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
1435 if (!ut[i].family) 1438 if (!ut[i].family)
1436 ut[i].family = family; 1439 ut[i].family = family;
1437 1440
1441 if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
1442 (ut[i].family != prev_family))
1443 return -EINVAL;
1444
1445 prev_family = ut[i].family;
1446
1438 switch (ut[i].family) { 1447 switch (ut[i].family) {
1439 case AF_INET: 1448 case AF_INET:
1440 break; 1449 break;
@@ -1445,6 +1454,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
1445 default: 1454 default:
1446 return -EINVAL; 1455 return -EINVAL;
1447 } 1456 }
1457
1458 switch (ut[i].id.proto) {
1459 case IPPROTO_AH:
1460 case IPPROTO_ESP:
1461 case IPPROTO_COMP:
1462#if IS_ENABLED(CONFIG_IPV6)
1463 case IPPROTO_ROUTING:
1464 case IPPROTO_DSTOPTS:
1465#endif
1466 case IPSEC_PROTO_ANY:
1467 break;
1468 default:
1469 return -EINVAL;
1470 }
1471
1448 } 1472 }
1449 1473
1450 return 0; 1474 return 0;
@@ -2470,7 +2494,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
2470 [XFRMA_PROTO] = { .type = NLA_U8 }, 2494 [XFRMA_PROTO] = { .type = NLA_U8 },
2471 [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, 2495 [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
2472 [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, 2496 [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) },
2473 [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 }, 2497 [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 },
2474}; 2498};
2475 2499
2476static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { 2500static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
diff --git a/scripts/genksyms/.gitignore b/scripts/genksyms/.gitignore
index 86dc07a01b43..e7836b47f060 100644
--- a/scripts/genksyms/.gitignore
+++ b/scripts/genksyms/.gitignore
@@ -1,4 +1,3 @@
1*.hash.c
2*.lex.c 1*.lex.c
3*.tab.c 2*.tab.c
4*.tab.h 3*.tab.h
diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c
index cbf4996dd9c1..8cee597d33a5 100644
--- a/scripts/kconfig/expr.c
+++ b/scripts/kconfig/expr.c
@@ -893,7 +893,10 @@ static enum string_value_kind expr_parse_string(const char *str,
893 switch (type) { 893 switch (type) {
894 case S_BOOLEAN: 894 case S_BOOLEAN:
895 case S_TRISTATE: 895 case S_TRISTATE:
896 return k_string; 896 val->s = !strcmp(str, "n") ? 0 :
897 !strcmp(str, "m") ? 1 :
898 !strcmp(str, "y") ? 2 : -1;
899 return k_signed;
897 case S_INT: 900 case S_INT:
898 val->s = strtoll(str, &tail, 10); 901 val->s = strtoll(str, &tail, 10);
899 kind = k_signed; 902 kind = k_signed;
diff --git a/security/Kconfig b/security/Kconfig
index e8e449444e65..b0cb9a5f9448 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -54,6 +54,17 @@ config SECURITY_NETWORK
54 implement socket and networking access controls. 54 implement socket and networking access controls.
55 If you are unsure how to answer this question, answer N. 55 If you are unsure how to answer this question, answer N.
56 56
57config PAGE_TABLE_ISOLATION
58 bool "Remove the kernel mapping in user mode"
59 default y
60 depends on X86_64 && !UML
61 help
62 This feature reduces the number of hardware side channels by
63 ensuring that the majority of kernel addresses are not mapped
64 into userspace.
65
66 See Documentation/x86/pti.txt for more details.
67
57config SECURITY_INFINIBAND 68config SECURITY_INFINIBAND
58 bool "Infiniband Security Hooks" 69 bool "Infiniband Security Hooks"
59 depends on SECURITY && INFINIBAND 70 depends on SECURITY && INFINIBAND
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 04ba9d0718ea..6a54d2ffa840 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -330,10 +330,7 @@ static struct aa_profile *__attach_match(const char *name,
330 continue; 330 continue;
331 331
332 if (profile->xmatch) { 332 if (profile->xmatch) {
333 if (profile->xmatch_len == len) { 333 if (profile->xmatch_len >= len) {
334 conflict = true;
335 continue;
336 } else if (profile->xmatch_len > len) {
337 unsigned int state; 334 unsigned int state;
338 u32 perm; 335 u32 perm;
339 336
@@ -342,6 +339,10 @@ static struct aa_profile *__attach_match(const char *name,
342 perm = dfa_user_allow(profile->xmatch, state); 339 perm = dfa_user_allow(profile->xmatch, state);
343 /* any accepting state means a valid match. */ 340 /* any accepting state means a valid match. */
344 if (perm & MAY_EXEC) { 341 if (perm & MAY_EXEC) {
342 if (profile->xmatch_len == len) {
343 conflict = true;
344 continue;
345 }
345 candidate = profile; 346 candidate = profile;
346 len = profile->xmatch_len; 347 len = profile->xmatch_len;
347 conflict = false; 348 conflict = false;
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index 2b27bb79aec4..d7b7e7115160 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -133,6 +133,9 @@ extern struct aa_perms allperms;
133#define xcheck_labels_profiles(L1, L2, FN, args...) \ 133#define xcheck_labels_profiles(L1, L2, FN, args...) \
134 xcheck_ns_labels((L1), (L2), xcheck_ns_profile_label, (FN), args) 134 xcheck_ns_labels((L1), (L2), xcheck_ns_profile_label, (FN), args)
135 135
136#define xcheck_labels(L1, L2, P, FN1, FN2) \
137 xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2)))
138
136 139
137void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask); 140void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
138void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask); 141void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index 7ca0032e7ba9..b40678f3c1d5 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -64,40 +64,48 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va)
64 FLAGS_NONE, GFP_ATOMIC); 64 FLAGS_NONE, GFP_ATOMIC);
65} 65}
66 66
67/* assumes check for PROFILE_MEDIATES is already done */
67/* TODO: conditionals */ 68/* TODO: conditionals */
68static int profile_ptrace_perm(struct aa_profile *profile, 69static int profile_ptrace_perm(struct aa_profile *profile,
69 struct aa_profile *peer, u32 request, 70 struct aa_label *peer, u32 request,
70 struct common_audit_data *sa) 71 struct common_audit_data *sa)
71{ 72{
72 struct aa_perms perms = { }; 73 struct aa_perms perms = { };
73 74
74 /* need because of peer in cross check */ 75 aad(sa)->peer = peer;
75 if (profile_unconfined(profile) || 76 aa_profile_match_label(profile, peer, AA_CLASS_PTRACE, request,
76 !PROFILE_MEDIATES(profile, AA_CLASS_PTRACE))
77 return 0;
78
79 aad(sa)->peer = &peer->label;
80 aa_profile_match_label(profile, &peer->label, AA_CLASS_PTRACE, request,
81 &perms); 77 &perms);
82 aa_apply_modes_to_perms(profile, &perms); 78 aa_apply_modes_to_perms(profile, &perms);
83 return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb); 79 return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb);
84} 80}
85 81
86static int cross_ptrace_perm(struct aa_profile *tracer, 82static int profile_tracee_perm(struct aa_profile *tracee,
87 struct aa_profile *tracee, u32 request, 83 struct aa_label *tracer, u32 request,
88 struct common_audit_data *sa) 84 struct common_audit_data *sa)
89{ 85{
86 if (profile_unconfined(tracee) || unconfined(tracer) ||
87 !PROFILE_MEDIATES(tracee, AA_CLASS_PTRACE))
88 return 0;
89
90 return profile_ptrace_perm(tracee, tracer, request, sa);
91}
92
93static int profile_tracer_perm(struct aa_profile *tracer,
94 struct aa_label *tracee, u32 request,
95 struct common_audit_data *sa)
96{
97 if (profile_unconfined(tracer))
98 return 0;
99
90 if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE)) 100 if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE))
91 return xcheck(profile_ptrace_perm(tracer, tracee, request, sa), 101 return profile_ptrace_perm(tracer, tracee, request, sa);
92 profile_ptrace_perm(tracee, tracer, 102
93 request << PTRACE_PERM_SHIFT, 103 /* profile uses the old style capability check for ptrace */
94 sa)); 104 if (&tracer->label == tracee)
95 /* policy uses the old style capability check for ptrace */
96 if (profile_unconfined(tracer) || tracer == tracee)
97 return 0; 105 return 0;
98 106
99 aad(sa)->label = &tracer->label; 107 aad(sa)->label = &tracer->label;
100 aad(sa)->peer = &tracee->label; 108 aad(sa)->peer = tracee;
101 aad(sa)->request = 0; 109 aad(sa)->request = 0;
102 aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1); 110 aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1);
103 111
@@ -115,10 +123,13 @@ static int cross_ptrace_perm(struct aa_profile *tracer,
115int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee, 123int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
116 u32 request) 124 u32 request)
117{ 125{
126 struct aa_profile *profile;
127 u32 xrequest = request << PTRACE_PERM_SHIFT;
118 DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE); 128 DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE);
119 129
120 return xcheck_labels_profiles(tracer, tracee, cross_ptrace_perm, 130 return xcheck_labels(tracer, tracee, profile,
121 request, &sa); 131 profile_tracer_perm(profile, tracee, request, &sa),
132 profile_tracee_perm(profile, tracer, xrequest, &sa));
122} 133}
123 134
124 135
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index ed9b4d0f9f7e..8c558cbce930 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -329,6 +329,9 @@ static int match_mnt_path_str(struct aa_profile *profile,
329 AA_BUG(!mntpath); 329 AA_BUG(!mntpath);
330 AA_BUG(!buffer); 330 AA_BUG(!buffer);
331 331
332 if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
333 return 0;
334
332 error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer, 335 error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer,
333 &mntpnt, &info, profile->disconnected); 336 &mntpnt, &info, profile->disconnected);
334 if (error) 337 if (error)
@@ -380,6 +383,9 @@ static int match_mnt(struct aa_profile *profile, const struct path *path,
380 AA_BUG(!profile); 383 AA_BUG(!profile);
381 AA_BUG(devpath && !devbuffer); 384 AA_BUG(devpath && !devbuffer);
382 385
386 if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
387 return 0;
388
383 if (devpath) { 389 if (devpath) {
384 error = aa_path_name(devpath, path_flags(profile, devpath), 390 error = aa_path_name(devpath, path_flags(profile, devpath),
385 devbuffer, &devname, &info, 391 devbuffer, &devname, &info,
@@ -558,6 +564,9 @@ static int profile_umount(struct aa_profile *profile, struct path *path,
558 AA_BUG(!profile); 564 AA_BUG(!profile);
559 AA_BUG(!path); 565 AA_BUG(!path);
560 566
567 if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
568 return 0;
569
561 error = aa_path_name(path, path_flags(profile, path), buffer, &name, 570 error = aa_path_name(path, path_flags(profile, path), buffer, &name,
562 &info, profile->disconnected); 571 &info, profile->disconnected);
563 if (error) 572 if (error)
@@ -613,7 +622,8 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile,
613 AA_BUG(!new_path); 622 AA_BUG(!new_path);
614 AA_BUG(!old_path); 623 AA_BUG(!old_path);
615 624
616 if (profile_unconfined(profile)) 625 if (profile_unconfined(profile) ||
626 !PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
617 return aa_get_newest_label(&profile->label); 627 return aa_get_newest_label(&profile->label);
618 628
619 error = aa_path_name(old_path, path_flags(profile, old_path), 629 error = aa_path_name(old_path, path_flags(profile, old_path),
diff --git a/security/commoncap.c b/security/commoncap.c
index 4f8e09340956..48620c93d697 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -348,21 +348,18 @@ static __u32 sansflags(__u32 m)
348 return m & ~VFS_CAP_FLAGS_EFFECTIVE; 348 return m & ~VFS_CAP_FLAGS_EFFECTIVE;
349} 349}
350 350
351static bool is_v2header(size_t size, __le32 magic) 351static bool is_v2header(size_t size, const struct vfs_cap_data *cap)
352{ 352{
353 __u32 m = le32_to_cpu(magic);
354 if (size != XATTR_CAPS_SZ_2) 353 if (size != XATTR_CAPS_SZ_2)
355 return false; 354 return false;
356 return sansflags(m) == VFS_CAP_REVISION_2; 355 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
357} 356}
358 357
359static bool is_v3header(size_t size, __le32 magic) 358static bool is_v3header(size_t size, const struct vfs_cap_data *cap)
360{ 359{
361 __u32 m = le32_to_cpu(magic);
362
363 if (size != XATTR_CAPS_SZ_3) 360 if (size != XATTR_CAPS_SZ_3)
364 return false; 361 return false;
365 return sansflags(m) == VFS_CAP_REVISION_3; 362 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
366} 363}
367 364
368/* 365/*
@@ -405,7 +402,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
405 402
406 fs_ns = inode->i_sb->s_user_ns; 403 fs_ns = inode->i_sb->s_user_ns;
407 cap = (struct vfs_cap_data *) tmpbuf; 404 cap = (struct vfs_cap_data *) tmpbuf;
408 if (is_v2header((size_t) ret, cap->magic_etc)) { 405 if (is_v2header((size_t) ret, cap)) {
409 /* If this is sizeof(vfs_cap_data) then we're ok with the 406 /* If this is sizeof(vfs_cap_data) then we're ok with the
410 * on-disk value, so return that. */ 407 * on-disk value, so return that. */
411 if (alloc) 408 if (alloc)
@@ -413,7 +410,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
413 else 410 else
414 kfree(tmpbuf); 411 kfree(tmpbuf);
415 return ret; 412 return ret;
416 } else if (!is_v3header((size_t) ret, cap->magic_etc)) { 413 } else if (!is_v3header((size_t) ret, cap)) {
417 kfree(tmpbuf); 414 kfree(tmpbuf);
418 return -EINVAL; 415 return -EINVAL;
419 } 416 }
@@ -470,9 +467,9 @@ static kuid_t rootid_from_xattr(const void *value, size_t size,
470 return make_kuid(task_ns, rootid); 467 return make_kuid(task_ns, rootid);
471} 468}
472 469
473static bool validheader(size_t size, __le32 magic) 470static bool validheader(size_t size, const struct vfs_cap_data *cap)
474{ 471{
475 return is_v2header(size, magic) || is_v3header(size, magic); 472 return is_v2header(size, cap) || is_v3header(size, cap);
476} 473}
477 474
478/* 475/*
@@ -495,7 +492,7 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
495 492
496 if (!*ivalue) 493 if (!*ivalue)
497 return -EINVAL; 494 return -EINVAL;
498 if (!validheader(size, cap->magic_etc)) 495 if (!validheader(size, cap))
499 return -EINVAL; 496 return -EINVAL;
500 if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) 497 if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
501 return -EPERM; 498 return -EPERM;
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index e49f448ee04f..c2db7e905f7d 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -455,7 +455,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm,
455 v = snd_pcm_hw_param_last(pcm, params, var, dir); 455 v = snd_pcm_hw_param_last(pcm, params, var, dir);
456 else 456 else
457 v = snd_pcm_hw_param_first(pcm, params, var, dir); 457 v = snd_pcm_hw_param_first(pcm, params, var, dir);
458 snd_BUG_ON(v < 0);
459 return v; 458 return v;
460} 459}
461 460
@@ -1335,8 +1334,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
1335 1334
1336 if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) 1335 if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
1337 return tmp; 1336 return tmp;
1338 mutex_lock(&runtime->oss.params_lock);
1339 while (bytes > 0) { 1337 while (bytes > 0) {
1338 if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
1339 tmp = -ERESTARTSYS;
1340 break;
1341 }
1340 if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { 1342 if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
1341 tmp = bytes; 1343 tmp = bytes;
1342 if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes) 1344 if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes)
@@ -1380,14 +1382,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
1380 xfer += tmp; 1382 xfer += tmp;
1381 if ((substream->f_flags & O_NONBLOCK) != 0 && 1383 if ((substream->f_flags & O_NONBLOCK) != 0 &&
1382 tmp != runtime->oss.period_bytes) 1384 tmp != runtime->oss.period_bytes)
1383 break; 1385 tmp = -EAGAIN;
1384 } 1386 }
1385 }
1386 mutex_unlock(&runtime->oss.params_lock);
1387 return xfer;
1388
1389 err: 1387 err:
1390 mutex_unlock(&runtime->oss.params_lock); 1388 mutex_unlock(&runtime->oss.params_lock);
1389 if (tmp < 0)
1390 break;
1391 if (signal_pending(current)) {
1392 tmp = -ERESTARTSYS;
1393 break;
1394 }
1395 tmp = 0;
1396 }
1391 return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; 1397 return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
1392} 1398}
1393 1399
@@ -1435,8 +1441,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
1435 1441
1436 if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) 1442 if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
1437 return tmp; 1443 return tmp;
1438 mutex_lock(&runtime->oss.params_lock);
1439 while (bytes > 0) { 1444 while (bytes > 0) {
1445 if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
1446 tmp = -ERESTARTSYS;
1447 break;
1448 }
1440 if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { 1449 if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
1441 if (runtime->oss.buffer_used == 0) { 1450 if (runtime->oss.buffer_used == 0) {
1442 tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1); 1451 tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1);
@@ -1467,12 +1476,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
1467 bytes -= tmp; 1476 bytes -= tmp;
1468 xfer += tmp; 1477 xfer += tmp;
1469 } 1478 }
1470 }
1471 mutex_unlock(&runtime->oss.params_lock);
1472 return xfer;
1473
1474 err: 1479 err:
1475 mutex_unlock(&runtime->oss.params_lock); 1480 mutex_unlock(&runtime->oss.params_lock);
1481 if (tmp < 0)
1482 break;
1483 if (signal_pending(current)) {
1484 tmp = -ERESTARTSYS;
1485 break;
1486 }
1487 tmp = 0;
1488 }
1476 return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; 1489 return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
1477} 1490}
1478 1491
diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
index cadc93792868..85a56af104bd 100644
--- a/sound/core/oss/pcm_plugin.c
+++ b/sound/core/oss/pcm_plugin.c
@@ -592,18 +592,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st
592 snd_pcm_sframes_t frames = size; 592 snd_pcm_sframes_t frames = size;
593 593
594 plugin = snd_pcm_plug_first(plug); 594 plugin = snd_pcm_plug_first(plug);
595 while (plugin && frames > 0) { 595 while (plugin) {
596 if (frames <= 0)
597 return frames;
596 if ((next = plugin->next) != NULL) { 598 if ((next = plugin->next) != NULL) {
597 snd_pcm_sframes_t frames1 = frames; 599 snd_pcm_sframes_t frames1 = frames;
598 if (plugin->dst_frames) 600 if (plugin->dst_frames) {
599 frames1 = plugin->dst_frames(plugin, frames); 601 frames1 = plugin->dst_frames(plugin, frames);
602 if (frames1 <= 0)
603 return frames1;
604 }
600 if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) { 605 if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) {
601 return err; 606 return err;
602 } 607 }
603 if (err != frames1) { 608 if (err != frames1) {
604 frames = err; 609 frames = err;
605 if (plugin->src_frames) 610 if (plugin->src_frames) {
606 frames = plugin->src_frames(plugin, frames1); 611 frames = plugin->src_frames(plugin, frames1);
612 if (frames <= 0)
613 return frames;
614 }
607 } 615 }
608 } else 616 } else
609 dst_channels = NULL; 617 dst_channels = NULL;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 10e7ef7a8804..db7894bb028c 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1632,7 +1632,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm,
1632 return changed; 1632 return changed;
1633 if (params->rmask) { 1633 if (params->rmask) {
1634 int err = snd_pcm_hw_refine(pcm, params); 1634 int err = snd_pcm_hw_refine(pcm, params);
1635 if (snd_BUG_ON(err < 0)) 1635 if (err < 0)
1636 return err; 1636 return err;
1637 } 1637 }
1638 return snd_pcm_hw_param_value(params, var, dir); 1638 return snd_pcm_hw_param_value(params, var, dir);
@@ -1678,7 +1678,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm,
1678 return changed; 1678 return changed;
1679 if (params->rmask) { 1679 if (params->rmask) {
1680 int err = snd_pcm_hw_refine(pcm, params); 1680 int err = snd_pcm_hw_refine(pcm, params);
1681 if (snd_BUG_ON(err < 0)) 1681 if (err < 0)
1682 return err; 1682 return err;
1683 } 1683 }
1684 return snd_pcm_hw_param_value(params, var, dir); 1684 return snd_pcm_hw_param_value(params, var, dir);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index a4d92e46c459..f08772568c17 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -2580,7 +2580,7 @@ static snd_pcm_sframes_t forward_appl_ptr(struct snd_pcm_substream *substream,
2580 return ret < 0 ? ret : frames; 2580 return ret < 0 ? ret : frames;
2581} 2581}
2582 2582
2583/* decrease the appl_ptr; returns the processed frames or a negative error */ 2583/* decrease the appl_ptr; returns the processed frames or zero for error */
2584static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream, 2584static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
2585 snd_pcm_uframes_t frames, 2585 snd_pcm_uframes_t frames,
2586 snd_pcm_sframes_t avail) 2586 snd_pcm_sframes_t avail)
@@ -2597,7 +2597,12 @@ static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
2597 if (appl_ptr < 0) 2597 if (appl_ptr < 0)
2598 appl_ptr += runtime->boundary; 2598 appl_ptr += runtime->boundary;
2599 ret = pcm_lib_apply_appl_ptr(substream, appl_ptr); 2599 ret = pcm_lib_apply_appl_ptr(substream, appl_ptr);
2600 return ret < 0 ? ret : frames; 2600 /* NOTE: we return zero for errors because PulseAudio gets depressed
2601 * upon receiving an error from rewind ioctl and stops processing
2602 * any longer. Returning zero means that no rewind is done, so
2603 * it's not absolutely wrong to answer like that.
2604 */
2605 return ret < 0 ? 0 : frames;
2601} 2606}
2602 2607
2603static snd_pcm_sframes_t snd_pcm_playback_rewind(struct snd_pcm_substream *substream, 2608static snd_pcm_sframes_t snd_pcm_playback_rewind(struct snd_pcm_substream *substream,
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index b3b353d72527..f055ca10bbc1 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream,
579 return 0; 579 return 0;
580} 580}
581 581
582int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) 582static int __snd_rawmidi_info_select(struct snd_card *card,
583 struct snd_rawmidi_info *info)
583{ 584{
584 struct snd_rawmidi *rmidi; 585 struct snd_rawmidi *rmidi;
585 struct snd_rawmidi_str *pstr; 586 struct snd_rawmidi_str *pstr;
586 struct snd_rawmidi_substream *substream; 587 struct snd_rawmidi_substream *substream;
587 588
588 mutex_lock(&register_mutex);
589 rmidi = snd_rawmidi_search(card, info->device); 589 rmidi = snd_rawmidi_search(card, info->device);
590 mutex_unlock(&register_mutex);
591 if (!rmidi) 590 if (!rmidi)
592 return -ENXIO; 591 return -ENXIO;
593 if (info->stream < 0 || info->stream > 1) 592 if (info->stream < 0 || info->stream > 1)
@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info
603 } 602 }
604 return -ENXIO; 603 return -ENXIO;
605} 604}
605
606int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
607{
608 int ret;
609
610 mutex_lock(&register_mutex);
611 ret = __snd_rawmidi_info_select(card, info);
612 mutex_unlock(&register_mutex);
613 return ret;
614}
606EXPORT_SYMBOL(snd_rawmidi_info_select); 615EXPORT_SYMBOL(snd_rawmidi_info_select);
607 616
608static int snd_rawmidi_info_select_user(struct snd_card *card, 617static int snd_rawmidi_info_select_user(struct snd_card *card,
diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
index afac886ffa28..0333143a1fa7 100644
--- a/sound/drivers/aloop.c
+++ b/sound/drivers/aloop.c
@@ -39,6 +39,7 @@
39#include <sound/core.h> 39#include <sound/core.h>
40#include <sound/control.h> 40#include <sound/control.h>
41#include <sound/pcm.h> 41#include <sound/pcm.h>
42#include <sound/pcm_params.h>
42#include <sound/info.h> 43#include <sound/info.h>
43#include <sound/initval.h> 44#include <sound/initval.h>
44 45
@@ -305,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd)
305 return 0; 306 return 0;
306} 307}
307 308
308static void params_change_substream(struct loopback_pcm *dpcm,
309 struct snd_pcm_runtime *runtime)
310{
311 struct snd_pcm_runtime *dst_runtime;
312
313 if (dpcm == NULL || dpcm->substream == NULL)
314 return;
315 dst_runtime = dpcm->substream->runtime;
316 if (dst_runtime == NULL)
317 return;
318 dst_runtime->hw = dpcm->cable->hw;
319}
320
321static void params_change(struct snd_pcm_substream *substream) 309static void params_change(struct snd_pcm_substream *substream)
322{ 310{
323 struct snd_pcm_runtime *runtime = substream->runtime; 311 struct snd_pcm_runtime *runtime = substream->runtime;
@@ -329,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream)
329 cable->hw.rate_max = runtime->rate; 317 cable->hw.rate_max = runtime->rate;
330 cable->hw.channels_min = runtime->channels; 318 cable->hw.channels_min = runtime->channels;
331 cable->hw.channels_max = runtime->channels; 319 cable->hw.channels_max = runtime->channels;
332 params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK],
333 runtime);
334 params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE],
335 runtime);
336} 320}
337 321
338static int loopback_prepare(struct snd_pcm_substream *substream) 322static int loopback_prepare(struct snd_pcm_substream *substream)
@@ -620,26 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream)
620static int rule_format(struct snd_pcm_hw_params *params, 604static int rule_format(struct snd_pcm_hw_params *params,
621 struct snd_pcm_hw_rule *rule) 605 struct snd_pcm_hw_rule *rule)
622{ 606{
607 struct loopback_pcm *dpcm = rule->private;
608 struct loopback_cable *cable = dpcm->cable;
609 struct snd_mask m;
623 610
624 struct snd_pcm_hardware *hw = rule->private; 611 snd_mask_none(&m);
625 struct snd_mask *maskp = hw_param_mask(params, rule->var); 612 mutex_lock(&dpcm->loopback->cable_lock);
626 613 m.bits[0] = (u_int32_t)cable->hw.formats;
627 maskp->bits[0] &= (u_int32_t)hw->formats; 614 m.bits[1] = (u_int32_t)(cable->hw.formats >> 32);
628 maskp->bits[1] &= (u_int32_t)(hw->formats >> 32); 615 mutex_unlock(&dpcm->loopback->cable_lock);
629 memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */ 616 return snd_mask_refine(hw_param_mask(params, rule->var), &m);
630 if (! maskp->bits[0] && ! maskp->bits[1])
631 return -EINVAL;
632 return 0;
633} 617}
634 618
635static int rule_rate(struct snd_pcm_hw_params *params, 619static int rule_rate(struct snd_pcm_hw_params *params,
636 struct snd_pcm_hw_rule *rule) 620 struct snd_pcm_hw_rule *rule)
637{ 621{
638 struct snd_pcm_hardware *hw = rule->private; 622 struct loopback_pcm *dpcm = rule->private;
623 struct loopback_cable *cable = dpcm->cable;
639 struct snd_interval t; 624 struct snd_interval t;
640 625
641 t.min = hw->rate_min; 626 mutex_lock(&dpcm->loopback->cable_lock);
642 t.max = hw->rate_max; 627 t.min = cable->hw.rate_min;
628 t.max = cable->hw.rate_max;
629 mutex_unlock(&dpcm->loopback->cable_lock);
643 t.openmin = t.openmax = 0; 630 t.openmin = t.openmax = 0;
644 t.integer = 0; 631 t.integer = 0;
645 return snd_interval_refine(hw_param_interval(params, rule->var), &t); 632 return snd_interval_refine(hw_param_interval(params, rule->var), &t);
@@ -648,22 +635,44 @@ static int rule_rate(struct snd_pcm_hw_params *params,
648static int rule_channels(struct snd_pcm_hw_params *params, 635static int rule_channels(struct snd_pcm_hw_params *params,
649 struct snd_pcm_hw_rule *rule) 636 struct snd_pcm_hw_rule *rule)
650{ 637{
651 struct snd_pcm_hardware *hw = rule->private; 638 struct loopback_pcm *dpcm = rule->private;
639 struct loopback_cable *cable = dpcm->cable;
652 struct snd_interval t; 640 struct snd_interval t;
653 641
654 t.min = hw->channels_min; 642 mutex_lock(&dpcm->loopback->cable_lock);
655 t.max = hw->channels_max; 643 t.min = cable->hw.channels_min;
644 t.max = cable->hw.channels_max;
645 mutex_unlock(&dpcm->loopback->cable_lock);
656 t.openmin = t.openmax = 0; 646 t.openmin = t.openmax = 0;
657 t.integer = 0; 647 t.integer = 0;
658 return snd_interval_refine(hw_param_interval(params, rule->var), &t); 648 return snd_interval_refine(hw_param_interval(params, rule->var), &t);
659} 649}
660 650
651static void free_cable(struct snd_pcm_substream *substream)
652{
653 struct loopback *loopback = substream->private_data;
654 int dev = get_cable_index(substream);
655 struct loopback_cable *cable;
656
657 cable = loopback->cables[substream->number][dev];
658 if (!cable)
659 return;
660 if (cable->streams[!substream->stream]) {
661 /* other stream is still alive */
662 cable->streams[substream->stream] = NULL;
663 } else {
664 /* free the cable */
665 loopback->cables[substream->number][dev] = NULL;
666 kfree(cable);
667 }
668}
669
661static int loopback_open(struct snd_pcm_substream *substream) 670static int loopback_open(struct snd_pcm_substream *substream)
662{ 671{
663 struct snd_pcm_runtime *runtime = substream->runtime; 672 struct snd_pcm_runtime *runtime = substream->runtime;
664 struct loopback *loopback = substream->private_data; 673 struct loopback *loopback = substream->private_data;
665 struct loopback_pcm *dpcm; 674 struct loopback_pcm *dpcm;
666 struct loopback_cable *cable; 675 struct loopback_cable *cable = NULL;
667 int err = 0; 676 int err = 0;
668 int dev = get_cable_index(substream); 677 int dev = get_cable_index(substream);
669 678
@@ -681,7 +690,6 @@ static int loopback_open(struct snd_pcm_substream *substream)
681 if (!cable) { 690 if (!cable) {
682 cable = kzalloc(sizeof(*cable), GFP_KERNEL); 691 cable = kzalloc(sizeof(*cable), GFP_KERNEL);
683 if (!cable) { 692 if (!cable) {
684 kfree(dpcm);
685 err = -ENOMEM; 693 err = -ENOMEM;
686 goto unlock; 694 goto unlock;
687 } 695 }
@@ -699,19 +707,19 @@ static int loopback_open(struct snd_pcm_substream *substream)
699 /* are cached -> they do not reflect the actual state */ 707 /* are cached -> they do not reflect the actual state */
700 err = snd_pcm_hw_rule_add(runtime, 0, 708 err = snd_pcm_hw_rule_add(runtime, 0,
701 SNDRV_PCM_HW_PARAM_FORMAT, 709 SNDRV_PCM_HW_PARAM_FORMAT,
702 rule_format, &runtime->hw, 710 rule_format, dpcm,
703 SNDRV_PCM_HW_PARAM_FORMAT, -1); 711 SNDRV_PCM_HW_PARAM_FORMAT, -1);
704 if (err < 0) 712 if (err < 0)
705 goto unlock; 713 goto unlock;
706 err = snd_pcm_hw_rule_add(runtime, 0, 714 err = snd_pcm_hw_rule_add(runtime, 0,
707 SNDRV_PCM_HW_PARAM_RATE, 715 SNDRV_PCM_HW_PARAM_RATE,
708 rule_rate, &runtime->hw, 716 rule_rate, dpcm,
709 SNDRV_PCM_HW_PARAM_RATE, -1); 717 SNDRV_PCM_HW_PARAM_RATE, -1);
710 if (err < 0) 718 if (err < 0)
711 goto unlock; 719 goto unlock;
712 err = snd_pcm_hw_rule_add(runtime, 0, 720 err = snd_pcm_hw_rule_add(runtime, 0,
713 SNDRV_PCM_HW_PARAM_CHANNELS, 721 SNDRV_PCM_HW_PARAM_CHANNELS,
714 rule_channels, &runtime->hw, 722 rule_channels, dpcm,
715 SNDRV_PCM_HW_PARAM_CHANNELS, -1); 723 SNDRV_PCM_HW_PARAM_CHANNELS, -1);
716 if (err < 0) 724 if (err < 0)
717 goto unlock; 725 goto unlock;
@@ -723,6 +731,10 @@ static int loopback_open(struct snd_pcm_substream *substream)
723 else 731 else
724 runtime->hw = cable->hw; 732 runtime->hw = cable->hw;
725 unlock: 733 unlock:
734 if (err < 0) {
735 free_cable(substream);
736 kfree(dpcm);
737 }
726 mutex_unlock(&loopback->cable_lock); 738 mutex_unlock(&loopback->cable_lock);
727 return err; 739 return err;
728} 740}
@@ -731,20 +743,10 @@ static int loopback_close(struct snd_pcm_substream *substream)
731{ 743{
732 struct loopback *loopback = substream->private_data; 744 struct loopback *loopback = substream->private_data;
733 struct loopback_pcm *dpcm = substream->runtime->private_data; 745 struct loopback_pcm *dpcm = substream->runtime->private_data;
734 struct loopback_cable *cable;
735 int dev = get_cable_index(substream);
736 746
737 loopback_timer_stop(dpcm); 747 loopback_timer_stop(dpcm);
738 mutex_lock(&loopback->cable_lock); 748 mutex_lock(&loopback->cable_lock);
739 cable = loopback->cables[substream->number][dev]; 749 free_cable(substream);
740 if (cable->streams[!substream->stream]) {
741 /* other stream is still alive */
742 cable->streams[substream->stream] = NULL;
743 } else {
744 /* free the cable */
745 loopback->cables[substream->number][dev] = NULL;
746 kfree(cable);
747 }
748 mutex_unlock(&loopback->cable_lock); 750 mutex_unlock(&loopback->cable_lock);
749 return 0; 751 return 0;
750} 752}
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index 038a180d3f81..cbe818eda336 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -325,7 +325,7 @@ static int hdac_component_master_match(struct device *dev, void *data)
325 */ 325 */
326int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops) 326int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops)
327{ 327{
328 if (WARN_ON(!hdac_acomp)) 328 if (!hdac_acomp)
329 return -ENODEV; 329 return -ENODEV;
330 330
331 hdac_acomp->audio_ops = aops; 331 hdac_acomp->audio_ops = aops;
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index a81aacf684b2..37e1cf8218ff 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -271,6 +271,8 @@ enum {
271 CXT_FIXUP_HP_SPECTRE, 271 CXT_FIXUP_HP_SPECTRE,
272 CXT_FIXUP_HP_GATE_MIC, 272 CXT_FIXUP_HP_GATE_MIC,
273 CXT_FIXUP_MUTE_LED_GPIO, 273 CXT_FIXUP_MUTE_LED_GPIO,
274 CXT_FIXUP_HEADSET_MIC,
275 CXT_FIXUP_HP_MIC_NO_PRESENCE,
274}; 276};
275 277
276/* for hda_fixup_thinkpad_acpi() */ 278/* for hda_fixup_thinkpad_acpi() */
@@ -350,6 +352,18 @@ static void cxt_fixup_headphone_mic(struct hda_codec *codec,
350 } 352 }
351} 353}
352 354
355static void cxt_fixup_headset_mic(struct hda_codec *codec,
356 const struct hda_fixup *fix, int action)
357{
358 struct conexant_spec *spec = codec->spec;
359
360 switch (action) {
361 case HDA_FIXUP_ACT_PRE_PROBE:
362 spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
363 break;
364 }
365}
366
353/* OPLC XO 1.5 fixup */ 367/* OPLC XO 1.5 fixup */
354 368
355/* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors) 369/* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors)
@@ -880,6 +894,19 @@ static const struct hda_fixup cxt_fixups[] = {
880 .type = HDA_FIXUP_FUNC, 894 .type = HDA_FIXUP_FUNC,
881 .v.func = cxt_fixup_mute_led_gpio, 895 .v.func = cxt_fixup_mute_led_gpio,
882 }, 896 },
897 [CXT_FIXUP_HEADSET_MIC] = {
898 .type = HDA_FIXUP_FUNC,
899 .v.func = cxt_fixup_headset_mic,
900 },
901 [CXT_FIXUP_HP_MIC_NO_PRESENCE] = {
902 .type = HDA_FIXUP_PINS,
903 .v.pins = (const struct hda_pintbl[]) {
904 { 0x1a, 0x02a1113c },
905 { }
906 },
907 .chained = true,
908 .chain_id = CXT_FIXUP_HEADSET_MIC,
909 },
883}; 910};
884 911
885static const struct snd_pci_quirk cxt5045_fixups[] = { 912static const struct snd_pci_quirk cxt5045_fixups[] = {
@@ -934,6 +961,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
934 SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), 961 SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
935 SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), 962 SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO),
936 SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), 963 SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO),
964 SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
965 SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
937 SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), 966 SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
938 SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), 967 SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
939 SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), 968 SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index c19c81d230bd..b4f1b6e88305 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
55#define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b) 55#define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
56#define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \ 56#define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
57 ((codec)->core.vendor_id == 0x80862800)) 57 ((codec)->core.vendor_id == 0x80862800))
58#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
58#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ 59#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
59 || is_skylake(codec) || is_broxton(codec) \ 60 || is_skylake(codec) || is_broxton(codec) \
60 || is_kabylake(codec)) || is_geminilake(codec) 61 || is_kabylake(codec)) || is_geminilake(codec) \
61 62 || is_cannonlake(codec)
62#define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882) 63#define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
63#define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883) 64#define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
64#define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec)) 65#define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
@@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI", patch_i915_hsw_hdmi),
3841HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi), 3842HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi),
3842HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi), 3843HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi),
3843HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi), 3844HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi),
3845HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
3844HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), 3846HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
3845HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi), 3847HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
3846HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), 3848HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 4b21f71d685c..8fd2d9c62c96 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -324,8 +324,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
324 case 0x10ec0292: 324 case 0x10ec0292:
325 alc_update_coef_idx(codec, 0x4, 1<<15, 0); 325 alc_update_coef_idx(codec, 0x4, 1<<15, 0);
326 break; 326 break;
327 case 0x10ec0215:
328 case 0x10ec0225: 327 case 0x10ec0225:
328 case 0x10ec0295:
329 case 0x10ec0299:
330 alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000);
331 /* fallthrough */
332 case 0x10ec0215:
329 case 0x10ec0233: 333 case 0x10ec0233:
330 case 0x10ec0236: 334 case 0x10ec0236:
331 case 0x10ec0255: 335 case 0x10ec0255:
@@ -336,10 +340,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
336 case 0x10ec0286: 340 case 0x10ec0286:
337 case 0x10ec0288: 341 case 0x10ec0288:
338 case 0x10ec0285: 342 case 0x10ec0285:
339 case 0x10ec0295:
340 case 0x10ec0298: 343 case 0x10ec0298:
341 case 0x10ec0289: 344 case 0x10ec0289:
342 case 0x10ec0299:
343 alc_update_coef_idx(codec, 0x10, 1<<9, 0); 345 alc_update_coef_idx(codec, 0x10, 1<<9, 0);
344 break; 346 break;
345 case 0x10ec0275: 347 case 0x10ec0275:
@@ -5185,6 +5187,22 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec,
5185 } 5187 }
5186} 5188}
5187 5189
5190/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
5191static void alc274_fixup_bind_dacs(struct hda_codec *codec,
5192 const struct hda_fixup *fix, int action)
5193{
5194 struct alc_spec *spec = codec->spec;
5195 static hda_nid_t preferred_pairs[] = {
5196 0x21, 0x03, 0x1b, 0x03, 0x16, 0x02,
5197 0
5198 };
5199
5200 if (action != HDA_FIXUP_ACT_PRE_PROBE)
5201 return;
5202
5203 spec->gen.preferred_dacs = preferred_pairs;
5204}
5205
5188/* for hda_fixup_thinkpad_acpi() */ 5206/* for hda_fixup_thinkpad_acpi() */
5189#include "thinkpad_helper.c" 5207#include "thinkpad_helper.c"
5190 5208
@@ -5302,6 +5320,8 @@ enum {
5302 ALC233_FIXUP_LENOVO_MULTI_CODECS, 5320 ALC233_FIXUP_LENOVO_MULTI_CODECS,
5303 ALC294_FIXUP_LENOVO_MIC_LOCATION, 5321 ALC294_FIXUP_LENOVO_MIC_LOCATION,
5304 ALC700_FIXUP_INTEL_REFERENCE, 5322 ALC700_FIXUP_INTEL_REFERENCE,
5323 ALC274_FIXUP_DELL_BIND_DACS,
5324 ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
5305}; 5325};
5306 5326
5307static const struct hda_fixup alc269_fixups[] = { 5327static const struct hda_fixup alc269_fixups[] = {
@@ -6112,6 +6132,21 @@ static const struct hda_fixup alc269_fixups[] = {
6112 {} 6132 {}
6113 } 6133 }
6114 }, 6134 },
6135 [ALC274_FIXUP_DELL_BIND_DACS] = {
6136 .type = HDA_FIXUP_FUNC,
6137 .v.func = alc274_fixup_bind_dacs,
6138 .chained = true,
6139 .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
6140 },
6141 [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = {
6142 .type = HDA_FIXUP_PINS,
6143 .v.pins = (const struct hda_pintbl[]) {
6144 { 0x1b, 0x0401102f },
6145 { }
6146 },
6147 .chained = true,
6148 .chain_id = ALC274_FIXUP_DELL_BIND_DACS
6149 },
6115}; 6150};
6116 6151
6117static const struct snd_pci_quirk alc269_fixup_tbl[] = { 6152static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6295,6 +6330,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
6295 SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), 6330 SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
6296 SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), 6331 SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
6297 SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), 6332 SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
6333 SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
6298 SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), 6334 SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
6299 SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), 6335 SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
6300 SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), 6336 SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
@@ -6553,6 +6589,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
6553 {0x1b, 0x01011020}, 6589 {0x1b, 0x01011020},
6554 {0x21, 0x02211010}), 6590 {0x21, 0x02211010}),
6555 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, 6591 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
6592 {0x12, 0x90a60130},
6593 {0x14, 0x90170110},
6594 {0x1b, 0x01011020},
6595 {0x21, 0x0221101f}),
6596 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
6556 {0x12, 0x90a60160}, 6597 {0x12, 0x90a60160},
6557 {0x14, 0x90170120}, 6598 {0x14, 0x90170120},
6558 {0x21, 0x02211030}), 6599 {0x21, 0x02211030}),
@@ -6578,7 +6619,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
6578 {0x14, 0x90170110}, 6619 {0x14, 0x90170110},
6579 {0x1b, 0x90a70130}, 6620 {0x1b, 0x90a70130},
6580 {0x21, 0x03211020}), 6621 {0x21, 0x03211020}),
6581 SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, 6622 SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
6582 {0x12, 0xb7a60130}, 6623 {0x12, 0xb7a60130},
6583 {0x13, 0xb8a61140}, 6624 {0x13, 0xb8a61140},
6584 {0x16, 0x90170110}, 6625 {0x16, 0x90170110},
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
index 9f521a55d610..b5e41df6bb3a 100644
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -1051,6 +1051,11 @@ static int acp_audio_probe(struct platform_device *pdev)
1051 struct resource *res; 1051 struct resource *res;
1052 const u32 *pdata = pdev->dev.platform_data; 1052 const u32 *pdata = pdev->dev.platform_data;
1053 1053
1054 if (!pdata) {
1055 dev_err(&pdev->dev, "Missing platform data\n");
1056 return -ENODEV;
1057 }
1058
1054 audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data), 1059 audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data),
1055 GFP_KERNEL); 1060 GFP_KERNEL);
1056 if (audio_drv_data == NULL) 1061 if (audio_drv_data == NULL)
@@ -1058,6 +1063,8 @@ static int acp_audio_probe(struct platform_device *pdev)
1058 1063
1059 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1064 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1060 audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res); 1065 audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res);
1066 if (IS_ERR(audio_drv_data->acp_mmio))
1067 return PTR_ERR(audio_drv_data->acp_mmio);
1061 1068
1062 /* The following members gets populated in device 'open' 1069 /* The following members gets populated in device 'open'
1063 * function. Till then interrupts are disabled in 'acp_init' 1070 * function. Till then interrupts are disabled in 'acp_init'
diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
index 4a56f3dfba51..dcee145dd179 100644
--- a/sound/soc/atmel/Kconfig
+++ b/sound/soc/atmel/Kconfig
@@ -64,7 +64,7 @@ config SND_AT91_SOC_SAM9X5_WM8731
64config SND_ATMEL_SOC_CLASSD 64config SND_ATMEL_SOC_CLASSD
65 tristate "Atmel ASoC driver for boards using CLASSD" 65 tristate "Atmel ASoC driver for boards using CLASSD"
66 depends on ARCH_AT91 || COMPILE_TEST 66 depends on ARCH_AT91 || COMPILE_TEST
67 select SND_ATMEL_SOC_DMA 67 select SND_SOC_GENERIC_DMAENGINE_PCM
68 select REGMAP_MMIO 68 select REGMAP_MMIO
69 help 69 help
70 Say Y if you want to add support for Atmel ASoC driver for boards using 70 Say Y if you want to add support for Atmel ASoC driver for boards using
diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c
index b2d42ec1dcd9..56564ce90cb6 100644
--- a/sound/soc/codecs/da7218.c
+++ b/sound/soc/codecs/da7218.c
@@ -2520,7 +2520,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec)
2520 } 2520 }
2521 2521
2522 if (da7218->dev_id == DA7218_DEV_ID) { 2522 if (da7218->dev_id == DA7218_DEV_ID) {
2523 hpldet_np = of_find_node_by_name(np, "da7218_hpldet"); 2523 hpldet_np = of_get_child_by_name(np, "da7218_hpldet");
2524 if (!hpldet_np) 2524 if (!hpldet_np)
2525 return pdata; 2525 return pdata;
2526 2526
diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c
index 5f3c42c4f74a..066ea2f4ce7b 100644
--- a/sound/soc/codecs/msm8916-wcd-analog.c
+++ b/sound/soc/codecs/msm8916-wcd-analog.c
@@ -267,7 +267,7 @@
267#define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\ 267#define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\
268 SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000) 268 SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000)
269#define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ 269#define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
270 SNDRV_PCM_FMTBIT_S24_LE) 270 SNDRV_PCM_FMTBIT_S32_LE)
271 271
272static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 | 272static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 |
273 SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4; 273 SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4;
diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c
index a10a724eb448..13354d6304a8 100644
--- a/sound/soc/codecs/msm8916-wcd-digital.c
+++ b/sound/soc/codecs/msm8916-wcd-digital.c
@@ -194,7 +194,7 @@
194 SNDRV_PCM_RATE_32000 | \ 194 SNDRV_PCM_RATE_32000 | \
195 SNDRV_PCM_RATE_48000) 195 SNDRV_PCM_RATE_48000)
196#define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ 196#define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
197 SNDRV_PCM_FMTBIT_S24_LE) 197 SNDRV_PCM_FMTBIT_S32_LE)
198 198
199struct msm8916_wcd_digital_priv { 199struct msm8916_wcd_digital_priv {
200 struct clk *ahbclk, *mclk; 200 struct clk *ahbclk, *mclk;
@@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream,
645 RX_I2S_CTL_RX_I2S_MODE_MASK, 645 RX_I2S_CTL_RX_I2S_MODE_MASK,
646 RX_I2S_CTL_RX_I2S_MODE_16); 646 RX_I2S_CTL_RX_I2S_MODE_16);
647 break; 647 break;
648 case SNDRV_PCM_FORMAT_S24_LE: 648 case SNDRV_PCM_FORMAT_S32_LE:
649 snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL, 649 snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL,
650 TX_I2S_CTL_TX_I2S_MODE_MASK, 650 TX_I2S_CTL_TX_I2S_MODE_MASK,
651 TX_I2S_CTL_TX_I2S_MODE_32); 651 TX_I2S_CTL_TX_I2S_MODE_32);
diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index 714ce17da717..e853a6dfd33b 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -905,6 +905,7 @@ static int nau8825_adc_event(struct snd_soc_dapm_widget *w,
905 905
906 switch (event) { 906 switch (event) {
907 case SND_SOC_DAPM_POST_PMU: 907 case SND_SOC_DAPM_POST_PMU:
908 msleep(125);
908 regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL, 909 regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL,
909 NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC); 910 NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC);
910 break; 911 break;
diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c
index 2df91db765ac..64bf26cec20d 100644
--- a/sound/soc/codecs/rt5514-spi.c
+++ b/sound/soc/codecs/rt5514-spi.c
@@ -289,6 +289,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform)
289 dev_err(&rt5514_spi->dev, 289 dev_err(&rt5514_spi->dev,
290 "%s Failed to reguest IRQ: %d\n", __func__, 290 "%s Failed to reguest IRQ: %d\n", __func__,
291 ret); 291 ret);
292 else
293 device_init_wakeup(rt5514_dsp->dev, true);
292 } 294 }
293 295
294 return 0; 296 return 0;
@@ -456,8 +458,6 @@ static int rt5514_spi_probe(struct spi_device *spi)
456 return ret; 458 return ret;
457 } 459 }
458 460
459 device_init_wakeup(&spi->dev, true);
460
461 return 0; 461 return 0;
462} 462}
463 463
@@ -482,10 +482,13 @@ static int __maybe_unused rt5514_resume(struct device *dev)
482 if (device_may_wakeup(dev)) 482 if (device_may_wakeup(dev))
483 disable_irq_wake(irq); 483 disable_irq_wake(irq);
484 484
485 if (rt5514_dsp->substream) { 485 if (rt5514_dsp) {
486 rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, sizeof(buf)); 486 if (rt5514_dsp->substream) {
487 if (buf[0] & RT5514_IRQ_STATUS_BIT) 487 rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf,
488 rt5514_schedule_copy(rt5514_dsp); 488 sizeof(buf));
489 if (buf[0] & RT5514_IRQ_STATUS_BIT)
490 rt5514_schedule_copy(rt5514_dsp);
491 }
489 } 492 }
490 493
491 return 0; 494 return 0;
diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c
index 2a5b5d74e697..2dd6e9f990a4 100644
--- a/sound/soc/codecs/rt5514.c
+++ b/sound/soc/codecs/rt5514.c
@@ -496,7 +496,7 @@ static const struct snd_soc_dapm_widget rt5514_dapm_widgets[] = {
496 SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0), 496 SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0),
497 SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0), 497 SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0),
498 498
499 SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0, 499 SND_SOC_DAPM_SUPPLY_S("DMIC CLK", 1, SND_SOC_NOPM, 0, 0,
500 rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU), 500 rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU),
501 501
502 SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1, 502 SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1,
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index f020d2d1eef4..edc152c8a1fe 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3823,6 +3823,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c,
3823 regmap_read(regmap, RT5645_VENDOR_ID, &val); 3823 regmap_read(regmap, RT5645_VENDOR_ID, &val);
3824 rt5645->v_id = val & 0xff; 3824 rt5645->v_id = val & 0xff;
3825 3825
3826 regmap_write(rt5645->regmap, RT5645_AD_DA_MIXER, 0x8080);
3827
3826 ret = regmap_register_patch(rt5645->regmap, init_list, 3828 ret = regmap_register_patch(rt5645->regmap, init_list,
3827 ARRAY_SIZE(init_list)); 3829 ARRAY_SIZE(init_list));
3828 if (ret != 0) 3830 if (ret != 0)
diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c
index b036c9dc0c8c..d329bf719d80 100644
--- a/sound/soc/codecs/rt5663.c
+++ b/sound/soc/codecs/rt5663.c
@@ -1560,6 +1560,10 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert)
1560 RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN); 1560 RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN);
1561 snd_soc_update_bits(codec, RT5663_IRQ_1, 1561 snd_soc_update_bits(codec, RT5663_IRQ_1,
1562 RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN); 1562 RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN);
1563 snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
1564 RT5663_EM_JD_MASK, RT5663_EM_JD_RST);
1565 snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
1566 RT5663_EM_JD_MASK, RT5663_EM_JD_NOR);
1563 1567
1564 while (true) { 1568 while (true) {
1565 regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val); 1569 regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val);
diff --git a/sound/soc/codecs/rt5663.h b/sound/soc/codecs/rt5663.h
index c5a9b69579ad..03adc8004ba9 100644
--- a/sound/soc/codecs/rt5663.h
+++ b/sound/soc/codecs/rt5663.h
@@ -1029,6 +1029,10 @@
1029#define RT5663_POL_EXT_JD_SHIFT 10 1029#define RT5663_POL_EXT_JD_SHIFT 10
1030#define RT5663_POL_EXT_JD_EN (0x1 << 10) 1030#define RT5663_POL_EXT_JD_EN (0x1 << 10)
1031#define RT5663_POL_EXT_JD_DIS (0x0 << 10) 1031#define RT5663_POL_EXT_JD_DIS (0x0 << 10)
1032#define RT5663_EM_JD_MASK (0x1 << 7)
1033#define RT5663_EM_JD_SHIFT 7
1034#define RT5663_EM_JD_NOR (0x1 << 7)
1035#define RT5663_EM_JD_RST (0x0 << 7)
1032 1036
1033/* DACREF LDO Control (0x0112)*/ 1037/* DACREF LDO Control (0x0112)*/
1034#define RT5663_PWR_LDO_DACREFL_MASK (0x1 << 9) 1038#define RT5663_PWR_LDO_DACREFL_MASK (0x1 << 9)
diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h
index 730fb2058869..1ff3edb7bbb6 100644
--- a/sound/soc/codecs/tlv320aic31xx.h
+++ b/sound/soc/codecs/tlv320aic31xx.h
@@ -116,7 +116,7 @@ struct aic31xx_pdata {
116/* INT2 interrupt control */ 116/* INT2 interrupt control */
117#define AIC31XX_INT2CTRL AIC31XX_REG(0, 49) 117#define AIC31XX_INT2CTRL AIC31XX_REG(0, 49)
118/* GPIO1 control */ 118/* GPIO1 control */
119#define AIC31XX_GPIO1 AIC31XX_REG(0, 50) 119#define AIC31XX_GPIO1 AIC31XX_REG(0, 51)
120 120
121#define AIC31XX_DACPRB AIC31XX_REG(0, 60) 121#define AIC31XX_DACPRB AIC31XX_REG(0, 60)
122/* ADC Instruction Set Register */ 122/* ADC Instruction Set Register */
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index c482b2e7a7d2..cfe72b9d4356 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
232 struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev); 232 struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev);
233 struct device_node *twl4030_codec_node = NULL; 233 struct device_node *twl4030_codec_node = NULL;
234 234
235 twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node, 235 twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node,
236 "codec"); 236 "codec");
237 237
238 if (!pdata && twl4030_codec_node) { 238 if (!pdata && twl4030_codec_node) {
@@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
241 GFP_KERNEL); 241 GFP_KERNEL);
242 if (!pdata) { 242 if (!pdata) {
243 dev_err(codec->dev, "Can not allocate memory\n"); 243 dev_err(codec->dev, "Can not allocate memory\n");
244 of_node_put(twl4030_codec_node);
244 return NULL; 245 return NULL;
245 } 246 }
246 twl4030_setup_pdata_of(pdata, twl4030_codec_node); 247 twl4030_setup_pdata_of(pdata, twl4030_codec_node);
248 of_node_put(twl4030_codec_node);
247 } 249 }
248 250
249 return pdata; 251 return pdata;
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 65c059b5ffd7..66e32f5d2917 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1733,7 +1733,7 @@ static int wm_adsp_load(struct wm_adsp *dsp)
1733 le64_to_cpu(footer->timestamp)); 1733 le64_to_cpu(footer->timestamp));
1734 1734
1735 while (pos < firmware->size && 1735 while (pos < firmware->size &&
1736 pos - firmware->size > sizeof(*region)) { 1736 sizeof(*region) < firmware->size - pos) {
1737 region = (void *)&(firmware->data[pos]); 1737 region = (void *)&(firmware->data[pos]);
1738 region_name = "Unknown"; 1738 region_name = "Unknown";
1739 reg = 0; 1739 reg = 0;
@@ -1782,8 +1782,8 @@ static int wm_adsp_load(struct wm_adsp *dsp)
1782 regions, le32_to_cpu(region->len), offset, 1782 regions, le32_to_cpu(region->len), offset,
1783 region_name); 1783 region_name);
1784 1784
1785 if ((pos + le32_to_cpu(region->len) + sizeof(*region)) > 1785 if (le32_to_cpu(region->len) >
1786 firmware->size) { 1786 firmware->size - pos - sizeof(*region)) {
1787 adsp_err(dsp, 1787 adsp_err(dsp,
1788 "%s.%d: %s region len %d bytes exceeds file length %zu\n", 1788 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
1789 file, regions, region_name, 1789 file, regions, region_name,
@@ -2253,7 +2253,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
2253 2253
2254 blocks = 0; 2254 blocks = 0;
2255 while (pos < firmware->size && 2255 while (pos < firmware->size &&
2256 pos - firmware->size > sizeof(*blk)) { 2256 sizeof(*blk) < firmware->size - pos) {
2257 blk = (void *)(&firmware->data[pos]); 2257 blk = (void *)(&firmware->data[pos]);
2258 2258
2259 type = le16_to_cpu(blk->type); 2259 type = le16_to_cpu(blk->type);
@@ -2327,8 +2327,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
2327 } 2327 }
2328 2328
2329 if (reg) { 2329 if (reg) {
2330 if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) > 2330 if (le32_to_cpu(blk->len) >
2331 firmware->size) { 2331 firmware->size - pos - sizeof(*blk)) {
2332 adsp_err(dsp, 2332 adsp_err(dsp,
2333 "%s.%d: %s region len %d bytes exceeds file length %zu\n", 2333 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
2334 file, blocks, region_name, 2334 file, blocks, region_name,
diff --git a/sound/soc/fsl/fsl_asrc.h b/sound/soc/fsl/fsl_asrc.h
index 0f163abe4ba3..52c27a358933 100644
--- a/sound/soc/fsl/fsl_asrc.h
+++ b/sound/soc/fsl/fsl_asrc.h
@@ -260,8 +260,8 @@
260#define ASRFSTi_OUTPUT_FIFO_SHIFT 12 260#define ASRFSTi_OUTPUT_FIFO_SHIFT 12
261#define ASRFSTi_OUTPUT_FIFO_MASK (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT) 261#define ASRFSTi_OUTPUT_FIFO_MASK (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT)
262#define ASRFSTi_IAEi_SHIFT 11 262#define ASRFSTi_IAEi_SHIFT 11
263#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_OAFi_SHIFT) 263#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_IAEi_SHIFT)
264#define ASRFSTi_IAEi (1 << ASRFSTi_OAFi_SHIFT) 264#define ASRFSTi_IAEi (1 << ASRFSTi_IAEi_SHIFT)
265#define ASRFSTi_INPUT_FIFO_WIDTH 7 265#define ASRFSTi_INPUT_FIFO_WIDTH 7
266#define ASRFSTi_INPUT_FIFO_SHIFT 0 266#define ASRFSTi_INPUT_FIFO_SHIFT 0
267#define ASRFSTi_INPUT_FIFO_MASK ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1) 267#define ASRFSTi_INPUT_FIFO_MASK ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1)
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index f2f51e06e22c..424bafaf51ef 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -38,6 +38,7 @@
38#include <linux/ctype.h> 38#include <linux/ctype.h>
39#include <linux/device.h> 39#include <linux/device.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/mutex.h>
41#include <linux/slab.h> 42#include <linux/slab.h>
42#include <linux/spinlock.h> 43#include <linux/spinlock.h>
43#include <linux/of.h> 44#include <linux/of.h>
@@ -265,6 +266,8 @@ struct fsl_ssi_private {
265 266
266 u32 fifo_watermark; 267 u32 fifo_watermark;
267 u32 dma_maxburst; 268 u32 dma_maxburst;
269
270 struct mutex ac97_reg_lock;
268}; 271};
269 272
270/* 273/*
@@ -1260,11 +1263,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
1260 if (reg > 0x7f) 1263 if (reg > 0x7f)
1261 return; 1264 return;
1262 1265
1266 mutex_lock(&fsl_ac97_data->ac97_reg_lock);
1267
1263 ret = clk_prepare_enable(fsl_ac97_data->clk); 1268 ret = clk_prepare_enable(fsl_ac97_data->clk);
1264 if (ret) { 1269 if (ret) {
1265 pr_err("ac97 write clk_prepare_enable failed: %d\n", 1270 pr_err("ac97 write clk_prepare_enable failed: %d\n",
1266 ret); 1271 ret);
1267 return; 1272 goto ret_unlock;
1268 } 1273 }
1269 1274
1270 lreg = reg << 12; 1275 lreg = reg << 12;
@@ -1278,6 +1283,9 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
1278 udelay(100); 1283 udelay(100);
1279 1284
1280 clk_disable_unprepare(fsl_ac97_data->clk); 1285 clk_disable_unprepare(fsl_ac97_data->clk);
1286
1287ret_unlock:
1288 mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
1281} 1289}
1282 1290
1283static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, 1291static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
@@ -1285,16 +1293,18 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
1285{ 1293{
1286 struct regmap *regs = fsl_ac97_data->regs; 1294 struct regmap *regs = fsl_ac97_data->regs;
1287 1295
1288 unsigned short val = -1; 1296 unsigned short val = 0;
1289 u32 reg_val; 1297 u32 reg_val;
1290 unsigned int lreg; 1298 unsigned int lreg;
1291 int ret; 1299 int ret;
1292 1300
1301 mutex_lock(&fsl_ac97_data->ac97_reg_lock);
1302
1293 ret = clk_prepare_enable(fsl_ac97_data->clk); 1303 ret = clk_prepare_enable(fsl_ac97_data->clk);
1294 if (ret) { 1304 if (ret) {
1295 pr_err("ac97 read clk_prepare_enable failed: %d\n", 1305 pr_err("ac97 read clk_prepare_enable failed: %d\n",
1296 ret); 1306 ret);
1297 return -1; 1307 goto ret_unlock;
1298 } 1308 }
1299 1309
1300 lreg = (reg & 0x7f) << 12; 1310 lreg = (reg & 0x7f) << 12;
@@ -1309,6 +1319,8 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
1309 1319
1310 clk_disable_unprepare(fsl_ac97_data->clk); 1320 clk_disable_unprepare(fsl_ac97_data->clk);
1311 1321
1322ret_unlock:
1323 mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
1312 return val; 1324 return val;
1313} 1325}
1314 1326
@@ -1458,12 +1470,6 @@ static int fsl_ssi_probe(struct platform_device *pdev)
1458 sizeof(fsl_ssi_ac97_dai)); 1470 sizeof(fsl_ssi_ac97_dai));
1459 1471
1460 fsl_ac97_data = ssi_private; 1472 fsl_ac97_data = ssi_private;
1461
1462 ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
1463 if (ret) {
1464 dev_err(&pdev->dev, "could not set AC'97 ops\n");
1465 return ret;
1466 }
1467 } else { 1473 } else {
1468 /* Initialize this copy of the CPU DAI driver structure */ 1474 /* Initialize this copy of the CPU DAI driver structure */
1469 memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template, 1475 memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template,
@@ -1574,6 +1580,15 @@ static int fsl_ssi_probe(struct platform_device *pdev)
1574 return ret; 1580 return ret;
1575 } 1581 }
1576 1582
1583 if (fsl_ssi_is_ac97(ssi_private)) {
1584 mutex_init(&ssi_private->ac97_reg_lock);
1585 ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
1586 if (ret) {
1587 dev_err(&pdev->dev, "could not set AC'97 ops\n");
1588 goto error_ac97_ops;
1589 }
1590 }
1591
1577 ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component, 1592 ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component,
1578 &ssi_private->cpu_dai_drv, 1); 1593 &ssi_private->cpu_dai_drv, 1);
1579 if (ret) { 1594 if (ret) {
@@ -1657,6 +1672,13 @@ error_sound_card:
1657 fsl_ssi_debugfs_remove(&ssi_private->dbg_stats); 1672 fsl_ssi_debugfs_remove(&ssi_private->dbg_stats);
1658 1673
1659error_asoc_register: 1674error_asoc_register:
1675 if (fsl_ssi_is_ac97(ssi_private))
1676 snd_soc_set_ac97_ops(NULL);
1677
1678error_ac97_ops:
1679 if (fsl_ssi_is_ac97(ssi_private))
1680 mutex_destroy(&ssi_private->ac97_reg_lock);
1681
1660 if (ssi_private->soc->imx) 1682 if (ssi_private->soc->imx)
1661 fsl_ssi_imx_clean(pdev, ssi_private); 1683 fsl_ssi_imx_clean(pdev, ssi_private);
1662 1684
@@ -1675,8 +1697,10 @@ static int fsl_ssi_remove(struct platform_device *pdev)
1675 if (ssi_private->soc->imx) 1697 if (ssi_private->soc->imx)
1676 fsl_ssi_imx_clean(pdev, ssi_private); 1698 fsl_ssi_imx_clean(pdev, ssi_private);
1677 1699
1678 if (fsl_ssi_is_ac97(ssi_private)) 1700 if (fsl_ssi_is_ac97(ssi_private)) {
1679 snd_soc_set_ac97_ops(NULL); 1701 snd_soc_set_ac97_ops(NULL);
1702 mutex_destroy(&ssi_private->ac97_reg_lock);
1703 }
1680 1704
1681 return 0; 1705 return 0;
1682} 1706}
diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c
index 6f9a8bcf20f3..6dcad0a8a0d0 100644
--- a/sound/soc/intel/boards/kbl_rt5663_max98927.c
+++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c
@@ -101,7 +101,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
101 { "ssp0 Tx", NULL, "spk_out" }, 101 { "ssp0 Tx", NULL, "spk_out" },
102 102
103 { "AIF Playback", NULL, "ssp1 Tx" }, 103 { "AIF Playback", NULL, "ssp1 Tx" },
104 { "ssp1 Tx", NULL, "hs_out" }, 104 { "ssp1 Tx", NULL, "codec1_out" },
105 105
106 { "hs_in", NULL, "ssp1 Rx" }, 106 { "hs_in", NULL, "ssp1 Rx" },
107 { "ssp1 Rx", NULL, "AIF Capture" }, 107 { "ssp1 Rx", NULL, "AIF Capture" },
diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
index 6072164f2d43..271ae3c2c535 100644
--- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
+++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
@@ -109,7 +109,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
109 { "ssp0 Tx", NULL, "spk_out" }, 109 { "ssp0 Tx", NULL, "spk_out" },
110 110
111 { "AIF Playback", NULL, "ssp1 Tx" }, 111 { "AIF Playback", NULL, "ssp1 Tx" },
112 { "ssp1 Tx", NULL, "hs_out" }, 112 { "ssp1 Tx", NULL, "codec1_out" },
113 113
114 { "hs_in", NULL, "ssp1 Rx" }, 114 { "hs_in", NULL, "ssp1 Rx" },
115 { "ssp1 Rx", NULL, "AIF Capture" }, 115 { "ssp1 Rx", NULL, "AIF Capture" },
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index d14c50a60289..3eaac41090ca 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -119,11 +119,16 @@ static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt,
119 119
120 if ((epnt->virtual_bus_id == instance_id) && 120 if ((epnt->virtual_bus_id == instance_id) &&
121 (epnt->linktype == link_type) && 121 (epnt->linktype == link_type) &&
122 (epnt->direction == dirn) && 122 (epnt->direction == dirn)) {
123 (epnt->device_type == dev_type)) 123 /* do not check dev_type for DMIC link type */
124 return true; 124 if (epnt->linktype == NHLT_LINK_DMIC)
125 else 125 return true;
126 return false; 126
127 if (epnt->device_type == dev_type)
128 return true;
129 }
130
131 return false;
127} 132}
128 133
129struct nhlt_specific_cfg 134struct nhlt_specific_cfg
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index a072bcf209d2..81923da18ac2 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -2908,7 +2908,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt,
2908 break; 2908 break;
2909 2909
2910 default: 2910 default:
2911 dev_warn(bus->dev, "Control load not supported %d:%d:%d\n", 2911 dev_dbg(bus->dev, "Control load not supported %d:%d:%d\n",
2912 hdr->ops.get, hdr->ops.put, hdr->ops.info); 2912 hdr->ops.get, hdr->ops.put, hdr->ops.info);
2913 break; 2913 break;
2914 } 2914 }
diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c
index ee5055d47d13..a89fe9b6463b 100644
--- a/sound/soc/rockchip/rockchip_spdif.c
+++ b/sound/soc/rockchip/rockchip_spdif.c
@@ -322,26 +322,30 @@ static int rk_spdif_probe(struct platform_device *pdev)
322 spdif->mclk = devm_clk_get(&pdev->dev, "mclk"); 322 spdif->mclk = devm_clk_get(&pdev->dev, "mclk");
323 if (IS_ERR(spdif->mclk)) { 323 if (IS_ERR(spdif->mclk)) {
324 dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n"); 324 dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n");
325 return PTR_ERR(spdif->mclk); 325 ret = PTR_ERR(spdif->mclk);
326 goto err_disable_hclk;
326 } 327 }
327 328
328 ret = clk_prepare_enable(spdif->mclk); 329 ret = clk_prepare_enable(spdif->mclk);
329 if (ret) { 330 if (ret) {
330 dev_err(spdif->dev, "clock enable failed %d\n", ret); 331 dev_err(spdif->dev, "clock enable failed %d\n", ret);
331 return ret; 332 goto err_disable_clocks;
332 } 333 }
333 334
334 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 335 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
335 regs = devm_ioremap_resource(&pdev->dev, res); 336 regs = devm_ioremap_resource(&pdev->dev, res);
336 if (IS_ERR(regs)) 337 if (IS_ERR(regs)) {
337 return PTR_ERR(regs); 338 ret = PTR_ERR(regs);
339 goto err_disable_clocks;
340 }
338 341
339 spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs, 342 spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs,
340 &rk_spdif_regmap_config); 343 &rk_spdif_regmap_config);
341 if (IS_ERR(spdif->regmap)) { 344 if (IS_ERR(spdif->regmap)) {
342 dev_err(&pdev->dev, 345 dev_err(&pdev->dev,
343 "Failed to initialise managed register map\n"); 346 "Failed to initialise managed register map\n");
344 return PTR_ERR(spdif->regmap); 347 ret = PTR_ERR(spdif->regmap);
348 goto err_disable_clocks;
345 } 349 }
346 350
347 spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR; 351 spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR;
@@ -373,6 +377,10 @@ static int rk_spdif_probe(struct platform_device *pdev)
373 377
374err_pm_runtime: 378err_pm_runtime:
375 pm_runtime_disable(&pdev->dev); 379 pm_runtime_disable(&pdev->dev);
380err_disable_clocks:
381 clk_disable_unprepare(spdif->mclk);
382err_disable_hclk:
383 clk_disable_unprepare(spdif->hclk);
376 384
377 return ret; 385 return ret;
378} 386}
diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c
index 8ddb08714faa..4672688cac32 100644
--- a/sound/soc/sh/rcar/adg.c
+++ b/sound/soc/sh/rcar/adg.c
@@ -222,7 +222,7 @@ int rsnd_adg_set_cmd_timsel_gen2(struct rsnd_mod *cmd_mod,
222 NULL, &val, NULL); 222 NULL, &val, NULL);
223 223
224 val = val << shift; 224 val = val << shift;
225 mask = 0xffff << shift; 225 mask = 0x0f1f << shift;
226 226
227 rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val); 227 rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val);
228 228
@@ -250,7 +250,7 @@ int rsnd_adg_set_src_timesel_gen2(struct rsnd_mod *src_mod,
250 250
251 in = in << shift; 251 in = in << shift;
252 out = out << shift; 252 out = out << shift;
253 mask = 0xffff << shift; 253 mask = 0x0f1f << shift;
254 254
255 switch (id / 2) { 255 switch (id / 2) {
256 case 0: 256 case 0:
@@ -380,7 +380,7 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate)
380 ckr = 0x80000000; 380 ckr = 0x80000000;
381 } 381 }
382 382
383 rsnd_mod_bset(adg_mod, BRGCKR, 0x80FF0000, adg->ckr | ckr); 383 rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr);
384 rsnd_mod_write(adg_mod, BRRA, adg->rbga); 384 rsnd_mod_write(adg_mod, BRRA, adg->rbga);
385 rsnd_mod_write(adg_mod, BRRB, adg->rbgb); 385 rsnd_mod_write(adg_mod, BRRB, adg->rbgb);
386 386
diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index c70eb2097816..f12a88a21dfa 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -1332,8 +1332,8 @@ static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd)
1332 1332
1333 return snd_pcm_lib_preallocate_pages_for_all( 1333 return snd_pcm_lib_preallocate_pages_for_all(
1334 rtd->pcm, 1334 rtd->pcm,
1335 SNDRV_DMA_TYPE_CONTINUOUS, 1335 SNDRV_DMA_TYPE_DEV,
1336 snd_dma_continuous_data(GFP_KERNEL), 1336 rtd->card->snd_card->dev,
1337 PREALLOC_BUFFER, PREALLOC_BUFFER_MAX); 1337 PREALLOC_BUFFER, PREALLOC_BUFFER_MAX);
1338} 1338}
1339 1339
diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c
index fd557abfe390..4d750bdf8e24 100644
--- a/sound/soc/sh/rcar/dma.c
+++ b/sound/soc/sh/rcar/dma.c
@@ -26,10 +26,7 @@
26struct rsnd_dmaen { 26struct rsnd_dmaen {
27 struct dma_chan *chan; 27 struct dma_chan *chan;
28 dma_cookie_t cookie; 28 dma_cookie_t cookie;
29 dma_addr_t dma_buf;
30 unsigned int dma_len; 29 unsigned int dma_len;
31 unsigned int dma_period;
32 unsigned int dma_cnt;
33}; 30};
34 31
35struct rsnd_dmapp { 32struct rsnd_dmapp {
@@ -71,38 +68,10 @@ static struct rsnd_mod mem = {
71/* 68/*
72 * Audio DMAC 69 * Audio DMAC
73 */ 70 */
74#define rsnd_dmaen_sync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 1)
75#define rsnd_dmaen_unsync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 0)
76static void __rsnd_dmaen_sync(struct rsnd_dmaen *dmaen, struct rsnd_dai_stream *io,
77 int i, int sync)
78{
79 struct device *dev = dmaen->chan->device->dev;
80 enum dma_data_direction dir;
81 int is_play = rsnd_io_is_play(io);
82 dma_addr_t buf;
83 int len, max;
84 size_t period;
85
86 len = dmaen->dma_len;
87 period = dmaen->dma_period;
88 max = len / period;
89 i = i % max;
90 buf = dmaen->dma_buf + (period * i);
91
92 dir = is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
93
94 if (sync)
95 dma_sync_single_for_device(dev, buf, period, dir);
96 else
97 dma_sync_single_for_cpu(dev, buf, period, dir);
98}
99
100static void __rsnd_dmaen_complete(struct rsnd_mod *mod, 71static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
101 struct rsnd_dai_stream *io) 72 struct rsnd_dai_stream *io)
102{ 73{
103 struct rsnd_priv *priv = rsnd_mod_to_priv(mod); 74 struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
104 struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
105 struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
106 bool elapsed = false; 75 bool elapsed = false;
107 unsigned long flags; 76 unsigned long flags;
108 77
@@ -115,22 +84,9 @@ static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
115 */ 84 */
116 spin_lock_irqsave(&priv->lock, flags); 85 spin_lock_irqsave(&priv->lock, flags);
117 86
118 if (rsnd_io_is_working(io)) { 87 if (rsnd_io_is_working(io))
119 rsnd_dmaen_unsync(dmaen, io, dmaen->dma_cnt);
120
121 /*
122 * Next period is already started.
123 * Let's sync Next Next period
124 * see
125 * rsnd_dmaen_start()
126 */
127 rsnd_dmaen_sync(dmaen, io, dmaen->dma_cnt + 2);
128
129 elapsed = true; 88 elapsed = true;
130 89
131 dmaen->dma_cnt++;
132 }
133
134 spin_unlock_irqrestore(&priv->lock, flags); 90 spin_unlock_irqrestore(&priv->lock, flags);
135 91
136 if (elapsed) 92 if (elapsed)
@@ -165,14 +121,8 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod,
165 struct rsnd_dma *dma = rsnd_mod_to_dma(mod); 121 struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
166 struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma); 122 struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
167 123
168 if (dmaen->chan) { 124 if (dmaen->chan)
169 int is_play = rsnd_io_is_play(io);
170
171 dmaengine_terminate_all(dmaen->chan); 125 dmaengine_terminate_all(dmaen->chan);
172 dma_unmap_single(dmaen->chan->device->dev,
173 dmaen->dma_buf, dmaen->dma_len,
174 is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
175 }
176 126
177 return 0; 127 return 0;
178} 128}
@@ -237,11 +187,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
237 struct device *dev = rsnd_priv_to_dev(priv); 187 struct device *dev = rsnd_priv_to_dev(priv);
238 struct dma_async_tx_descriptor *desc; 188 struct dma_async_tx_descriptor *desc;
239 struct dma_slave_config cfg = {}; 189 struct dma_slave_config cfg = {};
240 dma_addr_t buf;
241 size_t len;
242 size_t period;
243 int is_play = rsnd_io_is_play(io); 190 int is_play = rsnd_io_is_play(io);
244 int i;
245 int ret; 191 int ret;
246 192
247 cfg.direction = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; 193 cfg.direction = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
@@ -258,19 +204,10 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
258 if (ret < 0) 204 if (ret < 0)
259 return ret; 205 return ret;
260 206
261 len = snd_pcm_lib_buffer_bytes(substream);
262 period = snd_pcm_lib_period_bytes(substream);
263 buf = dma_map_single(dmaen->chan->device->dev,
264 substream->runtime->dma_area,
265 len,
266 is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
267 if (dma_mapping_error(dmaen->chan->device->dev, buf)) {
268 dev_err(dev, "dma map failed\n");
269 return -EIO;
270 }
271
272 desc = dmaengine_prep_dma_cyclic(dmaen->chan, 207 desc = dmaengine_prep_dma_cyclic(dmaen->chan,
273 buf, len, period, 208 substream->runtime->dma_addr,
209 snd_pcm_lib_buffer_bytes(substream),
210 snd_pcm_lib_period_bytes(substream),
274 is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM, 211 is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
275 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 212 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
276 213
@@ -282,18 +219,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
282 desc->callback = rsnd_dmaen_complete; 219 desc->callback = rsnd_dmaen_complete;
283 desc->callback_param = rsnd_mod_get(dma); 220 desc->callback_param = rsnd_mod_get(dma);
284 221
285 dmaen->dma_buf = buf; 222 dmaen->dma_len = snd_pcm_lib_buffer_bytes(substream);
286 dmaen->dma_len = len;
287 dmaen->dma_period = period;
288 dmaen->dma_cnt = 0;
289
290 /*
291 * synchronize this and next period
292 * see
293 * __rsnd_dmaen_complete()
294 */
295 for (i = 0; i < 2; i++)
296 rsnd_dmaen_sync(dmaen, io, i);
297 223
298 dmaen->cookie = dmaengine_submit(desc); 224 dmaen->cookie = dmaengine_submit(desc);
299 if (dmaen->cookie < 0) { 225 if (dmaen->cookie < 0) {
diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
index fece1e5f582f..cbf3bf312d23 100644
--- a/sound/soc/sh/rcar/ssi.c
+++ b/sound/soc/sh/rcar/ssi.c
@@ -446,25 +446,29 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod,
446 int byte) 446 int byte)
447{ 447{
448 struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); 448 struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
449 bool ret = false;
450 int byte_pos;
449 451
450 ssi->byte_pos += byte; 452 byte_pos = ssi->byte_pos + byte;
451 453
452 if (ssi->byte_pos >= ssi->next_period_byte) { 454 if (byte_pos >= ssi->next_period_byte) {
453 struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); 455 struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
454 456
455 ssi->period_pos++; 457 ssi->period_pos++;
456 ssi->next_period_byte += ssi->byte_per_period; 458 ssi->next_period_byte += ssi->byte_per_period;
457 459
458 if (ssi->period_pos >= runtime->periods) { 460 if (ssi->period_pos >= runtime->periods) {
459 ssi->byte_pos = 0; 461 byte_pos = 0;
460 ssi->period_pos = 0; 462 ssi->period_pos = 0;
461 ssi->next_period_byte = ssi->byte_per_period; 463 ssi->next_period_byte = ssi->byte_per_period;
462 } 464 }
463 465
464 return true; 466 ret = true;
465 } 467 }
466 468
467 return false; 469 WRITE_ONCE(ssi->byte_pos, byte_pos);
470
471 return ret;
468} 472}
469 473
470/* 474/*
@@ -838,7 +842,7 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod,
838 struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); 842 struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
839 struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); 843 struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
840 844
841 *pointer = bytes_to_frames(runtime, ssi->byte_pos); 845 *pointer = bytes_to_frames(runtime, READ_ONCE(ssi->byte_pos));
842 846
843 return 0; 847 return 0;
844} 848}
diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c
index 4d948757d300..6ff8a36c2c82 100644
--- a/sound/soc/sh/rcar/ssiu.c
+++ b/sound/soc/sh/rcar/ssiu.c
@@ -125,6 +125,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
125{ 125{
126 int hdmi = rsnd_ssi_hdmi_port(io); 126 int hdmi = rsnd_ssi_hdmi_port(io);
127 int ret; 127 int ret;
128 u32 mode = 0;
128 129
129 ret = rsnd_ssiu_init(mod, io, priv); 130 ret = rsnd_ssiu_init(mod, io, priv);
130 if (ret < 0) 131 if (ret < 0)
@@ -136,9 +137,11 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
136 * see 137 * see
137 * rsnd_ssi_config_init() 138 * rsnd_ssi_config_init()
138 */ 139 */
139 rsnd_mod_write(mod, SSI_MODE, 0x1); 140 mode = 0x1;
140 } 141 }
141 142
143 rsnd_mod_write(mod, SSI_MODE, mode);
144
142 if (rsnd_ssi_use_busif(io)) { 145 if (rsnd_ssi_use_busif(io)) {
143 rsnd_mod_write(mod, SSI_BUSIF_ADINR, 146 rsnd_mod_write(mod, SSI_BUSIF_ADINR,
144 rsnd_get_adinr_bit(mod, io) | 147 rsnd_get_adinr_bit(mod, io) |
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 7c9e361b2200..2b4ceda36291 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
2173 kctl->private_value = (unsigned long)namelist; 2173 kctl->private_value = (unsigned long)namelist;
2174 kctl->private_free = usb_mixer_selector_elem_free; 2174 kctl->private_free = usb_mixer_selector_elem_free;
2175 2175
2176 nameid = uac_selector_unit_iSelector(desc); 2176 /* check the static mapping table at first */
2177 len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)); 2177 len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
2178 if (len)
2179 ;
2180 else if (nameid)
2181 len = snd_usb_copy_string_desc(state, nameid, kctl->id.name,
2182 sizeof(kctl->id.name));
2183 else
2184 len = get_term_name(state, &state->oterm,
2185 kctl->id.name, sizeof(kctl->id.name), 0);
2186
2187 if (!len) { 2178 if (!len) {
2188 strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); 2179 /* no mapping ? */
2180 /* if iSelector is given, use it */
2181 nameid = uac_selector_unit_iSelector(desc);
2182 if (nameid)
2183 len = snd_usb_copy_string_desc(state, nameid,
2184 kctl->id.name,
2185 sizeof(kctl->id.name));
2186 /* ... or pick up the terminal name at next */
2187 if (!len)
2188 len = get_term_name(state, &state->oterm,
2189 kctl->id.name, sizeof(kctl->id.name), 0);
2190 /* ... or use the fixed string "USB" as the last resort */
2191 if (!len)
2192 strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
2189 2193
2194 /* and add the proper suffix */
2190 if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) 2195 if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
2191 append_ctl_name(kctl, " Clock Source"); 2196 append_ctl_name(kctl, " Clock Source");
2192 else if ((state->oterm.type & 0xff00) == 0x0100) 2197 else if ((state->oterm.type & 0xff00) == 0x0100)
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 77eecaa4db1f..a66ef5777887 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1166,10 +1166,11 @@ static bool is_marantz_denon_dac(unsigned int id)
1166/* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch 1166/* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
1167 * between PCM/DOP and native DSD mode 1167 * between PCM/DOP and native DSD mode
1168 */ 1168 */
1169static bool is_teac_50X_dac(unsigned int id) 1169static bool is_teac_dsd_dac(unsigned int id)
1170{ 1170{
1171 switch (id) { 1171 switch (id) {
1172 case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */ 1172 case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
1173 case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
1173 return true; 1174 return true;
1174 } 1175 }
1175 return false; 1176 return false;
@@ -1202,7 +1203,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs,
1202 break; 1203 break;
1203 } 1204 }
1204 mdelay(20); 1205 mdelay(20);
1205 } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) { 1206 } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
1206 /* Vendor mode switch cmd is required. */ 1207 /* Vendor mode switch cmd is required. */
1207 switch (fmt->altsetting) { 1208 switch (fmt->altsetting) {
1208 case 3: /* DSD mode (DSD_U32) requested */ 1209 case 3: /* DSD mode (DSD_U32) requested */
@@ -1392,7 +1393,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
1392 } 1393 }
1393 1394
1394 /* TEAC devices with USB DAC functionality */ 1395 /* TEAC devices with USB DAC functionality */
1395 if (is_teac_50X_dac(chip->usb_id)) { 1396 if (is_teac_dsd_dac(chip->usb_id)) {
1396 if (fp->altsetting == 3) 1397 if (fp->altsetting == 3)
1397 return SNDRV_PCM_FMTBIT_DSD_U32_BE; 1398 return SNDRV_PCM_FMTBIT_DSD_U32_BE;
1398 } 1399 }
diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
index cefe7c7cd4f6..0a8e37a519f2 100644
--- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
+++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -2,7 +2,7 @@
2#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ 2#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
3#define _UAPI__ASM_BPF_PERF_EVENT_H__ 3#define _UAPI__ASM_BPF_PERF_EVENT_H__
4 4
5#include <asm/ptrace.h> 5#include "ptrace.h"
6 6
7typedef user_pt_regs bpf_user_pt_regs_t; 7typedef user_pt_regs bpf_user_pt_regs_t;
8 8
diff --git a/tools/arch/s390/include/uapi/asm/perf_regs.h b/tools/arch/s390/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..d17dd9e5d516
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/perf_regs.h
@@ -0,0 +1,44 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2#ifndef _ASM_S390_PERF_REGS_H
3#define _ASM_S390_PERF_REGS_H
4
5enum perf_event_s390_regs {
6 PERF_REG_S390_R0,
7 PERF_REG_S390_R1,
8 PERF_REG_S390_R2,
9 PERF_REG_S390_R3,
10 PERF_REG_S390_R4,
11 PERF_REG_S390_R5,
12 PERF_REG_S390_R6,
13 PERF_REG_S390_R7,
14 PERF_REG_S390_R8,
15 PERF_REG_S390_R9,
16 PERF_REG_S390_R10,
17 PERF_REG_S390_R11,
18 PERF_REG_S390_R12,
19 PERF_REG_S390_R13,
20 PERF_REG_S390_R14,
21 PERF_REG_S390_R15,
22 PERF_REG_S390_FP0,
23 PERF_REG_S390_FP1,
24 PERF_REG_S390_FP2,
25 PERF_REG_S390_FP3,
26 PERF_REG_S390_FP4,
27 PERF_REG_S390_FP5,
28 PERF_REG_S390_FP6,
29 PERF_REG_S390_FP7,
30 PERF_REG_S390_FP8,
31 PERF_REG_S390_FP9,
32 PERF_REG_S390_FP10,
33 PERF_REG_S390_FP11,
34 PERF_REG_S390_FP12,
35 PERF_REG_S390_FP13,
36 PERF_REG_S390_FP14,
37 PERF_REG_S390_FP15,
38 PERF_REG_S390_MASK,
39 PERF_REG_S390_PC,
40
41 PERF_REG_S390_MAX
42};
43
44#endif /* _ASM_S390_PERF_REGS_H */
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e2450c8e88e6..a8c3a33dd185 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -523,21 +523,23 @@ static int do_show(int argc, char **argv)
523 break; 523 break;
524 p_err("can't get next map: %s%s", strerror(errno), 524 p_err("can't get next map: %s%s", strerror(errno),
525 errno == EINVAL ? " -- kernel too old?" : ""); 525 errno == EINVAL ? " -- kernel too old?" : "");
526 return -1; 526 break;
527 } 527 }
528 528
529 fd = bpf_map_get_fd_by_id(id); 529 fd = bpf_map_get_fd_by_id(id);
530 if (fd < 0) { 530 if (fd < 0) {
531 if (errno == ENOENT)
532 continue;
531 p_err("can't get map by id (%u): %s", 533 p_err("can't get map by id (%u): %s",
532 id, strerror(errno)); 534 id, strerror(errno));
533 return -1; 535 break;
534 } 536 }
535 537
536 err = bpf_obj_get_info_by_fd(fd, &info, &len); 538 err = bpf_obj_get_info_by_fd(fd, &info, &len);
537 if (err) { 539 if (err) {
538 p_err("can't get map info: %s", strerror(errno)); 540 p_err("can't get map info: %s", strerror(errno));
539 close(fd); 541 close(fd);
540 return -1; 542 break;
541 } 543 }
542 544
543 if (json_output) 545 if (json_output)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index ad619b96c276..dded77345bfb 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -382,6 +382,8 @@ static int do_show(int argc, char **argv)
382 382
383 fd = bpf_prog_get_fd_by_id(id); 383 fd = bpf_prog_get_fd_by_id(id);
384 if (fd < 0) { 384 if (fd < 0) {
385 if (errno == ENOENT)
386 continue;
385 p_err("can't get prog by id (%u): %s", 387 p_err("can't get prog by id (%u): %s",
386 id, strerror(errno)); 388 id, strerror(errno));
387 err = -1; 389 err = -1;
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 217cf6f95c36..a5684d0968b4 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -478,7 +478,7 @@ class Provider(object):
478 @staticmethod 478 @staticmethod
479 def is_field_wanted(fields_filter, field): 479 def is_field_wanted(fields_filter, field):
480 """Indicate whether field is valid according to fields_filter.""" 480 """Indicate whether field is valid according to fields_filter."""
481 if not fields_filter or fields_filter == "help": 481 if not fields_filter:
482 return True 482 return True
483 return re.match(fields_filter, field) is not None 483 return re.match(fields_filter, field) is not None
484 484
@@ -549,8 +549,8 @@ class TracepointProvider(Provider):
549 549
550 def update_fields(self, fields_filter): 550 def update_fields(self, fields_filter):
551 """Refresh fields, applying fields_filter""" 551 """Refresh fields, applying fields_filter"""
552 self._fields = [field for field in self.get_available_fields() 552 self.fields = [field for field in self.get_available_fields()
553 if self.is_field_wanted(fields_filter, field)] 553 if self.is_field_wanted(fields_filter, field)]
554 554
555 @staticmethod 555 @staticmethod
556 def get_online_cpus(): 556 def get_online_cpus():
@@ -950,7 +950,8 @@ class Tui(object):
950 curses.nocbreak() 950 curses.nocbreak()
951 curses.endwin() 951 curses.endwin()
952 952
953 def get_all_gnames(self): 953 @staticmethod
954 def get_all_gnames():
954 """Returns a list of (pid, gname) tuples of all running guests""" 955 """Returns a list of (pid, gname) tuples of all running guests"""
955 res = [] 956 res = []
956 try: 957 try:
@@ -963,7 +964,7 @@ class Tui(object):
963 # perform a sanity check before calling the more expensive 964 # perform a sanity check before calling the more expensive
964 # function to possibly extract the guest name 965 # function to possibly extract the guest name
965 if ' -name ' in line[1]: 966 if ' -name ' in line[1]:
966 res.append((line[0], self.get_gname_from_pid(line[0]))) 967 res.append((line[0], Tui.get_gname_from_pid(line[0])))
967 child.stdout.close() 968 child.stdout.close()
968 969
969 return res 970 return res
@@ -984,7 +985,8 @@ class Tui(object):
984 except Exception: 985 except Exception:
985 self.screen.addstr(row + 1, 2, 'Not available') 986 self.screen.addstr(row + 1, 2, 'Not available')
986 987
987 def get_pid_from_gname(self, gname): 988 @staticmethod
989 def get_pid_from_gname(gname):
988 """Fuzzy function to convert guest name to QEMU process pid. 990 """Fuzzy function to convert guest name to QEMU process pid.
989 991
990 Returns a list of potential pids, can be empty if no match found. 992 Returns a list of potential pids, can be empty if no match found.
@@ -992,7 +994,7 @@ class Tui(object):
992 994
993 """ 995 """
994 pids = [] 996 pids = []
995 for line in self.get_all_gnames(): 997 for line in Tui.get_all_gnames():
996 if gname == line[1]: 998 if gname == line[1]:
997 pids.append(int(line[0])) 999 pids.append(int(line[0]))
998 1000
@@ -1090,15 +1092,16 @@ class Tui(object):
1090 # sort by totals 1092 # sort by totals
1091 return (0, -stats[x][0]) 1093 return (0, -stats[x][0])
1092 total = 0. 1094 total = 0.
1093 for val in stats.values(): 1095 for key in stats.keys():
1094 total += val[0] 1096 if key.find('(') is -1:
1097 total += stats[key][0]
1095 if self._sorting == SORT_DEFAULT: 1098 if self._sorting == SORT_DEFAULT:
1096 sortkey = sortCurAvg 1099 sortkey = sortCurAvg
1097 else: 1100 else:
1098 sortkey = sortTotal 1101 sortkey = sortTotal
1102 tavg = 0
1099 for key in sorted(stats.keys(), key=sortkey): 1103 for key in sorted(stats.keys(), key=sortkey):
1100 1104 if row >= self.screen.getmaxyx()[0] - 1:
1101 if row >= self.screen.getmaxyx()[0]:
1102 break 1105 break
1103 values = stats[key] 1106 values = stats[key]
1104 if not values[0] and not values[1]: 1107 if not values[0] and not values[1]:
@@ -1110,9 +1113,15 @@ class Tui(object):
1110 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % 1113 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
1111 (key, values[0], values[0] * 100 / total, 1114 (key, values[0], values[0] * 100 / total,
1112 cur)) 1115 cur))
1116 if cur is not '' and key.find('(') is -1:
1117 tavg += cur
1113 row += 1 1118 row += 1
1114 if row == 3: 1119 if row == 3:
1115 self.screen.addstr(4, 1, 'No matching events reported yet') 1120 self.screen.addstr(4, 1, 'No matching events reported yet')
1121 else:
1122 self.screen.addstr(row, 1, '%-40s %10d %8s' %
1123 ('Total', total, tavg if tavg else ''),
1124 curses.A_BOLD)
1116 self.screen.refresh() 1125 self.screen.refresh()
1117 1126
1118 def show_msg(self, text): 1127 def show_msg(self, text):
@@ -1358,7 +1367,7 @@ class Tui(object):
1358 if char == 'x': 1367 if char == 'x':
1359 self.update_drilldown() 1368 self.update_drilldown()
1360 # prevents display of current values on next refresh 1369 # prevents display of current values on next refresh
1361 self.stats.get() 1370 self.stats.get(self._display_guests)
1362 except KeyboardInterrupt: 1371 except KeyboardInterrupt:
1363 break 1372 break
1364 except curses.error: 1373 except curses.error:
@@ -1451,16 +1460,13 @@ Press any other key to refresh statistics immediately.
1451 try: 1460 try:
1452 pids = Tui.get_pid_from_gname(val) 1461 pids = Tui.get_pid_from_gname(val)
1453 except: 1462 except:
1454 raise optparse.OptionValueError('Error while searching for guest ' 1463 sys.exit('Error while searching for guest "{}". Use "-p" to '
1455 '"{}", use "-p" to specify a pid ' 1464 'specify a pid instead?'.format(val))
1456 'instead'.format(val))
1457 if len(pids) == 0: 1465 if len(pids) == 0:
1458 raise optparse.OptionValueError('No guest by the name "{}" ' 1466 sys.exit('Error: No guest by the name "{}" found'.format(val))
1459 'found'.format(val))
1460 if len(pids) > 1: 1467 if len(pids) > 1:
1461 raise optparse.OptionValueError('Multiple processes found (pids: ' 1468 sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
1462 '{}) - use "-p" to specify a pid ' 1469 'to specify the desired pid'.format(" ".join(pids)))
1463 'instead'.format(" ".join(pids)))
1464 parser.values.pid = pids[0] 1470 parser.values.pid = pids[0]
1465 1471
1466 optparser = optparse.OptionParser(description=description_text, 1472 optparser = optparse.OptionParser(description=description_text,
@@ -1518,7 +1524,16 @@ Press any other key to refresh statistics immediately.
1518 help='restrict statistics to guest by name', 1524 help='restrict statistics to guest by name',
1519 callback=cb_guest_to_pid, 1525 callback=cb_guest_to_pid,
1520 ) 1526 )
1521 (options, _) = optparser.parse_args(sys.argv) 1527 options, unkn = optparser.parse_args(sys.argv)
1528 if len(unkn) != 1:
1529 sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
1530 try:
1531 # verify that we were passed a valid regex up front
1532 re.compile(options.fields)
1533 except re.error:
1534 sys.exit('Error: "' + options.fields + '" is not a valid regular '
1535 'expression')
1536
1522 return options 1537 return options
1523 1538
1524 1539
@@ -1564,16 +1579,13 @@ def main():
1564 1579
1565 stats = Stats(options) 1580 stats = Stats(options)
1566 1581
1567 if options.fields == "help": 1582 if options.fields == 'help':
1568 event_list = "\n" 1583 stats.fields_filter = None
1569 s = stats.get() 1584 event_list = []
1570 for key in s.keys(): 1585 for key in stats.get().keys():
1571 if key.find('(') != -1: 1586 event_list.append(key.split('(', 1)[0])
1572 key = key[0:key.find('(')] 1587 sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n')
1573 if event_list.find('\n' + key + '\n') == -1: 1588 sys.exit(0)
1574 event_list += key + '\n'
1575 sys.stdout.write(event_list)
1576 return ""
1577 1589
1578 if options.log: 1590 if options.log:
1579 log(stats) 1591 log(stats)
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index e5cf836be8a1..b5b3810c9e94 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -50,6 +50,8 @@ INTERACTIVE COMMANDS
50*s*:: set update interval 50*s*:: set update interval
51 51
52*x*:: toggle reporting of stats for child trace events 52*x*:: toggle reporting of stats for child trace events
53 :: *Note*: The stats for the parents summarize the respective child trace
54 events
53 55
54Press any other key to refresh statistics immediately. 56Press any other key to refresh statistics immediately.
55 57
@@ -86,7 +88,7 @@ OPTIONS
86 88
87-f<fields>:: 89-f<fields>::
88--fields=<fields>:: 90--fields=<fields>::
89 fields to display (regex) 91 fields to display (regex), "-f help" for a list of available events
90 92
91-h:: 93-h::
92--help:: 94--help::
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index ae0272f9a091..e6acc281dd37 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -46,7 +46,7 @@ $(OBJTOOL_IN): fixdep FORCE
46 @$(MAKE) $(build)=objtool 46 @$(MAKE) $(build)=objtool
47 47
48$(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN) 48$(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
49 @./sync-check.sh 49 @$(CONFIG_SHELL) ./sync-check.sh
50 $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ 50 $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
51 51
52 52
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 8acfc47af70e..540a209b78ab 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -138,7 +138,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
138 *type = INSN_STACK; 138 *type = INSN_STACK;
139 op->src.type = OP_SRC_ADD; 139 op->src.type = OP_SRC_ADD;
140 op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; 140 op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
141 op->dest.type = OP_SRC_REG; 141 op->dest.type = OP_DEST_REG;
142 op->dest.reg = CFI_SP; 142 op->dest.reg = CFI_SP;
143 } 143 }
144 break; 144 break;
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 4c6b5c9ef073..91e8e19ff5e0 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -44,6 +44,9 @@ int cmd_orc(int argc, const char **argv)
44 const char *objname; 44 const char *objname;
45 45
46 argc--; argv++; 46 argc--; argv++;
47 if (argc <= 0)
48 usage_with_options(orc_usage, check_options);
49
47 if (!strncmp(argv[0], "gen", 3)) { 50 if (!strncmp(argv[0], "gen", 3)) {
48 argc = parse_options(argc, argv, check_options, orc_usage, 0); 51 argc = parse_options(argc, argv, check_options, orc_usage, 0);
49 if (argc != 1) 52 if (argc != 1)
@@ -52,7 +55,6 @@ int cmd_orc(int argc, const char **argv)
52 objname = argv[0]; 55 objname = argv[0];
53 56
54 return check(objname, no_fp, no_unreachable, true); 57 return check(objname, no_fp, no_unreachable, true);
55
56 } 58 }
57 59
58 if (!strcmp(argv[0], "dump")) { 60 if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9b341584eb1b..f40d46e24bcc 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -428,6 +428,40 @@ static void add_ignores(struct objtool_file *file)
428} 428}
429 429
430/* 430/*
431 * FIXME: For now, just ignore any alternatives which add retpolines. This is
432 * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
433 * But it at least allows objtool to understand the control flow *around* the
434 * retpoline.
435 */
436static int add_nospec_ignores(struct objtool_file *file)
437{
438 struct section *sec;
439 struct rela *rela;
440 struct instruction *insn;
441
442 sec = find_section_by_name(file->elf, ".rela.discard.nospec");
443 if (!sec)
444 return 0;
445
446 list_for_each_entry(rela, &sec->rela_list, list) {
447 if (rela->sym->type != STT_SECTION) {
448 WARN("unexpected relocation symbol type in %s", sec->name);
449 return -1;
450 }
451
452 insn = find_insn(file, rela->sym->sec, rela->addend);
453 if (!insn) {
454 WARN("bad .discard.nospec entry");
455 return -1;
456 }
457
458 insn->ignore_alts = true;
459 }
460
461 return 0;
462}
463
464/*
431 * Find the destination instructions for all jumps. 465 * Find the destination instructions for all jumps.
432 */ 466 */
433static int add_jump_destinations(struct objtool_file *file) 467static int add_jump_destinations(struct objtool_file *file)
@@ -456,6 +490,13 @@ static int add_jump_destinations(struct objtool_file *file)
456 } else if (rela->sym->sec->idx) { 490 } else if (rela->sym->sec->idx) {
457 dest_sec = rela->sym->sec; 491 dest_sec = rela->sym->sec;
458 dest_off = rela->sym->sym.st_value + rela->addend + 4; 492 dest_off = rela->sym->sym.st_value + rela->addend + 4;
493 } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
494 /*
495 * Retpoline jumps are really dynamic jumps in
496 * disguise, so convert them accordingly.
497 */
498 insn->type = INSN_JUMP_DYNAMIC;
499 continue;
459 } else { 500 } else {
460 /* sibling call */ 501 /* sibling call */
461 insn->jump_dest = 0; 502 insn->jump_dest = 0;
@@ -502,11 +543,18 @@ static int add_call_destinations(struct objtool_file *file)
502 dest_off = insn->offset + insn->len + insn->immediate; 543 dest_off = insn->offset + insn->len + insn->immediate;
503 insn->call_dest = find_symbol_by_offset(insn->sec, 544 insn->call_dest = find_symbol_by_offset(insn->sec,
504 dest_off); 545 dest_off);
546 /*
547 * FIXME: Thanks to retpolines, it's now considered
548 * normal for a function to call within itself. So
549 * disable this warning for now.
550 */
551#if 0
505 if (!insn->call_dest) { 552 if (!insn->call_dest) {
506 WARN_FUNC("can't find call dest symbol at offset 0x%lx", 553 WARN_FUNC("can't find call dest symbol at offset 0x%lx",
507 insn->sec, insn->offset, dest_off); 554 insn->sec, insn->offset, dest_off);
508 return -1; 555 return -1;
509 } 556 }
557#endif
510 } else if (rela->sym->type == STT_SECTION) { 558 } else if (rela->sym->type == STT_SECTION) {
511 insn->call_dest = find_symbol_by_offset(rela->sym->sec, 559 insn->call_dest = find_symbol_by_offset(rela->sym->sec,
512 rela->addend+4); 560 rela->addend+4);
@@ -671,12 +719,6 @@ static int add_special_section_alts(struct objtool_file *file)
671 return ret; 719 return ret;
672 720
673 list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { 721 list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
674 alt = malloc(sizeof(*alt));
675 if (!alt) {
676 WARN("malloc failed");
677 ret = -1;
678 goto out;
679 }
680 722
681 orig_insn = find_insn(file, special_alt->orig_sec, 723 orig_insn = find_insn(file, special_alt->orig_sec,
682 special_alt->orig_off); 724 special_alt->orig_off);
@@ -687,6 +729,10 @@ static int add_special_section_alts(struct objtool_file *file)
687 goto out; 729 goto out;
688 } 730 }
689 731
732 /* Ignore retpoline alternatives. */
733 if (orig_insn->ignore_alts)
734 continue;
735
690 new_insn = NULL; 736 new_insn = NULL;
691 if (!special_alt->group || special_alt->new_len) { 737 if (!special_alt->group || special_alt->new_len) {
692 new_insn = find_insn(file, special_alt->new_sec, 738 new_insn = find_insn(file, special_alt->new_sec,
@@ -712,6 +758,13 @@ static int add_special_section_alts(struct objtool_file *file)
712 goto out; 758 goto out;
713 } 759 }
714 760
761 alt = malloc(sizeof(*alt));
762 if (!alt) {
763 WARN("malloc failed");
764 ret = -1;
765 goto out;
766 }
767
715 alt->insn = new_insn; 768 alt->insn = new_insn;
716 list_add_tail(&alt->list, &orig_insn->alts); 769 list_add_tail(&alt->list, &orig_insn->alts);
717 770
@@ -1028,6 +1081,10 @@ static int decode_sections(struct objtool_file *file)
1028 1081
1029 add_ignores(file); 1082 add_ignores(file);
1030 1083
1084 ret = add_nospec_ignores(file);
1085 if (ret)
1086 return ret;
1087
1031 ret = add_jump_destinations(file); 1088 ret = add_jump_destinations(file);
1032 if (ret) 1089 if (ret)
1033 return ret; 1090 return ret;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 47d9ea70a83d..dbadb304a410 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -44,7 +44,7 @@ struct instruction {
44 unsigned int len; 44 unsigned int len;
45 unsigned char type; 45 unsigned char type;
46 unsigned long immediate; 46 unsigned long immediate;
47 bool alt_group, visited, dead_end, ignore, hint, save, restore; 47 bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
48 struct symbol *call_dest; 48 struct symbol *call_dest;
49 struct instruction *jump_dest; 49 struct instruction *jump_dest;
50 struct list_head alts; 50 struct list_head alts;
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index e5ca31429c9b..e61fe703197b 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -165,6 +165,8 @@ int create_orc_sections(struct objtool_file *file)
165 165
166 /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ 166 /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
167 sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); 167 sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
168 if (!sec)
169 return -1;
168 170
169 ip_relasec = elf_create_rela_section(file->elf, sec); 171 ip_relasec = elf_create_rela_section(file->elf, sec);
170 if (!ip_relasec) 172 if (!ip_relasec)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index ed65e82f034e..0294bfb6c5f8 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -188,9 +188,7 @@ ifdef PYTHON_CONFIG
188 PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) 188 PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
189 PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil 189 PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
190 PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) 190 PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
191 ifeq ($(CC_NO_CLANG), 1) 191 PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
192 PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
193 endif
194 FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) 192 FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
195endif 193endif
196 194
@@ -576,14 +574,15 @@ ifndef NO_GTK2
576 endif 574 endif
577endif 575endif
578 576
579
580ifdef NO_LIBPERL 577ifdef NO_LIBPERL
581 CFLAGS += -DNO_LIBPERL 578 CFLAGS += -DNO_LIBPERL
582else 579else
583 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) 580 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
584 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) 581 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
585 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) 582 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
586 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` 583 PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
584 PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
585 PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
587 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) 586 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
588 587
589 ifneq ($(feature-libperl), 1) 588 ifneq ($(feature-libperl), 1)
diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h
index d2df54a6bc5a..bcfbaed78cc2 100644
--- a/tools/perf/arch/s390/include/perf_regs.h
+++ b/tools/perf/arch/s390/include/perf_regs.h
@@ -3,7 +3,7 @@
3 3
4#include <stdlib.h> 4#include <stdlib.h>
5#include <linux/types.h> 5#include <linux/types.h>
6#include <../../../../arch/s390/include/uapi/asm/perf_regs.h> 6#include <asm/perf_regs.h>
7 7
8void perf_regs_load(u64 *regs); 8void perf_regs_load(u64 *regs);
9 9
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 6db9d809fe97..3e64f10b6d66 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -21,6 +21,7 @@ arch/x86/include/asm/cpufeatures.h
21arch/arm/include/uapi/asm/perf_regs.h 21arch/arm/include/uapi/asm/perf_regs.h
22arch/arm64/include/uapi/asm/perf_regs.h 22arch/arm64/include/uapi/asm/perf_regs.h
23arch/powerpc/include/uapi/asm/perf_regs.h 23arch/powerpc/include/uapi/asm/perf_regs.h
24arch/s390/include/uapi/asm/perf_regs.h
24arch/x86/include/uapi/asm/perf_regs.h 25arch/x86/include/uapi/asm/perf_regs.h
25arch/x86/include/uapi/asm/kvm.h 26arch/x86/include/uapi/asm/kvm.h
26arch/x86/include/uapi/asm/kvm_perf.h 27arch/x86/include/uapi/asm/kvm_perf.h
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index cf36de7ea255..0c6d1002b524 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -384,13 +384,13 @@ jvmti_write_code(void *agent, char const *sym,
384} 384}
385 385
386int 386int
387jvmti_write_debug_info(void *agent, uint64_t code, const char *file, 387jvmti_write_debug_info(void *agent, uint64_t code,
388 jvmti_line_info_t *li, int nr_lines) 388 int nr_lines, jvmti_line_info_t *li,
389 const char * const * file_names)
389{ 390{
390 struct jr_code_debug_info rec; 391 struct jr_code_debug_info rec;
391 size_t sret, len, size, flen; 392 size_t sret, len, size, flen = 0;
392 uint64_t addr; 393 uint64_t addr;
393 const char *fn = file;
394 FILE *fp = agent; 394 FILE *fp = agent;
395 int i; 395 int i;
396 396
@@ -405,7 +405,9 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
405 return -1; 405 return -1;
406 } 406 }
407 407
408 flen = strlen(file) + 1; 408 for (i = 0; i < nr_lines; ++i) {
409 flen += strlen(file_names[i]) + 1;
410 }
409 411
410 rec.p.id = JIT_CODE_DEBUG_INFO; 412 rec.p.id = JIT_CODE_DEBUG_INFO;
411 size = sizeof(rec); 413 size = sizeof(rec);
@@ -421,7 +423,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
421 * file[] : source file name 423 * file[] : source file name
422 */ 424 */
423 size += nr_lines * sizeof(struct debug_entry); 425 size += nr_lines * sizeof(struct debug_entry);
424 size += flen * nr_lines; 426 size += flen;
425 rec.p.total_size = size; 427 rec.p.total_size = size;
426 428
427 /* 429 /*
@@ -452,7 +454,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
452 if (sret != 1) 454 if (sret != 1)
453 goto error; 455 goto error;
454 456
455 sret = fwrite_unlocked(fn, flen, 1, fp); 457 sret = fwrite_unlocked(file_names[i], strlen(file_names[i]) + 1, 1, fp);
456 if (sret != 1) 458 if (sret != 1)
457 goto error; 459 goto error;
458 } 460 }
diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h
index fe32d8344a82..6ed82f6c06dd 100644
--- a/tools/perf/jvmti/jvmti_agent.h
+++ b/tools/perf/jvmti/jvmti_agent.h
@@ -14,6 +14,7 @@ typedef struct {
14 unsigned long pc; 14 unsigned long pc;
15 int line_number; 15 int line_number;
16 int discrim; /* discriminator -- 0 for now */ 16 int discrim; /* discriminator -- 0 for now */
17 jmethodID methodID;
17} jvmti_line_info_t; 18} jvmti_line_info_t;
18 19
19void *jvmti_open(void); 20void *jvmti_open(void);
@@ -22,11 +23,9 @@ int jvmti_write_code(void *agent, char const *symbol_name,
22 uint64_t vma, void const *code, 23 uint64_t vma, void const *code,
23 const unsigned int code_size); 24 const unsigned int code_size);
24 25
25int jvmti_write_debug_info(void *agent, 26int jvmti_write_debug_info(void *agent, uint64_t code, int nr_lines,
26 uint64_t code,
27 const char *file,
28 jvmti_line_info_t *li, 27 jvmti_line_info_t *li,
29 int nr_lines); 28 const char * const * file_names);
30 29
31#if defined(__cplusplus) 30#if defined(__cplusplus)
32} 31}
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index c62c9fc9a525..6add3e982614 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c
@@ -47,6 +47,7 @@ do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
47 tab[lines].pc = (unsigned long)pc; 47 tab[lines].pc = (unsigned long)pc;
48 tab[lines].line_number = loc_tab[i].line_number; 48 tab[lines].line_number = loc_tab[i].line_number;
49 tab[lines].discrim = 0; /* not yet used */ 49 tab[lines].discrim = 0; /* not yet used */
50 tab[lines].methodID = m;
50 lines++; 51 lines++;
51 } else { 52 } else {
52 break; 53 break;
@@ -125,6 +126,99 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
125 return JVMTI_ERROR_NONE; 126 return JVMTI_ERROR_NONE;
126} 127}
127 128
129static void
130copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length)
131{
132 /*
133 * Assume path name is class hierarchy, this is a common practice with Java programs
134 */
135 if (*class_sign == 'L') {
136 int j, i = 0;
137 char *p = strrchr(class_sign, '/');
138 if (p) {
139 /* drop the 'L' prefix and copy up to the final '/' */
140 for (i = 0; i < (p - class_sign); i++)
141 result[i] = class_sign[i+1];
142 }
143 /*
144 * append file name, we use loops and not string ops to avoid modifying
145 * class_sign which is used later for the symbol name
146 */
147 for (j = 0; i < (max_length - 1) && file_name && j < strlen(file_name); j++, i++)
148 result[i] = file_name[j];
149
150 result[i] = '\0';
151 } else {
152 /* fallback case */
153 size_t file_name_len = strlen(file_name);
154 strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length);
155 }
156}
157
158static jvmtiError
159get_source_filename(jvmtiEnv *jvmti, jmethodID methodID, char ** buffer)
160{
161 jvmtiError ret;
162 jclass decl_class;
163 char *file_name = NULL;
164 char *class_sign = NULL;
165 char fn[PATH_MAX];
166 size_t len;
167
168 ret = (*jvmti)->GetMethodDeclaringClass(jvmti, methodID, &decl_class);
169 if (ret != JVMTI_ERROR_NONE) {
170 print_error(jvmti, "GetMethodDeclaringClass", ret);
171 return ret;
172 }
173
174 ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
175 if (ret != JVMTI_ERROR_NONE) {
176 print_error(jvmti, "GetSourceFileName", ret);
177 return ret;
178 }
179
180 ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL);
181 if (ret != JVMTI_ERROR_NONE) {
182 print_error(jvmti, "GetClassSignature", ret);
183 goto free_file_name_error;
184 }
185
186 copy_class_filename(class_sign, file_name, fn, PATH_MAX);
187 len = strlen(fn);
188 *buffer = malloc((len + 1) * sizeof(char));
189 if (!*buffer) {
190 print_error(jvmti, "GetClassSignature", ret);
191 ret = JVMTI_ERROR_OUT_OF_MEMORY;
192 goto free_class_sign_error;
193 }
194 strcpy(*buffer, fn);
195 ret = JVMTI_ERROR_NONE;
196
197free_class_sign_error:
198 (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
199free_file_name_error:
200 (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
201
202 return ret;
203}
204
205static jvmtiError
206fill_source_filenames(jvmtiEnv *jvmti, int nr_lines,
207 const jvmti_line_info_t * line_tab,
208 char ** file_names)
209{
210 int index;
211 jvmtiError ret;
212
213 for (index = 0; index < nr_lines; ++index) {
214 ret = get_source_filename(jvmti, line_tab[index].methodID, &(file_names[index]));
215 if (ret != JVMTI_ERROR_NONE)
216 return ret;
217 }
218
219 return JVMTI_ERROR_NONE;
220}
221
128static void JNICALL 222static void JNICALL
129compiled_method_load_cb(jvmtiEnv *jvmti, 223compiled_method_load_cb(jvmtiEnv *jvmti,
130 jmethodID method, 224 jmethodID method,
@@ -135,16 +229,18 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
135 const void *compile_info) 229 const void *compile_info)
136{ 230{
137 jvmti_line_info_t *line_tab = NULL; 231 jvmti_line_info_t *line_tab = NULL;
232 char ** line_file_names = NULL;
138 jclass decl_class; 233 jclass decl_class;
139 char *class_sign = NULL; 234 char *class_sign = NULL;
140 char *func_name = NULL; 235 char *func_name = NULL;
141 char *func_sign = NULL; 236 char *func_sign = NULL;
142 char *file_name= NULL; 237 char *file_name = NULL;
143 char fn[PATH_MAX]; 238 char fn[PATH_MAX];
144 uint64_t addr = (uint64_t)(uintptr_t)code_addr; 239 uint64_t addr = (uint64_t)(uintptr_t)code_addr;
145 jvmtiError ret; 240 jvmtiError ret;
146 int nr_lines = 0; /* in line_tab[] */ 241 int nr_lines = 0; /* in line_tab[] */
147 size_t len; 242 size_t len;
243 int output_debug_info = 0;
148 244
149 ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, 245 ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
150 &decl_class); 246 &decl_class);
@@ -158,6 +254,19 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
158 if (ret != JVMTI_ERROR_NONE) { 254 if (ret != JVMTI_ERROR_NONE) {
159 warnx("jvmti: cannot get line table for method"); 255 warnx("jvmti: cannot get line table for method");
160 nr_lines = 0; 256 nr_lines = 0;
257 } else if (nr_lines > 0) {
258 line_file_names = malloc(sizeof(char*) * nr_lines);
259 if (!line_file_names) {
260 warnx("jvmti: cannot allocate space for line table method names");
261 } else {
262 memset(line_file_names, 0, sizeof(char*) * nr_lines);
263 ret = fill_source_filenames(jvmti, nr_lines, line_tab, line_file_names);
264 if (ret != JVMTI_ERROR_NONE) {
265 warnx("jvmti: fill_source_filenames failed");
266 } else {
267 output_debug_info = 1;
268 }
269 }
161 } 270 }
162 } 271 }
163 272
@@ -181,33 +290,14 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
181 goto error; 290 goto error;
182 } 291 }
183 292
184 /* 293 copy_class_filename(class_sign, file_name, fn, PATH_MAX);
185 * Assume path name is class hierarchy, this is a common practice with Java programs 294
186 */
187 if (*class_sign == 'L') {
188 int j, i = 0;
189 char *p = strrchr(class_sign, '/');
190 if (p) {
191 /* drop the 'L' prefix and copy up to the final '/' */
192 for (i = 0; i < (p - class_sign); i++)
193 fn[i] = class_sign[i+1];
194 }
195 /*
196 * append file name, we use loops and not string ops to avoid modifying
197 * class_sign which is used later for the symbol name
198 */
199 for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++)
200 fn[i] = file_name[j];
201 fn[i] = '\0';
202 } else {
203 /* fallback case */
204 strcpy(fn, file_name);
205 }
206 /* 295 /*
207 * write source line info record if we have it 296 * write source line info record if we have it
208 */ 297 */
209 if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines)) 298 if (output_debug_info)
210 warnx("jvmti: write_debug_info() failed"); 299 if (jvmti_write_debug_info(jvmti_agent, addr, nr_lines, line_tab, (const char * const *) line_file_names))
300 warnx("jvmti: write_debug_info() failed");
211 301
212 len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2; 302 len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
213 { 303 {
@@ -223,6 +313,13 @@ error:
223 (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); 313 (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
224 (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); 314 (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
225 free(line_tab); 315 free(line_tab);
316 while (line_file_names && (nr_lines > 0)) {
317 if (line_file_names[nr_lines - 1]) {
318 free(line_file_names[nr_lines - 1]);
319 }
320 nr_lines -= 1;
321 }
322 free(line_file_names);
226} 323}
227 324
228static void JNICALL 325static void JNICALL
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 792af7c3b74f..9316e648a880 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),)
11endif 11endif
12 12
13CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include 13CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
14LDLIBS += -lcap -lelf 14LDLIBS += -lcap -lelf -lrt
15 15
16TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 16TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
17 test_align test_verifier_log test_dev_cgroup 17 test_align test_verifier_log test_dev_cgroup
@@ -39,7 +39,7 @@ $(BPFOBJ): force
39CLANG ?= clang 39CLANG ?= clang
40LLC ?= llc 40LLC ?= llc
41 41
42PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) 42PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
43 43
44# Let newer LLVM versions transparently probe the kernel for availability 44# Let newer LLVM versions transparently probe the kernel for availability
45# of full BPF instruction set. 45# of full BPF instruction set.
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 8591c89c0828..471bbbdb94db 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -474,27 +474,7 @@ static struct bpf_align_test tests[] = {
474 .result = REJECT, 474 .result = REJECT,
475 .matches = { 475 .matches = {
476 {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, 476 {4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
477 /* ptr & 0x40 == either 0 or 0x40 */ 477 /* R5 bitwise operator &= on pointer prohibited */
478 {5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
479 /* ptr << 2 == unknown, (4n) */
480 {7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
481 /* (4n) + 14 == (4n+2). We blow our bounds, because
482 * the add could overflow.
483 */
484 {8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
485 /* Checked s>=0 */
486 {10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
487 /* packet pointer + nonnegative (4n+2) */
488 {12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
489 {14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
490 /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
491 * We checked the bounds, but it might have been able
492 * to overflow if the packet pointer started in the
493 * upper half of the address space.
494 * So we did not get a 'range' on R6, and the access
495 * attempt will fail.
496 */
497 {16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
498 } 478 }
499 }, 479 },
500 { 480 {
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 69427531408d..6761be18a91f 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -351,7 +351,7 @@ static void test_bpf_obj_id(void)
351 info_len != sizeof(struct bpf_map_info) || 351 info_len != sizeof(struct bpf_map_info) ||
352 strcmp((char *)map_infos[i].name, expected_map_name), 352 strcmp((char *)map_infos[i].name, expected_map_name),
353 "get-map-info(fd)", 353 "get-map-info(fd)",
354 "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", 354 "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
355 err, errno, 355 err, errno,
356 map_infos[i].type, BPF_MAP_TYPE_ARRAY, 356 map_infos[i].type, BPF_MAP_TYPE_ARRAY,
357 info_len, sizeof(struct bpf_map_info), 357 info_len, sizeof(struct bpf_map_info),
@@ -395,7 +395,7 @@ static void test_bpf_obj_id(void)
395 *(int *)prog_infos[i].map_ids != map_infos[i].id || 395 *(int *)prog_infos[i].map_ids != map_infos[i].id ||
396 strcmp((char *)prog_infos[i].name, expected_prog_name), 396 strcmp((char *)prog_infos[i].name, expected_prog_name),
397 "get-prog-info(fd)", 397 "get-prog-info(fd)",
398 "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", 398 "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
399 err, errno, i, 399 err, errno, i,
400 prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, 400 prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
401 info_len, sizeof(struct bpf_prog_info), 401 info_len, sizeof(struct bpf_prog_info),
@@ -463,7 +463,7 @@ static void test_bpf_obj_id(void)
463 memcmp(&prog_info, &prog_infos[i], info_len) || 463 memcmp(&prog_info, &prog_infos[i], info_len) ||
464 *(int *)prog_info.map_ids != saved_map_id, 464 *(int *)prog_info.map_ids != saved_map_id,
465 "get-prog-info(next_id->fd)", 465 "get-prog-info(next_id->fd)",
466 "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n", 466 "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
467 err, errno, info_len, sizeof(struct bpf_prog_info), 467 err, errno, info_len, sizeof(struct bpf_prog_info),
468 memcmp(&prog_info, &prog_infos[i], info_len), 468 memcmp(&prog_info, &prog_infos[i], info_len),
469 *(int *)prog_info.map_ids, saved_map_id); 469 *(int *)prog_info.map_ids, saved_map_id);
@@ -509,7 +509,7 @@ static void test_bpf_obj_id(void)
509 memcmp(&map_info, &map_infos[i], info_len) || 509 memcmp(&map_info, &map_infos[i], info_len) ||
510 array_value != array_magic_value, 510 array_value != array_magic_value,
511 "check get-map-info(next_id->fd)", 511 "check get-map-info(next_id->fd)",
512 "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n", 512 "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
513 err, errno, info_len, sizeof(struct bpf_map_info), 513 err, errno, info_len, sizeof(struct bpf_map_info),
514 memcmp(&map_info, &map_infos[i], info_len), 514 memcmp(&map_info, &map_infos[i], info_len),
515 array_value, array_magic_value); 515 array_value, array_magic_value);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3c64f30cf63c..b51017404c62 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -422,9 +422,7 @@ static struct bpf_test tests[] = {
422 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), 422 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
423 BPF_EXIT_INSN(), 423 BPF_EXIT_INSN(),
424 }, 424 },
425 .errstr_unpriv = "R1 subtraction from stack pointer", 425 .errstr = "R1 subtraction from stack pointer",
426 .result_unpriv = REJECT,
427 .errstr = "R1 invalid mem access",
428 .result = REJECT, 426 .result = REJECT,
429 }, 427 },
430 { 428 {
@@ -606,7 +604,6 @@ static struct bpf_test tests[] = {
606 }, 604 },
607 .errstr = "misaligned stack access", 605 .errstr = "misaligned stack access",
608 .result = REJECT, 606 .result = REJECT,
609 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
610 }, 607 },
611 { 608 {
612 "invalid map_fd for function call", 609 "invalid map_fd for function call",
@@ -1797,7 +1794,6 @@ static struct bpf_test tests[] = {
1797 }, 1794 },
1798 .result = REJECT, 1795 .result = REJECT,
1799 .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", 1796 .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
1800 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
1801 }, 1797 },
1802 { 1798 {
1803 "PTR_TO_STACK store/load - bad alignment on reg", 1799 "PTR_TO_STACK store/load - bad alignment on reg",
@@ -1810,7 +1806,6 @@ static struct bpf_test tests[] = {
1810 }, 1806 },
1811 .result = REJECT, 1807 .result = REJECT,
1812 .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", 1808 .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
1813 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
1814 }, 1809 },
1815 { 1810 {
1816 "PTR_TO_STACK store/load - out of bounds low", 1811 "PTR_TO_STACK store/load - out of bounds low",
@@ -1862,9 +1857,8 @@ static struct bpf_test tests[] = {
1862 BPF_MOV64_IMM(BPF_REG_0, 0), 1857 BPF_MOV64_IMM(BPF_REG_0, 0),
1863 BPF_EXIT_INSN(), 1858 BPF_EXIT_INSN(),
1864 }, 1859 },
1865 .result = ACCEPT, 1860 .result = REJECT,
1866 .result_unpriv = REJECT, 1861 .errstr = "R1 pointer += pointer",
1867 .errstr_unpriv = "R1 pointer += pointer",
1868 }, 1862 },
1869 { 1863 {
1870 "unpriv: neg pointer", 1864 "unpriv: neg pointer",
@@ -2592,7 +2586,8 @@ static struct bpf_test tests[] = {
2592 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 2586 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
2593 offsetof(struct __sk_buff, data)), 2587 offsetof(struct __sk_buff, data)),
2594 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4), 2588 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
2595 BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), 2589 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
2590 offsetof(struct __sk_buff, len)),
2596 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49), 2591 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
2597 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49), 2592 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
2598 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), 2593 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
@@ -2899,7 +2894,7 @@ static struct bpf_test tests[] = {
2899 BPF_MOV64_IMM(BPF_REG_0, 0), 2894 BPF_MOV64_IMM(BPF_REG_0, 0),
2900 BPF_EXIT_INSN(), 2895 BPF_EXIT_INSN(),
2901 }, 2896 },
2902 .errstr = "invalid access to packet", 2897 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
2903 .result = REJECT, 2898 .result = REJECT,
2904 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2899 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
2905 }, 2900 },
@@ -3885,9 +3880,7 @@ static struct bpf_test tests[] = {
3885 BPF_EXIT_INSN(), 3880 BPF_EXIT_INSN(),
3886 }, 3881 },
3887 .fixup_map2 = { 3, 11 }, 3882 .fixup_map2 = { 3, 11 },
3888 .errstr_unpriv = "R0 pointer += pointer", 3883 .errstr = "R0 pointer += pointer",
3889 .errstr = "R0 invalid mem access 'inv'",
3890 .result_unpriv = REJECT,
3891 .result = REJECT, 3884 .result = REJECT,
3892 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, 3885 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
3893 }, 3886 },
@@ -3928,7 +3921,7 @@ static struct bpf_test tests[] = {
3928 BPF_EXIT_INSN(), 3921 BPF_EXIT_INSN(),
3929 }, 3922 },
3930 .fixup_map1 = { 4 }, 3923 .fixup_map1 = { 4 },
3931 .errstr = "R4 invalid mem access", 3924 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3932 .result = REJECT, 3925 .result = REJECT,
3933 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3926 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3934 }, 3927 },
@@ -3949,7 +3942,7 @@ static struct bpf_test tests[] = {
3949 BPF_EXIT_INSN(), 3942 BPF_EXIT_INSN(),
3950 }, 3943 },
3951 .fixup_map1 = { 4 }, 3944 .fixup_map1 = { 4 },
3952 .errstr = "R4 invalid mem access", 3945 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3953 .result = REJECT, 3946 .result = REJECT,
3954 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3947 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3955 }, 3948 },
@@ -3970,7 +3963,7 @@ static struct bpf_test tests[] = {
3970 BPF_EXIT_INSN(), 3963 BPF_EXIT_INSN(),
3971 }, 3964 },
3972 .fixup_map1 = { 4 }, 3965 .fixup_map1 = { 4 },
3973 .errstr = "R4 invalid mem access", 3966 .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
3974 .result = REJECT, 3967 .result = REJECT,
3975 .prog_type = BPF_PROG_TYPE_SCHED_CLS 3968 .prog_type = BPF_PROG_TYPE_SCHED_CLS
3976 }, 3969 },
@@ -5195,10 +5188,8 @@ static struct bpf_test tests[] = {
5195 BPF_EXIT_INSN(), 5188 BPF_EXIT_INSN(),
5196 }, 5189 },
5197 .fixup_map2 = { 3 }, 5190 .fixup_map2 = { 3 },
5198 .errstr_unpriv = "R0 bitwise operator &= on pointer", 5191 .errstr = "R0 bitwise operator &= on pointer",
5199 .errstr = "invalid mem access 'inv'",
5200 .result = REJECT, 5192 .result = REJECT,
5201 .result_unpriv = REJECT,
5202 }, 5193 },
5203 { 5194 {
5204 "map element value illegal alu op, 2", 5195 "map element value illegal alu op, 2",
@@ -5214,10 +5205,8 @@ static struct bpf_test tests[] = {
5214 BPF_EXIT_INSN(), 5205 BPF_EXIT_INSN(),
5215 }, 5206 },
5216 .fixup_map2 = { 3 }, 5207 .fixup_map2 = { 3 },
5217 .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited", 5208 .errstr = "R0 32-bit pointer arithmetic prohibited",
5218 .errstr = "invalid mem access 'inv'",
5219 .result = REJECT, 5209 .result = REJECT,
5220 .result_unpriv = REJECT,
5221 }, 5210 },
5222 { 5211 {
5223 "map element value illegal alu op, 3", 5212 "map element value illegal alu op, 3",
@@ -5233,10 +5222,8 @@ static struct bpf_test tests[] = {
5233 BPF_EXIT_INSN(), 5222 BPF_EXIT_INSN(),
5234 }, 5223 },
5235 .fixup_map2 = { 3 }, 5224 .fixup_map2 = { 3 },
5236 .errstr_unpriv = "R0 pointer arithmetic with /= operator", 5225 .errstr = "R0 pointer arithmetic with /= operator",
5237 .errstr = "invalid mem access 'inv'",
5238 .result = REJECT, 5226 .result = REJECT,
5239 .result_unpriv = REJECT,
5240 }, 5227 },
5241 { 5228 {
5242 "map element value illegal alu op, 4", 5229 "map element value illegal alu op, 4",
@@ -6019,8 +6006,7 @@ static struct bpf_test tests[] = {
6019 BPF_EXIT_INSN(), 6006 BPF_EXIT_INSN(),
6020 }, 6007 },
6021 .fixup_map_in_map = { 3 }, 6008 .fixup_map_in_map = { 3 },
6022 .errstr = "R1 type=inv expected=map_ptr", 6009 .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
6023 .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
6024 .result = REJECT, 6010 .result = REJECT,
6025 }, 6011 },
6026 { 6012 {
@@ -6117,6 +6103,30 @@ static struct bpf_test tests[] = {
6117 .result = ACCEPT, 6103 .result = ACCEPT,
6118 }, 6104 },
6119 { 6105 {
6106 "ld_abs: tests on r6 and skb data reload helper",
6107 .insns = {
6108 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
6109 BPF_LD_ABS(BPF_B, 0),
6110 BPF_LD_ABS(BPF_H, 0),
6111 BPF_LD_ABS(BPF_W, 0),
6112 BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
6113 BPF_MOV64_IMM(BPF_REG_6, 0),
6114 BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
6115 BPF_MOV64_IMM(BPF_REG_2, 1),
6116 BPF_MOV64_IMM(BPF_REG_3, 2),
6117 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6118 BPF_FUNC_skb_vlan_push),
6119 BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
6120 BPF_LD_ABS(BPF_B, 0),
6121 BPF_LD_ABS(BPF_H, 0),
6122 BPF_LD_ABS(BPF_W, 0),
6123 BPF_MOV64_IMM(BPF_REG_0, 42),
6124 BPF_EXIT_INSN(),
6125 },
6126 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6127 .result = ACCEPT,
6128 },
6129 {
6120 "ld_ind: check calling conv, r1", 6130 "ld_ind: check calling conv, r1",
6121 .insns = { 6131 .insns = {
6122 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), 6132 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -6300,7 +6310,7 @@ static struct bpf_test tests[] = {
6300 BPF_EXIT_INSN(), 6310 BPF_EXIT_INSN(),
6301 }, 6311 },
6302 .fixup_map1 = { 3 }, 6312 .fixup_map1 = { 3 },
6303 .errstr = "R0 min value is negative", 6313 .errstr = "unbounded min value",
6304 .result = REJECT, 6314 .result = REJECT,
6305 }, 6315 },
6306 { 6316 {
@@ -6324,7 +6334,7 @@ static struct bpf_test tests[] = {
6324 BPF_EXIT_INSN(), 6334 BPF_EXIT_INSN(),
6325 }, 6335 },
6326 .fixup_map1 = { 3 }, 6336 .fixup_map1 = { 3 },
6327 .errstr = "R0 min value is negative", 6337 .errstr = "unbounded min value",
6328 .result = REJECT, 6338 .result = REJECT,
6329 }, 6339 },
6330 { 6340 {
@@ -6350,7 +6360,7 @@ static struct bpf_test tests[] = {
6350 BPF_EXIT_INSN(), 6360 BPF_EXIT_INSN(),
6351 }, 6361 },
6352 .fixup_map1 = { 3 }, 6362 .fixup_map1 = { 3 },
6353 .errstr = "R8 invalid mem access 'inv'", 6363 .errstr = "unbounded min value",
6354 .result = REJECT, 6364 .result = REJECT,
6355 }, 6365 },
6356 { 6366 {
@@ -6375,7 +6385,7 @@ static struct bpf_test tests[] = {
6375 BPF_EXIT_INSN(), 6385 BPF_EXIT_INSN(),
6376 }, 6386 },
6377 .fixup_map1 = { 3 }, 6387 .fixup_map1 = { 3 },
6378 .errstr = "R8 invalid mem access 'inv'", 6388 .errstr = "unbounded min value",
6379 .result = REJECT, 6389 .result = REJECT,
6380 }, 6390 },
6381 { 6391 {
@@ -6423,7 +6433,7 @@ static struct bpf_test tests[] = {
6423 BPF_EXIT_INSN(), 6433 BPF_EXIT_INSN(),
6424 }, 6434 },
6425 .fixup_map1 = { 3 }, 6435 .fixup_map1 = { 3 },
6426 .errstr = "R0 min value is negative", 6436 .errstr = "unbounded min value",
6427 .result = REJECT, 6437 .result = REJECT,
6428 }, 6438 },
6429 { 6439 {
@@ -6494,7 +6504,7 @@ static struct bpf_test tests[] = {
6494 BPF_EXIT_INSN(), 6504 BPF_EXIT_INSN(),
6495 }, 6505 },
6496 .fixup_map1 = { 3 }, 6506 .fixup_map1 = { 3 },
6497 .errstr = "R0 min value is negative", 6507 .errstr = "unbounded min value",
6498 .result = REJECT, 6508 .result = REJECT,
6499 }, 6509 },
6500 { 6510 {
@@ -6545,7 +6555,7 @@ static struct bpf_test tests[] = {
6545 BPF_EXIT_INSN(), 6555 BPF_EXIT_INSN(),
6546 }, 6556 },
6547 .fixup_map1 = { 3 }, 6557 .fixup_map1 = { 3 },
6548 .errstr = "R0 min value is negative", 6558 .errstr = "unbounded min value",
6549 .result = REJECT, 6559 .result = REJECT,
6550 }, 6560 },
6551 { 6561 {
@@ -6572,7 +6582,7 @@ static struct bpf_test tests[] = {
6572 BPF_EXIT_INSN(), 6582 BPF_EXIT_INSN(),
6573 }, 6583 },
6574 .fixup_map1 = { 3 }, 6584 .fixup_map1 = { 3 },
6575 .errstr = "R0 min value is negative", 6585 .errstr = "unbounded min value",
6576 .result = REJECT, 6586 .result = REJECT,
6577 }, 6587 },
6578 { 6588 {
@@ -6598,7 +6608,7 @@ static struct bpf_test tests[] = {
6598 BPF_EXIT_INSN(), 6608 BPF_EXIT_INSN(),
6599 }, 6609 },
6600 .fixup_map1 = { 3 }, 6610 .fixup_map1 = { 3 },
6601 .errstr = "R0 min value is negative", 6611 .errstr = "unbounded min value",
6602 .result = REJECT, 6612 .result = REJECT,
6603 }, 6613 },
6604 { 6614 {
@@ -6627,7 +6637,7 @@ static struct bpf_test tests[] = {
6627 BPF_EXIT_INSN(), 6637 BPF_EXIT_INSN(),
6628 }, 6638 },
6629 .fixup_map1 = { 3 }, 6639 .fixup_map1 = { 3 },
6630 .errstr = "R0 min value is negative", 6640 .errstr = "unbounded min value",
6631 .result = REJECT, 6641 .result = REJECT,
6632 }, 6642 },
6633 { 6643 {
@@ -6657,7 +6667,7 @@ static struct bpf_test tests[] = {
6657 BPF_JMP_IMM(BPF_JA, 0, 0, -7), 6667 BPF_JMP_IMM(BPF_JA, 0, 0, -7),
6658 }, 6668 },
6659 .fixup_map1 = { 4 }, 6669 .fixup_map1 = { 4 },
6660 .errstr = "R0 min value is negative", 6670 .errstr = "unbounded min value",
6661 .result = REJECT, 6671 .result = REJECT,
6662 }, 6672 },
6663 { 6673 {
@@ -6685,8 +6695,7 @@ static struct bpf_test tests[] = {
6685 BPF_EXIT_INSN(), 6695 BPF_EXIT_INSN(),
6686 }, 6696 },
6687 .fixup_map1 = { 3 }, 6697 .fixup_map1 = { 3 },
6688 .errstr_unpriv = "R0 pointer comparison prohibited", 6698 .errstr = "unbounded min value",
6689 .errstr = "R0 min value is negative",
6690 .result = REJECT, 6699 .result = REJECT,
6691 .result_unpriv = REJECT, 6700 .result_unpriv = REJECT,
6692 }, 6701 },
@@ -6742,6 +6751,462 @@ static struct bpf_test tests[] = {
6742 .result = REJECT, 6751 .result = REJECT,
6743 }, 6752 },
6744 { 6753 {
6754 "bounds check based on zero-extended MOV",
6755 .insns = {
6756 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6757 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6758 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6759 BPF_LD_MAP_FD(BPF_REG_1, 0),
6760 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6761 BPF_FUNC_map_lookup_elem),
6762 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6763 /* r2 = 0x0000'0000'ffff'ffff */
6764 BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
6765 /* r2 = 0 */
6766 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
6767 /* no-op */
6768 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6769 /* access at offset 0 */
6770 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6771 /* exit */
6772 BPF_MOV64_IMM(BPF_REG_0, 0),
6773 BPF_EXIT_INSN(),
6774 },
6775 .fixup_map1 = { 3 },
6776 .result = ACCEPT
6777 },
6778 {
6779 "bounds check based on sign-extended MOV. test1",
6780 .insns = {
6781 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6782 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6783 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6784 BPF_LD_MAP_FD(BPF_REG_1, 0),
6785 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6786 BPF_FUNC_map_lookup_elem),
6787 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6788 /* r2 = 0xffff'ffff'ffff'ffff */
6789 BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
6790 /* r2 = 0xffff'ffff */
6791 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
6792 /* r0 = <oob pointer> */
6793 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6794 /* access to OOB pointer */
6795 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6796 /* exit */
6797 BPF_MOV64_IMM(BPF_REG_0, 0),
6798 BPF_EXIT_INSN(),
6799 },
6800 .fixup_map1 = { 3 },
6801 .errstr = "map_value pointer and 4294967295",
6802 .result = REJECT
6803 },
6804 {
6805 "bounds check based on sign-extended MOV. test2",
6806 .insns = {
6807 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6808 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6809 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6810 BPF_LD_MAP_FD(BPF_REG_1, 0),
6811 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6812 BPF_FUNC_map_lookup_elem),
6813 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6814 /* r2 = 0xffff'ffff'ffff'ffff */
6815 BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
6816 /* r2 = 0xfff'ffff */
6817 BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
6818 /* r0 = <oob pointer> */
6819 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
6820 /* access to OOB pointer */
6821 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6822 /* exit */
6823 BPF_MOV64_IMM(BPF_REG_0, 0),
6824 BPF_EXIT_INSN(),
6825 },
6826 .fixup_map1 = { 3 },
6827 .errstr = "R0 min value is outside of the array range",
6828 .result = REJECT
6829 },
6830 {
6831 "bounds check based on reg_off + var_off + insn_off. test1",
6832 .insns = {
6833 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
6834 offsetof(struct __sk_buff, mark)),
6835 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6836 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6837 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6838 BPF_LD_MAP_FD(BPF_REG_1, 0),
6839 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6840 BPF_FUNC_map_lookup_elem),
6841 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6842 BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
6843 BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
6844 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
6845 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
6846 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
6847 BPF_MOV64_IMM(BPF_REG_0, 0),
6848 BPF_EXIT_INSN(),
6849 },
6850 .fixup_map1 = { 4 },
6851 .errstr = "value_size=8 off=1073741825",
6852 .result = REJECT,
6853 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6854 },
6855 {
6856 "bounds check based on reg_off + var_off + insn_off. test2",
6857 .insns = {
6858 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
6859 offsetof(struct __sk_buff, mark)),
6860 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6861 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6862 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6863 BPF_LD_MAP_FD(BPF_REG_1, 0),
6864 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6865 BPF_FUNC_map_lookup_elem),
6866 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
6867 BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
6868 BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
6869 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
6870 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
6871 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
6872 BPF_MOV64_IMM(BPF_REG_0, 0),
6873 BPF_EXIT_INSN(),
6874 },
6875 .fixup_map1 = { 4 },
6876 .errstr = "value 1073741823",
6877 .result = REJECT,
6878 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
6879 },
6880 {
6881 "bounds check after truncation of non-boundary-crossing range",
6882 .insns = {
6883 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6884 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6885 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6886 BPF_LD_MAP_FD(BPF_REG_1, 0),
6887 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6888 BPF_FUNC_map_lookup_elem),
6889 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6890 /* r1 = [0x00, 0xff] */
6891 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6892 BPF_MOV64_IMM(BPF_REG_2, 1),
6893 /* r2 = 0x10'0000'0000 */
6894 BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
6895 /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
6896 BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
6897 /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
6898 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
6899 /* r1 = [0x00, 0xff] */
6900 BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
6901 /* r1 = 0 */
6902 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6903 /* no-op */
6904 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6905 /* access at offset 0 */
6906 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6907 /* exit */
6908 BPF_MOV64_IMM(BPF_REG_0, 0),
6909 BPF_EXIT_INSN(),
6910 },
6911 .fixup_map1 = { 3 },
6912 .result = ACCEPT
6913 },
6914 {
6915 "bounds check after truncation of boundary-crossing range (1)",
6916 .insns = {
6917 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6918 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6919 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6920 BPF_LD_MAP_FD(BPF_REG_1, 0),
6921 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6922 BPF_FUNC_map_lookup_elem),
6923 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6924 /* r1 = [0x00, 0xff] */
6925 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6926 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6927 /* r1 = [0xffff'ff80, 0x1'0000'007f] */
6928 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6929 /* r1 = [0xffff'ff80, 0xffff'ffff] or
6930 * [0x0000'0000, 0x0000'007f]
6931 */
6932 BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
6933 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6934 /* r1 = [0x00, 0xff] or
6935 * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
6936 */
6937 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6938 /* r1 = 0 or
6939 * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
6940 */
6941 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6942 /* no-op or OOB pointer computation */
6943 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6944 /* potentially OOB access */
6945 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6946 /* exit */
6947 BPF_MOV64_IMM(BPF_REG_0, 0),
6948 BPF_EXIT_INSN(),
6949 },
6950 .fixup_map1 = { 3 },
6951 /* not actually fully unbounded, but the bound is very high */
6952 .errstr = "R0 unbounded memory access",
6953 .result = REJECT
6954 },
6955 {
6956 "bounds check after truncation of boundary-crossing range (2)",
6957 .insns = {
6958 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
6959 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6960 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6961 BPF_LD_MAP_FD(BPF_REG_1, 0),
6962 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6963 BPF_FUNC_map_lookup_elem),
6964 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
6965 /* r1 = [0x00, 0xff] */
6966 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
6967 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6968 /* r1 = [0xffff'ff80, 0x1'0000'007f] */
6969 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
6970 /* r1 = [0xffff'ff80, 0xffff'ffff] or
6971 * [0x0000'0000, 0x0000'007f]
6972 * difference to previous test: truncation via MOV32
6973 * instead of ALU32.
6974 */
6975 BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
6976 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6977 /* r1 = [0x00, 0xff] or
6978 * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
6979 */
6980 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
6981 /* r1 = 0 or
6982 * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
6983 */
6984 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
6985 /* no-op or OOB pointer computation */
6986 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
6987 /* potentially OOB access */
6988 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
6989 /* exit */
6990 BPF_MOV64_IMM(BPF_REG_0, 0),
6991 BPF_EXIT_INSN(),
6992 },
6993 .fixup_map1 = { 3 },
6994 /* not actually fully unbounded, but the bound is very high */
6995 .errstr = "R0 unbounded memory access",
6996 .result = REJECT
6997 },
6998 {
6999 "bounds check after wrapping 32-bit addition",
7000 .insns = {
7001 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7002 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7003 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7004 BPF_LD_MAP_FD(BPF_REG_1, 0),
7005 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7006 BPF_FUNC_map_lookup_elem),
7007 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
7008 /* r1 = 0x7fff'ffff */
7009 BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
7010 /* r1 = 0xffff'fffe */
7011 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7012 /* r1 = 0 */
7013 BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
7014 /* no-op */
7015 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7016 /* access at offset 0 */
7017 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7018 /* exit */
7019 BPF_MOV64_IMM(BPF_REG_0, 0),
7020 BPF_EXIT_INSN(),
7021 },
7022 .fixup_map1 = { 3 },
7023 .result = ACCEPT
7024 },
7025 {
7026 "bounds check after shift with oversized count operand",
7027 .insns = {
7028 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7029 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7030 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7031 BPF_LD_MAP_FD(BPF_REG_1, 0),
7032 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7033 BPF_FUNC_map_lookup_elem),
7034 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
7035 BPF_MOV64_IMM(BPF_REG_2, 32),
7036 BPF_MOV64_IMM(BPF_REG_1, 1),
7037 /* r1 = (u32)1 << (u32)32 = ? */
7038 BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
7039 /* r1 = [0x0000, 0xffff] */
7040 BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
7041 /* computes unknown pointer, potentially OOB */
7042 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7043 /* potentially OOB access */
7044 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7045 /* exit */
7046 BPF_MOV64_IMM(BPF_REG_0, 0),
7047 BPF_EXIT_INSN(),
7048 },
7049 .fixup_map1 = { 3 },
7050 .errstr = "R0 max value is outside of the array range",
7051 .result = REJECT
7052 },
7053 {
7054 "bounds check after right shift of maybe-negative number",
7055 .insns = {
7056 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7057 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7058 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7059 BPF_LD_MAP_FD(BPF_REG_1, 0),
7060 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7061 BPF_FUNC_map_lookup_elem),
7062 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
7063 /* r1 = [0x00, 0xff] */
7064 BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7065 /* r1 = [-0x01, 0xfe] */
7066 BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
7067 /* r1 = 0 or 0xff'ffff'ffff'ffff */
7068 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
7069 /* r1 = 0 or 0xffff'ffff'ffff */
7070 BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
7071 /* computes unknown pointer, potentially OOB */
7072 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7073 /* potentially OOB access */
7074 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
7075 /* exit */
7076 BPF_MOV64_IMM(BPF_REG_0, 0),
7077 BPF_EXIT_INSN(),
7078 },
7079 .fixup_map1 = { 3 },
7080 .errstr = "R0 unbounded memory access",
7081 .result = REJECT
7082 },
7083 {
7084 "bounds check map access with off+size signed 32bit overflow. test1",
7085 .insns = {
7086 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7087 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7088 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7089 BPF_LD_MAP_FD(BPF_REG_1, 0),
7090 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7091 BPF_FUNC_map_lookup_elem),
7092 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7093 BPF_EXIT_INSN(),
7094 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
7095 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7096 BPF_JMP_A(0),
7097 BPF_EXIT_INSN(),
7098 },
7099 .fixup_map1 = { 3 },
7100 .errstr = "map_value pointer and 2147483646",
7101 .result = REJECT
7102 },
7103 {
7104 "bounds check map access with off+size signed 32bit overflow. test2",
7105 .insns = {
7106 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7107 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7108 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7109 BPF_LD_MAP_FD(BPF_REG_1, 0),
7110 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7111 BPF_FUNC_map_lookup_elem),
7112 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7113 BPF_EXIT_INSN(),
7114 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7115 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7116 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
7117 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7118 BPF_JMP_A(0),
7119 BPF_EXIT_INSN(),
7120 },
7121 .fixup_map1 = { 3 },
7122 .errstr = "pointer offset 1073741822",
7123 .result = REJECT
7124 },
7125 {
7126 "bounds check map access with off+size signed 32bit overflow. test3",
7127 .insns = {
7128 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7129 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7130 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7131 BPF_LD_MAP_FD(BPF_REG_1, 0),
7132 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7133 BPF_FUNC_map_lookup_elem),
7134 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7135 BPF_EXIT_INSN(),
7136 BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
7137 BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
7138 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
7139 BPF_JMP_A(0),
7140 BPF_EXIT_INSN(),
7141 },
7142 .fixup_map1 = { 3 },
7143 .errstr = "pointer offset -1073741822",
7144 .result = REJECT
7145 },
7146 {
7147 "bounds check map access with off+size signed 32bit overflow. test4",
7148 .insns = {
7149 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7150 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7151 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7152 BPF_LD_MAP_FD(BPF_REG_1, 0),
7153 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7154 BPF_FUNC_map_lookup_elem),
7155 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
7156 BPF_EXIT_INSN(),
7157 BPF_MOV64_IMM(BPF_REG_1, 1000000),
7158 BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
7159 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
7160 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
7161 BPF_JMP_A(0),
7162 BPF_EXIT_INSN(),
7163 },
7164 .fixup_map1 = { 3 },
7165 .errstr = "map_value pointer and 1000000000000",
7166 .result = REJECT
7167 },
7168 {
7169 "pointer/scalar confusion in state equality check (way 1)",
7170 .insns = {
7171 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7172 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7173 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7174 BPF_LD_MAP_FD(BPF_REG_1, 0),
7175 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7176 BPF_FUNC_map_lookup_elem),
7177 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
7178 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7179 BPF_JMP_A(1),
7180 BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
7181 BPF_JMP_A(0),
7182 BPF_EXIT_INSN(),
7183 },
7184 .fixup_map1 = { 3 },
7185 .result = ACCEPT,
7186 .result_unpriv = REJECT,
7187 .errstr_unpriv = "R0 leaks addr as return value"
7188 },
7189 {
7190 "pointer/scalar confusion in state equality check (way 2)",
7191 .insns = {
7192 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7193 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
7194 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
7195 BPF_LD_MAP_FD(BPF_REG_1, 0),
7196 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7197 BPF_FUNC_map_lookup_elem),
7198 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
7199 BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
7200 BPF_JMP_A(1),
7201 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
7202 BPF_EXIT_INSN(),
7203 },
7204 .fixup_map1 = { 3 },
7205 .result = ACCEPT,
7206 .result_unpriv = REJECT,
7207 .errstr_unpriv = "R0 leaks addr as return value"
7208 },
7209 {
6745 "variable-offset ctx access", 7210 "variable-offset ctx access",
6746 .insns = { 7211 .insns = {
6747 /* Get an unknown value */ 7212 /* Get an unknown value */
@@ -6783,6 +7248,71 @@ static struct bpf_test tests[] = {
6783 .prog_type = BPF_PROG_TYPE_LWT_IN, 7248 .prog_type = BPF_PROG_TYPE_LWT_IN,
6784 }, 7249 },
6785 { 7250 {
7251 "indirect variable-offset stack access",
7252 .insns = {
7253 /* Fill the top 8 bytes of the stack */
7254 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
7255 /* Get an unknown value */
7256 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
7257 /* Make it small and 4-byte aligned */
7258 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
7259 BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
7260 /* add it to fp. We now have either fp-4 or fp-8, but
7261 * we don't know which
7262 */
7263 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
7264 /* dereference it indirectly */
7265 BPF_LD_MAP_FD(BPF_REG_1, 0),
7266 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7267 BPF_FUNC_map_lookup_elem),
7268 BPF_MOV64_IMM(BPF_REG_0, 0),
7269 BPF_EXIT_INSN(),
7270 },
7271 .fixup_map1 = { 5 },
7272 .errstr = "variable stack read R2",
7273 .result = REJECT,
7274 .prog_type = BPF_PROG_TYPE_LWT_IN,
7275 },
7276 {
7277 "direct stack access with 32-bit wraparound. test1",
7278 .insns = {
7279 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7280 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7281 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
7282 BPF_MOV32_IMM(BPF_REG_0, 0),
7283 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7284 BPF_EXIT_INSN()
7285 },
7286 .errstr = "fp pointer and 2147483647",
7287 .result = REJECT
7288 },
7289 {
7290 "direct stack access with 32-bit wraparound. test2",
7291 .insns = {
7292 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7293 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
7294 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
7295 BPF_MOV32_IMM(BPF_REG_0, 0),
7296 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7297 BPF_EXIT_INSN()
7298 },
7299 .errstr = "fp pointer and 1073741823",
7300 .result = REJECT
7301 },
7302 {
7303 "direct stack access with 32-bit wraparound. test3",
7304 .insns = {
7305 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
7306 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
7307 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
7308 BPF_MOV32_IMM(BPF_REG_0, 0),
7309 BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
7310 BPF_EXIT_INSN()
7311 },
7312 .errstr = "fp pointer offset 1073741822",
7313 .result = REJECT
7314 },
7315 {
6786 "liveness pruning and write screening", 7316 "liveness pruning and write screening",
6787 .insns = { 7317 .insns = {
6788 /* Get an unknown value */ 7318 /* Get an unknown value */
@@ -7104,6 +7634,19 @@ static struct bpf_test tests[] = {
7104 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 7634 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
7105 }, 7635 },
7106 { 7636 {
7637 "pkt_end - pkt_start is allowed",
7638 .insns = {
7639 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
7640 offsetof(struct __sk_buff, data_end)),
7641 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
7642 offsetof(struct __sk_buff, data)),
7643 BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
7644 BPF_EXIT_INSN(),
7645 },
7646 .result = ACCEPT,
7647 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
7648 },
7649 {
7107 "XDP pkt read, pkt_end mangling, bad access 1", 7650 "XDP pkt read, pkt_end mangling, bad access 1",
7108 .insns = { 7651 .insns = {
7109 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 7652 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -7118,7 +7661,7 @@ static struct bpf_test tests[] = {
7118 BPF_MOV64_IMM(BPF_REG_0, 0), 7661 BPF_MOV64_IMM(BPF_REG_0, 0),
7119 BPF_EXIT_INSN(), 7662 BPF_EXIT_INSN(),
7120 }, 7663 },
7121 .errstr = "R1 offset is outside of the packet", 7664 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
7122 .result = REJECT, 7665 .result = REJECT,
7123 .prog_type = BPF_PROG_TYPE_XDP, 7666 .prog_type = BPF_PROG_TYPE_XDP,
7124 }, 7667 },
@@ -7137,7 +7680,7 @@ static struct bpf_test tests[] = {
7137 BPF_MOV64_IMM(BPF_REG_0, 0), 7680 BPF_MOV64_IMM(BPF_REG_0, 0),
7138 BPF_EXIT_INSN(), 7681 BPF_EXIT_INSN(),
7139 }, 7682 },
7140 .errstr = "R1 offset is outside of the packet", 7683 .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
7141 .result = REJECT, 7684 .result = REJECT,
7142 .prog_type = BPF_PROG_TYPE_XDP, 7685 .prog_type = BPF_PROG_TYPE_XDP,
7143 }, 7686 },
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index e57b4ac40e72..7177bea1fdfa 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,3 +1,4 @@
1CONFIG_USER_NS=y 1CONFIG_USER_NS=y
2CONFIG_BPF_SYSCALL=y 2CONFIG_BPF_SYSCALL=y
3CONFIG_TEST_BPF=m 3CONFIG_TEST_BPF=m
4CONFIG_NUMA=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 939a337128db..5d4f10ac2af2 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -7,7 +7,7 @@ include ../lib.mk
7 7
8TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ 8TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
9 check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ 9 check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
10 protection_keys test_vdso 10 protection_keys test_vdso test_vsyscall
11TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ 11TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
12 test_FCMOV test_FCOMI test_FISTTP \ 12 test_FCMOV test_FCOMI test_FISTTP \
13 vdso_restorer 13 vdso_restorer
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 66e5ce5b91f0..1aef72df20a1 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -122,8 +122,7 @@ static void check_valid_segment(uint16_t index, int ldt,
122 * NB: Different Linux versions do different things with the 122 * NB: Different Linux versions do different things with the
123 * accessed bit in set_thread_area(). 123 * accessed bit in set_thread_area().
124 */ 124 */
125 if (ar != expected_ar && 125 if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
126 (ldt || ar != (expected_ar | AR_ACCESSED))) {
127 printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", 126 printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
128 (ldt ? "LDT" : "GDT"), index, ar, expected_ar); 127 (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
129 nerrs++; 128 nerrs++;
@@ -627,13 +626,10 @@ static void do_multicpu_tests(void)
627static int finish_exec_test(void) 626static int finish_exec_test(void)
628{ 627{
629 /* 628 /*
630 * In a sensible world, this would be check_invalid_segment(0, 1); 629 * Older kernel versions did inherit the LDT on exec() which is
631 * For better or for worse, though, the LDT is inherited across exec. 630 * wrong because exec() starts from a clean state.
632 * We can probably change this safely, but for now we test it.
633 */ 631 */
634 check_valid_segment(0, 1, 632 check_invalid_segment(0, 1);
635 AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
636 42, true);
637 633
638 return nerrs ? 1 : 0; 634 return nerrs ? 1 : 0;
639} 635}
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
new file mode 100644
index 000000000000..7a744fa7b786
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -0,0 +1,500 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#define _GNU_SOURCE
4
5#include <stdio.h>
6#include <sys/time.h>
7#include <time.h>
8#include <stdlib.h>
9#include <sys/syscall.h>
10#include <unistd.h>
11#include <dlfcn.h>
12#include <string.h>
13#include <inttypes.h>
14#include <signal.h>
15#include <sys/ucontext.h>
16#include <errno.h>
17#include <err.h>
18#include <sched.h>
19#include <stdbool.h>
20#include <setjmp.h>
21
22#ifdef __x86_64__
23# define VSYS(x) (x)
24#else
25# define VSYS(x) 0
26#endif
27
28#ifndef SYS_getcpu
29# ifdef __x86_64__
30# define SYS_getcpu 309
31# else
32# define SYS_getcpu 318
33# endif
34#endif
35
36static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
37 int flags)
38{
39 struct sigaction sa;
40 memset(&sa, 0, sizeof(sa));
41 sa.sa_sigaction = handler;
42 sa.sa_flags = SA_SIGINFO | flags;
43 sigemptyset(&sa.sa_mask);
44 if (sigaction(sig, &sa, 0))
45 err(1, "sigaction");
46}
47
48/* vsyscalls and vDSO */
49bool should_read_vsyscall = false;
50
51typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
52gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
53gtod_t vdso_gtod;
54
55typedef int (*vgettime_t)(clockid_t, struct timespec *);
56vgettime_t vdso_gettime;
57
58typedef long (*time_func_t)(time_t *t);
59time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
60time_func_t vdso_time;
61
62typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
63getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
64getcpu_t vdso_getcpu;
65
66static void init_vdso(void)
67{
68 void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
69 if (!vdso)
70 vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
71 if (!vdso) {
72 printf("[WARN]\tfailed to find vDSO\n");
73 return;
74 }
75
76 vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
77 if (!vdso_gtod)
78 printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
79
80 vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
81 if (!vdso_gettime)
82 printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
83
84 vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
85 if (!vdso_time)
86 printf("[WARN]\tfailed to find time in vDSO\n");
87
88 vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
89 if (!vdso_getcpu) {
90 /* getcpu() was never wired up in the 32-bit vDSO. */
91 printf("[%s]\tfailed to find getcpu in vDSO\n",
92 sizeof(long) == 8 ? "WARN" : "NOTE");
93 }
94}
95
96static int init_vsys(void)
97{
98#ifdef __x86_64__
99 int nerrs = 0;
100 FILE *maps;
101 char line[128];
102 bool found = false;
103
104 maps = fopen("/proc/self/maps", "r");
105 if (!maps) {
106 printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
107 should_read_vsyscall = true;
108 return 0;
109 }
110
111 while (fgets(line, sizeof(line), maps)) {
112 char r, x;
113 void *start, *end;
114 char name[128];
115 if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
116 &start, &end, &r, &x, name) != 5)
117 continue;
118
119 if (strcmp(name, "[vsyscall]"))
120 continue;
121
122 printf("\tvsyscall map: %s", line);
123
124 if (start != (void *)0xffffffffff600000 ||
125 end != (void *)0xffffffffff601000) {
126 printf("[FAIL]\taddress range is nonsense\n");
127 nerrs++;
128 }
129
130 printf("\tvsyscall permissions are %c-%c\n", r, x);
131 should_read_vsyscall = (r == 'r');
132 if (x != 'x') {
133 vgtod = NULL;
134 vtime = NULL;
135 vgetcpu = NULL;
136 }
137
138 found = true;
139 break;
140 }
141
142 fclose(maps);
143
144 if (!found) {
145 printf("\tno vsyscall map in /proc/self/maps\n");
146 should_read_vsyscall = false;
147 vgtod = NULL;
148 vtime = NULL;
149 vgetcpu = NULL;
150 }
151
152 return nerrs;
153#else
154 return 0;
155#endif
156}
157
158/* syscalls */
159static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
160{
161 return syscall(SYS_gettimeofday, tv, tz);
162}
163
164static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
165{
166 return syscall(SYS_clock_gettime, id, ts);
167}
168
169static inline long sys_time(time_t *t)
170{
171 return syscall(SYS_time, t);
172}
173
174static inline long sys_getcpu(unsigned * cpu, unsigned * node,
175 void* cache)
176{
177 return syscall(SYS_getcpu, cpu, node, cache);
178}
179
180static jmp_buf jmpbuf;
181
182static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
183{
184 siglongjmp(jmpbuf, 1);
185}
186
187static double tv_diff(const struct timeval *a, const struct timeval *b)
188{
189 return (double)(a->tv_sec - b->tv_sec) +
190 (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
191}
192
193static int check_gtod(const struct timeval *tv_sys1,
194 const struct timeval *tv_sys2,
195 const struct timezone *tz_sys,
196 const char *which,
197 const struct timeval *tv_other,
198 const struct timezone *tz_other)
199{
200 int nerrs = 0;
201 double d1, d2;
202
203 if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
204 printf("[FAIL] %s tz mismatch\n", which);
205 nerrs++;
206 }
207
208 d1 = tv_diff(tv_other, tv_sys1);
209 d2 = tv_diff(tv_sys2, tv_other);
210 printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
211
212 if (d1 < 0 || d2 < 0) {
213 printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
214 nerrs++;
215 } else {
216 printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
217 }
218
219 return nerrs;
220}
221
222static int test_gtod(void)
223{
224 struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
225 struct timezone tz_sys, tz_vdso, tz_vsys;
226 long ret_vdso = -1;
227 long ret_vsys = -1;
228 int nerrs = 0;
229
230 printf("[RUN]\ttest gettimeofday()\n");
231
232 if (sys_gtod(&tv_sys1, &tz_sys) != 0)
233 err(1, "syscall gettimeofday");
234 if (vdso_gtod)
235 ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
236 if (vgtod)
237 ret_vsys = vgtod(&tv_vsys, &tz_vsys);
238 if (sys_gtod(&tv_sys2, &tz_sys) != 0)
239 err(1, "syscall gettimeofday");
240
241 if (vdso_gtod) {
242 if (ret_vdso == 0) {
243 nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
244 } else {
245 printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
246 nerrs++;
247 }
248 }
249
250 if (vgtod) {
251 if (ret_vsys == 0) {
252 nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
253 } else {
254 printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
255 nerrs++;
256 }
257 }
258
259 return nerrs;
260}
261
262static int test_time(void) {
263 int nerrs = 0;
264
265 printf("[RUN]\ttest time()\n");
266 long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
267 long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
268 t_sys1 = sys_time(&t2_sys1);
269 if (vdso_time)
270 t_vdso = vdso_time(&t2_vdso);
271 if (vtime)
272 t_vsys = vtime(&t2_vsys);
273 t_sys2 = sys_time(&t2_sys2);
274 if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
275 printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
276 nerrs++;
277 return nerrs;
278 }
279
280 if (vdso_time) {
281 if (t_vdso < 0 || t_vdso != t2_vdso) {
282 printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
283 nerrs++;
284 } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
285 printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
286 nerrs++;
287 } else {
288 printf("[OK]\tvDSO time() is okay\n");
289 }
290 }
291
292 if (vtime) {
293 if (t_vsys < 0 || t_vsys != t2_vsys) {
294 printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
295 nerrs++;
296 } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
297 printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
298 nerrs++;
299 } else {
300 printf("[OK]\tvsyscall time() is okay\n");
301 }
302 }
303
304 return nerrs;
305}
306
307static int test_getcpu(int cpu)
308{
309 int nerrs = 0;
310 long ret_sys, ret_vdso = -1, ret_vsys = -1;
311
312 printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
313
314 cpu_set_t cpuset;
315 CPU_ZERO(&cpuset);
316 CPU_SET(cpu, &cpuset);
317 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
318 printf("[SKIP]\tfailed to force CPU %d\n", cpu);
319 return nerrs;
320 }
321
322 unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
323 unsigned node = 0;
324 bool have_node = false;
325 ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
326 if (vdso_getcpu)
327 ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
328 if (vgetcpu)
329 ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
330
331 if (ret_sys == 0) {
332 if (cpu_sys != cpu) {
333 printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
334 nerrs++;
335 }
336
337 have_node = true;
338 node = node_sys;
339 }
340
341 if (vdso_getcpu) {
342 if (ret_vdso) {
343 printf("[FAIL]\tvDSO getcpu() failed\n");
344 nerrs++;
345 } else {
346 if (!have_node) {
347 have_node = true;
348 node = node_vdso;
349 }
350
351 if (cpu_vdso != cpu) {
352 printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
353 nerrs++;
354 } else {
355 printf("[OK]\tvDSO reported correct CPU\n");
356 }
357
358 if (node_vdso != node) {
359 printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
360 nerrs++;
361 } else {
362 printf("[OK]\tvDSO reported correct node\n");
363 }
364 }
365 }
366
367 if (vgetcpu) {
368 if (ret_vsys) {
369 printf("[FAIL]\tvsyscall getcpu() failed\n");
370 nerrs++;
371 } else {
372 if (!have_node) {
373 have_node = true;
374 node = node_vsys;
375 }
376
377 if (cpu_vsys != cpu) {
378 printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
379 nerrs++;
380 } else {
381 printf("[OK]\tvsyscall reported correct CPU\n");
382 }
383
384 if (node_vsys != node) {
385 printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
386 nerrs++;
387 } else {
388 printf("[OK]\tvsyscall reported correct node\n");
389 }
390 }
391 }
392
393 return nerrs;
394}
395
396static int test_vsys_r(void)
397{
398#ifdef __x86_64__
399 printf("[RUN]\tChecking read access to the vsyscall page\n");
400 bool can_read;
401 if (sigsetjmp(jmpbuf, 1) == 0) {
402 *(volatile int *)0xffffffffff600000;
403 can_read = true;
404 } else {
405 can_read = false;
406 }
407
408 if (can_read && !should_read_vsyscall) {
409 printf("[FAIL]\tWe have read access, but we shouldn't\n");
410 return 1;
411 } else if (!can_read && should_read_vsyscall) {
412 printf("[FAIL]\tWe don't have read access, but we should\n");
413 return 1;
414 } else {
415 printf("[OK]\tgot expected result\n");
416 }
417#endif
418
419 return 0;
420}
421
422
423#ifdef __x86_64__
424#define X86_EFLAGS_TF (1UL << 8)
425static volatile sig_atomic_t num_vsyscall_traps;
426
427static unsigned long get_eflags(void)
428{
429 unsigned long eflags;
430 asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
431 return eflags;
432}
433
434static void set_eflags(unsigned long eflags)
435{
436 asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
437}
438
439static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
440{
441 ucontext_t *ctx = (ucontext_t *)ctx_void;
442 unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
443
444 if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
445 num_vsyscall_traps++;
446}
447
448static int test_native_vsyscall(void)
449{
450 time_t tmp;
451 bool is_native;
452
453 if (!vtime)
454 return 0;
455
456 printf("[RUN]\tchecking for native vsyscall\n");
457 sethandler(SIGTRAP, sigtrap, 0);
458 set_eflags(get_eflags() | X86_EFLAGS_TF);
459 vtime(&tmp);
460 set_eflags(get_eflags() & ~X86_EFLAGS_TF);
461
462 /*
463 * If vsyscalls are emulated, we expect a single trap in the
464 * vsyscall page -- the call instruction will trap with RIP
465 * pointing to the entry point before emulation takes over.
466 * In native mode, we expect two traps, since whatever code
467 * the vsyscall page contains will be more than just a ret
468 * instruction.
469 */
470 is_native = (num_vsyscall_traps > 1);
471
472 printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
473 (is_native ? "native" : "emulated"),
474 (int)num_vsyscall_traps);
475
476 return 0;
477}
478#endif
479
480int main(int argc, char **argv)
481{
482 int nerrs = 0;
483
484 init_vdso();
485 nerrs += init_vsys();
486
487 nerrs += test_gtod();
488 nerrs += test_time();
489 nerrs += test_getcpu(0);
490 nerrs += test_getcpu(1);
491
492 sethandler(SIGSEGV, sigsegv, 0);
493 nerrs += test_vsys_r();
494
495#ifdef __x86_64__
496 nerrs += test_native_vsyscall();
497#endif
498
499 return nerrs ? 1 : 0;
500}
diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c
index 2b3d6d235015..3d7b42e77299 100644
--- a/tools/usb/usbip/src/utils.c
+++ b/tools/usb/usbip/src/utils.c
@@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add)
30 char command[SYSFS_BUS_ID_SIZE + 4]; 30 char command[SYSFS_BUS_ID_SIZE + 4];
31 char match_busid_attr_path[SYSFS_PATH_MAX]; 31 char match_busid_attr_path[SYSFS_PATH_MAX];
32 int rc; 32 int rc;
33 int cmd_size;
33 34
34 snprintf(match_busid_attr_path, sizeof(match_busid_attr_path), 35 snprintf(match_busid_attr_path, sizeof(match_busid_attr_path),
35 "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME, 36 "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME,
@@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add)
37 attr_name); 38 attr_name);
38 39
39 if (add) 40 if (add)
40 snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid); 41 cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s",
42 busid);
41 else 43 else
42 snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid); 44 cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s",
45 busid);
43 46
44 rc = write_sysfs_attribute(match_busid_attr_path, command, 47 rc = write_sysfs_attribute(match_busid_attr_path, command,
45 sizeof(command)); 48 cmd_size);
46 if (rc < 0) { 49 if (rc < 0) {
47 dbg("failed to write match_busid: %s", strerror(errno)); 50 dbg("failed to write match_busid: %s", strerror(errno));
48 return -1; 51 return -1;
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index f9555b1e7f15..cc29a8148328 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
92{ 92{
93 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 93 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
94 struct arch_timer_context *vtimer; 94 struct arch_timer_context *vtimer;
95 u32 cnt_ctl;
95 96
96 if (!vcpu) { 97 /*
97 pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n"); 98 * We may see a timer interrupt after vcpu_put() has been called which
98 return IRQ_NONE; 99 * sets the CPU's vcpu pointer to NULL, because even though the timer
99 } 100 * has been disabled in vtimer_save_state(), the hardware interrupt
100 vtimer = vcpu_vtimer(vcpu); 101 * signal may not have been retired from the interrupt controller yet.
102 */
103 if (!vcpu)
104 return IRQ_HANDLED;
101 105
106 vtimer = vcpu_vtimer(vcpu);
102 if (!vtimer->irq.level) { 107 if (!vtimer->irq.level) {
103 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); 108 cnt_ctl = read_sysreg_el0(cntv_ctl);
104 if (kvm_timer_irq_can_fire(vtimer)) 109 cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
110 ARCH_TIMER_CTRL_IT_MASK;
111 if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
105 kvm_timer_update_irq(vcpu, true, vtimer); 112 kvm_timer_update_irq(vcpu, true, vtimer);
106 } 113 }
107 114
@@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
355 362
356 /* Disable the virtual timer */ 363 /* Disable the virtual timer */
357 write_sysreg_el0(0, cntv_ctl); 364 write_sysreg_el0(0, cntv_ctl);
365 isb();
358 366
359 vtimer->loaded = false; 367 vtimer->loaded = false;
360out: 368out:
@@ -720,7 +728,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
720 return 0; 728 return 0;
721} 729}
722 730
723int kvm_timer_hyp_init(void) 731int kvm_timer_hyp_init(bool has_gic)
724{ 732{
725 struct arch_timer_kvm_info *info; 733 struct arch_timer_kvm_info *info;
726 int err; 734 int err;
@@ -756,10 +764,13 @@ int kvm_timer_hyp_init(void)
756 return err; 764 return err;
757 } 765 }
758 766
759 err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus()); 767 if (has_gic) {
760 if (err) { 768 err = irq_set_vcpu_affinity(host_vtimer_irq,
761 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 769 kvm_get_running_vcpus());
762 goto out_free_irq; 770 if (err) {
771 kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
772 goto out_free_irq;
773 }
763 } 774 }
764 775
765 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); 776 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
@@ -835,10 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
835no_vgic: 846no_vgic:
836 preempt_disable(); 847 preempt_disable();
837 timer->enabled = 1; 848 timer->enabled = 1;
838 if (!irqchip_in_kernel(vcpu->kvm)) 849 kvm_timer_vcpu_load(vcpu);
839 kvm_timer_vcpu_load_user(vcpu);
840 else
841 kvm_timer_vcpu_load_vgic(vcpu);
842 preempt_enable(); 850 preempt_enable();
843 851
844 return 0; 852 return 0;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 6b60c98a6e22..2e43f9d42bd5 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1326,7 +1326,7 @@ static int init_subsystems(void)
1326 /* 1326 /*
1327 * Init HYP architected timer support 1327 * Init HYP architected timer support
1328 */ 1328 */
1329 err = kvm_timer_hyp_init(); 1329 err = kvm_timer_hyp_init(vgic_present);
1330 if (err) 1330 if (err)
1331 goto out; 1331 goto out;
1332 1332
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index b6e715fd3c90..dac7ceb1a677 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
112 } 112 }
113 113
114 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, 114 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
115 data); 115 &data);
116 data = vcpu_data_host_to_guest(vcpu, data, len); 116 data = vcpu_data_host_to_guest(vcpu, data, len);
117 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); 117 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
118 } 118 }
@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
182 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), 182 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
183 len); 183 len);
184 184
185 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); 185 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
186 kvm_mmio_write_buf(data_buf, len, data); 186 kvm_mmio_write_buf(data_buf, len, data);
187 187
188 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, 188 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
189 data_buf); 189 data_buf);
190 } else { 190 } else {
191 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, 191 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
192 fault_ipa, 0); 192 fault_ipa, NULL);
193 193
194 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, 194 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
195 data_buf); 195 data_buf);
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b36945d49986..b4b69c2d1012 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
509 */ 509 */
510void free_hyp_pgds(void) 510void free_hyp_pgds(void)
511{ 511{
512 unsigned long addr;
513
514 mutex_lock(&kvm_hyp_pgd_mutex); 512 mutex_lock(&kvm_hyp_pgd_mutex);
515 513
516 if (boot_hyp_pgd) { 514 if (boot_hyp_pgd) {
@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
521 519
522 if (hyp_pgd) { 520 if (hyp_pgd) {
523 unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); 521 unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
524 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) 522 unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
525 unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); 523 (uintptr_t)high_memory - PAGE_OFFSET);
526 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 524 unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
527 unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); 525 VMALLOC_END - VMALLOC_START);
528 526
529 free_pages((unsigned long)hyp_pgd, hyp_pgd_order); 527 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
530 hyp_pgd = NULL; 528 hyp_pgd = NULL;